simul_these/Modules/ado/plus/m/micombine.ado

438 lines
13 KiB
Plaintext

*! version 1.1.1 PR 28nov2005
/*
History
1.1.1 28nov2005 eform and eform() option styles now allowed.
nowarning option to suppress warning message about supported regression cmds.
1.1.0 30sep2005 Generalization of supported commands (cmdchk.ado also modified).
Version <=7 no longer supported.
1.0.9 18apr2005 Default rowid variable becomes _i.
If not found, take from char _dta[MI_obsid] then char _dta[mi_id].
Default impid variable is _j. If not found, take from _dta[MI_impid].
These changes are for future compatibility with MItools.
1.0.8 04mar2005 Fixed problem with eform on redisplay
1.0.7 28jan2005 Fixed problem with null model estimation
Fixed bug with noconstant option
1.0.6 25jan2005 e(sample) now correct for all imputations, via ereturn post.
Add d.f. quantities from Barnard & Rubin (1999) Biometrika 86:948-955 eq (3)-(5).
Minor additions to ereturn quantities for compatibility with misw.
Allow null model and model only with constant for benefit of misw.
1.0.5 16nov2004 Update calculations of quantities stored for Li et al (1991) F test.
1.0.4 13oct2004 Change e(cmd) to `cmd', make e(cmd2)="micombine" (for use after mfx).
Implementation of mean log likelihood and chisquare statistic saved
(note undocumented -noclear- but v. useful option to ereturn post/estimates post).
*/
program define micombine, eclass
if _caller()<=7 {
di as error "version 7 and earlier not supported"
exit 9
}
if replay() {
if `"`e(cmd2)'"'!="micombine" {
error 301
}
syntax[, EForm EForm2(string)]
if "`eform2'"!="" {
if "`eform'"!="" di as err "[eform ignored]"
local eform eform(`eform2')
}
else if "`eform'"!="" local eform eform("exp(b)")
di as text _n "Multiple imputation parameter estimates (" as res e(m) as text " imputations)"
capture ereturn display, `eform'
local rc=_rc
if `rc'>0 {
* Null model, or model with cc() only
if `"`e(cmd)'"'!="stcox" {
`e(cmd)' `0'
}
else di as text "[Null model - no estimates]"
}
else ereturn display, `eform'
di as result e(N) as text " observations."
exit
}
gettoken cmd 0 : 0
if "`cmd'"=="stpm" {
local dist 7
local bad 0
}
else {
cmdchk `cmd'
local cmdnotknown `s(bad)'
/*
dist=0 (normal), 1 (binomial), 2 (poisson), 3 (cox), 4 (glm),
5 (xtgee), 6(ereg/weibull).
*/
local dist `s(dist)'
}
syntax [anything] [if] [in] [aw fw pw iw] , [ IMPid(string) BR CC(varlist) noCONStant ///
DEAD(varname) DETail EForm EForm2(string) GENxb(string) LRR noWARning OBSid(string) * ]
if `cmdnotknown' & "`warning'"!="nowarning" {
di as err _n "Warning: " as inp "`cmd'" as err " is not a certified regression command for micombine."
di as err "micombine will continue mechanically, but correct results are not guaranteed."
di as err "You must take responsibility that Rubin's rules are appropriate here." _n
}
if "`eform2'"!="" {
if "`eform'"!="" di as err "[eform ignored]"
local eform eform(`eform2')
}
else if "`eform'"!="" local eform eform("exp(b)")
* Check for possible impid characteristic. If it does not exist use default _j
if "`impid'"=="" {
local impid: char _dta[MI_impid]
if "`impid'"=="" local impid _j
}
cap confirm var `impid'
if _rc {
di as err "imputation identifier `impid' not found"
exit 601
}
* Check for possible obsid chars. If none exist use default _i
if "`obsid'"=="" {
local I: char _dta[MI_obsid]
if "`I'"=="" local I: char _dta[mi_id]
if "`I'"=="" local I _i
}
else local I `obsid'
cap confirm var `I'
if _rc {
di as err "observation identifier `I' not found"
exit 601
}
if "`detail'"!="" {
local detail noisily
}
else local detail
*Change here 11/15/05, commenting out the line below !! PR DELETED XIAO CHEN'S EDIT, NOV 05
frac_cox "`dead'" `dist'
if "`constant'"=="noconstant" {
if "`cmd'"=="fit" | "`cmd'"=="stcox" | "`cmd'"=="cox" {
di as error "noconstant invalid with `cmd'"
exit 198
}
}
*Change here 11/15/05, `dist' could be null.... !! PR DELETED XIAO CHEN'S EDIT, NOV 05
*if "`dist'" =="7" { /* stcox, streg, stpm */
if `dist'==7 {
local y
local yname _t
local xvars `anything'
}
else {
gettoken y xvars : anything
gettoken xvars left: xvars, parse("(")
local yname `y'
}
tempvar touse
quietly {
marksample touse
markout `touse' `varlist' `dead' `cc'
if "`dead'"!="" {
local dead "dead(`dead')"
}
* Deal with weights.
frac_wgt `"`exp'"' `touse' `"`weight'"'
local wgt `r(wgt)'
tempvar J
* Important (compatibility with mitools): ignore rows for which impid=0
egen int `J'=group(`impid') if `touse'==1 & `impid'>0 & !missing(`impid')
sum `J', meanonly
local m=r(max)
if `m'<2 {
di as error "there must be at least 2 imputations"
exit 198
}
local nxvar: word count `xvars'
/*
if `nxvar'<1 {
di as err "there must be at least one covariate"
exit 198
}
*/
local ncc: word count `cc' /* could legitimately be zero */
local nvar=`nxvar'+`ncc' /* number of covariates in model */
count if `touse'==1 & `J'==1
local nobs=r(N)
* Null Cox model (or model with only ccvars): fit on final imputation only, and quit
if "`xvars'"=="" & ("`cmd'"=="cox" | "`cmd'"=="stcox") {
if "`cmd'"=="stcox" & "`cc'"=="" {
local options `options' estimate
}
`detail' `cmd' `y' `cc' if `touse'==1 & `J'==`m' `wgt', `options' `dead' `constant'
noi `cmd'
di as result `nobs' as text " observations."
ereturn scalar m=`m'
ereturn local impid `impid'
ereturn local cmd `cmd'
ereturn local cmd2 micombine
exit
}
* Compute model over m imputations
tempname W Q B T QQ
if "`genxb'"!="" {
tempvar xb xbtmp
gen `xb'=.
}
* Estimate mean LR chisquare statistic (where possible)
tempname chi2 ell ell0 nucom
scalar `chi2'=0
scalar `ell'=0
scalar `ell0'=0
forvalues i=1/`m' {
tempname Q`i'
`detail' `cmd' `y' `xvars' `cc' `left' if `touse'==1 & `J'==`i' `wgt', ///
`options' `dead' `constant'
scalar `nucom'=e(df_r) // complete-data residual degrees of freedom
if `nucom'==. {
scalar `nucom'=100000
}
scalar `ell'=`ell'+e(ll)
scalar `ell0'=`ell0'+e(ll_0)
scalar `chi2'=`chi2'-2*(e(ll_0)-e(ll))
if "`genxb'"!="" {
predict `xbtmp' if `touse'==1 & `J'==`i', xb
replace `xb'=`xbtmp' if `touse'==1 & `J'==`i'
drop `xbtmp'
}
matrix `Q`i''=e(b)
if `i'==1 {
matrix `Q'=e(b)
matrix `W'=e(V)
}
else {
matrix `Q'=`Q'+e(b)
matrix `W'=`W'+e(V)
}
}
if "`genxb'"!="" {
sort `touse' `I' `J'
by `touse' `I': gen `genxb'=sum(`xb')/`m' if `touse'==1
by `touse' `I': replace `genxb'=`genxb'[_N] if _n<_N
lab var `genxb' "Mean Linear Predictor (`m' imputations)"
}
matrix `Q'=`Q'/`m' /* MI param estimates */
matrix `W'=`W'/`m'
scalar `chi2'=`chi2'/`m'
scalar `ell'=`ell'/`m'
scalar `ell0'=`ell0'/`m'
local k=colsof(`Q')
matrix `B'=J(`k',`k',0)
forvalues i=1/`m' {
matrix `QQ'=`Q`i''-`Q'
if `i'==1 {
matrix `B'=`QQ''*`QQ'
}
else matrix `B'=`B'+`QQ''*`QQ'
}
matrix `B'=`B'/(`m'-1)
matrix `T'=`W'+(1+1/`m')*`B' /* estimated VCE matrix */
/*
Relative increase in variance due to missing information (r) for
each variable, and df and lambda, the fraction of missing information.
All measures are unstable for low m. See Schafer (1997) p. 110.
Note that BIF = sqrt(T/W) = sqrt(1 + (B/W)*(1+1/m)) = sqrt(1+r)
is the between-imputation imprecision factor, i.e. the ratio
of the SE derived from T to the SE derived from W,
ignoring between-imputation variation in parameter estimates.
*/
tempname r t lambda nu BIF
matrix `r'=J(1,`k',0)
matrix `lambda'=J(1,`k',0)
matrix `nu'=J(1,`k',0)
matrix `BIF'=J(1,`k',0)
scalar `t'=`m'-1
* Next few lines assign quantities for tests of individual (1 df) components of Q (=beta)
forvalues j=1/`k' {
matrix `r'[1,`j']=(1+1/`m')*`B'[`j',`j']/`W'[`j',`j']
matrix `nu'[1,`j']=cond(`t'>4, 4+(`t'-4)*(1+(1-2/`t')/`r'[1,`j'])^2, `t'*(1+1/`r'[1,`j'])^2)
matrix `lambda'[1,`j']=(`r'[1,`j']+2/(`nu'[1,`j']+3))/(`r'[1,`j']+1)
matrix `BIF'[1,`j']=sqrt(1+`r'[1,`j']) /* = sqrt(`T'[`j',`j']/`W'[`j',`j']) */
}
* Next few lines assign quantities for d.f. from Barnard & Rubin 1999 B'ka 86(4): 948-955.
tempname nutilde num nuobs gamma
matrix `nutilde'=J(1,`k',0)
matrix `num'=J(1,`k',0)
matrix `nuobs'=J(1,`k',0)
matrix `gamma'=J(1,`k',0)
forvalues j=1/`k' {
matrix `gamma'[1,`j']=(1+1/`m')*`B'[`j',`j']/`T'[`j',`j']
matrix `nuobs'[1,`j']=((`nucom'+1)/(`nucom'+3))*`nucom'*(1-`gamma'[1,`j'])
matrix `num'[1,`j']=(`m'-1)*`gamma'[1,`j']^-2
matrix `nutilde'[1,`j']=1/((1/`num'[1,`j']+1/`nuobs'[1,`j']))
}
* use all varnames
local names: colnames(`Q1')
matrix colnames `r'=`names'
matrix colnames `nu'=`names'
matrix colnames `lambda'=`names'
matrix colnames `BIF'=`names'
matrix colnames `gamma'=`names'
matrix colnames `nuobs'=`names'
matrix colnames `num'=`names'
matrix colnames `nutilde'=`names'
* Li, Raghunathan & Rubin (1991) estimates of T and nu1
* for F test of Q=0 on k,nu1 degrees of freedom
tempname r1 t1 BW TLRR
matrix `BW'=`B'*syminv(`W')
scalar `r1'=trace(`BW')*(1+1/`m')/`k'
matrix `TLRR'=`W'*(1+`r1')
scalar `t1'=`k'*(`m'-1)
matrix colnames `Q'=`names'
matrix rownames `T'=`names'
matrix colnames `T'=`names'
matrix rownames `B'=`names'
matrix colnames `B'=`names'
matrix rownames `TLRR'=`names'
matrix colnames `TLRR'=`names'
}
di as text _n "Multiple imputation parameter estimates (`m' imputations)"
if "`lrr'"!="" {
di as text "[Using Li-Raghunathan-Rubin (LRR) estimate of VCE matrix]"
ereturn post `Q' `TLRR', depname(`yname') obs(`nobs') esample(`touse') noclear
ereturn matrix T `T'
}
else {
ereturn post `Q' `T', depname(`yname') obs(`nobs') esample(`touse') noclear
ereturn matrix TLRR `TLRR'
}
if "`br'"=="" {
ereturn display, `eform'
di as result `nobs' as text " observations."
}
ereturn matrix B `B'
ereturn matrix W `W'
ereturn matrix r `r'
ereturn matrix nu `nu'
ereturn matrix lambda `lambda'
ereturn matrix BIF `BIF'
* Quantities for calculating df `nutilde' according to Barnard & Rubin (1999)
ereturn matrix gamma `gamma'
ereturn matrix nuobs `nuobs'
ereturn matrix num `num'
ereturn matrix nutilde `nutilde'
ereturn scalar r1=`r1'
ereturn scalar nu1=cond(`t1'>4, 4+(`t1'-4)*(1+(1-2/`t1')/`r1')^2, 0.5*`t1'*(1+1/`k')*(1+1/`r1')^2)
ereturn scalar m=`m'
ereturn scalar chi2=`chi2'
ereturn scalar ll=`ell'
ereturn scalar ll_0=`ell0'
ereturn local eform `eform'
ereturn local impid `impid'
ereturn local cmd `cmd'
ereturn local cmd2 micombine
if "`br'"!="" {
display_t
di as result `nobs' as text " observations."
}
end
program define display_t
* Display results with t-statistics estimated according to Barnard & Rubin (1999)
tempname V Q nu
matrix `V'=e(V)
matrix `Q'=e(b)
matrix `nu'=e(nutilde)
local yname `e(depvar)'
local xs: colnames `Q'
local k=colsof(`Q')
di as text _n "Intervals and inference based on d.f. from Barnard & Rubin (1999)"
di as txt "{hline 13}{c TT}{hline 64}"
local t0 = abbrev("`yname'",12)
if `"`e(eform)'"'!="" {
local tt "Odds Ratio"
}
else {
local tt " Coef."
}
#delimit ;
di as text
%12s "`t0'" _col(14)"{c |}`tt' Std. Err. t P>|t| [$S_level% Conf. Intvl] MI.df"
_n "{hline 13}{c +}{hline 64}" ;
#delimit cr
tempname df mn se t p invt l u
forvalues i=1/`k' {
local x: word `i' of `xs'
if "`x'"!="_cons" {
local fmt : format `x'
if substr("`fmt'",-1,1)=="f" {
local fmt="%8."+substr("`fmt'",-2,2)
}
else if substr("`fmt'",-2,2)=="fc" {
local fmt="%8."+substr("`fmt'",-3,3)
}
else local fmt "%8.0g"
local fmt`i' `fmt'
}
else local fmt "%8.0g"
scalar `df' =`nu'[1,`i']
scalar `mn' = `Q'[1,`i']
scalar `se' = sqrt(`V'[`i',`i'])
scalar `t' = `mn'/`se'
scalar `p' = 2* ttail(`df', abs(`t'))
scalar `invt' = invttail(`df', (1-$S_level/100)/2)
scalar `l' = `mn' - `invt'*`se'
scalar `u' = `mn' + `invt'*`se'
if `"`e(eform)'"'!="" {
scalar `mn' = exp(`mn')
scalar `se' = `mn'*`se'
scalar `l' = exp(`l')
scalar `u' = exp(`u')
}
if `df'>99999 {
local fmtdf %9.2e
}
else local fmtdf %9.2f
di as text /*
*/ %12s abbrev("`x'",12) _col(14) "{c |}" /*
*/ _col(17) as res `fmt' `mn' /*
*/ _col(27) `fmt' `se' /*
*/ _col(36) %7.2f `t' /*
*/ _col(42) %7.3f `p' /*
*/ _col(52) `fmt' `l' /*
*/ _col(61) `fmt' `u' /*
*/ _col(70) `fmtdf' `df'
}
di as text "{hline 13}{c BT}{hline 64}"
end
program define chkrowid, sclass
local I: char _dta[mi_id]
if "`I'"=="" {
di as error "no row-identifier variable found - data may have incorrect format"
exit 198
}
cap confirm var `I'
local rc=_rc
if `rc' {
di as error "row-identifier variable `I' not found"
exit `rc'
}
sret local I `I'
end