638 lines
17 KiB
Plaintext
638 lines
17 KiB
Plaintext
*! Date : 1 Aug 2005
|
|
*! Version : 1.64
|
|
*! Authors : Adrian Mander/David Clayton
|
|
*! Email : adrian.mander@mrc-hnr.cam.ac.uk
|
|
*! Description : Hotdeck imputation
|
|
|
|
program define hotdeck6
|
|
version 6.0
|
|
syntax [varlist] [if] [in] [using/], [BY(varlist) IMPute(integer 1) STORE GENerate(string) COMmand(string) PARMS(string) REPlace NOISE KEEP(varlist) SEED(string) QUIET INFILES(string) ]
|
|
tokenize "`varlist'"
|
|
local z "`1'"
|
|
|
|
preserve
|
|
if "`if'"~="" {
|
|
qui keep `if'
|
|
}
|
|
|
|
|
|
/* Check the seed option */
|
|
if "`seed'"=="1" { local seed 2}
|
|
if "`seed'"=="" { local seed 1 }
|
|
confirm number `seed'
|
|
|
|
/* To generate a seed from the time */
|
|
if `seed'==1 {
|
|
local time = "$S_TIME"
|
|
local date = "$S_DATE"
|
|
tokenize "`time'", parse(":")
|
|
local seed1 "`1'`3'`5'"
|
|
tokenize "`date'", parse(" ")
|
|
local dat "`1'`2'`3'"
|
|
local dat1 = date("`dat'","dmy")
|
|
local seed1 "`seed1'`dat1'"
|
|
di
|
|
local l_seed "2^31-1"
|
|
local seed1 = mod(`seed1',`l_seed')
|
|
di in green "Seed is `seed1'"
|
|
set seed `seed1'
|
|
|
|
}
|
|
|
|
|
|
|
|
estimates clear
|
|
tempfile olddata
|
|
|
|
/*NOT SURE if I must implement no strings in BY() option
|
|
tokenize "`by'"
|
|
while "`1'"~="" {
|
|
confirm numeric variable `1'
|
|
mac shift 1
|
|
}
|
|
*/
|
|
|
|
tempvar touse
|
|
mark `touse' `if' `in'
|
|
markout `touse' `by', strok
|
|
|
|
di in green "DELETING all matrices...."
|
|
mat drop _all
|
|
|
|
|
|
if "`infiles'"=="" {
|
|
if "`by'"=="" { _misspat `varlist' `if' `in' }
|
|
else { _misspat `varlist' `if' `in', by(`by') }
|
|
local nfill=r(nmiss)
|
|
}
|
|
|
|
qui save "`olddata'"
|
|
|
|
if "`infiles'"=="" {
|
|
global allpat = r(allpat)
|
|
count if `touse'
|
|
local miss = (r(N)-`nfill')/r(N)
|
|
}
|
|
|
|
if "`command'"=="" {
|
|
di in red "WARNING: When the <command> option is not selected "
|
|
di in red "then no analysis is performed on the imputed datasets"
|
|
di
|
|
if "`store'"=="" {
|
|
di "ALSO STORE isnt selected so hotdeck will appear to do nothing"
|
|
exit(198)
|
|
}
|
|
}
|
|
|
|
if `impute'<1 {
|
|
di in red "The number of imputations must be more than 0 not `impute'"
|
|
exit(198)
|
|
}
|
|
|
|
if `impute'==1 & "`infiles'"=="" {
|
|
if "`store'"=="" | "`command'"~="" {
|
|
di in red "If one imputation is made then command option should NOT be used"
|
|
di in red "AND the store option must be specified"
|
|
exit(198)
|
|
}
|
|
}
|
|
|
|
|
|
if "`using'"~="" {
|
|
if "`store'"=="" {
|
|
di in red "To save datasets you must specify the STORE option"
|
|
exit(198)
|
|
}
|
|
}
|
|
if "`keep'"~="" {
|
|
if "`store'"=="" {
|
|
di in red "If you use the KEEP option you must specify the STORE option"
|
|
exit(198)
|
|
}
|
|
}
|
|
|
|
if "`noise'"~="" & "`command'"=="" {
|
|
di in red "When specifying noise you must also specify the command option"
|
|
exit(198)
|
|
}
|
|
if "`command'"~="" {
|
|
if "`parms'"=="" {
|
|
di in red "To obtain any output from the command option you must also specify "
|
|
di in red "the parameters of interest using the parms() option"
|
|
exit(198)
|
|
}
|
|
}
|
|
|
|
/************************************************
|
|
* Loop over the number of imputed data sets
|
|
* required
|
|
************************************************/
|
|
|
|
if "`seed'"~="1" {set seed `seed'}
|
|
|
|
/* This is the if statement that allows the input of imputed datafiles */
|
|
if "`infiles'"~="" {
|
|
local i 1
|
|
tokenize "`infiles'"
|
|
while "`1'"~="" {
|
|
use "`1'",replace
|
|
mac shift 1
|
|
if "`command'"=="" {
|
|
di in red "You must use the command option when using INFILES"
|
|
exit(198)
|
|
}
|
|
if "`noise'"~="" { `command' }
|
|
else { qui `command' } /* Do the analysis */
|
|
_parms, parms(`parms') command(`command') iter(`i') /* Select Parameters of interest*/
|
|
local i =`i'+1
|
|
}
|
|
local impute=`i'-1
|
|
}
|
|
|
|
else {
|
|
local i 1
|
|
while `i'<= `impute' {
|
|
use "`olddata'",clear /* Use original dataset */
|
|
qui keep if `touse'
|
|
|
|
if "`by'"~="" { _hotdeck `varlist', by(`by') }
|
|
else { _hotdeck `varlist' } /* Impute values */
|
|
if "`store'"~="" { /* Save imputed datasets */
|
|
if "`using'"=="" {
|
|
local using "imp"
|
|
}
|
|
if "`keep'"=="" {
|
|
qui keep `varlist' `by'
|
|
qui save `using'`i',replace
|
|
}
|
|
else {
|
|
mkvlist `varlist' `by', vlist(`keep')
|
|
qui keep `r(vlist)'
|
|
qui save `using'`i',replace
|
|
}
|
|
}
|
|
if "`command'"~="" {
|
|
if "`noise'"~="" { `command' }
|
|
else { qui `command' } /* Do the analysis */
|
|
_parms, parms(`parms') command(`command') iter(`i') /* Select Parameters of interest*/
|
|
}
|
|
local i=`i'+1
|
|
}
|
|
}
|
|
|
|
if "`command'"~="" {
|
|
|
|
/********************************************************
|
|
* Loop to calculate the estimates needed
|
|
*
|
|
* First get the dimensions of the parameter matrices
|
|
********************************************************/
|
|
|
|
local dim= rowsof(impV1)
|
|
mat Qbar = J(1,`dim',0)
|
|
mat Ubar = J(`dim',`dim',0)
|
|
|
|
/* calc the averaging factor */
|
|
local inv = 1/`impute'
|
|
|
|
/* calc the average coef and variance qbar and ubar */
|
|
local i 1
|
|
while `i'<= `impute' {
|
|
mat Qbar= `inv'*impb`i'+ Qbar
|
|
mat Ubar= `inv'*impV`i'+ Ubar
|
|
local i=`i'+1
|
|
}
|
|
|
|
/* calc between variances */
|
|
mat B=J(`dim',`dim',0)
|
|
local inv1 = 1/(`impute'-1)
|
|
|
|
local i 1
|
|
while `i'<= `impute' {
|
|
mat B= B + `inv1'*(impb`i' - Qbar)'*(impb`i' - Qbar)
|
|
local i=`i'+1
|
|
}
|
|
|
|
/* Calc total variance */
|
|
mat T = Ubar+(1+1/`impute')*B
|
|
|
|
cap mat tempmt=B*inv(Ubar)
|
|
if _rc==504 {
|
|
di as error "WARNING: Trying to invert variance matrix with zero elements?"
|
|
local ter = rowsof(Ubar)
|
|
mat temp = J(`ter',1,1)
|
|
mat temp2 = Ubar*temp
|
|
local tei 1
|
|
local names: colfullnames(impb1)
|
|
matrix rownames temp2 = `names'
|
|
while `tei'<=`ter' {
|
|
if temp2[`tei',1]==0 {
|
|
local var:word `tei' of "`names'"
|
|
di as txt "Variance for covariate `tei' is 0 !!"
|
|
}
|
|
local tei=`tei'+1
|
|
}
|
|
mat tempmt=B*inv(Ubar)
|
|
}
|
|
|
|
local trace=trace(tempmt)
|
|
local r1 = 1-((1+1/`impute')*`trace'/`dim')
|
|
local names: rowfullnames(impb1)
|
|
matrix rownames Qbar = `names'
|
|
local names: colfullnames(impb1)
|
|
matrix colnames Qbar = `names'
|
|
|
|
local names: rowfullnames(impV1)
|
|
matrix rownames T = `names'
|
|
matrix rownames B = `names'
|
|
matrix rownames Ubar = `names'
|
|
local names: colfullnames(impV1)
|
|
matrix colnames T = `names'
|
|
matrix colnames B = `names'
|
|
matrix colnames Ubar = `names'
|
|
|
|
mat Tsurr= `r1'*T
|
|
mat D = Qbar*inv(Tsurr)*Qbar'
|
|
|
|
local D1 = D[1,1]/`dim'
|
|
local t=`dim'*(`impute'-1)
|
|
local v1= 4+(`t'-4)*(1+(1-2/`t')*1/`r1')^2
|
|
local ftest= fprob(`dim',`v1',`D1')
|
|
|
|
/********************************************************
|
|
* The next will output the main results in Stata style
|
|
* if the normal approximation is good then you could
|
|
* use the matrix post command
|
|
********************************************************/
|
|
|
|
if "`quiet'"=="" {
|
|
if `r1'<0 {
|
|
di in red "WARNING: between se larger than within se in one or more "
|
|
di in red "parameters invalidating the global F test"
|
|
}
|
|
if `t'<4 {
|
|
di in red "WARNING: t less than 4 invalid global test increase "
|
|
di in red "parameters OR imputations"
|
|
}
|
|
}
|
|
di
|
|
di in gr _col(1) "Number of Obs.", _col(45) " = ", _N
|
|
di in gr _col(1) "No. of Imputations", _col(48) "= ", `impute'
|
|
|
|
if "`infiles'"=="" {
|
|
di in gr _col(1) "% Lines of Missing Data", _col(45) " = ", `miss'*100,"%"
|
|
}
|
|
|
|
di in gr _col(1) "F(",%6.3f `v1',",`dim')", _col(45) " = ", %9.4f `D1'
|
|
di in gr _col(1) "Prob > F " , _col(45) " = ", %9.4f `ftest'
|
|
|
|
di in gr _dup(83) "-"
|
|
local names: colfullnames(impb1)
|
|
local name : word 1 of `names'
|
|
local i 1
|
|
di in gr _continue "Variable |",_col(12) "Average",_col(21) "Between", _col(30) "Within", _col(40) "Total", _col(50) "df", _col(59) "t", _col(65) "p-value"
|
|
di
|
|
di in gr _continue" |",_col(12) "Coef.",_col(21) "Imp. SE", _col(30) "Imp. SE", _col(39) " SE", _col(50) "", _col(59) "", _col(65) ""
|
|
di
|
|
di in gr _continue "-------------+---------------------------------------------------------------------"
|
|
|
|
while "`name'"~="" {
|
|
di
|
|
mat qhat=Qbar[1,"`name'"]
|
|
mat b=B["`name'","`name'"]
|
|
mat u=Ubar["`name'","`name'"]
|
|
mat t=T["`name'","`name'"]
|
|
local df = (`impute'-1)*(1+(u[1,1])/((1+1/`impute')*b[1,1]))^2
|
|
local ttest= qhat[1,1]/sqrt(t[1,1])
|
|
di _continue "`name'",_col(9) "|", _col(10) %7.4f qhat[1,1],_col(17) %9.3f sqrt(b[1,1]), _col(25) %9.3f sqrt(u[1,1]), _col(34) %9.3f sqrt(t[1,1]), _col(44) %9.1f `df', _col(53) %9.3f `ttest', _col(62) %9.3f tprob(`df',`ttest')
|
|
local i=`i'+1
|
|
local name : word `i' of `names'
|
|
}
|
|
di
|
|
di in gr _continue "-------------+---------------------------------------------------------------------"
|
|
di
|
|
local name : word 1 of `names'
|
|
local i 1
|
|
di in gr _continue "Variable |",_col(12) "[$S_level% Conf. Interval]"
|
|
di
|
|
di in gr _continue "-------------+---------------------------------------------------------------------"
|
|
while "`name'"~="" {
|
|
di
|
|
mat qhat=Qbar[1,"`name'"]
|
|
mat b=B["`name'","`name'"]
|
|
mat u=Ubar["`name'","`name'"]
|
|
mat t=T["`name'","`name'"]
|
|
local df = (`impute'-1)*(1+(u[1,1])/((1+1/`impute')*b[1,1]))^2
|
|
local ttest= qhat[1,1]/sqrt(t[1,1])
|
|
local prob = 1-((100-$S_level)/2)/100
|
|
local tvalue = invt(`df',`prob')
|
|
local left = qhat[1,1]-`tvalue'*sqrt(t[1,1])
|
|
local right = qhat[1,1]+`tvalue'*sqrt(t[1,1])
|
|
di _continue "`name'",_col(9) "|", %9.4f `left', %9.4f `right'
|
|
local i=`i'+1
|
|
local name : word `i' of `names'
|
|
}
|
|
di ""
|
|
di in gr _dup(83) "-"
|
|
|
|
} /* end of command if statement */
|
|
|
|
restore
|
|
end
|
|
|
|
/****************************************************
|
|
* The approximate Bayesian Bootstrap hotdecking
|
|
****************************************************/
|
|
|
|
program define _hotdeck
|
|
version 6.0
|
|
syntax [varlist] [using], [BY(string)]
|
|
tokenize "`varlist'"
|
|
local z "ipattern"
|
|
|
|
if "`by'"!="" {
|
|
confirm ex var `by'
|
|
}
|
|
|
|
tempvar nobs bstrp b2strp temp temp2
|
|
|
|
qui {
|
|
local nold = _N
|
|
local nnew = _N
|
|
sort `by' `z' `varlist'
|
|
gen long `nobs' = (`z'!=.)
|
|
|
|
if "`by'"=="" {
|
|
replace `nobs' = sum(`nobs')
|
|
replace `nobs' = `nobs'[_N]
|
|
gen long `bstrp' = int(uniform()*`nobs'+1)
|
|
gen long `b2strp' = int(uniform()*`nobs'+1)
|
|
gen long `temp' = `bstrp'[`b2strp']
|
|
replace `bstrp' = `temp'
|
|
replace `bstrp' = _n if _n<=`nobs'
|
|
|
|
tokenize "`varlist'"
|
|
while "`1'"~="" {
|
|
gen `temp2' = `1'[`bstrp']
|
|
replace `1' = `temp2'
|
|
drop `temp2'
|
|
mac shift 1
|
|
}
|
|
}
|
|
else {
|
|
by `by': replace `nobs' = sum(`nobs')
|
|
by `by': replace `nobs' = `nobs'[_N]
|
|
by `by': gen long `bstrp' = int(uniform()*`nobs'+1)
|
|
by `by': gen long `b2strp' = int(uniform()*`nobs'+1)
|
|
|
|
by `by': gen long `temp' = `bstrp'[`b2strp']
|
|
by `by': replace `bstrp' = `temp'
|
|
|
|
by `by': replace `bstrp' = _n if _n<=`nobs'
|
|
|
|
tokenize "`varlist'"
|
|
while "`1'"~="" {
|
|
by `by': gen `temp2' = `1'[`bstrp']
|
|
|
|
by `by': replace `1' = `temp2'
|
|
mac shift 1
|
|
drop `temp2'
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
end
|
|
|
|
/*******************************************************************
|
|
* Get the parameters or a subset of them from the
|
|
* model and the subset
|
|
* the covariance variance matrix as well
|
|
* Note that this section can also handle non-regression commands
|
|
* and macro lists
|
|
*******************************************************************/
|
|
|
|
program define _parms
|
|
syntax [varlist], [PARMS(string) ITER(integer 1) COMMAND(string) GENerate(string) REPlace]
|
|
|
|
local 0 "`parms'"
|
|
while "`parms'"~="" {
|
|
gettoken 0 parms: parms , parse(" ,")
|
|
cap syntax [varlist]
|
|
if _rc~=0 {
|
|
if "`0'"=="_cons" { local vlist "`vlist' `0'" }
|
|
else { local plist "`plist' `0'" }
|
|
}
|
|
else { local vlist "`vlist' `varlist'" }
|
|
}
|
|
|
|
* if results were not part of a regression command
|
|
|
|
if "`e(cmd)'"=="" {
|
|
local names = ""
|
|
if `iter'==1 { di in red "Using Non Regression Parameters and Command" }
|
|
tokenize "`plist' `vlist'"
|
|
|
|
local np=0
|
|
while "`1'"~="" {
|
|
local names1 ="`names1' `1'"
|
|
local names2 ="`names2' `2'"
|
|
if "`2'"=="" { di in red "Must supply variance estimate of `1'"
|
|
exit(302)
|
|
}
|
|
mac shift 2
|
|
local np = `np'+1
|
|
}
|
|
mat impb`iter' = J(1,`np',0)
|
|
mat impV`iter' = J(`np',`np',0)
|
|
|
|
tokenize "`plist' `vlist'"
|
|
local np 1
|
|
|
|
while "`1'"~="" {
|
|
|
|
if "$`1'"=="" & "``1''"=="" {
|
|
di in red "Global = $`1' Local = ``1''"
|
|
di in red "Global/local macro `1' is missing "
|
|
exit(198)
|
|
}
|
|
|
|
if "``1''"~="" { mat impb`iter'[1,`np'] = ``1'' }
|
|
if "$`1'"~="" & "``1''"=="" { mat impb`iter'[1,`np'] = $`1' }
|
|
if "$`2'"=="" & "``2''"=="" {
|
|
di in red "Global = $`2' Local = ``2''"
|
|
di in red "Global/local macro `2' is missing "
|
|
exit(198)
|
|
}
|
|
if "``2''"~="" { mat impV`iter'[`np',`np'] = ``2'' }
|
|
if "$`2'"~="" & "``2''"=="" { mat impV`iter'[`np',`np'] = $`2' }
|
|
|
|
local np=`np'+1
|
|
mac shift 2
|
|
}
|
|
|
|
matrix colnames impb`iter'=`names1'
|
|
matrix colnames impV`iter'=`names1'
|
|
matrix rownames impV`iter'=`names1'
|
|
}
|
|
else {
|
|
matrix myb = get(_b)
|
|
matrix myV = get(VCE)
|
|
tokenize "`vlist' `plist'"
|
|
if "`2'"~="" {
|
|
cap mat impb`iter' = myb[.,"`1'"]
|
|
if _rc==111 {
|
|
di as error "Are you sure `1' is in the model??"
|
|
di as error "Check the matrix of estimates and only include column names in the parameters NOT variable names"
|
|
exit(111)
|
|
}
|
|
mac shift 1
|
|
while "`1'"~="" {
|
|
mat temp=myb[.,"`1'"]
|
|
mat impb`iter'= impb`iter' , temp
|
|
mac shift 1
|
|
}
|
|
|
|
tokenize "`vlist' `plist'", parse(" ")
|
|
mat impVt`iter' = myV[.,"`1'"]
|
|
mac shift 1
|
|
while "`1'"~="" {
|
|
mat temp=myV[.,"`1'"]
|
|
mat impVt`iter'= impVt`iter' , temp
|
|
mac shift 1
|
|
}
|
|
|
|
tokenize "`vlist' `plist'", parse(" ")
|
|
mat impV`iter' = impVt`iter'["`1'",.]
|
|
mac shift 1
|
|
while "`1'"~="" {
|
|
mat temp=impVt`iter'["`1'",.]
|
|
mat impV`iter'= impV`iter' \ temp
|
|
mac shift 1
|
|
}
|
|
|
|
mat drop myb
|
|
mat drop myV
|
|
mat drop impVt`iter'
|
|
mat drop temp
|
|
}
|
|
else {
|
|
cap mat impb`iter' = myb[.,"`1'"]
|
|
if _rc==111 {
|
|
di as error "Are you sure `1' is in the model??"
|
|
exit(111)
|
|
}
|
|
mat impVt`iter'=myV[.,"`1'"]
|
|
mat impV`iter' = impVt`iter'["`1'",.]
|
|
}
|
|
}
|
|
end
|
|
|
|
/*************************************************
|
|
* Look at the missing pattern in the varlist
|
|
*************************************************/
|
|
|
|
program define _misspat,rclass
|
|
syntax varlist [if] [in] , [BY(string) ]
|
|
tokenize "`varlist'"
|
|
tempvar touse2
|
|
|
|
mark `touse2' `if' `in'
|
|
markout `touse2'
|
|
|
|
qui gen str40 pattern=""
|
|
|
|
local allstr ""
|
|
while "`1'"~="" {
|
|
qui replace pattern = cond(`1'==.,pattern+"*",pattern+"-") if `touse2'
|
|
local allstr="-`allstr'"
|
|
mac shift 1
|
|
}
|
|
qui compress pattern
|
|
sort pattern
|
|
lab var pattern "Missing pattern"
|
|
di
|
|
di in green "Missing Patterns"
|
|
di in green "----------------"
|
|
di
|
|
di in green "Table of the Missing data patterns "
|
|
di in green " * signifies missing and - is not missing"
|
|
di
|
|
di "Varlist order: `varlist'"
|
|
|
|
tab pattern if `touse2'
|
|
local n=r(N)
|
|
|
|
qui count if pattern=="`allstr'" & `touse2'
|
|
|
|
if r(N)==`n' {
|
|
di "There is no missing data in the varlist"
|
|
exit(198)
|
|
}
|
|
|
|
return scalar nmiss = `n'-r(N)
|
|
return local allpat = "`allstr'"
|
|
|
|
qui gen ipattern=cond(pattern=="`allstr'",1,.) if `touse2'
|
|
|
|
/*****************************************
|
|
* Calculate stratum missing numbers
|
|
*****************************************/
|
|
|
|
if "`by'"~="" {
|
|
di
|
|
di in green "STRATUM information"
|
|
di in green "-------------------"
|
|
di
|
|
di in green "Listing the number observed (No_obs) and "
|
|
di in green "the number missing (No_miss) in each stratum"
|
|
tempvar cnt mcnt
|
|
qui sort `by'
|
|
qui by `by':gen `cnt'=sum(ipattern)
|
|
qui by `by':gen `mcnt'=sum(ipattern==.)
|
|
qui by `by': replace `cnt'=cond( _n==_N,`cnt',.)
|
|
qui by `by': replace `mcnt'=cond( _n==_N,`mcnt',.)
|
|
rename `cnt' No_obs
|
|
rename `mcnt' No_miss
|
|
l `by' No_obs No_miss if No_obs~=., noobs
|
|
di
|
|
qui count if No_obs==0
|
|
if `r(N)'>0 { di in red "WARNING: `r(N)' strata with NO observed data" }
|
|
qui count if No_obs==1
|
|
if `r(N)'>0 { di in blue "`r(N)' strata with only 1 observed data"}
|
|
qui count if (No_obs>1 & No_obs<6)
|
|
if `r(N)'>0 { di in blue "`r(N)' strata with 2-5 observed data"}
|
|
di
|
|
}
|
|
|
|
end
|
|
|
|
/*************************************************
|
|
* Expand stata syntax
|
|
*************************************************/
|
|
|
|
program define mkvlist, rclass
|
|
syntax varlist, VLIST(string)
|
|
|
|
local o_vlist "`varlist'"
|
|
local keep "`vlist'"
|
|
local 0 "`keep'"
|
|
while "`keep'"~="" {
|
|
gettoken 0 keep: keep , parse(" ,")
|
|
cap syntax [varlist]
|
|
if _rc~=0 {
|
|
if "`0'"=="_cons" {
|
|
local vlist "`vlist' `0'"
|
|
}
|
|
else { local plist "`plist' `0'" }
|
|
}
|
|
else {
|
|
local vlist "`vlist' `varlist'"
|
|
}
|
|
}
|
|
return local vlist "`o_vlist' `vlist'"
|
|
|
|
end
|
|
|
|
|