You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
683 lines
19 KiB
Plaintext
683 lines
19 KiB
Plaintext
*! Date : 3 Sep 2007
|
|
*! Version : 1.72
|
|
*! Authors : Adrian Mander/David Clayton
|
|
*! Email : adrian.mander@mrc-hnr.cam.ac.uk
|
|
*! Description : Hotdeck imputation
|
|
|
|
/*
|
|
25/07/06 version 1.67 - removed some = and investigated the set seed problem
|
|
16/3/07 version 1.68 - spruced up the displays
|
|
13/6/07 version 1.69 - Made sure set seed does what the masses want although it is truly the wrong thing to do
|
|
27/7/07 version 1.70 - The warnings were not strong enough about the strata information.. in fact I think when there is
|
|
no data to impute it still tries to combine results. Also a slight error in the output.
|
|
15/8/07 version 1.71 - Corrected the confidence intervals.. the tail probability was wrong
|
|
ALSO checked the calculation of T B Ubar Qbar by hand!
|
|
3/9/07 version 1.72 - Corrected the % lines missing to % lines complete
|
|
*/
|
|
|
|
program define hotdeck
|
|
version 9.0
|
|
syntax [varlist] [if] [in] [using/], [BY(varlist) IMPute(integer 1) STORE GENerate(string) COMmand(string) PARMS(string asis) REPlace NOISE KEEP(varlist) SEED(string) QUIET INFILES(string) ]
|
|
tokenize "`varlist'"
|
|
local z "`1'"
|
|
|
|
preserve
|
|
if "`if'"~="" qui keep `if'
|
|
|
|
/* Check the seed option */
|
|
if "`seed'"=="1" local seed 2
|
|
if "`seed'"=="" {
|
|
local noseed "noseed"
|
|
local seed 1
|
|
}
|
|
|
|
confirm number `seed'
|
|
/* To generate a seed from the time note need to truncate the seed to be below 2^31-1 */
|
|
if `seed'==1 {
|
|
local time "$S_TIME"
|
|
local date "$S_DATE"
|
|
tokenize "`time'", parse(":")
|
|
local seed1 "`1'`3'`5'"
|
|
tokenize "`date'", parse(" ")
|
|
local dat "`1'`2'`3'"
|
|
local dat1 = date("`dat'","dmy")
|
|
local seed1 "`seed1'`dat1'"
|
|
di
|
|
local l_seed "2^31-1"
|
|
local seed1 = mod(`seed1',`l_seed')
|
|
set seed `seed1'
|
|
local seed "`seed1'" /* Added to sort out the seed */
|
|
}
|
|
di "{txt}Seed is set as {res} `seed'"
|
|
|
|
estimates clear
|
|
tempfile olddata
|
|
|
|
tempvar touse
|
|
mark `touse' `if' `in'
|
|
markout `touse' `by', strok
|
|
|
|
di in green "DELETING all matrices...."
|
|
mat drop _all
|
|
|
|
/* Display the patterns of missingness.. only on observed data not imputed */
|
|
if "`infiles'"=="" {
|
|
if "`by'"=="" _misspat `varlist' `if' `in'
|
|
else _misspat `varlist' `if' `in', by(`by')
|
|
local nfill=r(nmiss)
|
|
}
|
|
|
|
|
|
qui save "`olddata'"
|
|
|
|
/* Count the missing data for displaying later */
|
|
if "`infiles'"=="" {
|
|
global allpat = r(allpat)
|
|
qui count if `touse'
|
|
local miss = (r(N)-`nfill')/r(N)
|
|
}
|
|
|
|
|
|
/* Make sure the users are using the right syntax.. lots of checks here to make sure*/
|
|
if "`command'"=="" {
|
|
di in red "WARNING: When the <command> option is not selected "
|
|
di in red "then no analysis is performed on the imputed datasets"
|
|
di
|
|
if "`store'"=="" {
|
|
di "ALSO STORE isnt selected so hotdeck will appear to do nothing"
|
|
exit(198)
|
|
}
|
|
}
|
|
|
|
if `impute'<1 {
|
|
di in red "The number of imputations must be more than 0 not `impute'"
|
|
exit(198)
|
|
}
|
|
|
|
if `impute'==1 & "`infiles'"=="" {
|
|
if "`store'"=="" | "`command'"~="" {
|
|
di in red "If one imputation is made then command option should NOT be used"
|
|
di in red "AND the store option must be specified"
|
|
exit(198)
|
|
}
|
|
}
|
|
|
|
|
|
if "`using'"~="" {
|
|
if "`store'"=="" {
|
|
di in red "To save datasets you must specify the STORE option"
|
|
exit(198)
|
|
}
|
|
}
|
|
if "`keep'"~="" {
|
|
if "`store'"=="" {
|
|
di in red "If you use the KEEP option you must specify the STORE option"
|
|
exit(198)
|
|
}
|
|
}
|
|
|
|
if "`noise'"~="" & "`command'"=="" {
|
|
di in red "When specifying noise you must also specify the command option"
|
|
exit(198)
|
|
}
|
|
if "`command'"~="" {
|
|
if `"`parms'"'==`""' {
|
|
di in red "To obtain any output from the command option you must also specify "
|
|
di in red "the parameters of interest using the parms() option"
|
|
exit(198)
|
|
}
|
|
}
|
|
|
|
/************************************************
|
|
* Loop over the number of imputed data sets
|
|
* required
|
|
************************************************/
|
|
|
|
if "`seed'"~="1" set seed `seed'
|
|
|
|
/* This is the if statement that allows the input of imputed datafiles */
|
|
|
|
if "`infiles'"~="" {
|
|
local i 1
|
|
tokenize "`infiles'"
|
|
while "`1'"~="" {
|
|
use "`1'",replace
|
|
mac shift 1
|
|
if "`command'"=="" {
|
|
di in red "You must use the command option when using INFILES"
|
|
exit(198)
|
|
}
|
|
if "`noise'"~="" `command'
|
|
else qui `command' /* Do the analysis */
|
|
_parms, parms(`"`parms'"') command(`command') iter(`i') /* Select Parameters of interest*/
|
|
local i=`i'+1
|
|
}
|
|
local impute=`i'-1
|
|
}
|
|
|
|
|
|
/* If there are no INFILES .. then just have to create the imputed datasets and analyse them */
|
|
else {
|
|
forv i =1/`impute' {
|
|
|
|
/* Use original dataset */
|
|
use "`olddata'",replace
|
|
qui keep if `touse'
|
|
|
|
/* Impute values */
|
|
if "`by'"~="" _hotdeck `varlist', by(`by') i((`seed'+`i')) `noseed'
|
|
else _hotdeck `varlist', i((`seed'+`i')) `noseed'
|
|
|
|
/* Save imputed datasets */
|
|
if "`store'"~="" {
|
|
if "`using'"=="" local using "imp"
|
|
if "`keep'"=="" {
|
|
qui keep `varlist' `by'
|
|
qui save `using'`i',replace
|
|
}
|
|
else {
|
|
mkvlist `varlist' `by', vlist(`keep')
|
|
qui keep `r(vlist)'
|
|
qui save `using'`i',replace
|
|
}
|
|
}
|
|
if "`command'"~="" {
|
|
|
|
/* Do the analysis */
|
|
if "`noise'"~="" `command'
|
|
else qui `command'
|
|
|
|
/* Select Parameters of interest*/
|
|
_parms, parms(`"`parms'"') command(`command') iter(`i')
|
|
}
|
|
|
|
}
|
|
}
|
|
|
|
|
|
/********************************************************
|
|
* Loop to calculate the estimates needed
|
|
*
|
|
* First get the dimensions of the parameter matrices
|
|
********************************************************/
|
|
|
|
if "`command'"~="" {
|
|
local dim= rowsof(impV1)
|
|
mat Qbar = J(1,`dim',0)
|
|
mat Ubar = J(`dim',`dim',0)
|
|
|
|
/* calc the averaging factor */
|
|
local inv = 1/`impute'
|
|
|
|
/* calc the average coef and variance qbar and ubar */
|
|
forv i=1/`impute' {
|
|
mat Qbar= `inv'*impb`i'+ Qbar
|
|
mat Ubar= `inv'*impV`i'+ Ubar
|
|
}
|
|
|
|
/* calc between variances */
|
|
mat B=J(`dim',`dim',0)
|
|
local inv1 = 1/(`impute'-1)
|
|
|
|
forv i=1/`impute' {
|
|
mat B= B + `inv1'*(impb`i' - Qbar)'*(impb`i' - Qbar)
|
|
}
|
|
|
|
/* Calc total variance */
|
|
mat T = Ubar+(1+1/`impute')*B
|
|
|
|
cap mat tempmt=B*inv(Ubar)
|
|
if _rc==504 {
|
|
di as error "WARNING: Trying to invert variance matrix with zero elements?"
|
|
local ter = rowsof(Ubar)
|
|
mat temp = J(`ter',1,1)
|
|
mat temp2 = Ubar*temp
|
|
local tei 1
|
|
local names: colfullnames impb1
|
|
matrix rownames temp2 = `names'
|
|
while `tei'<=`ter' {
|
|
if temp2[`tei',1]==0 {
|
|
local var:word `tei' of "`names'"
|
|
di as txt "Variance for covariate `tei' is 0 !!"
|
|
}
|
|
local tei=`tei'+1
|
|
}
|
|
mat tempmt=B*inv(Ubar)
|
|
}
|
|
|
|
local trace=trace(tempmt)
|
|
|
|
/* Everything hunky dorey until now... a strange 1 appears.. */
|
|
local r1 = 1-((1+1/`impute')*`trace'/`dim')
|
|
|
|
/************************************************
|
|
* Just sorting out the matrix names
|
|
************************************************/
|
|
local names: rowfullnames impb1
|
|
matrix rownames Qbar = `names'
|
|
local names: colfullnames impb1
|
|
matrix colnames Qbar = `names'
|
|
local names: rowfullnames impV1
|
|
matrix rownames T = `names'
|
|
matrix rownames B = `names'
|
|
matrix rownames Ubar = `names'
|
|
local names: colfullnames impV1
|
|
matrix colnames T = `names'
|
|
matrix colnames B = `names'
|
|
matrix colnames Ubar = `names'
|
|
|
|
mat Tsurr= `r1'*T
|
|
mat D = Qbar*inv(Tsurr)*Qbar'
|
|
|
|
local D1 = D[1,1]/`dim'
|
|
local t=`dim'*(`impute'-1)
|
|
local v1= 4+(`t'-4)*(1+(1-2/`t')*1/`r1')^2
|
|
local ftest= fprob(`dim',`v1',`D1')
|
|
|
|
/********************************************************
|
|
* The next will output the main results in Stata style
|
|
* if the normal approximation is good then you could
|
|
* use the matrix post command
|
|
********************************************************/
|
|
|
|
if "`quiet'"=="" {
|
|
if `r1'<0 {
|
|
di in red "WARNING: between se larger than within se in one or more "
|
|
di in red "parameters invalidating the global F test"
|
|
}
|
|
if `t'<4 {
|
|
di in red "WARNING: t less than 4 invalid global test "
|
|
di in red "increase parameters OR imputations"
|
|
}
|
|
}
|
|
|
|
di
|
|
di in gr _col(1) "Number of Obs.", _col(45) "= ", as res %5.0f _N
|
|
di in gr _col(1) "No. of Imputations", _col(45) "= ", as res %5.0f `impute'
|
|
|
|
|
|
if "`infiles'"=="" di in gr _col(1) "% Lines of Complete Data", _col(45) "= ", as res %10.4f `miss'*100, as text "%"
|
|
di in gr _col(1) "F(",%6.3f `v1',",`dim')", _col(45) "= ", as res %10.4f `D1'
|
|
di in gr _col(1) "Prob > F " , _col(45) "= ", as res %10.4f `ftest'
|
|
di "{text}{dup 14:{c -}}{c TT}{dup 68:{c -}}"
|
|
|
|
local names: colfullnames impb1
|
|
|
|
|
|
/* Transform the double quoted names to a macrolist */
|
|
|
|
di in gr _continue "Variable" _col(15) "{c |}",_col(17) "Average", _col(28) "Between", _col(38) "Within", _col(48) "Total", _col(58) "df", _col(68) "t", _col(77) "p-value"
|
|
di
|
|
di in gr _continue _col(15) "{c |}", _col(17) "Coef.",_col(28) "Imp. SE", _col(38) "Imp. SE", _col(47) " SE", _col(58) "", _col(68) "", _col(74) ""
|
|
di
|
|
di _continue "{text}{dup 14:{c -}}{c +}{dup 68:{c -}}"
|
|
|
|
foreach name of local names {
|
|
di
|
|
mat qhat=Qbar[1,"`name'"]
|
|
mat b=B["`name'","`name'"]
|
|
mat u=Ubar["`name'","`name'"]
|
|
mat t=T["`name'","`name'"]
|
|
local df = (`impute'-1)*(1+(u[1,1])/((1+1/`impute')*b[1,1]))^2
|
|
local ttest= qhat[1,1]/sqrt(t[1,1])
|
|
di as text _continue "`name'",_col(15) "{c |}", as res _col(10) %7.4f qhat[1,1],_col(17) %9.3f sqrt(b[1,1]), _col(25) %9.3f sqrt(u[1,1]), _col(34) %9.3f sqrt(t[1,1]), _col(44) %9.1f `df', _col(53) %9.3f `ttest', _col(62) %9.3f tprob(`df',`ttest')
|
|
}
|
|
|
|
|
|
di
|
|
di _continue "{text}{dup 14:{c -}}{c +}{dup 68:{c -}}"
|
|
di
|
|
local name : word 1 of `names'
|
|
local i 1
|
|
di in gr _continue "Variable", _col(15) "{c |}", _col(17) "[$S_level% Conf. Interval]"
|
|
di
|
|
di _continue "{text}{dup 14:{c -}}{c +}{dup 68:{c -}}"
|
|
while "`name'"~="" {
|
|
di
|
|
mat qhat=Qbar[1,"`name'"]
|
|
mat b=B["`name'","`name'"]
|
|
mat u=Ubar["`name'","`name'"]
|
|
mat t=T["`name'","`name'"]
|
|
local df = (`impute'-1)*(1+(u[1,1])/((1+1/`impute')*b[1,1]))^2
|
|
local ttest= qhat[1,1]/sqrt(t[1,1])
|
|
|
|
local prob = 1-((100-$S_level)/2)/100
|
|
local tvalue=abs( invttail(`df',`prob') )
|
|
/* The t-distribution function could be very out here.... due to a version 6 bug!
|
|
version 6 : local tvalue = invt(`df',`prob')
|
|
|
|
THIS HAS BEEN REMOVED 15Aug07 as the probability is calculated on adding the two tails AND it should have been
|
|
a single tailed value!!!
|
|
*/
|
|
|
|
|
|
|
|
local left = qhat[1,1]-`tvalue'*sqrt(t[1,1])
|
|
local right = qhat[1,1]+`tvalue'*sqrt(t[1,1])
|
|
di as text _continue "`name'",_col(15) "{c |}", as res %9.4f `left', %9.4f `right'
|
|
local i=`i'+1
|
|
local name : word `i' of `names'
|
|
}
|
|
di ""
|
|
di "{text}{dup 14:{c -}}{c BT}{dup 68:{c -}}"
|
|
|
|
} /* end of command if statement */
|
|
|
|
restore
|
|
end
|
|
|
|
/****************************************************
|
|
* The approximate Bayesian Bootstrap hotdecking
|
|
****************************************************/
|
|
|
|
program define _hotdeck
|
|
version 9.0
|
|
syntax [varlist] [using], [BY(string) Iseed(string) NOSEED]
|
|
|
|
local iseed =`iseed'
|
|
|
|
tokenize "`varlist'"
|
|
local z "ipattern"
|
|
|
|
if "`by'"!="" confirm ex var `by'
|
|
|
|
tempvar nobs bstrp b2strp temp temp2
|
|
|
|
local nold = _N
|
|
local nnew = _N
|
|
|
|
/* This is the place of difference for a set seed command ..*/
|
|
|
|
if "`noseed'"=="" set seed `iseed'
|
|
qui sort `by' `z' `varlist'
|
|
|
|
|
|
|
|
|
|
qui gen long `nobs' = (`z'!=.)
|
|
|
|
if "`by'"=="" {
|
|
qui replace `nobs' = sum(`nobs')
|
|
qui replace `nobs' = `nobs'[_N]
|
|
qui gen long `bstrp' = int(uniform()*`nobs'+1)
|
|
qui gen long `b2strp' = int(uniform()*`nobs'+1)
|
|
qui gen long `temp' = `bstrp'[`b2strp']
|
|
qui replace `bstrp' = `temp'
|
|
qui replace `bstrp' = _n if _n<=`nobs'
|
|
qui tokenize "`varlist'"
|
|
while "`1'"~="" {
|
|
qui gen `temp2' = `1'[`bstrp']
|
|
qui replace `1' = `temp2'
|
|
qui drop `temp2'
|
|
qui mac shift 1
|
|
}
|
|
}
|
|
else {
|
|
qui by `by': replace `nobs' = sum(`nobs')
|
|
qui by `by': replace `nobs' = `nobs'[_N]
|
|
qui by `by': gen long `bstrp' = int(uniform()*`nobs'+1)
|
|
qui by `by': gen long `b2strp' = int(uniform()*`nobs'+1)
|
|
qui by `by': gen long `temp' = `bstrp'[`b2strp']
|
|
qui by `by': replace `bstrp' = `temp'
|
|
qui by `by': replace `bstrp' = _n if _n<=`nobs'
|
|
|
|
qui tokenize "`varlist'"
|
|
while "`1'"~="" {
|
|
qui by `by': gen `temp2' = `1'[`bstrp']
|
|
qui by `by': replace `1' = `temp2'
|
|
qui mac shift 1
|
|
qui drop `temp2'
|
|
}
|
|
|
|
}
|
|
|
|
|
|
end
|
|
|
|
/*******************************************************************
|
|
* Get the parameters or a subset of them from the
|
|
* model and the subset
|
|
* the covariance variance matrix as well
|
|
* Note that this section can also handle non-regression commands
|
|
* and macro lists
|
|
*******************************************************************/
|
|
|
|
program define _parms
|
|
syntax [varlist], [PARMS(string asis) ITER(integer 1) COMMAND(string) GENerate(string) REPlace]
|
|
|
|
|
|
/*
|
|
previously accepted a varlist in the parms string.. too many difficulties with multiple equation models
|
|
so this code below is being dropped
|
|
|
|
* local 0 "`parms'"
|
|
* while "`parms'"~="" {
|
|
* gettoken 0 parms: parms , parse(" ,")
|
|
* cap syntax [varlist]
|
|
* if _rc~=0 {
|
|
* if "`0'"=="_cons" local vlist "`vlist' `0'" <-- just extract _cons
|
|
* else local plist "`plist' `0'"
|
|
* }
|
|
* else local vlist "`vlist' `varlist'"
|
|
* }
|
|
*/
|
|
|
|
|
|
foreach item in `"`parms'"' {
|
|
local vlist `"`vlist' `item'"'
|
|
}
|
|
|
|
/* if results were not part of a regression command */
|
|
|
|
if "`e(cmd)'"=="" {
|
|
local names ""
|
|
if `iter'==1 di in red "Using Non Regression Parameters and Command"
|
|
tokenize "`plist' `vlist'"
|
|
|
|
local np=0
|
|
while "`1'"~="" {
|
|
local names1 ="`names1' `1'"
|
|
local names2 ="`names2' `2'"
|
|
if "`2'"=="" {
|
|
di in red "Must supply variance estimate of `1'"
|
|
exit(302)
|
|
}
|
|
mac shift 2
|
|
local `np++'
|
|
}
|
|
mat impb`iter' = J(1,`np',0)
|
|
mat impV`iter' = J(`np',`np',0)
|
|
|
|
tokenize "`plist' `vlist'"
|
|
local np 1
|
|
|
|
while "`1'"~="" {
|
|
|
|
if "$`1'"=="" & "``1''"=="" {
|
|
di in red "Global = $`1' Local = ``1''"
|
|
di in red "Global/local macro `1' is missing "
|
|
exit(198)
|
|
}
|
|
|
|
if "``1''"~="" mat impb`iter'[1,`np'] = ``1''
|
|
if "$`1'"~="" & "``1''"=="" mat impb`iter'[1,`np'] = $`1'
|
|
if "$`2'"=="" & "``2''"=="" {
|
|
di in red "Global = $`2' Local = ``2''"
|
|
di in red "Global/local macro `2' is missing "
|
|
exit(198)
|
|
}
|
|
if "``2''"~="" mat impV`iter'[`np',`np'] = ``2''
|
|
if "$`2'"~="" & "``2''"=="" mat impV`iter'[`np',`np'] = $`2'
|
|
|
|
local np=`np'+1
|
|
mac shift 2
|
|
}
|
|
|
|
matrix colnames impb`iter'=`names1'
|
|
matrix colnames impV`iter'=`names1'
|
|
matrix rownames impV`iter'=`names1'
|
|
}
|
|
|
|
/* The regression-type output part */
|
|
else {
|
|
matrix myb = e(b)
|
|
matrix myV = e(V)
|
|
|
|
/* This next statement is to handle double quoted strings.. otherwise parms will contain one item in a macro */
|
|
|
|
local teparms :di `parms'
|
|
local first 1
|
|
foreach item of local teparms {
|
|
|
|
if `first'==1 {
|
|
cap mat impb`iter' = myb[.,"`item'"]
|
|
if _rc==111 {
|
|
di as error `" Attempted to extract `item' from e(b) "'
|
|
mat list e(b)
|
|
di as error "Check the matrix of estimates and only include column names in the parameters NOT variable names"
|
|
exit(111)
|
|
}
|
|
mat impVt`iter'= myV[.,"`item'"]
|
|
}
|
|
else {
|
|
mat temp = myb[.,"`item'"]
|
|
mat impb`iter'= impb`iter' , temp
|
|
mat drop temp
|
|
|
|
mat temp=myV[.,"`item'"]
|
|
mat impVt`iter'= impVt`iter' , temp
|
|
mat drop temp
|
|
}
|
|
|
|
local `first++'
|
|
}
|
|
|
|
local first 1
|
|
foreach item of local teparms {
|
|
if `first'==1 mat impV`iter' = impVt`iter'["`item'",.]
|
|
else {
|
|
mat temp=impVt`iter'["`item'",.]
|
|
mat impV`iter'= impV`iter' \ temp
|
|
}
|
|
local `first++'
|
|
}
|
|
|
|
}
|
|
|
|
end
|
|
|
|
/*************************************************
|
|
* Look at the missing pattern in the varlist
|
|
*************************************************/
|
|
|
|
program define _misspat,rclass
|
|
syntax varlist [if] [in] , [BY(string) ]
|
|
tokenize "`varlist'"
|
|
tempvar touse2 tempid
|
|
|
|
qui gen long `tempid'=_n
|
|
|
|
mark `touse2' `if' `in'
|
|
markout `touse2'
|
|
|
|
qui gen str50 pattern=""
|
|
|
|
local allstr ""
|
|
while "`1'"~="" {
|
|
qui replace pattern = cond(`1'==.,pattern+"*",pattern+"-") if `touse2'
|
|
local allstr="-`allstr'"
|
|
mac shift 1
|
|
}
|
|
|
|
qui compress pattern
|
|
sort pattern
|
|
lab var pattern "Missing pattern"
|
|
di
|
|
di in green "Missing Patterns"
|
|
di "{text}{dup 16:{c -}}"
|
|
di
|
|
di in green "Table of the Missing data patterns "
|
|
di in green " * signifies missing and - is not missing"
|
|
di
|
|
di "Varlist order: `varlist'"
|
|
|
|
tab pattern if `touse2'
|
|
local n=r(N)
|
|
|
|
qui count if pattern=="`allstr'" & `touse2'
|
|
|
|
if r(N)==`n' {
|
|
di "There is no missing data in the varlist"
|
|
exit(198)
|
|
}
|
|
|
|
return scalar nmiss = `n'-r(N)
|
|
return local allpat = "`allstr'"
|
|
|
|
qui gen ipattern=cond(pattern=="`allstr'",1,.) if `touse2'
|
|
|
|
/*****************************************
|
|
* Calculate stratum missing numbers
|
|
*****************************************/
|
|
|
|
if "`by'"~="" {
|
|
di
|
|
di "{text}STRATUM information"
|
|
di "{text}{dup 19:{c -}}"
|
|
di
|
|
di "{text} Listing the number observed (No_obs) and "
|
|
di in green "the number missing (No_miss) in each stratum"
|
|
tempvar cnt mcnt
|
|
qui sort `by'
|
|
qui by `by':gen `cnt'=sum(ipattern)
|
|
qui by `by':gen `mcnt'=sum(ipattern==.)
|
|
qui by `by': replace `cnt'=cond( _n==_N,`cnt',.)
|
|
qui by `by': replace `mcnt'=cond( _n==_N,`mcnt',.)
|
|
rename `cnt' No_obs
|
|
rename `mcnt' No_miss
|
|
l `by' No_obs No_miss if No_obs~=., noobs
|
|
di
|
|
qui count if No_obs==0
|
|
if `r(N)'>0 {
|
|
di in red "WARNING: `r(N)' strata with NO complete records"
|
|
di
|
|
di "{error}This implies that within these strata the missing data will NOT be replaced "
|
|
di "and hence will give the wrong answers in the analysis because the analysis"
|
|
di "command will do casewise deletion"
|
|
}
|
|
qui count if No_obs==1
|
|
if `r(N)'>0 di in blue "Note: `r(N)' strata with only 1 complete record"
|
|
qui count if (No_obs>1 & No_obs<6)
|
|
if `r(N)'>0 di in blue "Note: `r(N)' strata with 2-5 complete records"
|
|
di
|
|
}
|
|
|
|
/* I thought that the following bit of command might've sorted out the seed problem :( but I don't think so */
|
|
qui sort `tempid'
|
|
end
|
|
|
|
/*************************************************
|
|
* Expand stata syntax
|
|
*************************************************/
|
|
|
|
program define mkvlist, rclass
|
|
syntax varlist, VLIST(string)
|
|
|
|
local o_vlist "`varlist'"
|
|
local keep "`vlist'"
|
|
local 0 "`keep'"
|
|
while "`keep'"~="" {
|
|
gettoken 0 keep: keep , parse(" ,")
|
|
cap syntax [varlist]
|
|
if _rc~=0 {
|
|
if "`0'"=="_cons" local vlist "`vlist' `0'"
|
|
else local plist "`plist' `0'"
|
|
}
|
|
else local vlist "`vlist' `varlist'"
|
|
}
|
|
return local vlist "`o_vlist' `vlist'"
|
|
|
|
end
|
|
|
|
|