Setup initial file structure

This commit is contained in:
2024-03-05 11:20:30 +01:00
parent 6183a6391b
commit 70e53e7760
708 changed files with 277486 additions and 0 deletions

View File

@ -0,0 +1,32 @@
program def uclist, rclass
*! NJC 1.0.0 29 June 2000
version 6.0
gettoken list 0 : 0, parse(",")
if "`list'" == "" | "`list'" == "," {
di in r "nothing in list"
exit 198
}
syntax , [ Global(str) Noisily ]
if length("`global'") > 8 {
di in r "global name must be <=8 characters"
exit 198
}
tokenize `list'
while "`1'" != "" {
if length("`1'") > 80 {
di in r "cannot handle word length > 80"
exit 498
}
local 1 = upper("`1'")
local newlist "`newlist'`1' "
mac shift
}
if "`noisily'" != "" { di "`newlist'" }
if "`global'" != "" { global `global' "`newlist'" }
return local list `newlist'
end

View File

@ -0,0 +1,2 @@
.h listutil

View File

@ -0,0 +1,43 @@
program def uniqlist, rclass
*! NJC 1.3.0 7 June 2000
* NJC 1.2.0 31 Jan 2000
* NJC 1.1.0 22 Dec 1999
* NJC 1.0.0 22 Sept 1999
version 6.0
gettoken list 0 : 0, parse(",")
if "`list'" == "" | "`list'" == "," {
di in r "nothing ln list"
exit 198
}
syntax [, Noisily Global(str) ]
if length("`global'") > 8 {
di in r "global name must be <=8 characters"
exit 198
}
tokenize `list'
local newlist "`1'"
mac shift
while "`1'" != "" {
local nnew : word count `newlist'
local i = 1
local putin = 1
while `i' <= `nnew' {
local word : word `i' of `newlist'
if "`word'" == "`1'" {
local putin = 0
local i = `nnew'
}
local i = `i' + 1
}
if `putin' { local newlist "`newlist' `1'" }
mac shift
}
if "`noisily'" != "" { di "`newlist'" }
if "`global'" != "" { global `global' "`newlist'" }
return local list `newlist'
end

View File

@ -0,0 +1,2 @@
.h listutil

View File

@ -0,0 +1,843 @@
*! usesas Version 2.1 dan.blanchette@duke.edu 15Apr2009
*! Center of Entrepreneurship and Innovation Duke University's Fuqua School of Business
* - made it so that the describe option returns scalars as promised in the help file.
* usesas Version 2.1 dan.blanchette@duke.edu 26Feb2009
* - made usesas fail gracefully when there was no SAS dataset in &SYSLAST. when using a SAS program
* instead of a dataset
* usesas Version 2.0 dan.blanchette@duke.edu 24Nov2008
* - added ".tpt" as a known file extension for SAS transport file since NEBER uses that file extension
* usesas Version 2.0 dan_blanchette@unc.edu 25Mar2008
* research computing, unch-ch
* - added describe option that loads the metadata of the using dataset into memory
* and displays a Stata-like -describe- description of the using data.
* - made it so that when used with Stata MP savastata treats data like Stata SE
* - fixed it when usesas uses sas programs and the SAS program's last SAS dataset
* is a permanent one then it deletes that file and doesn't fail with odd errors.
* - made it so that when savastata fails by a known error usesas deletes whatever
* intermediary files were created.
* - added error message that -usesas- cannot be run in Stata batch in Windows
* - removed efforts to keep sortedby vars since descending sorts in SAS mess up Stata
* as well as missing values mess up sort order...left it in for --usesas describe--
* usesas Version 1.4 dan_blanchette@unc.edu 17Apr2007
* - made it so that if a format catalog file was created for a different OS
* would provide a message to user that that was the case and that the
* SAS formats would not be used to create Stata value labels
* - fixed it so that if in console mode you are not suggested to click something.
* usesas Version 1.4 dan_blanchette@unc.edu 24Aug2006
* - corrected how the SAS check name was displayed.
* usesas Version 1.4 dan_blanchette@unc.edu 09Nov2005
* - made usesasdel it's own ado-file
* usesas Version 1.4 dan_blanchette@unc.edu 28Sep2005
* - stopped savastata from closing user's log if one was open
* usesas Version 1.3 dan_blanchette@unc.edu 04Aug2005
* - in non-console mode when messy option used, you can now delete all files
* - for Stata 9 new option char2lab that uses my SAS macro char2fmt that converts
* long SAS character variables to numeric with value labels like -encode- does.
* - allow datasets created by proc cport
* NOTE: SAS's CIMPORT procedure will not open a datafile created in later version of SAS
* - use rver() to control sas engine type
* - now passing Stata version to savastata
* - sort order preserved, though data never lost sort order, Stata needs to sort data to know its sort order
* - added float option to allow user to save space with numeric vars that would otherwise
* require being stored in 8-byte double.
* usesas Version 1.2 dan_blanchette@unc.edu 06Jan2005
* -now accepts an xport file that has a different internal dataset name
** usesas Version 1.1 dan_blanchette@unc.edu 11Nov2004
* -fixed it so that if() can contain code with double quotes
* e.g: if(`" var="A" "')
* -fixed it so that when a user submitts a SAS program
* only their program is printed in the *_usesas.log file
* and not the whole sasvastata macro code as before
* -deletes non-work SAS dataset created by user's SAS program
* -fix code related to "if" option
* -reduced usage of subinstr to help allow for directory paths and if conditions
* to be longer than 80 characters
** usesas Version 1.0 dan_blanchette@unc.edu 13Jul2004
* -added mprint and source2 so that user submitted programs
* would appear in the *_usesas.log file
** usesas Version 1.0 dan_blanchette@unc.edu 17Nov2003
** usesas Version 1.0 dan_blanchette@unc.edu 27Oct2003
** the carolina population center, unc-ch
program define usesas, rclass
version 8
syntax using/ [, MEssy FORmats xport clear QUotes char2lab CHeck float ///
KEep(string) DEscribe listnot if(string) in(string) ]
/* log usage of usesas */
capture which usagelog
if _rc==0 {
usagelog , start type(savas) message(`"usesas using `using', `messy' `formats' `xport' `clear' `quotes' `char2lab' `check' keep(`keep') `describe' `listnot' if(`if') in(`in') "')
}
if "`c(os)'"=="Windows" & "`c(mode)'" == "batch" {
di as err "{help usesas:usesas} cannot be run in batch mode on Windows"
/* log usage of usesas */
capture which usagelog
if _rc==0 {
usagelog , type(savas) uerror(8) etime
}
exit 499
}
if "`listnot'" != "" & "`describe'" == "" {
di as err "listnot option only allowed when using the descibe option"
/* log usage of usesas */
capture which usagelog
if _rc==0 {
usagelog , type(savas) uerror(8) etime
}
exit 499
}
di `"{txt}The {help usesas:usesas} {txt}command uses the {browse "http://faculty.fuqua.duke.edu/sas_to_stata/savastata.html":savastata} {txt}SAS macro to load the SAS dataset into memory."'
di "{txt}Large datasets may take a few minutes."
if `c(N)'!=0 & "`clear'"=="" {
di "{error} no, data in memory would be lost"
di "{error} use the {res}clear {error}option"
/* log usage of usesas */
capture which usagelog
if _rc==0 {
usagelog , type(savas) uerror(1) etime
}
exit 4
}
* CAPTURE USER'S LOG
* ------------------
quietly log query
local usrlog `r(filename)'
* FIGURE OUT WHERE SAS EXECUTABLE IS
* ----------------------------------
sasexe usesas
local wsas `r(wsas)'
local usas `r(usas)'
local savastata `r(savastata)'
local char2fmt `r(char2fmt)'
local rver `r(rver)' // version of sas that's being run i.e. "v8", "v9" etc
if index("`using'","'") | index(`"`using'"',`"""') {
di `"{help usesas} {error}cannot handle directory or file names that contain single or double quotes. "'
capture which usagelog
if _rc==0 {
usagelog , type(savas) uerror(2) etime
}
exit 499
}
/* if filename is given with directory info too,
strip to just file name and to dir location */
if "`c(os)'"=="Windows" {
local dirsep="\"
if index("`using'","/") {
local using : subinstr local using "/" "\" , all
}
}
else {
local dirsep="`c(dirsep)'"
}
if index("`using'","`dirsep'") {
local filen=substr("`using'",index("`using'","`dirsep'")+1,length("`using'"))
while index("`filen'","`dirsep'") !=0 {
local filen=substr("`filen'",index("`filen'","`dirsep'")+1,length("`filen'"))
}
local dir=substr("`using'",1,index("`using'","`filen'")-1)
}
else if index("`using'","\\\")==1 { /* Universal naming convention */
local filen=substr("`using'",index("`using'","\\\")+2,length("`using'"))
while index("`filen'","\") !=0 {
local filen=substr("`filen'",index("`filen'","\")+1,length("`filen'"))
}
local dir=substr("`using'",1,index("`using'","`filen'")-1)
}
else { /* no directory given */
local filen="`using'"
local dir ="`c(pwd)'`dirsep'"
}
/** extract file extension if there is one **/
if index("`filen'",".") {
local ext=substr("`filen'",index("`filen'","."),length("`filen'"))
while index("`ext'",".") > 0 {
local ext=substr("`ext'",index("`ext'",".")+1,length("`ext'"))
}
local ext=".`ext'"
local middle=substr("`filen'",1,index("`filen'","`ext'")-1) /* middle will not end in a period */
local filen=substr("`filen'",1,index("`filen'",".")-1)
local middle=substr("`middle'",length("`filen'")+1,length("`middle'"))
}
if lower("`ext'")==".sas7bdat" {
local type="sas"
}
else if lower("`ext'")==".sd7" {
local type="sas"
local shortfileext="shortfileext"
}
else if lower("`ext'")==".ssd01" {
local type="sas6"
}
else if lower("`ext'")==".ssd02" {
local type="sas6"
}
else if lower("`ext'")==".sd2" {
local type="sas6"
}
else if lower("`ext'")==".sas" {
local type="sasprogram"
}
else if lower("`ext'")==".por" {
local type="spss"
}
else if lower("`ext'")==".xpt" | ///
lower("`ext'")==".xport" | ///
lower("`ext'")==".export" | ///
lower("`ext'")==".expt" | ///
lower("`ext'")==".exp" | ///
lower("`ext'")==".trans" | ///
lower("`ext'")==".tpt" | ///
lower("`ext'")==".cport" | ///
lower("`ext'")==".ssp" | ///
lower("`ext'")==".stx" | ///
lower("`ext'")==".sasx" | ///
lower("`ext'")==".v5x" | ///
lower("`ext'")==".v6x" {
local type="sasx"
}
else if "`xport'"=="xport" { // else no file extension
local type="sasx"
}
else { // guess that the user is wanting to use a .sas7bdat file
local using1 `"`using'.sas7bdat"'
local ext ".sas7bdat"
local type="sas"
capture confirm file `"`using1'"'
if _rc != 0 {
di `"{error}The SAS file: `using1' does not exist."'
// check that user is not expecting file extention but forgot to use xport option
capture confirm file `"`using'"'
if _rc == 0 {
di `"{error}But the SAS file: `using' does exist."'
di as text `"Use the xport option as it is likely this file is a transport/xport file."'
}
/* log usage of usesas */
capture which usagelog
if _rc==0 {
usagelog , type(savas) uerror(3) etime
}
exit 601
}
// only here if this file does exist
local using `"`using'.sas7bdat"'
}
capture confirm file `"`using'"'
if _rc != 0 {
di `"{error}The SAS file: `using' does not exist."'
/* log usage of usesas */
capture which usagelog
if _rc==0 {
usagelog , type(savas) uerror(3) etime
}
exit 601
}
if "`type'"=="" {
di "{error}Is `using' a SAS transport/xport data file?"
di "{error}If so then use the {res}xport {error}option."
/* log usage of usesas */
capture which usagelog
if _rc==0 {
usagelog , type(savas) uerror(4) etime
}
exit 499
}
if "`type'"=="sas" {
local engine="`rver'" // whatever version of SAS that's being used
}
else if "`type'"=="sas6" {
local engine="v6"
}
else if "`type'"=="sasprogram" {
local sasprogram="sasprogram"
}
else if "`type'"=="sasx" {
local engine="xport"
}
else if "`type'"=="spss" {
local engine="spss"
}
/* set where temp directory is */
tmpdir
local tmpdir="`r(tmpdir)'"
local tfn=subinstr("`c(current_time)'",":","",.)
local sysjobid=substr("`tfn'",length("`tfn'")-5,length("`tfn'"))
local temp `"`macval(tmpdir)'_`sysjobid'"'
local raw `"`macval(tmpdir)'_`sysjobid'_usesas"'
local xpt "`macval(dir)'`filen'`middle'`ext'"
* MAKE "IF" AND "IN" INTO SAS CODE
* --------------------------------
local firstobs = upper(substr("`in'",1,index("`in'","/")-1))
if "`firstobs'" == "F" | index("`firstobs'","-") {
di `"{error}Your 'in()' option cannot use f/F or negative values. "'
exit 100
}
local obs = upper(substr("`in'",index("`in'","/")+1,length("`in'")))
if "`obs'" == "L" {
di `"{error}Your 'in()' option cannot use l/L. "'
exit 100
}
if `"`if'"'!=`""' {
local iflen : length local if
if `iflen'>247 { // 255-6-wiggle room = 247
// 'if()' option needs to be less than 255 characters for SAS to process, it is limited to max length of string
di `"{error}Your 'if()' option is longer than max length of 247. "'
exit 100
}
else { // okay to process
if index(`"`if'"',"==") {
local if : subinstr local if "==" "=" , all
}
if lower(substr(`"`if'"',1,3)) != `"if "' & lower(substr(`"`if'"',1,6)) != `"where "' {
local if `"where `if'"'
}
else if lower(substr(`"`if'"',1,3)) == `"if "' {
di `"{error}Your 'if()' option starts with "if". The "if" is assumed, do not type it."'
exit 100
}
/* now make sure if has only one 'if' or 'where' in it */
if index(lower(`"`if'"')," if ") | index(lower(`"`if'"')," inrange(") | /*
*/ index(lower(`"`if'"')," inlist(") | index(lower(`"`if'"')," where ")>1 {
di `"{error}Invalid SAS 'if' condition."'
capture which usagelog
if _rc==0 {
usagelog , type(savas) uerror(5) etime
}
exit 499
}
} // 'if()' is less than 247 chars
}
* WRITE SAS PROGRAM TO READ IN DATA
* ---------------------------------
usesas_sas , rver(`rver') dirsep("`dirsep'") dir("`dir'") tmpdir("`tmpdir'") filen(`filen') raw("`raw'") engine(`engine') ///
`shortfileext' `quotes' `check' `formats' sysjobid(`sysjobid') ext(`ext') middle(`middle') xpt("`xpt'") ///
savastata("`savastata'") if(`"`if'"') firstobs(`firstobs') obs(`obs') keep(`"`keep'"') ///
`char2lab' char2fmt("`char2fmt'") `sasprogram' `describe' `listnot'
* RUN SAS
* -------
if "`c(os)'"=="Unix" /* or Linux */ {
shell "`usas'" "`temp'_usesas.sas" -log "`temp'_usesas.log" -print "`temp'_usesas.lst"
} /* end of if Unix */
else if "`c(os)'"=="Windows" /* Windows */ {
** do not add -icon option since that pop-up window is not a big deal and could tell user important info **
shell `wsas' "`temp'_usesas.sas" -nologo -log "`temp'_usesas.log" -print "`temp'_usesas.lst"
} /* end of if Windows */
* LOOK AT ANY REPORT FROM SAS
* ---------------------------
capture confirm file `"`temp'_report.log"'
if _rc==0 {
type `"`temp'_report.log"'
if "`messy'"=="" {
erase `"`temp'_report.log"'
}
}
* CLEAR DATA OUT OF MEMORY
* ------------------------
if "`clear'"!="" {
drop _all
label drop _all
}
* LOAD STATA DATASET INTO MEMORY
* ------------------------------
capture confirm file `"`tmpdir'_`sysjobid'_infile.do"'
if _rc == 0 {
if `"`usrlog'"' != "" {
quietly log close
}
local cwd "`c(pwd)'"
** cd to where infile.do is **
quietly cd "`tmpdir'"
run `"_`sysjobid'_infile.do"'
if `"`usrlog'"' != "" {
quietly log using `"`usrlog'"' , append
}
* SET DATASET NAME
* ----------------
if index("$S_FN","`dirsep'") == 1 {
global S_FN : subinstr global S_FN "`dirsep'" ""
}
global S_FN `"`macval(dir)'$S_FN"'
// run savastata_report to see if SAS and Stata agree how many obs and vars there are
savastata_report
if "`check'" != "" {
local gsfn : subinstr global S_FN ".dta" ""
display as res _n " Compare these results with the results provided by SAS "
display as res " in the file `gsfn'_SAScheck.lst. " _n
summarize
describe
list in 1/5
di _n "You have requested to have savastata provide a check file:"
di `""`gsfn'_SAScheck.lst" "'
}
** cd back to where you were **
quietly cd "`cwd'"
} /* if infile.do file exists */
else {
di `"{error}{help usesas:usesas} failed."'
capture confirm file `"`tmpdir'_`sysjobid'_knerror.txt"'
if _rc ==0 {
// savastata failed with a known error so just let report.log show the error
if "`c(os)'" != "Windows" {
usesasdel `"`tmpdir'"' _`sysjobid'_
}
if "`c(os)'" == "Windows" {
local usesasdeldir : subinstr local tmpdir `":"' `"\\\`= char(58)'"', all
usesasdel `"`usesasdeldir'"' _`sysjobid'_
}
}
else {
di `"{error}If no error message above this one, then check out the SAS log file to see why. "'
di `" {view "`temp'_usesas.log"} "'
di `"{inp}Erase these temporary files created by {help usesas:usesas} when done with them:"'
di `"{res}(files located in "`tmpdir'") "'
ls "`temp'_*"
if "`c(console)'" != "console" {
if "`c(os)'" != "Windows" {
di `"{res} {stata usesasdel `"`tmpdir'"' _`sysjobid'_:Click here to erase them all.} "'
}
if "`c(os)'" == "Windows" {
local usesasdeldir : subinstr local tmpdir `":"' `"\\\`= char(58)'"', all
di `"{res} {stata usesasdel `"`usesasdeldir'"' _`sysjobid'_:Click here to erase them all.} "'
}
}
}
if "`sasprogram'"!="" {
di `""'
if "`c(console)'" != "console" {
di `"{inp}Click here to edit your SAS program and try it again. "'
di `" {stata `"doedit "`xpt'""':`xpt'} "'
}
else di `"Edit your SAS program: "`xpt'" and try it again."'
di `""'
}
capture which usagelog
if _rc==0 {
usagelog , type(savas) uerror(6) etime
}
exit 499
}
if "`describe'" == "describe" {
di as res `"Contains data from "`using'" "'
di as res `" obs: `=string(nobs,"%32.0fc")' "' memlabel
di as res `" vars: `=string(_N,"%32.0fc")' "'
if ( _N > 2047 & "$S_StataSE"=="" & "$S_StataMP" == "" ) | ///
( _N > 32767 ) {
di as err "Your version of Stata will not read this entire dataset"
if ( _N > 2047 & "$S_StataSE"=="" & "$S_StataMP" == "" ) ///
di as err " as it has more than 2,047 variables."
else if ( _N > 32767 ) ///
di as err " as it has more than 32,767 variables."
} // this message is repeated after all vars are listed
local name_len = `= substr("`: type name'",index("`: type name'","r")+1,2)'
if `name_len' < 13 recast str13 name
recast str12 type
char define name[varname] "variable name"
char define type[varname] "storage type"
char define label[varname] "variable label"
order varnum name type format label
if "`listnot'" == "" {
list name type format label, nocompress noobs subvarname
}
capture confirm file `"`macval(temp)'_usesas.txt"'
if _rc ==0 {
file open sortedby using `"`temp'_usesas.txt"' , read text
file read sortedby sortedby // creates local sortedby
file close sortedby
// clear sortedby if no vars in it, it ends up being a double quote
if `"`sortedby'"' == `"""' local sortedby ""
}
di as res `"Sorted by: `sortedby'"'
if "`listnot'" == "" {
if ( _N > 2047 & "$S_StataSE"=="" & "$S_StataMP" == "" ) | ///
( _N > 32767 ) {
di as err "Your version of Stata will not read this entire dataset"
if ( _N > 2047 & "$S_StataSE"=="" & "$S_StataMP" == "" ) ///
di as err " as it has more than 2,047 variables."
else if ( _N > 32767 ) ///
di as err " as it has more than 32,767 variables."
} // this message is made first before all vars are listed
}
// these vars do not vary by obs so just drop 'em
quietly drop memlabel // nobs dropped at end of usesas
di as res _n _dup(`c(linesize)') "-"
di as res `" Now the dataset in memory is just the description of "`using'" "'
di as res `" Use the {stata describe :describe} command to see what you have and use "'
di as res `" whatever data manipulation you like to create variable lists for "'
di as res `" your actual invocation of {help usesas :usesas} if you want."'
if "`c(console)'" != "console" {
di as res `" Otherwise, {stata clear :Click here to clear out the dataset from memory}. "'
}
else {
di as res `" Otherwise, use the clear command to clear out the dataset from memory. "'
}
di as res _dup(`c(linesize)') "-"
}
* CLEAN UP TEMP FILES
* -------------------
if "`messy'"=="" {
if "`c(os)'" != "Windows" {
usesasdel `"`tmpdir'"' _`sysjobid'_
}
if "`c(os)'" == "Windows" {
local usesasdeldir : subinstr local tmpdir `":"' `"\\\`= char(58)'"', all
usesasdel `"`usesasdeldir'"' _`sysjobid'_
}
} /* end of messy=="" */
else {
di "{res}You have requested {help usesas:usesas} not to delete the intermediary files created by {help usesas:usesas}:"
dir "`temp'_*"
di "{input}Files located here: "
di `"{input}"`tmpdir'" "'
if "`c(console)'" != "console" {
if "`c(os)'" != "Windows" {
di `"{res} {stata usesasdel `"`tmpdir'"' _`sysjobid'_:Click here to erase them all.} "'
}
if "`c(os)'" == "Windows" {
local usesasdeldir : subinstr local tmpdir `":"' `"\\\`= char(58)'"', all
di `"{res} {stata usesasdel `"`usesasdeldir'"' _`sysjobid'_:Click here to erase them all.} "'
}
}
} // of if else if messy
/* log usage of usesas */
capture which usagelog
if _rc==0 {
if `c(N)' == 0 & `c(k)' == 0 {
usagelog , type(savas) uerror(7) message(no data) etime
}
else {
local obs=`c(N)'
local vars=`c(k)'
usagelog , type(savas) uerror(0) message(Input Stata dataset has `obs' obs and `vars' vars) etime
}
}
if "`describe'" == "describe" {
local varlist = ""
local vlen=0
forvalues n = 1/`= _N' {
local vlen = `vlen' + length(trim("`= name[`n']'")) + 1
if `n' == 1 local varlist = trim("`= name[`n']'")
else local varlist `"`varlist' `= trim("`= name[`n']'")'"'
}
if `vlen' > `c(max_macrolen)' {
di as err "not all the variables are in r(varlist) since there are too many "
}
return local varlist "`varlist'"
return local sortlist "`sortedby'"
return scalar k = _N
return scalar N = `= nobs[1]'
drop nobs
}
end /* end of usesas */
program define usesas_sas, nclass
syntax [, QUotes engine(string) rver(string) dirsep(string) dir(string) tmpdir(string) filen(string) ///
shortfileext xpt(string) replace raw(string) FORmats sysjobid(string) CHeck ext(string) middle(string) ///
savastata(string) if(string) firstobs(string) obs(string) keep(string) sasprogram ///
char2lab char2fmt(string) float describe listnot ]
version 8
quietly {
file open sasfile using `"`raw'.sas"', replace text write
* DATA LIST
* ---------
file write sasfile `"* SAS program to read file and output Stata dataset *;"' ///
_n _n `"options nofmterr nocenter linesize=250;"' ///
_n _n `"%let badx =0; ** if proc cimport has trouble with xport file **; "' _n _n ///
_n _n `"%include "`savastata'"; "' _n _n
if "`char2lab'" != "" {
file write sasfile `"%include "`char2fmt'"; "' _n _n
}
if "`sasprogram'"!="" { /* user submitted a SAS program */
file write sasfile `"options mprint source2; "' _n _n ///
`" /*************** THE FOLLOWING IS YOUR PROGRAM ***************/ "' _n _n ///
`" %include"`xpt'"; "' _n _n ///
`" /*************** END OF YOUR PROGRAM ***************/ "' _n _n ///
`"options nomprint nosource2; "' _n _n
file write sasfile `" %let sortedby=; ** leave in for now **; "' _n _n ///
`"%macro makework; "' _n ///
`" %if &syserr.^=0 %then %goto nevrmind; "' _n ///
`" %if &syslast.=_NULL_ %then %goto nevrmind; "' _n ///
`" %let ldset=%length(&syslast.); "' _n ///
`" %let decpos=%index(&syslast.,.); "' _n ///
`" %let llib=%substr(&syslast.,1,&decpos.-1); "' _n ///
`" %let dset=%substr(&syslast.,&decpos.+1,&ldset.-&decpos.); "' _n ///
`" %let dset=%sysfunc(lowcase(%nrbquote(&dset.))); "' _n _n
file write sasfile `" data _null_; "' _n ///
`" dsid=open("&syslast.",'i');"' _n `" sortedby=attrc(dsid,'SORTEDBY'); "' _n ///
`" call symput('sortedby',trim(sortedby));"' _n `" rc=close(dsid);"' _n `"run;"' _n _n
file write sasfile `" %if %index(%upcase(&sortedby.),DESCENDING) %then %do; "' _n ///
`" %* this is how Stata treats descending sortedby *; "' _n ///
`" %let sortedby= %substr(&sortedby.,1,%index(%upcase(&sortedby.),DESCENDING)-1); %end;"' _n _n
file write sasfile `" ** if not in work make it be in work **; "' _n ///
`" %if %index(%upcase(&syslast.),WORK)^=1 %then %do; "' _n ///
`" data work.&dset.; "' _n ///
`" set &syslast.;"' _n `" run; "' _n ///
`" proc datasets library=&llib.;"' _n `" delete &dset.;"' _n `" run; quit;"' _n ///
`"%end; ** end of if syslast is not in WORK **; "' _n _n
if "`keep'"!="" | "`firstobs'"!="" | length(`"`if'"')>5 {
/** apply subsetting to work dataset **/
file write sasfile `" data work.&dset."' _n
if "`keep'"!="" {
file write sasfile `" (keep=`keep' &sortedby.) "'
}
file write sasfile `";;; "' _n ///
`" set &dset."'
if "`firstobs'"!="" {
file write sasfile `"(firstobs=`firstobs' obs=`obs')"' _n
}
file write sasfile `";;; "' _n
if length(`"`if'"')>5 /* b/c "where" has 5 letters */ {
file write sasfile `" `if'; "' _n
}
file write sasfile `"run; "' _n
}
file write sasfile `" %nevrmind: ; "' _n /*
*/ `"%mend; "' _n /*
*/ `"%makework; "'
}
else if "`sasprogram'"=="" { /* write SAS program to feed SAS data set into savastata */
if "`formats'"!="" {
if "`engine'"=="v6" {
file write sasfile `"libname library v6 "`dir'" ; "'_n _n
}
else {
file write sasfile `"libname library `engine' "`dir'" `shortfileext'; "'_n _n
}
}
if "`engine'"=="`rver'" | "`engine'"=="v6" {
file write sasfile `"libname ___in___ `engine' "`dir'" `shortfileext' ; "'_n _n
// preserve sort order
// Transport datasets cannot be opened and they do not save sort info anyway
file write sasfile `"%let sortedby=; "' _n _n `"data _null_;"' _n `" dsid=open('___in___.`filen'','i');"' _n ///
`" sortedby=attrc(dsid,'SORTEDBY'); "' _n `" call symput('sortedby',trim(sortedby));"' _n `" rc=close(dsid);"' _n `"run;"' _n _n ///
`" %macro __sort; %if %index(&sortedby.,DESCENDING) %then %sysfunc(tranwrd(&sortedby.,%str(DESCENDING ),-)); "' ///
`" %mend __sort; %__sort; "'
} // end of normal SAS dataset
else if "`engine'"=="xport" { // test xport file to see if created by cimport
// Transport datasets cannot be opened and they do not save sort info anyway
file write sasfile `"%let sortedby=; "'
file write sasfile `"filename ___in___ "`xpt'"; "' _n _n ///
`"%macro ___xt___ ;"' _n `" data _null_; "' _n `" infile ___in___ ; "' _n `" input xt $ 1-6; "' _n ///
`" call symput('header',xt); "' _n `" if _n_ = 1 then stop; "' _n `" run; "' _n
file write sasfile `" %if %index(&header.,HEAD) ^= 0 %then %do; "' _n _n ///
`" libname ___in___ xport "`xpt'"; "' _n _n
file write sasfile `" data _null_;"' _n `" set sashelp.vmember; "' _n ///
`" if upcase(libname)="___IN___" and upcase(memtype)="DATA" then call symput("filen",memname); "' _n
file write sasfile `" run;"' _n _n `" data `filen'; "' _n `" set ___in___.&filen.;"' _n ///
`" run; "' _n `" %end; "' _n
file write sasfile `" %else %do;"' _n `" proc cimport data=`filen' infile=___in___; "' _n ///
`" run; "' _n `" %if &syserr. ^=0 %then %do; "' _n ///
`" proc printto log="`tmpdir'_`sysjobid'_report.log"; options nonotes; "' _n ///
`" data _null_; "' _n ///
`" put "ERROR: SAS could not open `filen' because it was created in a newer version of SAS *"; "' _n ///
`" put " or there is not just a data set named `filen' in the file. *"; "' _n ///
`" run; proc printto; ** end printing to *_report.log "' _n ///
`" %let badx=1; %end; "' _n `"%end; "' _n `"%mend ___xt___;"' _n _n `"%___xt___; ** now run macro ___xt___ ***; "' ///
_n _n
}
if "`engine'"!="spss" {
if "`formats'"!="" {
/* look for datasetname.formatscatalog file */
local rc=1
if ("`engine'"=="`rver'" | "`engine'"=="xport") & "`shortfileext'"=="" {
capture confirm file `"`macval(dir)'`filen'.sas7bcat"'
if _rc ==0 {
local rc=0
}
}
else if ("`engine'"=="`rver'" | "`engine'"=="xport") & "`shortfileext'"!="" {
capture confirm file `"`macval(dir)'`filen'.sc7"'
if _rc ==0 {
local rc=0
}
}
else if "`engine'"=="v6" & "`c(os)'"=="Unix" {
capture confirm file `"`macval(dir)'`filen'.sct01"'
if _rc ==0 {
local rc=0
}
}
else if "`engine'"=="v6" & "`c(os)'"=="Windows" {
capture confirm file `"`macval(dir)'`filen'.sc2"'
if _rc ==0 {
local rc=0
}
}
if `rc'==0 {
file write sasfile _n `"%macro __fmt__;"' ///
`" %if %sysfunc(cexist(LIBRARY.`filen')) = 1 %then %do;"' _n ///
`" options fmtsearch=(library.`filen' library.formats); "' _n _n ///
`" proc datasets; "' _n ///
`" copy in=library out=work memtype=catalog; "' _n ///
`" select `filen'; "' _n ///
`" change `filen'=formats;"' _n ///
`" run; quit;"' _n ///
`" %end; "' _n ///
`" %else %do; "' _n ///
`" proc printto log="`tmpdir'_`sysjobid'_report.log"; options nonotes; "' _n ///
`" data _null_; "' _n ///
`" put "ERROR: File LIBRARY.`filen'.CATALOG was created for a different operating system. *" ; "' _n ///
`" put "ERROR: -usesas- did not create Stata value labels from SAS formats. *"; "' _n ///
`" run; proc printto; ** end printing to *_report.log "' _n ///
`" %end; "' _n ///
`"%mend __fmt__; "' _n ///
`"%__fmt__; "' _n
} /* if filen.catalog file exists */
} /* end of if "`formats'"!="" */
file write sasfile _n `"data `filen'"'
if "`keep'"!="" {
file write sasfile `" (keep=`keep' &sortedby.) "'
}
if "`engine'" == "xport" {
file write sasfile `";;;"' _n `" set work.`filen' "' // 08Apr2005 use `filen' in work lib
}
else {
file write sasfile `";;;"' _n `" set ___in___.`filen' "'
}
if "`firstobs'"!="" {
file write sasfile `"(firstobs=`firstobs' obs=`obs')"' _n
}
file write sasfile `";;; "' _n
if length(`"`if'"')>5 /* b/c "where" has 5 letters */ {
file write sasfile `" `if'; "' _n //
}
file write sasfile `"run; "' _n
}
else {
file write sasfile `"filename spss "`xpt'"; "' _n _n /*
*/ `"proc convert spss=spss out=`filen'; "' _n /*
*/ `"run; "'
file write sasfile _n `"data `filen'"'
if "`keep'"!="" {
file write sasfile `" (keep=`keep') "'
}
file write sasfile `";;;"' _n `" set `filen' "'
if "`firstobs'"!="" {
file write sasfile `"(firstobs=`firstobs' obs=`obs')"' _n
}
file write sasfile `";;; "' _n
if length(`"`if'"')>5 /* b/c "where" has 5 letters */ {
file write sasfile `" `if'; "' _n //
}
file write sasfile `"run; "' _n
}
} /* end of if no sas program submitted */
if `c(stata_version)' < 9 & "`char2lab'" != "" {
noisily {
di as error `"option char2lab is not allowed prior to Stata 9."'
di as error `"option will be ignored."'
local char2lab ""
}
}
file write sasfile _n _n ///
`"%macro runit;"' _n `" %if &badx.=0 %then %do; "'
if "`describe'" == "describe" {
file write sasfile _n ///
_n `" proc contents data=`filen' out=`filen'(keep=name varnum type label format "' ///
`" nobs length memlabel) noprint; run; "'
file write sasfile _n ///
_n `" data `filen'(drop=type rename=(stype=type)); "'
// truncate long string vars just to make life simple
if `c(stata_version)' < 9.2 & "$S_StataSE" == "" & "$S_StataMP" == "" {
file write sasfile _n `" length label memlabel $80; "'
}
else {
file write sasfile _n `" length label memlabel $244; "'
}
file write sasfile ///
_n `" set `filen'; "' ///
_n `" if type =1 then stype="numeric"; "' ///
_n `" if type =2 then stype="string "; "' ///
_n `" label stype = "Variable Type"; "' ///
_n `" run; "'
file write sasfile ///
_n `" data _null_; "' ///
_n `" file "`tmpdir'_`sysjobid'_usesas.txt"; "' ///
_n `" put "%trim(&sortedby.)"; "' ///
_n `" run;"'
file write sasfile _n ///
_n `" proc sort data=`filen'; by varnum; run; "'
file write sasfile _n ///
_n `" %let sortedby=varnum; "'
} // end of if describe
// need to put c(SE) and c(MP) in quotes since c(MP) doesn't exist in Stata 8
// need to pass a zero or a one to savastata for SE or MP
file write sasfile `" libname ___dir__ "`dir'" ; "' _n ///
`" %let _dir=%nrbquote(%sysfunc(pathname(___dir__))); "' _n ///
`" /* &sortedby. is global because of: call symput creates it */ "' _n ///
`" %savastata("`tmpdir'",`quotes' `char2lab' `check' messy `float', &sortedby., "' ///
`" `sysjobid',nosave,"&_dir.`dirsep'",`= ("`c(SE)'" == "1") + ("`c(MP)'" == "1")', "' ///
`" version=`c(stata_version)'); "' _n ///
`"%end; %* if &badx.=0 *; %mend runit;"' _n `" %runit;"' _n _n
file close sasfile
} /* end of quietly */
end
exit

View File

@ -0,0 +1,213 @@
{smcl}
{* version 2.1 26Feb2009}{...}
{* 24Aug2006}{...}
{* 04Aug2005}{...}
{* 05Nov2003}{...}
{hline}
help for {hi:usesas} {right:manual: {hi:[R] none}}
{right:dialog: {hi: none} }
{hline}
{title:Use a SAS dataset}
{p 8 17 2}{cmd:usesas}
{cmd:using} {it:filename}
[{cmd:,}
{cmdab:for:mats}
{cmd:char2lab}
{cmdab:ch:eck}
{cmd:clear}
{cmd:float}
{cmd:xport}
{cmdab:de:scribe}
{cmdab:ke:ep(}{it:variable names}{cmd:)}
{cmd:if(}{it:SAS if statement}{cmd:)}
{cmd:in(}{it:firstobs/lastobs}{cmd:)}
{cmdab:qu:otes}
{cmdab:me:ssy}
]{p_end}
{title:Description}
{p 4 8 2} {cmd:NOTE:} Before the first use of {cmd:usesas} your {cmd:sasexe.ado} file may need to be edited to set
the location of your SAS executable file (sas.exe) and your savastata SAS macro file (savastata.sas). It may be
that {cmd:usesas} will be able to run with the default settings in {cmd:sasexe.ado}.{p_end}
{p 4 4 2} {cmd:usesas} loads a SAS datafile into memory. This usually occurs by supplying {cmd:usesas} a SAS
dataset (*.sas7bdat, *.sd7, *.sd2, *.ssd01, *.xpt, *.cport) or an SPSS portable file (*.por),
but {cmd:usesas} can also load a SAS datafile into memory via a SAS program (*.sas) that creates a
SAS dataset. The last dataset created by the SAS program will be the SAS dataset processed by {cmd:usesas}.{p_end}
{p 4 4 2}{cmd:usesas} assumes the most common SAS datafile extension {cmd:.sas7bdat} if no file extension/suffix is
specified.{p_end}
{p 4 4 2}{cmd:usesas} uses the savastata SAS macro to create the Stata dataset from the SAS
dataset. {cmd:usesas} downloads the savastata SAS macro and stores it where user-written
Stata ado-files are stored that begin with the letter "s". This macro can be used in SAS.
Learn about savastata here:
{browse "http://faculty.fuqua.duke.edu/home/blanc004/data_programming/sas_to_stata/savastata.html": http://faculty.fuqua.duke.edu/home/blanc004/data_programming/sas_to_stata/savastata.html}{p_end}
{p 4 4 2}{cmd:usesas} figures out how much memory the SAS dataset will require to be loaded into Stata
and sets Stata's memory for you if your memory setting is less than is required.{p_end}
{p 4 4 2}{cmd:usesas} indicates that it has finished running by reporting to you how many observations
and variables are in your dataset now in memory. For example:{p_end}
{p 4 8}Stata reports that the dataset has 200 observations and 11 variables.{p_end}
{p 4 8 2}{cmd:NOTE: usesas} calls SAS to run a SAS program. This requires the ability to run SAS on your computer.{p_end}
{title:Options}
{p 4 8 2}{cmd:formats} specifies to create value labels from SAS user-defined formats that are stored
in a SAS formats catalog file that has the same name as the dataset and is in the same directory
as the SAS dataset. For example: MySasData.sas7bcat . If this file doesn't exist, {cmd:usesas} will
look for the file formats.sas7bcat in the same directory as the dataset.{p_end}
{p 4 8 2}{cmd:char2lab} specifies to encode long SAS character variables like the Stata
command {help encode :encode}. Character variables that are too long for a Stata string
variable are maintained in value labels. This is all done with the {cmd:char2fmt} SAS
macro.{p_end}
{p 4 8 2}{cmd:check} specifies to generate basic stats for both datasets for the user to compare the
newly created Stata dataset with the imported SAS dataset to make sure {cmd:usesas} created the files
correctly. This is a comparison that should be done after any datafile is converted to any other
type of datafile by any software. The SAS file is created in the same directory as the input SAS
datafile and is named starting with the name of the datafile followed by "_SAScheck.lst"
(SAS). e.g. "mySASdata_SAScheck.lst"{p_end}
{p 4 8 2}{cmd:clear} specifies to clear the data currently in memory before running {cmd:usesas}.{p_end}
{p 4 8 2}{cmd:float} specifies that numeric variables that would otherwise be stored as numeric type
double be stored with numeric type float. This option should only be used if you are certain you
have no integer variables that have more than 7 digits (like an ID variable).{p_end}
{p 4 8 2}{cmd:xport} specifies that the input dataset is a SAS Transport/Xport dataset. Since there
is no standard file extension for SAS Xport datasets, this option is required. Datasets created
by SAS's PROC CPORT procedure are allowed.{p_end}
{p 4 8 2}{cmd:describe} makes {cmd:usesas} act somewhat like the Stata command
{help describe :describe using}. It does not bring the full dataset into memory. Instead it specifies for
{cmd:usesas} only to load the descriptive information about the using dataset into Stata's memory as a
Stata dataset and print it. So, instead of loading the actual dataset into Stata, {cmd:usesas} loads
the descriptive information (variable names, what type of variables they are, the variable labels and
formats associated to the variables) into Stata as a dataset. You can {help clear :clear} the
descriptive data out of Stata's memory or use the descriptive data however you like to create variable
lists for your actual invocation of {cmd:usesas}. This may be helpful for situations where the SAS
dataset has more variables than your version of Stata can handle. You can create a variable list
from the variable called "name" to create another invocation of {cmd:usesas} to read in only the
variables you need.{p_end}
{p 8 8 2}If you do not want to have the {cmd:describe} option list the descriptive information of the
imported dataset, you can use the option {cmd:listnot} with {cmd:describe}. The descriptive information
will still be loaded into Stata as a Stata dataset.{p_end}
{p 8 8 2}The descriptive data are sorted in the variable order of the using dataset so a variable list
for {cmd:usesas} could be created like so:{p_end}
{p 8 8 2} {cmd:. display "`= trim(name[1])'--`= name[2047]'" }{p_end}
{p 8 8 2} {cmd:id--income88 }{p_end}
{p 8 8 2} which could then be used like so to keep the first 2,047 variables in the using dataset
(2,047 is the maximum number of variables that Stata Intercooled can handle):{p_end}
{p 8 8 2} {cmd:. usesas using "mySASdata.sas7bdat", clear keep(`= trim(name[1])'--`= name[2047]') }{p_end}
{p 8 8 2} SAS variable lists using two dashes "--" tells SAS to use the variables that exist
positionally between the first variable and the last variable in the using dataset inclusively.
Read more about this under the documentation of the {cmd:keep} option.{p_end}
{p 8 8 2}The {cmd:describe} option makes {cmd:usesas} return the following in {cmd:r()}:{p_end}
{synoptset 20 tabbed}{...}
{p2col 5 20 24 2: Scalars}{p_end}
{synopt:{cmd:r(N)}}number of observations in using dataset{p_end}
{synopt:{cmd:r(k)}}number of variables in using dataset{p_end}
{p2col 5 20 24 2: Macros}{p_end}
{synopt:{cmd:r(varlist)}}variables in using dataset {p_end}
{synopt:{cmd:r(sortlist)}}variables by which using data are sorted {p_end}
{p 8 8 2} The above scalars and macros contain information about the dataset that was described,
not information of the dataset of descriptive information that {cmd:usesas} loaded into Stata
with the {cmd:describe} option.{p_end}
{p 4 8 2}{cmd:keep} allows for a list of variables from the imported dataset to be read in. This list is
used in the SAS code portion of {cmd:usesas} so must be written in the SAS variable list style. SAS does
not allow for variable lists to contain stars (*) or question marks (?). For example:{p_end}
{p 4 8 2}{cmd: keep(var1-var20)} includes only vars that start with "var" and end in a number between 1 and 20.{p_end}
{p 4 8 2}{cmd: keep(var1--var20)} includes only vars in the dataset between var1 and var20. This is like Stata's
{help varlist:varlist} style {cmd: var1-var20}.{p_end}
{p 4 8 2}{cmd:if} allows for a SAS {cmd:if} statement to subset the data before it's read in. Any valid
SAS style {cmd:if} statement will work.{p_end}
{p 4 8 2}{cmd:in} allows for subsetting the data before it's read in. Use only {cmd:#/#} where both numbers are
positive, for example 1/30 for the first 30 observations.{p_end}
{p 4 8 2}{cmd:quotes} specifies that double quotes that exist in string variables are to be replaced
with single quotes. Since the data are written out to an ASCII file and then read into Stata,
there are rare instances when double quotes are not allowed inside string variables.{p_end}
{p 4 8 2}{cmd:messy} specifies that all the intermediary files created by {cmd:usesas} during its operation
are not to be deleted. The {cmd:messy} option prevents {cmd:usesas} from cleaning up after it has
finished. This option is mostly useful for debugging purposes in order to find out where something went
wrong. All intermediary files have a name starting with an underscore "_" followed by the process ID and
are located in Stata's temp directory.{p_end}
{title:Examples}
{p 4 8 2} {cmd:. usesas using "mySASdata.sas7bdat" }{p_end}
{p 4 8 2} {cmd:. usesas using "c:\data\mySASdata.ssd01", check }{p_end}
{p 4 8 2} {cmd:. usesas using "mySASdata.xpt", xport }{p_end}
{p 4 8 2} {cmd:. usesas using "mySASdata.sas7bdat", formats }{p_end}
{p 4 8 2} {cmd:. usesas using "mySASdata.sd2", quotes }{p_end}
{p 4 8 2} {cmd:. usesas using "mySASdata.sas7bdat", messy }{p_end}
{p 4 8 2} {cmd:. usesas using "mySASdata.sas7bdat", keep(id--qvm203a) if(1980<year<2000) in(1/500) }{p_end}
{p 4 8 2} {cmd:. usesas using "mySASdata.sas7bdat", describe }{p_end}
{p 4 8 2} {cmd:. usesas using "mySASdata.sas7bdat", describe nolist }{p_end}
{p 4 8 2} {cmd:// then submit the following actual invocation of usesas: }{p_end}
{p 4 8 2} {cmd:. usesas using "mySASdata.sas7bdat", clear keep(`r(sortlist)' `= trim(name[1])'--`= name[2047]') }{p_end}
{p 4 8 2} NOTE: If you are setting up this program on your computer for the first time, please edit
{cmd:sasexe.ado} to set the location of your SAS executable file (sas.exe). If you do not, {cmd:usesas}
will try to set it for you. The {cmd:sasexe.ado} file is an ASCII text file and should be saved as such
after editing. Stata's {cmd:do-file} editor will do the trick.{p_end}
{title:Setting up usesas}
{p 4 8 2}{stata quietly adoedit sasexe:edit sasexe.ado} (click, to edit the {cmd:sasexe.ado} file, remember to save when done.){p_end}
{title:Author}
{p 4 4 2}
Dan Blanchette {break}
Center for Entrepreneurship and Innovation {break}
Duke University's Fuqua School of Business {break}
Dan.Blanchette@Duke.edu{p_end}
{title:Also see}
{p 4 13 2}On-line: {help use}, {help fdause}, {help savasas} (if installed){p_end}

View File

@ -0,0 +1,25 @@
*! usesasdel Version 1.1 dan.blanchette@duke.edu 16Mar2009
*! Center of Entrepreneurship and Innovation Duke University's Fuqua School of Business
** usesasdel Version 1.1 dan_blanchette@unc.edu 01Feb2008
** - made the string comparison work for very long strings
** research computing, unc-ch
** usesasdel Version 1.0 dan_blanchette@unc.edu 09Nov2005
** the carolina population center, unc-ch
// can only delete files with no spaces in their names
// but can handle directory names with spaces in their names
program define usesasdel
version 8
args dir basefilename
local files : dir `"`dir'"' files `"`basefilename'*"' , nofail
foreach f in `files' {
local dirf `"`dir'/`f'"'
if `: list local(dir) == local(dirf)' == 0 {
erase `"`dir'/`f'"'
}
}
end
exit

456
Modules/ado/plus/u/uvis.ado Normal file
View File

@ -0,0 +1,456 @@
*! version 1.1.0 PR 30aug2005.
*
* Recent history of uvis
* 1.1.0 03aug2005 Replace -draw- option with -match-. Default becomes draw.
* With prediction matching, randomly sort observations with identical predictions.
* Order variables in chained equations in order of increasing missingness.
* 1.0.4 21jun2005 Add sort, stable to enable reproducibility imputations with given seed
*
program define uvis, rclass sortpreserve
version 8
gettoken cmd 0 : 0
if substr("`cmd'",1,3)=="reg" {
local cmd regress
}
local normal=("`cmd'"=="regress")|("`cmd'"=="rreg")
local binary=("`cmd'"=="logit")|("`cmd'"=="logistic")
local catcmd=("`cmd'"=="mlogit")|("`cmd'"=="ologit")
if !`normal' & !`binary' & !`catcmd' {
di in red "invalid or unrecognised command, `cmd'"
exit 198
}
syntax varlist(min=2 numeric) [if] [in] [aweight fweight pweight iweight] , Gen(string) /*
*/ [ noCONStant Delta(real 0) BOot MAtch REPLACE SEed(int 0) * ]
if "`replace'"=="" {
confirm new var `gen'
}
if "`match'"=="match" {
di as text "[imputing by prediction matching" _cont
}
else di as text "[imputing by drawing from conditional distribution" _cont
if "`boot'"=="" {
di as text " without bootstrap]"
}
else di as text " with bootstrap]"
if "`constant'"=="noconstant" {
local options "`options' nocons"
}
gettoken y xvars : varlist
tempvar touse
quietly {
marksample touse, novarlist
markout `touse' `xvars' /* note: does not include `y' */
if `seed'!=0 {
set seed `seed'
}
* Deal with weights
frac_wgt `"`exp'"' `touse' `"`weight'"'
local wgt `r(wgt)'
* Code types of missings: 1=non-missing y, 2=missing y, 3=other missing
tempvar obstype yimp
gen byte `obstype'=1*(`touse'==1 & !missing(`y')) /*
*/ +2*(`touse'==1 & missing(`y')) /*
*/ +3*(`touse'==0)
count if `obstype'==1
local nobs=r(N)
count if `obstype'==2
local nmis=r(N)
local type: type `y'
gen `type' `yimp'=.
* Fit imputation model
`cmd' `y' `xvars' `wgt', `options'
tempname b e V chol bstar
tempvar xb u
matrix `b'=e(b)
matrix `e'=e(b)
matrix `V'=e(V)
local colsofb=colsof(`b')
* Check for zeroes on the diagonal of V and replace them with 1.
* Otherwise this makes the matrix non-positive definite.
* Occurs when e.g. logit drops variables, giving zero variances.
* !! Is this safe to do?
if diag0cnt(`V')>0 {
forvalues j=1/`colsofb' {
if `V'[`j',`j']==0 {
matrix `V'[`j',`j']=1
}
}
}
matrix `chol'=cholesky(`V')
if `catcmd' {
tempname cat
local nclass=e(k_cat) /* number of classes in (ordered) categoric variable */
matrix `cat'=e(cat) /* row vector giving actual category values */
local cuts=`nclass'-1
}
* Draw beta, and if necessary rmse, for proper imputation
if `normal' {
* draw rmse
local rmse=e(rmse)
local df=e(df_r)
local chi2=2*invgammap(`df'/2,uniform())
local rmsestar=`rmse'*sqrt(`df'/`chi2')
matrix `chol'=`chol'*sqrt(`df'/`chi2')
}
* draw beta
forvalues i=1/`colsofb' {
matrix `e'[1,`i']=invnorm(uniform())
}
matrix `bstar'=`b'+`e'*`chol''
if "`boot'"=="" {
* Based on Ian White's code to implement van Buuren et al (1999).
* draw y
gen `u'=uniform()
if `normal' | `binary' {
* in normal or binary case, impute by sampling conditional distribution
* or by prediction matching
if "`match'"=="match" {
* prediction matching
tempvar etaobs etamis
matrix score `etaobs'=`b' if `obstype'==1
matrix score `etamis'=`bstar' if `obstype'==2
* Include non-response location shift, delta.
if `delta'!=0 {
replace `etamis'=`etamis'+`delta'
}
match_normal `obstype' `nobs' `nmis' `etaobs' `etamis' `yimp' `y'
}
else {
* sampling conditional distribution
matrix score `xb'=`bstar' if `touse'
if `normal' {
replace `yimp'=`xb'+`rmsestar'*invnorm(`u')
}
else replace `yimp'=`u'<1/(1+exp(-`xb')) if !missing(`xb')
}
}
else { /* catcmd */
if "`match'"=="match" { // prediction matching
* predict class-specific probabilities and convert to logits
if "`cmd'"=="ologit" {
* Predict index independent of cutpoints
* (note use of forcezero option to circumvent missing _cut* vars)
matrix score `xb'=`b' if `touse', forcezero
* predict cumulative probabilities for obs data and hence logits of class probs
forvalues k=1/`nclass' {
tempvar etaobs`k' etamis`k'
if `k'==`nclass' {
gen `etaobs`nclass''=log((1-`p`cuts'')/`p`cuts'') if `obstype'==1
}
else {
tempvar p`k'
local cutpt=`b'[1, `k'+`colsofb'-`cuts']
* 1/(1+exp(-... is probability of being in category 1 or 2 or ... k
gen `p`k''=1/(1+exp(-(`cutpt'-`xb')))
if `k'==1 {
gen `etaobs`k''=log(`p`k''/(1-`p`k'')) if `obstype'==1
}
else {
local k1=`k'-1
gen `etaobs`k''=log((`p`k''-`p`k1'')/(1-(`p`k''-`p`k1''))) /*
*/ if `obstype'==1
}
}
}
drop `xb'
matrix score `xb'=`bstar' if `touse', forcezero
* predict cumulative probabilities for missing data and hence logits of class probs
forvalues k=1/`nclass' {
if `k'==`nclass' {
gen `etamis`nclass''=log((1-`p`cuts'')/`p`cuts'') if `obstype'==2
}
else {
local cutpt=`bstar'[1, `k'+`colsofb'-`cuts']
replace `p`k''=1/(1+exp(-(`cutpt'-`xb')))
if `k'==1 {
gen `etamis`k''=log(`p`k''/(1-`p`k'')) if `obstype'==2
}
else {
local k1=`k'-1
gen `etamis`k''=log((`p`k''-`p`k1'')/(1-(`p`k''-`p`k1''))) /*
*/ if `obstype'==2
}
}
}
}
else { /* mlogit */
* predict cumulative probabilities for obs data and hence logits of class probs
* care needed dealing with different possible base categories
tempvar sumexp
local basecat=e(basecat) /* actual basecategory chosen by Stata */
gen `sumexp'=0 if `touse'
forvalues k=1/`nclass' {
tempvar etaobs`k' etamis`k' xb`k'
local thiscat=`cat'[1,`k']
if `thiscat'==`basecat' {
gen `xb`k''=0 if `touse'
}
else matrix score `xb`k''=`b' if `touse', equation(`thiscat')
replace `sumexp'=`sumexp' + exp(`xb`k'')
}
forvalues k=1/`nclass' {
* formula for logit of class prob derived from Pk in Stata mlogit entry
gen `etaobs`k''=`xb`k''-log(`sumexp'-exp(`xb`k'')) if `obstype'==1
}
* same for missing obs
replace `sumexp'=0
forvalues k=1/`nclass' {
cap drop `xb`k''
local thiscat=`cat'[1,`k']
if `thiscat'==`basecat' {
gen `xb`k''=0 if `touse'
}
else matrix score `xb`k''=`bstar' if `touse', equation(`thiscat')
replace `sumexp'=`sumexp' + exp(`xb`k'')
}
forvalues k=1/`nclass' {
* formula for logit of class prob derived from Pk in Stata mlogit entry
gen `etamis`k''=`xb`k''-log(`sumexp'-exp(`xb`k'')) if `obstype'==2
}
}
* match
sort `obstype', stable
tempvar order distance
gen `distance'=.
gen long `order'=_n
* For each missing obs j, find index of obs whose etaobs is closest to prediction [j].
forvalues i=1/`nmis' {
local j=`i'+`nobs'
* calc summed absolute distances between etamis* and etaobs*
replace `distance'=0 in 1/`nobs'
forvalues k=1/`nclass' {
replace `distance'=`distance'+abs(`etamis`k''[`j']-`etaobs`k'') in 1/`nobs'
}
* Find index of smallest distance between etamis* and etaobs*
sort `distance'
local index=`order'[1]
* restore correct order
sort `order'
replace `yimp'=`y'[`index'] in `j'
}
}
else { // draw
* sampling conditional distribution
replace `yimp'=`cat'[1,1]
if "`cmd'"=="ologit" {
* Predict index independent of cutpoints
* (note use of forcezero option to circumvent missing _cut* vars)
matrix score `xb'=`bstar' if `touse', forcezero
forvalues k=1/`cuts' {
* 1/(1+exp(-... is probability of being in category 1 or 2 or ... k
local cutpt=`bstar'[1, `k'+`colsofb'-`cuts']
replace `yimp'=`cat'[1,`k'+1] if `u'>1/(1+exp(-(`cutpt'-`xb')))
}
}
else { /* mlogit */
* care needed dealing with different possible base categories
tempvar cusump sumexp
local basecat=e(basecat) /* actual basecategory chosen by Stata */
gen `sumexp'=0 if `touse'
forvalues i=1/`nclass' {
tempvar xb`i'
local thiscat=`cat'[1,`i']
if `thiscat'==`basecat' {
gen `xb`i''=0 if `touse'
}
else matrix score `xb`i''=`bstar' if `touse', equation(`thiscat')
replace `sumexp'=`sumexp' + exp(`xb`i'')
}
gen `cusump'=exp(`xb1')/`sumexp'
forvalues i=2/`nclass' {
replace `yimp'=`cat'[1,`i'] if `u'>`cusump'
replace `cusump'=`cusump'+exp(`xb`i'')/`sumexp'
replace `yimp'=. if missing(`xb`i'')
}
}
}
}
}
else {
* Bootstrap method
if "`match'"=="match" { /* match */
if `catcmd' {
* predict class-specific probabilities and convert to logits
forvalues k=1/`nclass' {
local outk=`cat'[1,`k']
tempvar etaobs`k' etamis`k'
predict `etaobs`k'' if `obstype'==1, outcome(`outk') /* probability */
replace `etaobs`k''=log(`etaobs`k''/(1-`etaobs`k'')) /* logit */
}
}
else { /* normal and binary cases */
tempvar etaobs etamis
predict `etaobs' if `obstype'==1, xb
}
}
* Bootstrap observed data
tempvar wt
gen double `wt'=.
bsample if `obstype'==1, weight(`wt')
if "`wgt'"!="" {
replace `wt' `exp'*`wt'
local w [`weight'=`wt']
}
else local w [fweight=`wt']
`cmd' `y' `xvars' `w', `options'
if `catcmd' {
if e(k_cat)<`nclass' {
di as error "cannot predict outcome for all classes in bootstrap sample;"
di as error "probably one or more classes has a low frequency in the original data:"
di as error "try amalgamating small classes of `y' and rerunning"
exit 303
}
}
if "`match'"=="match" {
if `catcmd' {
* predict class-specific probabilities and convert to logits
forvalues k=1/`nclass' {
local outk=`cat'[1,`k']
predict `etamis`k'' if `obstype'==2, outcome(`outk') /* probability */
replace `etamis`k''=log(`etamis`k''/(1-`etamis`k'')) /* logit */
}
* match
sort `obstype', stable
tempvar order distance
gen `distance'=.
gen long `order'=_n
* For each missing obs j, find index of obs whose etaobs is closest to prediction [j].
forvalues i=1/`nmis' {
local j=`i'+`nobs'
* calc summed absolute distances between etamis* and etaobs*
replace `distance'=0 in 1/`nobs'
forvalues k=1/`nclass' {
replace `distance'=`distance'+abs(`etamis`k''[`j']-`etaobs`k'') in 1/`nobs'
}
* Find index of smallest distance between etamis* and etaobs*
sort `distance'
local index=`order'[1]
* restore correct order
sort `order'
replace `yimp'=`y'[`index'] in `j'
}
}
else { /* normal and binary */
predict `etamis' if `obstype'==2, xb
* Include non-response location shift, delta.
if `delta'!=0 {
replace `etamis'=`etamis'+`delta'
}
match_normal `obstype' `nobs' `nmis' `etaobs' `etamis' `yimp' `y'
}
}
else { // draw
matrix `bstar'=e(b)
gen `u'=uniform()
if `normal' | `binary' {
matrix score `xb'=`bstar' if `touse'
if `normal' {
replace `yimp'=`xb'+e(rmse)*invnorm(`u')
}
else replace `yimp'=`u'<1/(1+exp(-`xb')) if !missing(`xb')
}
else { /* catcmd */
replace `yimp'=`cat'[1,1]
if "`cmd'"=="ologit" {
matrix score `xb'=`bstar' if `touse', forcezero
forvalues k=1/`cuts' {
* 1/(1+exp(-... is probability of being in category 1 or 2 or ... k
local cutpt=`bstar'[1, `k'+`colsofb'-`cuts']
replace `yimp'=`cat'[1,`k'+1] if `u'>1/(1+exp(-(`cutpt'-`xb')))
}
}
else { /* mlogit */
* care needed dealing with different possible base categories
tempvar cusump sumexp
local basecat=e(basecat) /* actual basecategory chosen by Stata */
gen `sumexp'=0 if `touse'
forvalues i=1/`nclass' {
tempvar xb`i'
local thiscat=`cat'[1,`i']
if `thiscat'==`basecat' {
gen `xb`i''=0 if `touse'
}
else matrix score `xb`i''=`bstar' if `touse', equation(`thiscat')
replace `sumexp'=`sumexp' + exp(`xb`i'')
}
gen `cusump'=exp(`xb1')/`sumexp'
forvalues i=2/`nclass' {
replace `yimp'=`cat'[1,`i'] if `u'>`cusump'
replace `cusump'=`cusump'+exp(`xb`i'')/`sumexp'
replace `yimp'=. if missing(`xb`i'')
}
}
}
}
}
cap drop `gen'
rename `yimp' `gen'
*replace `gen'=`y' if `obstype'==1
replace `gen'=`y' if !missing(`y')
lab var `gen' "imputed from `y'"
}
di _n in ye `nmis' in gr " missing observations on `y' imputed from " /*
*/ in ye `nobs' in gr " complete observations."
end
program define match_normal
* Prediction matching, normal or binary case.
args obstype nobs nmis etaobs etamis yimp y
quietly {
* For each missing obs j, find index of observation
* whose etaobs is closest to etamis[j].
tempvar sumgt
tempname etamisi
gen long `sumgt'=.
* Sort etaobs within obstype
sort `obstype' `etaobs', stable
forvalues i=1/`nmis' {
local j=`i'+`nobs'
scalar `etamisi'=`etamis'[`j']
replace `sumgt'=sum((`etamisi'>`etaobs')) in 1/`nobs'
sum `sumgt', meanonly
local j1=r(max)
if `j1'==0 {
local index 1
local direction 1
}
else if `j1'==`nobs' {
local index `nobs'
local direction -1
}
else {
local j2=`j1'+1
if (`etamisi'-`etaobs'[`j1'])<(`etaobs'[`j2']-`etamisi') {
local index `j1'
local direction -1
}
else {
local index `j2'
local direction 1
}
}
* In case of tied etaobs values, add random offset to index in the appropriate direction
count if `obstype'==1 & reldif(`etaobs', `etaobs'[`index'])<1e-7 // counts as equality
scalar count`i'=r(N)
if r(N)>1 {
local index=`index'+`direction'*int(uniform()*r(N))
}
replace `yimp'=`y'[`index'] in `j'
}
}
end

View File

@ -0,0 +1 @@
.h ice