Computed theoretical power for N=100 and N=200 scenarios
This commit is contained in:
32
Modules/ado/plus/u/uclist.ado
Normal file
32
Modules/ado/plus/u/uclist.ado
Normal file
@ -0,0 +1,32 @@
|
||||
program def uclist, rclass
|
||||
*! NJC 1.0.0 29 June 2000
|
||||
version 6.0
|
||||
gettoken list 0 : 0, parse(",")
|
||||
if "`list'" == "" | "`list'" == "," {
|
||||
di in r "nothing in list"
|
||||
exit 198
|
||||
}
|
||||
syntax , [ Global(str) Noisily ]
|
||||
|
||||
if length("`global'") > 8 {
|
||||
di in r "global name must be <=8 characters"
|
||||
exit 198
|
||||
}
|
||||
|
||||
tokenize `list'
|
||||
|
||||
while "`1'" != "" {
|
||||
if length("`1'") > 80 {
|
||||
di in r "cannot handle word length > 80"
|
||||
exit 498
|
||||
}
|
||||
local 1 = upper("`1'")
|
||||
local newlist "`newlist'`1' "
|
||||
mac shift
|
||||
}
|
||||
|
||||
if "`noisily'" != "" { di "`newlist'" }
|
||||
if "`global'" != "" { global `global' "`newlist'" }
|
||||
return local list `newlist'
|
||||
end
|
||||
|
2
Modules/ado/plus/u/uclist.hlp
Normal file
2
Modules/ado/plus/u/uclist.hlp
Normal file
@ -0,0 +1,2 @@
|
||||
.h listutil
|
||||
|
43
Modules/ado/plus/u/uniqlist.ado
Normal file
43
Modules/ado/plus/u/uniqlist.ado
Normal file
@ -0,0 +1,43 @@
|
||||
program def uniqlist, rclass
|
||||
*! NJC 1.3.0 7 June 2000
|
||||
* NJC 1.2.0 31 Jan 2000
|
||||
* NJC 1.1.0 22 Dec 1999
|
||||
* NJC 1.0.0 22 Sept 1999
|
||||
version 6.0
|
||||
gettoken list 0 : 0, parse(",")
|
||||
if "`list'" == "" | "`list'" == "," {
|
||||
di in r "nothing ln list"
|
||||
exit 198
|
||||
}
|
||||
syntax [, Noisily Global(str) ]
|
||||
|
||||
if length("`global'") > 8 {
|
||||
di in r "global name must be <=8 characters"
|
||||
exit 198
|
||||
}
|
||||
|
||||
tokenize `list'
|
||||
local newlist "`1'"
|
||||
mac shift
|
||||
|
||||
while "`1'" != "" {
|
||||
local nnew : word count `newlist'
|
||||
local i = 1
|
||||
local putin = 1
|
||||
while `i' <= `nnew' {
|
||||
local word : word `i' of `newlist'
|
||||
if "`word'" == "`1'" {
|
||||
local putin = 0
|
||||
local i = `nnew'
|
||||
}
|
||||
local i = `i' + 1
|
||||
}
|
||||
if `putin' { local newlist "`newlist' `1'" }
|
||||
mac shift
|
||||
}
|
||||
|
||||
if "`noisily'" != "" { di "`newlist'" }
|
||||
if "`global'" != "" { global `global' "`newlist'" }
|
||||
return local list `newlist'
|
||||
end
|
||||
|
2
Modules/ado/plus/u/uniqlist.hlp
Normal file
2
Modules/ado/plus/u/uniqlist.hlp
Normal file
@ -0,0 +1,2 @@
|
||||
.h listutil
|
||||
|
843
Modules/ado/plus/u/usesas.ado
Normal file
843
Modules/ado/plus/u/usesas.ado
Normal file
@ -0,0 +1,843 @@
|
||||
*! usesas Version 2.1 dan.blanchette@duke.edu 15Apr2009
|
||||
*! Center of Entrepreneurship and Innovation Duke University's Fuqua School of Business
|
||||
* - made it so that the describe option returns scalars as promised in the help file.
|
||||
* usesas Version 2.1 dan.blanchette@duke.edu 26Feb2009
|
||||
* - made usesas fail gracefully when there was no SAS dataset in &SYSLAST. when using a SAS program
|
||||
* instead of a dataset
|
||||
* usesas Version 2.0 dan.blanchette@duke.edu 24Nov2008
|
||||
* - added ".tpt" as a known file extension for SAS transport file since NEBER uses that file extension
|
||||
* usesas Version 2.0 dan_blanchette@unc.edu 25Mar2008
|
||||
* research computing, unch-ch
|
||||
* - added describe option that loads the metadata of the using dataset into memory
|
||||
* and displays a Stata-like -describe- description of the using data.
|
||||
* - made it so that when used with Stata MP savastata treats data like Stata SE
|
||||
* - fixed it when usesas uses sas programs and the SAS program's last SAS dataset
|
||||
* is a permanent one then it deletes that file and doesn't fail with odd errors.
|
||||
* - made it so that when savastata fails by a known error usesas deletes whatever
|
||||
* intermediary files were created.
|
||||
* - added error message that -usesas- cannot be run in Stata batch in Windows
|
||||
* - removed efforts to keep sortedby vars since descending sorts in SAS mess up Stata
|
||||
* as well as missing values mess up sort order...left it in for --usesas describe--
|
||||
* usesas Version 1.4 dan_blanchette@unc.edu 17Apr2007
|
||||
* - made it so that if a format catalog file was created for a different OS
|
||||
* would provide a message to user that that was the case and that the
|
||||
* SAS formats would not be used to create Stata value labels
|
||||
* - fixed it so that if in console mode you are not suggested to click something.
|
||||
* usesas Version 1.4 dan_blanchette@unc.edu 24Aug2006
|
||||
* - corrected how the SAS check name was displayed.
|
||||
* usesas Version 1.4 dan_blanchette@unc.edu 09Nov2005
|
||||
* - made usesasdel it's own ado-file
|
||||
* usesas Version 1.4 dan_blanchette@unc.edu 28Sep2005
|
||||
* - stopped savastata from closing user's log if one was open
|
||||
* usesas Version 1.3 dan_blanchette@unc.edu 04Aug2005
|
||||
* - in non-console mode when messy option used, you can now delete all files
|
||||
* - for Stata 9 new option char2lab that uses my SAS macro char2fmt that converts
|
||||
* long SAS character variables to numeric with value labels like -encode- does.
|
||||
* - allow datasets created by proc cport
|
||||
* NOTE: SAS's CIMPORT procedure will not open a datafile created in later version of SAS
|
||||
* - use rver() to control sas engine type
|
||||
* - now passing Stata version to savastata
|
||||
* - sort order preserved, though data never lost sort order, Stata needs to sort data to know its sort order
|
||||
* - added float option to allow user to save space with numeric vars that would otherwise
|
||||
* require being stored in 8-byte double.
|
||||
* usesas Version 1.2 dan_blanchette@unc.edu 06Jan2005
|
||||
* -now accepts an xport file that has a different internal dataset name
|
||||
** usesas Version 1.1 dan_blanchette@unc.edu 11Nov2004
|
||||
* -fixed it so that if() can contain code with double quotes
|
||||
* e.g: if(`" var="A" "')
|
||||
* -fixed it so that when a user submitts a SAS program
|
||||
* only their program is printed in the *_usesas.log file
|
||||
* and not the whole sasvastata macro code as before
|
||||
* -deletes non-work SAS dataset created by user's SAS program
|
||||
* -fix code related to "if" option
|
||||
* -reduced usage of subinstr to help allow for directory paths and if conditions
|
||||
* to be longer than 80 characters
|
||||
** usesas Version 1.0 dan_blanchette@unc.edu 13Jul2004
|
||||
* -added mprint and source2 so that user submitted programs
|
||||
* would appear in the *_usesas.log file
|
||||
** usesas Version 1.0 dan_blanchette@unc.edu 17Nov2003
|
||||
** usesas Version 1.0 dan_blanchette@unc.edu 27Oct2003
|
||||
** the carolina population center, unc-ch
|
||||
|
||||
|
||||
program define usesas, rclass
|
||||
version 8
|
||||
syntax using/ [, MEssy FORmats xport clear QUotes char2lab CHeck float ///
|
||||
KEep(string) DEscribe listnot if(string) in(string) ]
|
||||
|
||||
/* log usage of usesas */
|
||||
capture which usagelog
|
||||
if _rc==0 {
|
||||
usagelog , start type(savas) message(`"usesas using `using', `messy' `formats' `xport' `clear' `quotes' `char2lab' `check' keep(`keep') `describe' `listnot' if(`if') in(`in') "')
|
||||
}
|
||||
|
||||
if "`c(os)'"=="Windows" & "`c(mode)'" == "batch" {
|
||||
di as err "{help usesas:usesas} cannot be run in batch mode on Windows"
|
||||
/* log usage of usesas */
|
||||
capture which usagelog
|
||||
if _rc==0 {
|
||||
usagelog , type(savas) uerror(8) etime
|
||||
}
|
||||
exit 499
|
||||
}
|
||||
|
||||
if "`listnot'" != "" & "`describe'" == "" {
|
||||
di as err "listnot option only allowed when using the descibe option"
|
||||
/* log usage of usesas */
|
||||
capture which usagelog
|
||||
if _rc==0 {
|
||||
usagelog , type(savas) uerror(8) etime
|
||||
}
|
||||
exit 499
|
||||
}
|
||||
|
||||
di `"{txt}The {help usesas:usesas} {txt}command uses the {browse "http://faculty.fuqua.duke.edu/sas_to_stata/savastata.html":savastata} {txt}SAS macro to load the SAS dataset into memory."'
|
||||
di "{txt}Large datasets may take a few minutes."
|
||||
|
||||
if `c(N)'!=0 & "`clear'"=="" {
|
||||
di "{error} no, data in memory would be lost"
|
||||
di "{error} use the {res}clear {error}option"
|
||||
/* log usage of usesas */
|
||||
capture which usagelog
|
||||
if _rc==0 {
|
||||
usagelog , type(savas) uerror(1) etime
|
||||
}
|
||||
exit 4
|
||||
}
|
||||
|
||||
* CAPTURE USER'S LOG
|
||||
* ------------------
|
||||
quietly log query
|
||||
local usrlog `r(filename)'
|
||||
|
||||
* FIGURE OUT WHERE SAS EXECUTABLE IS
|
||||
* ----------------------------------
|
||||
sasexe usesas
|
||||
|
||||
local wsas `r(wsas)'
|
||||
local usas `r(usas)'
|
||||
local savastata `r(savastata)'
|
||||
local char2fmt `r(char2fmt)'
|
||||
local rver `r(rver)' // version of sas that's being run i.e. "v8", "v9" etc
|
||||
|
||||
|
||||
if index("`using'","'") | index(`"`using'"',`"""') {
|
||||
di `"{help usesas} {error}cannot handle directory or file names that contain single or double quotes. "'
|
||||
capture which usagelog
|
||||
if _rc==0 {
|
||||
usagelog , type(savas) uerror(2) etime
|
||||
}
|
||||
exit 499
|
||||
}
|
||||
/* if filename is given with directory info too,
|
||||
strip to just file name and to dir location */
|
||||
if "`c(os)'"=="Windows" {
|
||||
local dirsep="\"
|
||||
if index("`using'","/") {
|
||||
local using : subinstr local using "/" "\" , all
|
||||
}
|
||||
}
|
||||
else {
|
||||
local dirsep="`c(dirsep)'"
|
||||
}
|
||||
if index("`using'","`dirsep'") {
|
||||
local filen=substr("`using'",index("`using'","`dirsep'")+1,length("`using'"))
|
||||
while index("`filen'","`dirsep'") !=0 {
|
||||
local filen=substr("`filen'",index("`filen'","`dirsep'")+1,length("`filen'"))
|
||||
}
|
||||
local dir=substr("`using'",1,index("`using'","`filen'")-1)
|
||||
}
|
||||
else if index("`using'","\\\")==1 { /* Universal naming convention */
|
||||
local filen=substr("`using'",index("`using'","\\\")+2,length("`using'"))
|
||||
while index("`filen'","\") !=0 {
|
||||
local filen=substr("`filen'",index("`filen'","\")+1,length("`filen'"))
|
||||
}
|
||||
local dir=substr("`using'",1,index("`using'","`filen'")-1)
|
||||
}
|
||||
else { /* no directory given */
|
||||
local filen="`using'"
|
||||
local dir ="`c(pwd)'`dirsep'"
|
||||
}
|
||||
|
||||
|
||||
/** extract file extension if there is one **/
|
||||
if index("`filen'",".") {
|
||||
local ext=substr("`filen'",index("`filen'","."),length("`filen'"))
|
||||
while index("`ext'",".") > 0 {
|
||||
local ext=substr("`ext'",index("`ext'",".")+1,length("`ext'"))
|
||||
}
|
||||
local ext=".`ext'"
|
||||
local middle=substr("`filen'",1,index("`filen'","`ext'")-1) /* middle will not end in a period */
|
||||
local filen=substr("`filen'",1,index("`filen'",".")-1)
|
||||
local middle=substr("`middle'",length("`filen'")+1,length("`middle'"))
|
||||
}
|
||||
|
||||
if lower("`ext'")==".sas7bdat" {
|
||||
local type="sas"
|
||||
}
|
||||
else if lower("`ext'")==".sd7" {
|
||||
local type="sas"
|
||||
local shortfileext="shortfileext"
|
||||
}
|
||||
else if lower("`ext'")==".ssd01" {
|
||||
local type="sas6"
|
||||
}
|
||||
else if lower("`ext'")==".ssd02" {
|
||||
local type="sas6"
|
||||
}
|
||||
else if lower("`ext'")==".sd2" {
|
||||
local type="sas6"
|
||||
}
|
||||
else if lower("`ext'")==".sas" {
|
||||
local type="sasprogram"
|
||||
}
|
||||
else if lower("`ext'")==".por" {
|
||||
local type="spss"
|
||||
}
|
||||
else if lower("`ext'")==".xpt" | ///
|
||||
lower("`ext'")==".xport" | ///
|
||||
lower("`ext'")==".export" | ///
|
||||
lower("`ext'")==".expt" | ///
|
||||
lower("`ext'")==".exp" | ///
|
||||
lower("`ext'")==".trans" | ///
|
||||
lower("`ext'")==".tpt" | ///
|
||||
lower("`ext'")==".cport" | ///
|
||||
lower("`ext'")==".ssp" | ///
|
||||
lower("`ext'")==".stx" | ///
|
||||
lower("`ext'")==".sasx" | ///
|
||||
lower("`ext'")==".v5x" | ///
|
||||
lower("`ext'")==".v6x" {
|
||||
local type="sasx"
|
||||
}
|
||||
else if "`xport'"=="xport" { // else no file extension
|
||||
local type="sasx"
|
||||
}
|
||||
else { // guess that the user is wanting to use a .sas7bdat file
|
||||
local using1 `"`using'.sas7bdat"'
|
||||
local ext ".sas7bdat"
|
||||
local type="sas"
|
||||
capture confirm file `"`using1'"'
|
||||
if _rc != 0 {
|
||||
di `"{error}The SAS file: `using1' does not exist."'
|
||||
// check that user is not expecting file extention but forgot to use xport option
|
||||
capture confirm file `"`using'"'
|
||||
if _rc == 0 {
|
||||
di `"{error}But the SAS file: `using' does exist."'
|
||||
di as text `"Use the xport option as it is likely this file is a transport/xport file."'
|
||||
}
|
||||
/* log usage of usesas */
|
||||
capture which usagelog
|
||||
if _rc==0 {
|
||||
usagelog , type(savas) uerror(3) etime
|
||||
}
|
||||
exit 601
|
||||
}
|
||||
// only here if this file does exist
|
||||
local using `"`using'.sas7bdat"'
|
||||
}
|
||||
|
||||
capture confirm file `"`using'"'
|
||||
if _rc != 0 {
|
||||
di `"{error}The SAS file: `using' does not exist."'
|
||||
/* log usage of usesas */
|
||||
capture which usagelog
|
||||
if _rc==0 {
|
||||
usagelog , type(savas) uerror(3) etime
|
||||
}
|
||||
exit 601
|
||||
}
|
||||
|
||||
if "`type'"=="" {
|
||||
di "{error}Is `using' a SAS transport/xport data file?"
|
||||
di "{error}If so then use the {res}xport {error}option."
|
||||
/* log usage of usesas */
|
||||
capture which usagelog
|
||||
if _rc==0 {
|
||||
usagelog , type(savas) uerror(4) etime
|
||||
}
|
||||
|
||||
exit 499
|
||||
}
|
||||
|
||||
|
||||
if "`type'"=="sas" {
|
||||
local engine="`rver'" // whatever version of SAS that's being used
|
||||
}
|
||||
else if "`type'"=="sas6" {
|
||||
local engine="v6"
|
||||
}
|
||||
else if "`type'"=="sasprogram" {
|
||||
local sasprogram="sasprogram"
|
||||
}
|
||||
else if "`type'"=="sasx" {
|
||||
local engine="xport"
|
||||
}
|
||||
else if "`type'"=="spss" {
|
||||
local engine="spss"
|
||||
}
|
||||
|
||||
/* set where temp directory is */
|
||||
tmpdir
|
||||
local tmpdir="`r(tmpdir)'"
|
||||
|
||||
|
||||
local tfn=subinstr("`c(current_time)'",":","",.)
|
||||
local sysjobid=substr("`tfn'",length("`tfn'")-5,length("`tfn'"))
|
||||
local temp `"`macval(tmpdir)'_`sysjobid'"'
|
||||
local raw `"`macval(tmpdir)'_`sysjobid'_usesas"'
|
||||
local xpt "`macval(dir)'`filen'`middle'`ext'"
|
||||
|
||||
|
||||
* MAKE "IF" AND "IN" INTO SAS CODE
|
||||
* --------------------------------
|
||||
local firstobs = upper(substr("`in'",1,index("`in'","/")-1))
|
||||
if "`firstobs'" == "F" | index("`firstobs'","-") {
|
||||
di `"{error}Your 'in()' option cannot use f/F or negative values. "'
|
||||
exit 100
|
||||
}
|
||||
local obs = upper(substr("`in'",index("`in'","/")+1,length("`in'")))
|
||||
if "`obs'" == "L" {
|
||||
di `"{error}Your 'in()' option cannot use l/L. "'
|
||||
exit 100
|
||||
}
|
||||
|
||||
if `"`if'"'!=`""' {
|
||||
local iflen : length local if
|
||||
if `iflen'>247 { // 255-6-wiggle room = 247
|
||||
// 'if()' option needs to be less than 255 characters for SAS to process, it is limited to max length of string
|
||||
di `"{error}Your 'if()' option is longer than max length of 247. "'
|
||||
exit 100
|
||||
}
|
||||
else { // okay to process
|
||||
if index(`"`if'"',"==") {
|
||||
local if : subinstr local if "==" "=" , all
|
||||
}
|
||||
if lower(substr(`"`if'"',1,3)) != `"if "' & lower(substr(`"`if'"',1,6)) != `"where "' {
|
||||
local if `"where `if'"'
|
||||
}
|
||||
else if lower(substr(`"`if'"',1,3)) == `"if "' {
|
||||
di `"{error}Your 'if()' option starts with "if". The "if" is assumed, do not type it."'
|
||||
exit 100
|
||||
}
|
||||
/* now make sure if has only one 'if' or 'where' in it */
|
||||
if index(lower(`"`if'"')," if ") | index(lower(`"`if'"')," inrange(") | /*
|
||||
*/ index(lower(`"`if'"')," inlist(") | index(lower(`"`if'"')," where ")>1 {
|
||||
di `"{error}Invalid SAS 'if' condition."'
|
||||
capture which usagelog
|
||||
if _rc==0 {
|
||||
usagelog , type(savas) uerror(5) etime
|
||||
}
|
||||
exit 499
|
||||
}
|
||||
} // 'if()' is less than 247 chars
|
||||
}
|
||||
|
||||
|
||||
|
||||
* WRITE SAS PROGRAM TO READ IN DATA
|
||||
* ---------------------------------
|
||||
usesas_sas , rver(`rver') dirsep("`dirsep'") dir("`dir'") tmpdir("`tmpdir'") filen(`filen') raw("`raw'") engine(`engine') ///
|
||||
`shortfileext' `quotes' `check' `formats' sysjobid(`sysjobid') ext(`ext') middle(`middle') xpt("`xpt'") ///
|
||||
savastata("`savastata'") if(`"`if'"') firstobs(`firstobs') obs(`obs') keep(`"`keep'"') ///
|
||||
`char2lab' char2fmt("`char2fmt'") `sasprogram' `describe' `listnot'
|
||||
|
||||
|
||||
* RUN SAS
|
||||
* -------
|
||||
if "`c(os)'"=="Unix" /* or Linux */ {
|
||||
shell "`usas'" "`temp'_usesas.sas" -log "`temp'_usesas.log" -print "`temp'_usesas.lst"
|
||||
} /* end of if Unix */
|
||||
else if "`c(os)'"=="Windows" /* Windows */ {
|
||||
** do not add -icon option since that pop-up window is not a big deal and could tell user important info **
|
||||
shell `wsas' "`temp'_usesas.sas" -nologo -log "`temp'_usesas.log" -print "`temp'_usesas.lst"
|
||||
} /* end of if Windows */
|
||||
|
||||
* LOOK AT ANY REPORT FROM SAS
|
||||
* ---------------------------
|
||||
capture confirm file `"`temp'_report.log"'
|
||||
if _rc==0 {
|
||||
type `"`temp'_report.log"'
|
||||
if "`messy'"=="" {
|
||||
erase `"`temp'_report.log"'
|
||||
}
|
||||
}
|
||||
|
||||
* CLEAR DATA OUT OF MEMORY
|
||||
* ------------------------
|
||||
if "`clear'"!="" {
|
||||
drop _all
|
||||
label drop _all
|
||||
}
|
||||
|
||||
|
||||
* LOAD STATA DATASET INTO MEMORY
|
||||
* ------------------------------
|
||||
capture confirm file `"`tmpdir'_`sysjobid'_infile.do"'
|
||||
if _rc == 0 {
|
||||
if `"`usrlog'"' != "" {
|
||||
quietly log close
|
||||
}
|
||||
local cwd "`c(pwd)'"
|
||||
** cd to where infile.do is **
|
||||
quietly cd "`tmpdir'"
|
||||
run `"_`sysjobid'_infile.do"'
|
||||
if `"`usrlog'"' != "" {
|
||||
quietly log using `"`usrlog'"' , append
|
||||
}
|
||||
|
||||
* SET DATASET NAME
|
||||
* ----------------
|
||||
if index("$S_FN","`dirsep'") == 1 {
|
||||
global S_FN : subinstr global S_FN "`dirsep'" ""
|
||||
}
|
||||
global S_FN `"`macval(dir)'$S_FN"'
|
||||
|
||||
// run savastata_report to see if SAS and Stata agree how many obs and vars there are
|
||||
savastata_report
|
||||
|
||||
if "`check'" != "" {
|
||||
local gsfn : subinstr global S_FN ".dta" ""
|
||||
display as res _n " Compare these results with the results provided by SAS "
|
||||
display as res " in the file `gsfn'_SAScheck.lst. " _n
|
||||
summarize
|
||||
describe
|
||||
list in 1/5
|
||||
|
||||
di _n "You have requested to have savastata provide a check file:"
|
||||
di `""`gsfn'_SAScheck.lst" "'
|
||||
}
|
||||
|
||||
|
||||
** cd back to where you were **
|
||||
quietly cd "`cwd'"
|
||||
} /* if infile.do file exists */
|
||||
else {
|
||||
di `"{error}{help usesas:usesas} failed."'
|
||||
capture confirm file `"`tmpdir'_`sysjobid'_knerror.txt"'
|
||||
if _rc ==0 {
|
||||
// savastata failed with a known error so just let report.log show the error
|
||||
if "`c(os)'" != "Windows" {
|
||||
usesasdel `"`tmpdir'"' _`sysjobid'_
|
||||
}
|
||||
if "`c(os)'" == "Windows" {
|
||||
local usesasdeldir : subinstr local tmpdir `":"' `"\\\`= char(58)'"', all
|
||||
usesasdel `"`usesasdeldir'"' _`sysjobid'_
|
||||
}
|
||||
}
|
||||
else {
|
||||
di `"{error}If no error message above this one, then check out the SAS log file to see why. "'
|
||||
di `" {view "`temp'_usesas.log"} "'
|
||||
di `"{inp}Erase these temporary files created by {help usesas:usesas} when done with them:"'
|
||||
di `"{res}(files located in "`tmpdir'") "'
|
||||
ls "`temp'_*"
|
||||
if "`c(console)'" != "console" {
|
||||
if "`c(os)'" != "Windows" {
|
||||
di `"{res} {stata usesasdel `"`tmpdir'"' _`sysjobid'_:Click here to erase them all.} "'
|
||||
}
|
||||
if "`c(os)'" == "Windows" {
|
||||
local usesasdeldir : subinstr local tmpdir `":"' `"\\\`= char(58)'"', all
|
||||
di `"{res} {stata usesasdel `"`usesasdeldir'"' _`sysjobid'_:Click here to erase them all.} "'
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if "`sasprogram'"!="" {
|
||||
di `""'
|
||||
if "`c(console)'" != "console" {
|
||||
di `"{inp}Click here to edit your SAS program and try it again. "'
|
||||
di `" {stata `"doedit "`xpt'""':`xpt'} "'
|
||||
}
|
||||
else di `"Edit your SAS program: "`xpt'" and try it again."'
|
||||
di `""'
|
||||
}
|
||||
capture which usagelog
|
||||
if _rc==0 {
|
||||
usagelog , type(savas) uerror(6) etime
|
||||
}
|
||||
exit 499
|
||||
}
|
||||
|
||||
if "`describe'" == "describe" {
|
||||
di as res `"Contains data from "`using'" "'
|
||||
di as res `" obs: `=string(nobs,"%32.0fc")' "' memlabel
|
||||
di as res `" vars: `=string(_N,"%32.0fc")' "'
|
||||
if ( _N > 2047 & "$S_StataSE"=="" & "$S_StataMP" == "" ) | ///
|
||||
( _N > 32767 ) {
|
||||
di as err "Your version of Stata will not read this entire dataset"
|
||||
if ( _N > 2047 & "$S_StataSE"=="" & "$S_StataMP" == "" ) ///
|
||||
di as err " as it has more than 2,047 variables."
|
||||
else if ( _N > 32767 ) ///
|
||||
di as err " as it has more than 32,767 variables."
|
||||
} // this message is repeated after all vars are listed
|
||||
local name_len = `= substr("`: type name'",index("`: type name'","r")+1,2)'
|
||||
if `name_len' < 13 recast str13 name
|
||||
recast str12 type
|
||||
char define name[varname] "variable name"
|
||||
char define type[varname] "storage type"
|
||||
char define label[varname] "variable label"
|
||||
order varnum name type format label
|
||||
if "`listnot'" == "" {
|
||||
list name type format label, nocompress noobs subvarname
|
||||
}
|
||||
capture confirm file `"`macval(temp)'_usesas.txt"'
|
||||
if _rc ==0 {
|
||||
file open sortedby using `"`temp'_usesas.txt"' , read text
|
||||
file read sortedby sortedby // creates local sortedby
|
||||
file close sortedby
|
||||
// clear sortedby if no vars in it, it ends up being a double quote
|
||||
if `"`sortedby'"' == `"""' local sortedby ""
|
||||
}
|
||||
di as res `"Sorted by: `sortedby'"'
|
||||
if "`listnot'" == "" {
|
||||
if ( _N > 2047 & "$S_StataSE"=="" & "$S_StataMP" == "" ) | ///
|
||||
( _N > 32767 ) {
|
||||
di as err "Your version of Stata will not read this entire dataset"
|
||||
if ( _N > 2047 & "$S_StataSE"=="" & "$S_StataMP" == "" ) ///
|
||||
di as err " as it has more than 2,047 variables."
|
||||
else if ( _N > 32767 ) ///
|
||||
di as err " as it has more than 32,767 variables."
|
||||
} // this message is made first before all vars are listed
|
||||
}
|
||||
// these vars do not vary by obs so just drop 'em
|
||||
quietly drop memlabel // nobs dropped at end of usesas
|
||||
di as res _n _dup(`c(linesize)') "-"
|
||||
di as res `" Now the dataset in memory is just the description of "`using'" "'
|
||||
di as res `" Use the {stata describe :describe} command to see what you have and use "'
|
||||
di as res `" whatever data manipulation you like to create variable lists for "'
|
||||
di as res `" your actual invocation of {help usesas :usesas} if you want."'
|
||||
if "`c(console)'" != "console" {
|
||||
di as res `" Otherwise, {stata clear :Click here to clear out the dataset from memory}. "'
|
||||
}
|
||||
else {
|
||||
di as res `" Otherwise, use the clear command to clear out the dataset from memory. "'
|
||||
}
|
||||
di as res _dup(`c(linesize)') "-"
|
||||
}
|
||||
|
||||
|
||||
* CLEAN UP TEMP FILES
|
||||
* -------------------
|
||||
if "`messy'"=="" {
|
||||
if "`c(os)'" != "Windows" {
|
||||
usesasdel `"`tmpdir'"' _`sysjobid'_
|
||||
}
|
||||
if "`c(os)'" == "Windows" {
|
||||
local usesasdeldir : subinstr local tmpdir `":"' `"\\\`= char(58)'"', all
|
||||
usesasdel `"`usesasdeldir'"' _`sysjobid'_
|
||||
}
|
||||
} /* end of messy=="" */
|
||||
else {
|
||||
di "{res}You have requested {help usesas:usesas} not to delete the intermediary files created by {help usesas:usesas}:"
|
||||
dir "`temp'_*"
|
||||
di "{input}Files located here: "
|
||||
di `"{input}"`tmpdir'" "'
|
||||
|
||||
if "`c(console)'" != "console" {
|
||||
if "`c(os)'" != "Windows" {
|
||||
di `"{res} {stata usesasdel `"`tmpdir'"' _`sysjobid'_:Click here to erase them all.} "'
|
||||
}
|
||||
if "`c(os)'" == "Windows" {
|
||||
local usesasdeldir : subinstr local tmpdir `":"' `"\\\`= char(58)'"', all
|
||||
di `"{res} {stata usesasdel `"`usesasdeldir'"' _`sysjobid'_:Click here to erase them all.} "'
|
||||
}
|
||||
}
|
||||
} // of if else if messy
|
||||
|
||||
|
||||
|
||||
/* log usage of usesas */
|
||||
capture which usagelog
|
||||
if _rc==0 {
|
||||
if `c(N)' == 0 & `c(k)' == 0 {
|
||||
usagelog , type(savas) uerror(7) message(no data) etime
|
||||
}
|
||||
else {
|
||||
local obs=`c(N)'
|
||||
local vars=`c(k)'
|
||||
usagelog , type(savas) uerror(0) message(Input Stata dataset has `obs' obs and `vars' vars) etime
|
||||
}
|
||||
}
|
||||
if "`describe'" == "describe" {
|
||||
local varlist = ""
|
||||
local vlen=0
|
||||
forvalues n = 1/`= _N' {
|
||||
local vlen = `vlen' + length(trim("`= name[`n']'")) + 1
|
||||
if `n' == 1 local varlist = trim("`= name[`n']'")
|
||||
else local varlist `"`varlist' `= trim("`= name[`n']'")'"'
|
||||
}
|
||||
if `vlen' > `c(max_macrolen)' {
|
||||
di as err "not all the variables are in r(varlist) since there are too many "
|
||||
}
|
||||
return local varlist "`varlist'"
|
||||
return local sortlist "`sortedby'"
|
||||
return scalar k = _N
|
||||
return scalar N = `= nobs[1]'
|
||||
drop nobs
|
||||
}
|
||||
|
||||
|
||||
end /* end of usesas */
|
||||
|
||||
|
||||
program define usesas_sas, nclass
|
||||
syntax [, QUotes engine(string) rver(string) dirsep(string) dir(string) tmpdir(string) filen(string) ///
|
||||
shortfileext xpt(string) replace raw(string) FORmats sysjobid(string) CHeck ext(string) middle(string) ///
|
||||
savastata(string) if(string) firstobs(string) obs(string) keep(string) sasprogram ///
|
||||
char2lab char2fmt(string) float describe listnot ]
|
||||
version 8
|
||||
|
||||
|
||||
quietly {
|
||||
file open sasfile using `"`raw'.sas"', replace text write
|
||||
|
||||
* DATA LIST
|
||||
* ---------
|
||||
|
||||
file write sasfile `"* SAS program to read file and output Stata dataset *;"' ///
|
||||
_n _n `"options nofmterr nocenter linesize=250;"' ///
|
||||
_n _n `"%let badx =0; ** if proc cimport has trouble with xport file **; "' _n _n ///
|
||||
_n _n `"%include "`savastata'"; "' _n _n
|
||||
|
||||
if "`char2lab'" != "" {
|
||||
file write sasfile `"%include "`char2fmt'"; "' _n _n
|
||||
}
|
||||
|
||||
if "`sasprogram'"!="" { /* user submitted a SAS program */
|
||||
file write sasfile `"options mprint source2; "' _n _n ///
|
||||
`" /*************** THE FOLLOWING IS YOUR PROGRAM ***************/ "' _n _n ///
|
||||
`" %include"`xpt'"; "' _n _n ///
|
||||
`" /*************** END OF YOUR PROGRAM ***************/ "' _n _n ///
|
||||
`"options nomprint nosource2; "' _n _n
|
||||
file write sasfile `" %let sortedby=; ** leave in for now **; "' _n _n ///
|
||||
`"%macro makework; "' _n ///
|
||||
`" %if &syserr.^=0 %then %goto nevrmind; "' _n ///
|
||||
`" %if &syslast.=_NULL_ %then %goto nevrmind; "' _n ///
|
||||
`" %let ldset=%length(&syslast.); "' _n ///
|
||||
`" %let decpos=%index(&syslast.,.); "' _n ///
|
||||
`" %let llib=%substr(&syslast.,1,&decpos.-1); "' _n ///
|
||||
`" %let dset=%substr(&syslast.,&decpos.+1,&ldset.-&decpos.); "' _n ///
|
||||
`" %let dset=%sysfunc(lowcase(%nrbquote(&dset.))); "' _n _n
|
||||
file write sasfile `" data _null_; "' _n ///
|
||||
`" dsid=open("&syslast.",'i');"' _n `" sortedby=attrc(dsid,'SORTEDBY'); "' _n ///
|
||||
`" call symput('sortedby',trim(sortedby));"' _n `" rc=close(dsid);"' _n `"run;"' _n _n
|
||||
file write sasfile `" %if %index(%upcase(&sortedby.),DESCENDING) %then %do; "' _n ///
|
||||
`" %* this is how Stata treats descending sortedby *; "' _n ///
|
||||
`" %let sortedby= %substr(&sortedby.,1,%index(%upcase(&sortedby.),DESCENDING)-1); %end;"' _n _n
|
||||
file write sasfile `" ** if not in work make it be in work **; "' _n ///
|
||||
`" %if %index(%upcase(&syslast.),WORK)^=1 %then %do; "' _n ///
|
||||
`" data work.&dset.; "' _n ///
|
||||
`" set &syslast.;"' _n `" run; "' _n ///
|
||||
`" proc datasets library=&llib.;"' _n `" delete &dset.;"' _n `" run; quit;"' _n ///
|
||||
`"%end; ** end of if syslast is not in WORK **; "' _n _n
|
||||
if "`keep'"!="" | "`firstobs'"!="" | length(`"`if'"')>5 {
|
||||
/** apply subsetting to work dataset **/
|
||||
file write sasfile `" data work.&dset."' _n
|
||||
if "`keep'"!="" {
|
||||
file write sasfile `" (keep=`keep' &sortedby.) "'
|
||||
}
|
||||
file write sasfile `";;; "' _n ///
|
||||
`" set &dset."'
|
||||
if "`firstobs'"!="" {
|
||||
file write sasfile `"(firstobs=`firstobs' obs=`obs')"' _n
|
||||
}
|
||||
file write sasfile `";;; "' _n
|
||||
if length(`"`if'"')>5 /* b/c "where" has 5 letters */ {
|
||||
file write sasfile `" `if'; "' _n
|
||||
}
|
||||
file write sasfile `"run; "' _n
|
||||
}
|
||||
file write sasfile `" %nevrmind: ; "' _n /*
|
||||
*/ `"%mend; "' _n /*
|
||||
*/ `"%makework; "'
|
||||
}
|
||||
else if "`sasprogram'"=="" { /* write SAS program to feed SAS data set into savastata */
|
||||
if "`formats'"!="" {
|
||||
if "`engine'"=="v6" {
|
||||
file write sasfile `"libname library v6 "`dir'" ; "'_n _n
|
||||
}
|
||||
else {
|
||||
file write sasfile `"libname library `engine' "`dir'" `shortfileext'; "'_n _n
|
||||
}
|
||||
}
|
||||
if "`engine'"=="`rver'" | "`engine'"=="v6" {
|
||||
file write sasfile `"libname ___in___ `engine' "`dir'" `shortfileext' ; "'_n _n
|
||||
// preserve sort order
|
||||
// Transport datasets cannot be opened and they do not save sort info anyway
|
||||
file write sasfile `"%let sortedby=; "' _n _n `"data _null_;"' _n `" dsid=open('___in___.`filen'','i');"' _n ///
|
||||
`" sortedby=attrc(dsid,'SORTEDBY'); "' _n `" call symput('sortedby',trim(sortedby));"' _n `" rc=close(dsid);"' _n `"run;"' _n _n ///
|
||||
`" %macro __sort; %if %index(&sortedby.,DESCENDING) %then %sysfunc(tranwrd(&sortedby.,%str(DESCENDING ),-)); "' ///
|
||||
`" %mend __sort; %__sort; "'
|
||||
} // end of normal SAS dataset
|
||||
else if "`engine'"=="xport" { // test xport file to see if created by cimport
|
||||
// Transport datasets cannot be opened and they do not save sort info anyway
|
||||
file write sasfile `"%let sortedby=; "'
|
||||
file write sasfile `"filename ___in___ "`xpt'"; "' _n _n ///
|
||||
`"%macro ___xt___ ;"' _n `" data _null_; "' _n `" infile ___in___ ; "' _n `" input xt $ 1-6; "' _n ///
|
||||
`" call symput('header',xt); "' _n `" if _n_ = 1 then stop; "' _n `" run; "' _n
|
||||
file write sasfile `" %if %index(&header.,HEAD) ^= 0 %then %do; "' _n _n ///
|
||||
`" libname ___in___ xport "`xpt'"; "' _n _n
|
||||
file write sasfile `" data _null_;"' _n `" set sashelp.vmember; "' _n ///
|
||||
`" if upcase(libname)="___IN___" and upcase(memtype)="DATA" then call symput("filen",memname); "' _n
|
||||
file write sasfile `" run;"' _n _n `" data `filen'; "' _n `" set ___in___.&filen.;"' _n ///
|
||||
`" run; "' _n `" %end; "' _n
|
||||
file write sasfile `" %else %do;"' _n `" proc cimport data=`filen' infile=___in___; "' _n ///
|
||||
`" run; "' _n `" %if &syserr. ^=0 %then %do; "' _n ///
|
||||
`" proc printto log="`tmpdir'_`sysjobid'_report.log"; options nonotes; "' _n ///
|
||||
`" data _null_; "' _n ///
|
||||
`" put "ERROR: SAS could not open `filen' because it was created in a newer version of SAS *"; "' _n ///
|
||||
`" put " or there is not just a data set named `filen' in the file. *"; "' _n ///
|
||||
`" run; proc printto; ** end printing to *_report.log "' _n ///
|
||||
`" %let badx=1; %end; "' _n `"%end; "' _n `"%mend ___xt___;"' _n _n `"%___xt___; ** now run macro ___xt___ ***; "' ///
|
||||
_n _n
|
||||
|
||||
}
|
||||
|
||||
if "`engine'"!="spss" {
|
||||
if "`formats'"!="" {
|
||||
/* look for datasetname.formatscatalog file */
|
||||
local rc=1
|
||||
if ("`engine'"=="`rver'" | "`engine'"=="xport") & "`shortfileext'"=="" {
|
||||
capture confirm file `"`macval(dir)'`filen'.sas7bcat"'
|
||||
if _rc ==0 {
|
||||
local rc=0
|
||||
}
|
||||
}
|
||||
else if ("`engine'"=="`rver'" | "`engine'"=="xport") & "`shortfileext'"!="" {
|
||||
capture confirm file `"`macval(dir)'`filen'.sc7"'
|
||||
if _rc ==0 {
|
||||
local rc=0
|
||||
}
|
||||
}
|
||||
else if "`engine'"=="v6" & "`c(os)'"=="Unix" {
|
||||
capture confirm file `"`macval(dir)'`filen'.sct01"'
|
||||
if _rc ==0 {
|
||||
local rc=0
|
||||
}
|
||||
}
|
||||
else if "`engine'"=="v6" & "`c(os)'"=="Windows" {
|
||||
capture confirm file `"`macval(dir)'`filen'.sc2"'
|
||||
if _rc ==0 {
|
||||
local rc=0
|
||||
}
|
||||
}
|
||||
if `rc'==0 {
|
||||
file write sasfile _n `"%macro __fmt__;"' ///
|
||||
`" %if %sysfunc(cexist(LIBRARY.`filen')) = 1 %then %do;"' _n ///
|
||||
`" options fmtsearch=(library.`filen' library.formats); "' _n _n ///
|
||||
`" proc datasets; "' _n ///
|
||||
`" copy in=library out=work memtype=catalog; "' _n ///
|
||||
`" select `filen'; "' _n ///
|
||||
`" change `filen'=formats;"' _n ///
|
||||
`" run; quit;"' _n ///
|
||||
`" %end; "' _n ///
|
||||
`" %else %do; "' _n ///
|
||||
`" proc printto log="`tmpdir'_`sysjobid'_report.log"; options nonotes; "' _n ///
|
||||
`" data _null_; "' _n ///
|
||||
`" put "ERROR: File LIBRARY.`filen'.CATALOG was created for a different operating system. *" ; "' _n ///
|
||||
`" put "ERROR: -usesas- did not create Stata value labels from SAS formats. *"; "' _n ///
|
||||
`" run; proc printto; ** end printing to *_report.log "' _n ///
|
||||
`" %end; "' _n ///
|
||||
`"%mend __fmt__; "' _n ///
|
||||
`"%__fmt__; "' _n
|
||||
|
||||
} /* if filen.catalog file exists */
|
||||
} /* end of if "`formats'"!="" */
|
||||
|
||||
file write sasfile _n `"data `filen'"'
|
||||
if "`keep'"!="" {
|
||||
file write sasfile `" (keep=`keep' &sortedby.) "'
|
||||
}
|
||||
if "`engine'" == "xport" {
|
||||
file write sasfile `";;;"' _n `" set work.`filen' "' // 08Apr2005 use `filen' in work lib
|
||||
|
||||
}
|
||||
else {
|
||||
file write sasfile `";;;"' _n `" set ___in___.`filen' "'
|
||||
}
|
||||
|
||||
if "`firstobs'"!="" {
|
||||
file write sasfile `"(firstobs=`firstobs' obs=`obs')"' _n
|
||||
}
|
||||
file write sasfile `";;; "' _n
|
||||
if length(`"`if'"')>5 /* b/c "where" has 5 letters */ {
|
||||
file write sasfile `" `if'; "' _n //
|
||||
}
|
||||
file write sasfile `"run; "' _n
|
||||
}
|
||||
else {
|
||||
file write sasfile `"filename spss "`xpt'"; "' _n _n /*
|
||||
*/ `"proc convert spss=spss out=`filen'; "' _n /*
|
||||
*/ `"run; "'
|
||||
file write sasfile _n `"data `filen'"'
|
||||
if "`keep'"!="" {
|
||||
file write sasfile `" (keep=`keep') "'
|
||||
}
|
||||
file write sasfile `";;;"' _n `" set `filen' "'
|
||||
if "`firstobs'"!="" {
|
||||
file write sasfile `"(firstobs=`firstobs' obs=`obs')"' _n
|
||||
}
|
||||
file write sasfile `";;; "' _n
|
||||
if length(`"`if'"')>5 /* b/c "where" has 5 letters */ {
|
||||
file write sasfile `" `if'; "' _n //
|
||||
}
|
||||
file write sasfile `"run; "' _n
|
||||
}
|
||||
} /* end of if no sas program submitted */
|
||||
|
||||
if `c(stata_version)' < 9 & "`char2lab'" != "" {
|
||||
noisily {
|
||||
di as error `"option char2lab is not allowed prior to Stata 9."'
|
||||
di as error `"option will be ignored."'
|
||||
local char2lab ""
|
||||
}
|
||||
}
|
||||
file write sasfile _n _n ///
|
||||
`"%macro runit;"' _n `" %if &badx.=0 %then %do; "'
|
||||
if "`describe'" == "describe" {
|
||||
file write sasfile _n ///
|
||||
_n `" proc contents data=`filen' out=`filen'(keep=name varnum type label format "' ///
|
||||
`" nobs length memlabel) noprint; run; "'
|
||||
file write sasfile _n ///
|
||||
_n `" data `filen'(drop=type rename=(stype=type)); "'
|
||||
// truncate long string vars just to make life simple
|
||||
if `c(stata_version)' < 9.2 & "$S_StataSE" == "" & "$S_StataMP" == "" {
|
||||
file write sasfile _n `" length label memlabel $80; "'
|
||||
}
|
||||
else {
|
||||
file write sasfile _n `" length label memlabel $244; "'
|
||||
}
|
||||
file write sasfile ///
|
||||
_n `" set `filen'; "' ///
|
||||
_n `" if type =1 then stype="numeric"; "' ///
|
||||
_n `" if type =2 then stype="string "; "' ///
|
||||
_n `" label stype = "Variable Type"; "' ///
|
||||
_n `" run; "'
|
||||
file write sasfile ///
|
||||
_n `" data _null_; "' ///
|
||||
_n `" file "`tmpdir'_`sysjobid'_usesas.txt"; "' ///
|
||||
_n `" put "%trim(&sortedby.)"; "' ///
|
||||
_n `" run;"'
|
||||
|
||||
file write sasfile _n ///
|
||||
_n `" proc sort data=`filen'; by varnum; run; "'
|
||||
file write sasfile _n ///
|
||||
_n `" %let sortedby=varnum; "'
|
||||
|
||||
} // end of if describe
|
||||
// need to put c(SE) and c(MP) in quotes since c(MP) doesn't exist in Stata 8
|
||||
// need to pass a zero or a one to savastata for SE or MP
|
||||
file write sasfile `" libname ___dir__ "`dir'" ; "' _n ///
|
||||
`" %let _dir=%nrbquote(%sysfunc(pathname(___dir__))); "' _n ///
|
||||
`" /* &sortedby. is global because of: call symput creates it */ "' _n ///
|
||||
`" %savastata("`tmpdir'",`quotes' `char2lab' `check' messy `float', &sortedby., "' ///
|
||||
`" `sysjobid',nosave,"&_dir.`dirsep'",`= ("`c(SE)'" == "1") + ("`c(MP)'" == "1")', "' ///
|
||||
`" version=`c(stata_version)'); "' _n ///
|
||||
`"%end; %* if &badx.=0 *; %mend runit;"' _n `" %runit;"' _n _n
|
||||
|
||||
file close sasfile
|
||||
|
||||
} /* end of quietly */
|
||||
end
|
||||
|
||||
exit
|
||||
|
213
Modules/ado/plus/u/usesas.hlp
Normal file
213
Modules/ado/plus/u/usesas.hlp
Normal file
@ -0,0 +1,213 @@
|
||||
{smcl}
|
||||
{* version 2.1 26Feb2009}{...}
|
||||
{* 24Aug2006}{...}
|
||||
{* 04Aug2005}{...}
|
||||
{* 05Nov2003}{...}
|
||||
{hline}
|
||||
help for {hi:usesas} {right:manual: {hi:[R] none}}
|
||||
{right:dialog: {hi: none} }
|
||||
{hline}
|
||||
|
||||
|
||||
{title:Use a SAS dataset}
|
||||
|
||||
{p 8 17 2}{cmd:usesas}
|
||||
{cmd:using} {it:filename}
|
||||
[{cmd:,}
|
||||
{cmdab:for:mats}
|
||||
{cmd:char2lab}
|
||||
{cmdab:ch:eck}
|
||||
{cmd:clear}
|
||||
{cmd:float}
|
||||
{cmd:xport}
|
||||
{cmdab:de:scribe}
|
||||
{cmdab:ke:ep(}{it:variable names}{cmd:)}
|
||||
{cmd:if(}{it:SAS if statement}{cmd:)}
|
||||
{cmd:in(}{it:firstobs/lastobs}{cmd:)}
|
||||
{cmdab:qu:otes}
|
||||
{cmdab:me:ssy}
|
||||
]{p_end}
|
||||
|
||||
|
||||
{title:Description}
|
||||
|
||||
{p 4 8 2} {cmd:NOTE:} Before the first use of {cmd:usesas} your {cmd:sasexe.ado} file may need to be edited to set
|
||||
the location of your SAS executable file (sas.exe) and your savastata SAS macro file (savastata.sas). It may be
|
||||
that {cmd:usesas} will be able to run with the default settings in {cmd:sasexe.ado}.{p_end}
|
||||
|
||||
{p 4 4 2} {cmd:usesas} loads a SAS datafile into memory. This usually occurs by supplying {cmd:usesas} a SAS
|
||||
dataset (*.sas7bdat, *.sd7, *.sd2, *.ssd01, *.xpt, *.cport) or an SPSS portable file (*.por),
|
||||
but {cmd:usesas} can also load a SAS datafile into memory via a SAS program (*.sas) that creates a
|
||||
SAS dataset. The last dataset created by the SAS program will be the SAS dataset processed by {cmd:usesas}.{p_end}
|
||||
|
||||
{p 4 4 2}{cmd:usesas} assumes the most common SAS datafile extension {cmd:.sas7bdat} if no file extension/suffix is
|
||||
specified.{p_end}
|
||||
|
||||
{p 4 4 2}{cmd:usesas} uses the savastata SAS macro to create the Stata dataset from the SAS
|
||||
dataset. {cmd:usesas} downloads the savastata SAS macro and stores it where user-written
|
||||
Stata ado-files are stored that begin with the letter "s". This macro can be used in SAS.
|
||||
Learn about savastata here:
|
||||
{browse "http://faculty.fuqua.duke.edu/home/blanc004/data_programming/sas_to_stata/savastata.html": http://faculty.fuqua.duke.edu/home/blanc004/data_programming/sas_to_stata/savastata.html}{p_end}
|
||||
|
||||
{p 4 4 2}{cmd:usesas} figures out how much memory the SAS dataset will require to be loaded into Stata
|
||||
and sets Stata's memory for you if your memory setting is less than is required.{p_end}
|
||||
|
||||
{p 4 4 2}{cmd:usesas} indicates that it has finished running by reporting to you how many observations
|
||||
and variables are in your dataset now in memory. For example:{p_end}
|
||||
|
||||
{p 4 8}Stata reports that the dataset has 200 observations and 11 variables.{p_end}
|
||||
|
||||
{p 4 8 2}{cmd:NOTE: usesas} calls SAS to run a SAS program. This requires the ability to run SAS on your computer.{p_end}
|
||||
|
||||
|
||||
{title:Options}
|
||||
|
||||
{p 4 8 2}{cmd:formats} specifies to create value labels from SAS user-defined formats that are stored
|
||||
in a SAS formats catalog file that has the same name as the dataset and is in the same directory
|
||||
as the SAS dataset. For example: MySasData.sas7bcat . If this file doesn't exist, {cmd:usesas} will
|
||||
look for the file formats.sas7bcat in the same directory as the dataset.{p_end}
|
||||
|
||||
{p 4 8 2}{cmd:char2lab} specifies to encode long SAS character variables like the Stata
|
||||
command {help encode :encode}. Character variables that are too long for a Stata string
|
||||
variable are maintained in value labels. This is all done with the {cmd:char2fmt} SAS
|
||||
macro.{p_end}
|
||||
|
||||
{p 4 8 2}{cmd:check} specifies to generate basic stats for both datasets for the user to compare the
|
||||
newly created Stata dataset with the imported SAS dataset to make sure {cmd:usesas} created the files
|
||||
correctly. This is a comparison that should be done after any datafile is converted to any other
|
||||
type of datafile by any software. The SAS file is created in the same directory as the input SAS
|
||||
datafile and is named starting with the name of the datafile followed by "_SAScheck.lst"
|
||||
(SAS). e.g. "mySASdata_SAScheck.lst"{p_end}
|
||||
|
||||
{p 4 8 2}{cmd:clear} specifies to clear the data currently in memory before running {cmd:usesas}.{p_end}
|
||||
|
||||
{p 4 8 2}{cmd:float} specifies that numeric variables that would otherwise be stored as numeric type
|
||||
double be stored with numeric type float. This option should only be used if you are certain you
|
||||
have no integer variables that have more than 7 digits (like an ID variable).{p_end}
|
||||
|
||||
{p 4 8 2}{cmd:xport} specifies that the input dataset is a SAS Transport/Xport dataset. Since there
|
||||
is no standard file extension for SAS Xport datasets, this option is required. Datasets created
|
||||
by SAS's PROC CPORT procedure are allowed.{p_end}
|
||||
|
||||
{p 4 8 2}{cmd:describe} makes {cmd:usesas} act somewhat like the Stata command
|
||||
{help describe :describe using}. It does not bring the full dataset into memory. Instead it specifies for
|
||||
{cmd:usesas} only to load the descriptive information about the using dataset into Stata's memory as a
|
||||
Stata dataset and print it. So, instead of loading the actual dataset into Stata, {cmd:usesas} loads
|
||||
the descriptive information (variable names, what type of variables they are, the variable labels and
|
||||
formats associated to the variables) into Stata as a dataset. You can {help clear :clear} the
|
||||
descriptive data out of Stata's memory or use the descriptive data however you like to create variable
|
||||
lists for your actual invocation of {cmd:usesas}. This may be helpful for situations where the SAS
|
||||
dataset has more variables than your version of Stata can handle. You can create a variable list
|
||||
from the variable called "name" to create another invocation of {cmd:usesas} to read in only the
|
||||
variables you need.{p_end}
|
||||
|
||||
{p 8 8 2}If you do not want to have the {cmd:describe} option list the descriptive information of the
|
||||
imported dataset, you can use the option {cmd:listnot} with {cmd:describe}. The descriptive information
|
||||
will still be loaded into Stata as a Stata dataset.{p_end}
|
||||
|
||||
{p 8 8 2}The descriptive data are sorted in the variable order of the using dataset so a variable list
|
||||
for {cmd:usesas} could be created like so:{p_end}
|
||||
|
||||
{p 8 8 2} {cmd:. display "`= trim(name[1])'--`= name[2047]'" }{p_end}
|
||||
|
||||
{p 8 8 2} {cmd:id--income88 }{p_end}
|
||||
|
||||
|
||||
{p 8 8 2} which could then be used like so to keep the first 2,047 variables in the using dataset
|
||||
(2,047 is the maximum number of variables that Stata Intercooled can handle):{p_end}
|
||||
|
||||
{p 8 8 2} {cmd:. usesas using "mySASdata.sas7bdat", clear keep(`= trim(name[1])'--`= name[2047]') }{p_end}
|
||||
|
||||
{p 8 8 2} SAS variable lists using two dashes "--" tells SAS to use the variables that exist
|
||||
positionally between the first variable and the last variable in the using dataset inclusively.
|
||||
Read more about this under the documentation of the {cmd:keep} option.{p_end}
|
||||
|
||||
{p 8 8 2}The {cmd:describe} option makes {cmd:usesas} return the following in {cmd:r()}:{p_end}
|
||||
|
||||
{synoptset 20 tabbed}{...}
|
||||
{p2col 5 20 24 2: Scalars}{p_end}
|
||||
{synopt:{cmd:r(N)}}number of observations in using dataset{p_end}
|
||||
{synopt:{cmd:r(k)}}number of variables in using dataset{p_end}
|
||||
|
||||
{p2col 5 20 24 2: Macros}{p_end}
|
||||
{synopt:{cmd:r(varlist)}}variables in using dataset {p_end}
|
||||
{synopt:{cmd:r(sortlist)}}variables by which using data are sorted {p_end}
|
||||
|
||||
{p 8 8 2} The above scalars and macros contain information about the dataset that was described,
|
||||
not information of the dataset of descriptive information that {cmd:usesas} loaded into Stata
|
||||
with the {cmd:describe} option.{p_end}
|
||||
|
||||
{p 4 8 2}{cmd:keep} allows for a list of variables from the imported dataset to be read in. This list is
|
||||
used in the SAS code portion of {cmd:usesas} so must be written in the SAS variable list style. SAS does
|
||||
not allow for variable lists to contain stars (*) or question marks (?). For example:{p_end}
|
||||
|
||||
{p 4 8 2}{cmd: keep(var1-var20)} includes only vars that start with "var" and end in a number between 1 and 20.{p_end}
|
||||
|
||||
{p 4 8 2}{cmd: keep(var1--var20)} includes only vars in the dataset between var1 and var20. This is like Stata's
|
||||
{help varlist:varlist} style {cmd: var1-var20}.{p_end}
|
||||
|
||||
{p 4 8 2}{cmd:if} allows for a SAS {cmd:if} statement to subset the data before it's read in. Any valid
|
||||
SAS style {cmd:if} statement will work.{p_end}
|
||||
|
||||
{p 4 8 2}{cmd:in} allows for subsetting the data before it's read in. Use only {cmd:#/#} where both numbers are
|
||||
positive, for example 1/30 for the first 30 observations.{p_end}
|
||||
|
||||
{p 4 8 2}{cmd:quotes} specifies that double quotes that exist in string variables are to be replaced
|
||||
with single quotes. Since the data are written out to an ASCII file and then read into Stata,
|
||||
there are rare instances when double quotes are not allowed inside string variables.{p_end}
|
||||
|
||||
{p 4 8 2}{cmd:messy} specifies that all the intermediary files created by {cmd:usesas} during its operation
|
||||
are not to be deleted. The {cmd:messy} option prevents {cmd:usesas} from cleaning up after it has
|
||||
finished. This option is mostly useful for debugging purposes in order to find out where something went
|
||||
wrong. All intermediary files have a name starting with an underscore "_" followed by the process ID and
|
||||
are located in Stata's temp directory.{p_end}
|
||||
|
||||
{title:Examples}
|
||||
|
||||
{p 4 8 2} {cmd:. usesas using "mySASdata.sas7bdat" }{p_end}
|
||||
|
||||
{p 4 8 2} {cmd:. usesas using "c:\data\mySASdata.ssd01", check }{p_end}
|
||||
|
||||
{p 4 8 2} {cmd:. usesas using "mySASdata.xpt", xport }{p_end}
|
||||
|
||||
{p 4 8 2} {cmd:. usesas using "mySASdata.sas7bdat", formats }{p_end}
|
||||
|
||||
{p 4 8 2} {cmd:. usesas using "mySASdata.sd2", quotes }{p_end}
|
||||
|
||||
{p 4 8 2} {cmd:. usesas using "mySASdata.sas7bdat", messy }{p_end}
|
||||
|
||||
{p 4 8 2} {cmd:. usesas using "mySASdata.sas7bdat", keep(id--qvm203a) if(1980<year<2000) in(1/500) }{p_end}
|
||||
|
||||
{p 4 8 2} {cmd:. usesas using "mySASdata.sas7bdat", describe }{p_end}
|
||||
|
||||
{p 4 8 2} {cmd:. usesas using "mySASdata.sas7bdat", describe nolist }{p_end}
|
||||
|
||||
{p 4 8 2} {cmd:// then submit the following actual invocation of usesas: }{p_end}
|
||||
|
||||
{p 4 8 2} {cmd:. usesas using "mySASdata.sas7bdat", clear keep(`r(sortlist)' `= trim(name[1])'--`= name[2047]') }{p_end}
|
||||
|
||||
|
||||
{p 4 8 2} NOTE: If you are setting up this program on your computer for the first time, please edit
|
||||
{cmd:sasexe.ado} to set the location of your SAS executable file (sas.exe). If you do not, {cmd:usesas}
|
||||
will try to set it for you. The {cmd:sasexe.ado} file is an ASCII text file and should be saved as such
|
||||
after editing. Stata's {cmd:do-file} editor will do the trick.{p_end}
|
||||
|
||||
{title:Setting up usesas}
|
||||
|
||||
{p 4 8 2}{stata quietly adoedit sasexe:edit sasexe.ado} (click, to edit the {cmd:sasexe.ado} file, remember to save when done.){p_end}
|
||||
|
||||
|
||||
{title:Author}
|
||||
|
||||
{p 4 4 2}
|
||||
Dan Blanchette {break}
|
||||
Center for Entrepreneurship and Innovation {break}
|
||||
Duke University's Fuqua School of Business {break}
|
||||
Dan.Blanchette@Duke.edu{p_end}
|
||||
|
||||
|
||||
{title:Also see}
|
||||
|
||||
{p 4 13 2}On-line: {help use}, {help fdause}, {help savasas} (if installed){p_end}
|
||||
|
||||
|
25
Modules/ado/plus/u/usesasdel.ado
Normal file
25
Modules/ado/plus/u/usesasdel.ado
Normal file
@ -0,0 +1,25 @@
|
||||
*! usesasdel Version 1.1 dan.blanchette@duke.edu 16Mar2009
|
||||
*! Center of Entrepreneurship and Innovation Duke University's Fuqua School of Business
|
||||
** usesasdel Version 1.1 dan_blanchette@unc.edu 01Feb2008
|
||||
** - made the string comparison work for very long strings
|
||||
** research computing, unc-ch
|
||||
** usesasdel Version 1.0 dan_blanchette@unc.edu 09Nov2005
|
||||
** the carolina population center, unc-ch
|
||||
|
||||
// can only delete files with no spaces in their names
|
||||
// but can handle directory names with spaces in their names
|
||||
program define usesasdel
|
||||
version 8
|
||||
args dir basefilename
|
||||
local files : dir `"`dir'"' files `"`basefilename'*"' , nofail
|
||||
foreach f in `files' {
|
||||
local dirf `"`dir'/`f'"'
|
||||
if `: list local(dir) == local(dirf)' == 0 {
|
||||
erase `"`dir'/`f'"'
|
||||
}
|
||||
}
|
||||
end
|
||||
|
||||
exit
|
||||
|
||||
|
456
Modules/ado/plus/u/uvis.ado
Normal file
456
Modules/ado/plus/u/uvis.ado
Normal file
@ -0,0 +1,456 @@
|
||||
*! version 1.1.0 PR 30aug2005.
|
||||
*
|
||||
* Recent history of uvis
|
||||
* 1.1.0 03aug2005 Replace -draw- option with -match-. Default becomes draw.
|
||||
* With prediction matching, randomly sort observations with identical predictions.
|
||||
* Order variables in chained equations in order of increasing missingness.
|
||||
* 1.0.4 21jun2005 Add sort, stable to enable reproducibility imputations with given seed
|
||||
*
|
||||
program define uvis, rclass sortpreserve
|
||||
version 8
|
||||
gettoken cmd 0 : 0
|
||||
if substr("`cmd'",1,3)=="reg" {
|
||||
local cmd regress
|
||||
}
|
||||
|
||||
local normal=("`cmd'"=="regress")|("`cmd'"=="rreg")
|
||||
local binary=("`cmd'"=="logit")|("`cmd'"=="logistic")
|
||||
local catcmd=("`cmd'"=="mlogit")|("`cmd'"=="ologit")
|
||||
|
||||
if !`normal' & !`binary' & !`catcmd' {
|
||||
di in red "invalid or unrecognised command, `cmd'"
|
||||
exit 198
|
||||
}
|
||||
|
||||
syntax varlist(min=2 numeric) [if] [in] [aweight fweight pweight iweight] , Gen(string) /*
|
||||
*/ [ noCONStant Delta(real 0) BOot MAtch REPLACE SEed(int 0) * ]
|
||||
|
||||
if "`replace'"=="" {
|
||||
confirm new var `gen'
|
||||
}
|
||||
|
||||
if "`match'"=="match" {
|
||||
di as text "[imputing by prediction matching" _cont
|
||||
}
|
||||
else di as text "[imputing by drawing from conditional distribution" _cont
|
||||
if "`boot'"=="" {
|
||||
di as text " without bootstrap]"
|
||||
}
|
||||
else di as text " with bootstrap]"
|
||||
|
||||
if "`constant'"=="noconstant" {
|
||||
local options "`options' nocons"
|
||||
}
|
||||
gettoken y xvars : varlist
|
||||
tempvar touse
|
||||
quietly {
|
||||
marksample touse, novarlist
|
||||
markout `touse' `xvars' /* note: does not include `y' */
|
||||
|
||||
if `seed'!=0 {
|
||||
set seed `seed'
|
||||
}
|
||||
|
||||
* Deal with weights
|
||||
frac_wgt `"`exp'"' `touse' `"`weight'"'
|
||||
local wgt `r(wgt)'
|
||||
|
||||
* Code types of missings: 1=non-missing y, 2=missing y, 3=other missing
|
||||
tempvar obstype yimp
|
||||
gen byte `obstype'=1*(`touse'==1 & !missing(`y')) /*
|
||||
*/ +2*(`touse'==1 & missing(`y')) /*
|
||||
*/ +3*(`touse'==0)
|
||||
|
||||
count if `obstype'==1
|
||||
local nobs=r(N)
|
||||
count if `obstype'==2
|
||||
local nmis=r(N)
|
||||
|
||||
local type: type `y'
|
||||
gen `type' `yimp'=.
|
||||
|
||||
* Fit imputation model
|
||||
`cmd' `y' `xvars' `wgt', `options'
|
||||
tempname b e V chol bstar
|
||||
tempvar xb u
|
||||
matrix `b'=e(b)
|
||||
matrix `e'=e(b)
|
||||
matrix `V'=e(V)
|
||||
local colsofb=colsof(`b')
|
||||
* Check for zeroes on the diagonal of V and replace them with 1.
|
||||
* Otherwise this makes the matrix non-positive definite.
|
||||
* Occurs when e.g. logit drops variables, giving zero variances.
|
||||
* !! Is this safe to do?
|
||||
if diag0cnt(`V')>0 {
|
||||
forvalues j=1/`colsofb' {
|
||||
if `V'[`j',`j']==0 {
|
||||
matrix `V'[`j',`j']=1
|
||||
}
|
||||
}
|
||||
}
|
||||
matrix `chol'=cholesky(`V')
|
||||
if `catcmd' {
|
||||
tempname cat
|
||||
local nclass=e(k_cat) /* number of classes in (ordered) categoric variable */
|
||||
matrix `cat'=e(cat) /* row vector giving actual category values */
|
||||
local cuts=`nclass'-1
|
||||
}
|
||||
* Draw beta, and if necessary rmse, for proper imputation
|
||||
if `normal' {
|
||||
* draw rmse
|
||||
local rmse=e(rmse)
|
||||
local df=e(df_r)
|
||||
local chi2=2*invgammap(`df'/2,uniform())
|
||||
local rmsestar=`rmse'*sqrt(`df'/`chi2')
|
||||
matrix `chol'=`chol'*sqrt(`df'/`chi2')
|
||||
}
|
||||
* draw beta
|
||||
forvalues i=1/`colsofb' {
|
||||
matrix `e'[1,`i']=invnorm(uniform())
|
||||
}
|
||||
matrix `bstar'=`b'+`e'*`chol''
|
||||
|
||||
if "`boot'"=="" {
|
||||
* Based on Ian White's code to implement van Buuren et al (1999).
|
||||
* draw y
|
||||
gen `u'=uniform()
|
||||
if `normal' | `binary' {
|
||||
* in normal or binary case, impute by sampling conditional distribution
|
||||
* or by prediction matching
|
||||
if "`match'"=="match" {
|
||||
* prediction matching
|
||||
tempvar etaobs etamis
|
||||
matrix score `etaobs'=`b' if `obstype'==1
|
||||
matrix score `etamis'=`bstar' if `obstype'==2
|
||||
* Include non-response location shift, delta.
|
||||
if `delta'!=0 {
|
||||
replace `etamis'=`etamis'+`delta'
|
||||
}
|
||||
match_normal `obstype' `nobs' `nmis' `etaobs' `etamis' `yimp' `y'
|
||||
}
|
||||
else {
|
||||
* sampling conditional distribution
|
||||
matrix score `xb'=`bstar' if `touse'
|
||||
if `normal' {
|
||||
replace `yimp'=`xb'+`rmsestar'*invnorm(`u')
|
||||
}
|
||||
else replace `yimp'=`u'<1/(1+exp(-`xb')) if !missing(`xb')
|
||||
}
|
||||
}
|
||||
else { /* catcmd */
|
||||
if "`match'"=="match" { // prediction matching
|
||||
* predict class-specific probabilities and convert to logits
|
||||
if "`cmd'"=="ologit" {
|
||||
* Predict index independent of cutpoints
|
||||
* (note use of forcezero option to circumvent missing _cut* vars)
|
||||
matrix score `xb'=`b' if `touse', forcezero
|
||||
* predict cumulative probabilities for obs data and hence logits of class probs
|
||||
forvalues k=1/`nclass' {
|
||||
tempvar etaobs`k' etamis`k'
|
||||
if `k'==`nclass' {
|
||||
gen `etaobs`nclass''=log((1-`p`cuts'')/`p`cuts'') if `obstype'==1
|
||||
}
|
||||
else {
|
||||
tempvar p`k'
|
||||
local cutpt=`b'[1, `k'+`colsofb'-`cuts']
|
||||
* 1/(1+exp(-... is probability of being in category 1 or 2 or ... k
|
||||
gen `p`k''=1/(1+exp(-(`cutpt'-`xb')))
|
||||
if `k'==1 {
|
||||
gen `etaobs`k''=log(`p`k''/(1-`p`k'')) if `obstype'==1
|
||||
}
|
||||
else {
|
||||
local k1=`k'-1
|
||||
gen `etaobs`k''=log((`p`k''-`p`k1'')/(1-(`p`k''-`p`k1''))) /*
|
||||
*/ if `obstype'==1
|
||||
}
|
||||
}
|
||||
}
|
||||
drop `xb'
|
||||
matrix score `xb'=`bstar' if `touse', forcezero
|
||||
* predict cumulative probabilities for missing data and hence logits of class probs
|
||||
forvalues k=1/`nclass' {
|
||||
if `k'==`nclass' {
|
||||
gen `etamis`nclass''=log((1-`p`cuts'')/`p`cuts'') if `obstype'==2
|
||||
}
|
||||
else {
|
||||
local cutpt=`bstar'[1, `k'+`colsofb'-`cuts']
|
||||
replace `p`k''=1/(1+exp(-(`cutpt'-`xb')))
|
||||
if `k'==1 {
|
||||
gen `etamis`k''=log(`p`k''/(1-`p`k'')) if `obstype'==2
|
||||
}
|
||||
else {
|
||||
local k1=`k'-1
|
||||
gen `etamis`k''=log((`p`k''-`p`k1'')/(1-(`p`k''-`p`k1''))) /*
|
||||
*/ if `obstype'==2
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
else { /* mlogit */
|
||||
* predict cumulative probabilities for obs data and hence logits of class probs
|
||||
* care needed dealing with different possible base categories
|
||||
tempvar sumexp
|
||||
local basecat=e(basecat) /* actual basecategory chosen by Stata */
|
||||
gen `sumexp'=0 if `touse'
|
||||
forvalues k=1/`nclass' {
|
||||
tempvar etaobs`k' etamis`k' xb`k'
|
||||
local thiscat=`cat'[1,`k']
|
||||
if `thiscat'==`basecat' {
|
||||
gen `xb`k''=0 if `touse'
|
||||
}
|
||||
else matrix score `xb`k''=`b' if `touse', equation(`thiscat')
|
||||
replace `sumexp'=`sumexp' + exp(`xb`k'')
|
||||
}
|
||||
forvalues k=1/`nclass' {
|
||||
* formula for logit of class prob derived from Pk in Stata mlogit entry
|
||||
gen `etaobs`k''=`xb`k''-log(`sumexp'-exp(`xb`k'')) if `obstype'==1
|
||||
}
|
||||
* same for missing obs
|
||||
replace `sumexp'=0
|
||||
forvalues k=1/`nclass' {
|
||||
cap drop `xb`k''
|
||||
local thiscat=`cat'[1,`k']
|
||||
if `thiscat'==`basecat' {
|
||||
gen `xb`k''=0 if `touse'
|
||||
}
|
||||
else matrix score `xb`k''=`bstar' if `touse', equation(`thiscat')
|
||||
replace `sumexp'=`sumexp' + exp(`xb`k'')
|
||||
}
|
||||
forvalues k=1/`nclass' {
|
||||
* formula for logit of class prob derived from Pk in Stata mlogit entry
|
||||
gen `etamis`k''=`xb`k''-log(`sumexp'-exp(`xb`k'')) if `obstype'==2
|
||||
}
|
||||
}
|
||||
* match
|
||||
sort `obstype', stable
|
||||
tempvar order distance
|
||||
gen `distance'=.
|
||||
gen long `order'=_n
|
||||
* For each missing obs j, find index of obs whose etaobs is closest to prediction [j].
|
||||
forvalues i=1/`nmis' {
|
||||
local j=`i'+`nobs'
|
||||
* calc summed absolute distances between etamis* and etaobs*
|
||||
replace `distance'=0 in 1/`nobs'
|
||||
forvalues k=1/`nclass' {
|
||||
replace `distance'=`distance'+abs(`etamis`k''[`j']-`etaobs`k'') in 1/`nobs'
|
||||
}
|
||||
* Find index of smallest distance between etamis* and etaobs*
|
||||
sort `distance'
|
||||
local index=`order'[1]
|
||||
* restore correct order
|
||||
sort `order'
|
||||
replace `yimp'=`y'[`index'] in `j'
|
||||
}
|
||||
}
|
||||
else { // draw
|
||||
* sampling conditional distribution
|
||||
replace `yimp'=`cat'[1,1]
|
||||
if "`cmd'"=="ologit" {
|
||||
* Predict index independent of cutpoints
|
||||
* (note use of forcezero option to circumvent missing _cut* vars)
|
||||
matrix score `xb'=`bstar' if `touse', forcezero
|
||||
forvalues k=1/`cuts' {
|
||||
* 1/(1+exp(-... is probability of being in category 1 or 2 or ... k
|
||||
local cutpt=`bstar'[1, `k'+`colsofb'-`cuts']
|
||||
replace `yimp'=`cat'[1,`k'+1] if `u'>1/(1+exp(-(`cutpt'-`xb')))
|
||||
}
|
||||
}
|
||||
else { /* mlogit */
|
||||
* care needed dealing with different possible base categories
|
||||
tempvar cusump sumexp
|
||||
local basecat=e(basecat) /* actual basecategory chosen by Stata */
|
||||
gen `sumexp'=0 if `touse'
|
||||
forvalues i=1/`nclass' {
|
||||
tempvar xb`i'
|
||||
local thiscat=`cat'[1,`i']
|
||||
if `thiscat'==`basecat' {
|
||||
gen `xb`i''=0 if `touse'
|
||||
}
|
||||
else matrix score `xb`i''=`bstar' if `touse', equation(`thiscat')
|
||||
replace `sumexp'=`sumexp' + exp(`xb`i'')
|
||||
}
|
||||
gen `cusump'=exp(`xb1')/`sumexp'
|
||||
forvalues i=2/`nclass' {
|
||||
replace `yimp'=`cat'[1,`i'] if `u'>`cusump'
|
||||
replace `cusump'=`cusump'+exp(`xb`i'')/`sumexp'
|
||||
replace `yimp'=. if missing(`xb`i'')
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
else {
|
||||
* Bootstrap method
|
||||
if "`match'"=="match" { /* match */
|
||||
if `catcmd' {
|
||||
* predict class-specific probabilities and convert to logits
|
||||
forvalues k=1/`nclass' {
|
||||
local outk=`cat'[1,`k']
|
||||
tempvar etaobs`k' etamis`k'
|
||||
predict `etaobs`k'' if `obstype'==1, outcome(`outk') /* probability */
|
||||
replace `etaobs`k''=log(`etaobs`k''/(1-`etaobs`k'')) /* logit */
|
||||
}
|
||||
}
|
||||
else { /* normal and binary cases */
|
||||
tempvar etaobs etamis
|
||||
predict `etaobs' if `obstype'==1, xb
|
||||
}
|
||||
}
|
||||
* Bootstrap observed data
|
||||
tempvar wt
|
||||
gen double `wt'=.
|
||||
bsample if `obstype'==1, weight(`wt')
|
||||
if "`wgt'"!="" {
|
||||
replace `wt' `exp'*`wt'
|
||||
local w [`weight'=`wt']
|
||||
}
|
||||
else local w [fweight=`wt']
|
||||
`cmd' `y' `xvars' `w', `options'
|
||||
|
||||
if `catcmd' {
|
||||
if e(k_cat)<`nclass' {
|
||||
di as error "cannot predict outcome for all classes in bootstrap sample;"
|
||||
di as error "probably one or more classes has a low frequency in the original data:"
|
||||
di as error "try amalgamating small classes of `y' and rerunning"
|
||||
exit 303
|
||||
}
|
||||
}
|
||||
if "`match'"=="match" {
|
||||
if `catcmd' {
|
||||
* predict class-specific probabilities and convert to logits
|
||||
forvalues k=1/`nclass' {
|
||||
local outk=`cat'[1,`k']
|
||||
predict `etamis`k'' if `obstype'==2, outcome(`outk') /* probability */
|
||||
replace `etamis`k''=log(`etamis`k''/(1-`etamis`k'')) /* logit */
|
||||
}
|
||||
* match
|
||||
sort `obstype', stable
|
||||
tempvar order distance
|
||||
gen `distance'=.
|
||||
gen long `order'=_n
|
||||
* For each missing obs j, find index of obs whose etaobs is closest to prediction [j].
|
||||
forvalues i=1/`nmis' {
|
||||
local j=`i'+`nobs'
|
||||
* calc summed absolute distances between etamis* and etaobs*
|
||||
replace `distance'=0 in 1/`nobs'
|
||||
forvalues k=1/`nclass' {
|
||||
replace `distance'=`distance'+abs(`etamis`k''[`j']-`etaobs`k'') in 1/`nobs'
|
||||
}
|
||||
* Find index of smallest distance between etamis* and etaobs*
|
||||
sort `distance'
|
||||
local index=`order'[1]
|
||||
* restore correct order
|
||||
sort `order'
|
||||
replace `yimp'=`y'[`index'] in `j'
|
||||
}
|
||||
}
|
||||
else { /* normal and binary */
|
||||
predict `etamis' if `obstype'==2, xb
|
||||
|
||||
* Include non-response location shift, delta.
|
||||
if `delta'!=0 {
|
||||
replace `etamis'=`etamis'+`delta'
|
||||
}
|
||||
match_normal `obstype' `nobs' `nmis' `etaobs' `etamis' `yimp' `y'
|
||||
}
|
||||
}
|
||||
else { // draw
|
||||
matrix `bstar'=e(b)
|
||||
gen `u'=uniform()
|
||||
if `normal' | `binary' {
|
||||
matrix score `xb'=`bstar' if `touse'
|
||||
if `normal' {
|
||||
replace `yimp'=`xb'+e(rmse)*invnorm(`u')
|
||||
}
|
||||
else replace `yimp'=`u'<1/(1+exp(-`xb')) if !missing(`xb')
|
||||
}
|
||||
else { /* catcmd */
|
||||
replace `yimp'=`cat'[1,1]
|
||||
if "`cmd'"=="ologit" {
|
||||
matrix score `xb'=`bstar' if `touse', forcezero
|
||||
forvalues k=1/`cuts' {
|
||||
* 1/(1+exp(-... is probability of being in category 1 or 2 or ... k
|
||||
local cutpt=`bstar'[1, `k'+`colsofb'-`cuts']
|
||||
replace `yimp'=`cat'[1,`k'+1] if `u'>1/(1+exp(-(`cutpt'-`xb')))
|
||||
}
|
||||
}
|
||||
else { /* mlogit */
|
||||
* care needed dealing with different possible base categories
|
||||
tempvar cusump sumexp
|
||||
local basecat=e(basecat) /* actual basecategory chosen by Stata */
|
||||
gen `sumexp'=0 if `touse'
|
||||
forvalues i=1/`nclass' {
|
||||
tempvar xb`i'
|
||||
local thiscat=`cat'[1,`i']
|
||||
if `thiscat'==`basecat' {
|
||||
gen `xb`i''=0 if `touse'
|
||||
}
|
||||
else matrix score `xb`i''=`bstar' if `touse', equation(`thiscat')
|
||||
replace `sumexp'=`sumexp' + exp(`xb`i'')
|
||||
}
|
||||
gen `cusump'=exp(`xb1')/`sumexp'
|
||||
forvalues i=2/`nclass' {
|
||||
replace `yimp'=`cat'[1,`i'] if `u'>`cusump'
|
||||
replace `cusump'=`cusump'+exp(`xb`i'')/`sumexp'
|
||||
replace `yimp'=. if missing(`xb`i'')
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
cap drop `gen'
|
||||
rename `yimp' `gen'
|
||||
*replace `gen'=`y' if `obstype'==1
|
||||
replace `gen'=`y' if !missing(`y')
|
||||
lab var `gen' "imputed from `y'"
|
||||
}
|
||||
di _n in ye `nmis' in gr " missing observations on `y' imputed from " /*
|
||||
*/ in ye `nobs' in gr " complete observations."
|
||||
end
|
||||
|
||||
program define match_normal
|
||||
* Prediction matching, normal or binary case.
|
||||
args obstype nobs nmis etaobs etamis yimp y
|
||||
quietly {
|
||||
* For each missing obs j, find index of observation
|
||||
* whose etaobs is closest to etamis[j].
|
||||
tempvar sumgt
|
||||
tempname etamisi
|
||||
gen long `sumgt'=.
|
||||
* Sort etaobs within obstype
|
||||
sort `obstype' `etaobs', stable
|
||||
forvalues i=1/`nmis' {
|
||||
local j=`i'+`nobs'
|
||||
scalar `etamisi'=`etamis'[`j']
|
||||
replace `sumgt'=sum((`etamisi'>`etaobs')) in 1/`nobs'
|
||||
sum `sumgt', meanonly
|
||||
local j1=r(max)
|
||||
if `j1'==0 {
|
||||
local index 1
|
||||
local direction 1
|
||||
}
|
||||
else if `j1'==`nobs' {
|
||||
local index `nobs'
|
||||
local direction -1
|
||||
}
|
||||
else {
|
||||
local j2=`j1'+1
|
||||
if (`etamisi'-`etaobs'[`j1'])<(`etaobs'[`j2']-`etamisi') {
|
||||
local index `j1'
|
||||
local direction -1
|
||||
}
|
||||
else {
|
||||
local index `j2'
|
||||
local direction 1
|
||||
}
|
||||
}
|
||||
* In case of tied etaobs values, add random offset to index in the appropriate direction
|
||||
count if `obstype'==1 & reldif(`etaobs', `etaobs'[`index'])<1e-7 // counts as equality
|
||||
scalar count`i'=r(N)
|
||||
if r(N)>1 {
|
||||
local index=`index'+`direction'*int(uniform()*r(N))
|
||||
}
|
||||
replace `yimp'=`y'[`index'] in `j'
|
||||
}
|
||||
}
|
||||
end
|
1
Modules/ado/plus/u/uvis.hlp
Normal file
1
Modules/ado/plus/u/uvis.hlp
Normal file
@ -0,0 +1 @@
|
||||
.h ice
|
Reference in New Issue
Block a user