You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

886 lines
28 KiB
Plaintext

7 months ago
capture program drop vardesc
*! version 0.5.1 jsl n option for names
* version 0.5.0 jsl string variables
* version 0.4.9b jsl trap some errors
* still buggy try vd-kins
* version 0.4.9 jsl sq bug fix
* version 0.4.8 jsl 01Jul2006 maxlen
* version 0.4.7 jsl 29Jun2006 with sq ad op overrides
* version 0.4.6b jsl 29Jun2006 with squeezing
* DO: add truncate for truncate at some column length
// prints names, labels and descriptive statistics in a table
program define vardesc, byable(recall)
version 8
syntax [varlist(default=none)] [if] [in] ///
[, ///
SQueeze /// squeeze out extra space
SQMin(integer 6) /// minimum column size if compression
OPtimize /// optimize space used without any truncation
OPTMin(integer 5) /// minimum column size if optimized
OCLABel(integer 0) /// column widths to override squeeze and optimize
OCNAme(integer 0) ///
OCVAlues(integer 0) ///
OCPCTile(integer 0) ///
OCMAx(integer 0) ///
OCMEan(integer 0) ///
OCMOde(integer 0) ///
OCMIn(integer 0) ///
OCNobs(integer 0) ///
OCNMISS(integer 0) ///
OCSd(integer 0) ///
OCVar(integer 0) ///
SPacer(integer 1) /// spacing added to minimum with squeeze and optimize
Range(string) /// range of values
Style(string) /// define style of output
FORCEN /// force printing of N's
Values(string) /// values to show % of cases with that value
OTHervalues /// show total of other values
aorder /// alphabetize output
First(string) /// first variable to list
Basic /// just the basic table
Columns(integer 0) /// change all column widths
Decimal(integer 0) /// change all # of decimals
CLABel(integer 25) ///
CNAme(integer 12) ///
CVAlues(integer 6) DVAlues(integer 1) /// columns and decimal digits
CPCTile(integer 10) DPCTile(integer 2) ///
CMAx(integer 9) DMAx(integer 2) ///
CMEan(integer 9) DMEan(integer 2) ///
CMOde(integer 9) DMOde(integer 2) ///
CMIn(integer 9) DMIn(integer 2) ///
CNobs(integer 6) DNobs(integer 0) ///
CNMISS(integer 7) DNMISS(integer 0) ///
CSd(integer 9) DSd(integer 2) ///
CVar(integer 9) DVar(integer 2) ///
LEFTLabel /// left justify var label
LEFTName /// left justify var name
MINLabel /// minimize column size of variable labels
MINName /// do not minimize column size of variable names
NBasic /// basic table with numbered rows
NOHeader /// surpress header
NOMiss /// drop all missing cases
NUMber /// number the list of variables
Order(string) /// order for displaying items
RIGHTLabel /// right justify var labels
RIGHTName /// right justify var names
VERBose /// add extra output
MAXLength(integer 0) /// truncated label if exceeds this length
M80 /// maxlength 80
NCOL80 /// print columns
Names /// only list names
]
if "`m80'"=="m80" {
local maxlength = 80
}
if "`ncol80'"=="ncol80" {
di "0 1 2 3 4 5 6 7 8"
di "12345678901234567890123456789012345678901234567890123456789012345678901234567890"
}
// default ordering of statistics -- change for different default
local orderdefault "name nobs mean sd min max label"
// valid names for statistics being printed; see synonyms below
local validnms mean min max sd var name label nobs nmiss mode
local validpct pct1 pct5 pct01 pct05 pct10 pct25 ///
pct50 pct75 pct90 pct95 pct99
local validall `validnms' `validpct' values
// experiment with defaults
/*
local optimize "optimize"
if "`squeeze'"=="squeeze" {
local optimize ""
}
if "`fixed'"=="fixed" {
local fixed "fixed"
}
*/
// locals used to keep track of things
local drop_ifin = 0 // drop for if and in conditions
local drop_total = 0 // drop for any reason
local drop_miss = 0 // drop due to missing values
local ISsamenobs = 1 // same N for all variables? assume yes to start
local ISmissing = 0 // is there missing? assume no to start
local ISnomissing = 1 // assume no missing to start
local ISpct = 0 // need to compute percentile?
local ISmode = 0 // only compute mode if requested since it can be slow
local ISvalues = 0 // need to % at values?
if "`values'"!="" {
local ISvalues = 1 // need to % at values?
}
local ISno_n_equal = 0 // supress n= at end of list
// variables to be analyzed
if "`varlist'" == "" {
unab varlist : _all // if none, use all
* remove variable create with byable
if "`_byindex'"!="" {
local varlist = subinstr("`varlist'","`_byindex'","",.)
}
}
// decode range of values
if "`range'"!="" {
local nrange = 0
foreach r in `range' {
local ++nrange
if `nrange'==1 local r1 = `r'
if `nrange'==2 local r2 = `r'
if `nrange'==3 {
di in red "range option can have only two values"
}
}
if `r1'<`r2' {
local rangemin = `r1'
local rangemax = `r2'
}
else {
local rangemin = `r2'
local rangemax = `r1'
}
local rangelist ""
forvalues v = `rangemin'(1)`rangemax' {
local rangelist `rangelist' `v'
}
local ISvalues = 1 // need to % at values?
local values `rangelist'
}
// set up for printing of labels and names
* spaces before variable label
if `spacer'==1 local labeloffset " "
if `spacer'==2 local labeloffset " "
if `spacer'==3 local labeloffset " "
if `spacer'==4 local labeloffset " "
if `spacer'==5 local labeloffset " "
if `spacer'==6 local labeloffset " "
if `spacer'==7 local labeloffset " "
if `spacer'==8 local labeloffset " "
if `spacer'==9 local labeloffset " "
* justification of label
local lblsign "-" // left by default
if "`rightlabel'"=="rightlabel" {
local lblsign ""
}
if "`leftlabel'"=="leftlabel" {
local lblsign "-"
}
* alignment of name
local namesign "-" // left by default
if "`rightname'"=="rightname" {
local namesign ""
}
if "`leftname'"=="leftname" {
local namesign "-"
}
// global changes to column and decimal settings
if `decimal'!=0 {
foreach n in max mean min sd var pctile values {
local d`n' = `decimal'
}
}
if `columns'!=0 {
foreach n in max mean min sd var pctile values {
local c`n' = `columns'
}
}
// arrange order of variables
* alphabetize the list
if "`aorder'"=="aorder" {
local varlistunsorted `varlist'
local varlist : list sort varlistunsorted
}
* put the first variable in front of the list
if "`first'"!="" {
local varlist = subinstr("`varlist'","`first'","",.)
local varlist `first' `varlist'
}
// decode order in which items are printed
local pctlist "" // list to hold requested percentiles
* default order of statistics if order not specified
if "`order'"=="" {
local order `orderdefault'
}
// define styles
if "`style'"=="basic" | "`style'"=="b" {
local style basic
local order name nobs mean sd min max label
}
else if "`style'"=="check" | "`style'"=="c" {
local style check
local order name mean med mode min p1 p99 max nmiss
}
else if "`style'"=="missing" | "`style'"=="miss" | "`style'"=="m" {
local style missing
local order name nmiss nobs mean min max label
}
else if "`style'"=="names" | "`style'"=="n" | ///
"`style'"=="nam" | "`style'"=="nm" | "`style'"=="name" | ///
"`names'"=="names" {
local style names
local order name label
if `clabel'==25 {
local clabel = 45
}
}
else if "`style'"=="outliers" | "`style'"=="out" | "`style'"=="o" {
local style outliers
local order name min p1 p5 p10 p90 p95 p99 max
}
else if "`style'"=="range" | "`style'"=="r" {
local style range
local order name nobs values label
}
// synonyms
local order = subinword("`order'","median" ,"pct50" ,.)
local order = subinword("`order'","med" ,"pct50" ,.)
local order = subinword("`order'","minimum" ,"min" ,.)
local order = subinword("`order'","maximum" ,"max" ,.)
local order = subinword("`order'","variance","var" ,.)
local order = subinword("`order'","mn" ,"mean" ,.)
local order = subinword("`order'","stddev" ,"sd" ,.)
local order = subinword("`order'","val" ,"values",.)
local order = subinword("`order'","value" ,"values",.)
local order = subinword("`order'","lab" ,"label" ,.)
local order = subinword("`order'","lbl" ,"label" ,.)
local order = subinword("`order'","nm" ,"name" ,.)
local order = subinword("`order'","nam" ,"name" ,.)
local order = subinword("`order'","obs" ,"nobs" ,.)
local order = subinword("`order'","n" ,"nobs" ,.)
foreach p in 1 5 10 25 50 75 90 95 99 {
local order = subinword("`order'","p`p'","pct`p'",.)
}
// check order and make sure statistics are valid
local isexit = 0
foreach o in `order' {
* check if valid item
local isbad = 1
foreach n in `validall' {
if "`o'"=="`n'" {
local isbad = 0
}
}
if `isbad' {
di in red "invalid name in order(): `o'"
local isexit = 1
}
}
if `isexit'==1 {
exit
}
* list of statistics in order to be output
local outorder "`order'"
* number of items in list
local norder = wordcount("`order'")
* is value among items
local print_value = 0
* check is item in list
local i = 0
foreach o in `order' {
local ++i
if `i'==1 {
* no lead spacing if listed first
if "`o'"=="label" {
local labeloffset ""
}
}
if "`o'"=="mode" {
local ISmode = 1
}
if "`o'"=="values" {
local print_value = 1
}
* check if valid item
local isvalid = strpos("`validall'","`o'")
if `isvalid'==0 {
di in red "invalid name in order(): `o'"
exit
}
* if pct, decode and add to list
local o3 = substr("`o'",1,3) // grab ## from pct##
* check if percentile
if "`o3'"=="pct" { // is percentile
local ISpct = 1
local pctnum = substr("`o'",4,5) // if pct## retrieve ##
local pctnum = `pctnum' // strip off leading 0
local pctlist `pctlist' `pctnum'
}
} // loop over output order
* add values at end if values() but values not in order() list
if `print_value'==0 & `ISvalues'==1 {
local outorder `order' values
}
// if minlabel, determine smallest size that will fit label
if "`minlabel'"=="minlabel" {
local maxlab = 0
* check length of each label
foreach v in `varlist' {
local `v'label : variable label `v'
local ll = length("``v'label'")
if `ll'>`maxlab' {
local maxlab = `ll'
}
}
* set new column size for labels as spacer larger than minimum
local clabel = `maxlab' + `spacer'
}
// if minname, determine smallest size that will fit names
if "`minname'"=="minname" {
local maxname = 0
foreach v in `varlist' { // loop through variables
local ll = length("`v'")
if `ll'>`maxname' {
local maxname = `ll'
}
}
* set new column size for names
local cname = `maxname' + `spacer'
}
* need at least 9 for name
if "`nbasic'"=="nbasic" | "`basic'"=="basic" {
if `cname'<8 local cname = 9
}
// define sample and check missing values - 0.4.0 - 28Jun2006 - byable
tempvar touse
* initially, just use if and in
mark `touse' `if' `in'
* count sample based on if and in
qui count if `touse'==0
local drop_ifin = r(N) // total droppedbased on if and in
* if nomiss, drop missing 0.4.0
if "`nomiss'"=="nomiss" {
markout `touse' `varlist'
}
* counted after missing might be dropped
qui count if `touse'==0
local drop_total = r(N) // total to drop
local drop_miss = `drop_total' - `drop_ifin'
if "`verbose'"=="verbose" {
di
di "Dropped for if & in conditions: " _col(50) `drop_ifin'
di "Dropped for missing data: " _col(50) `drop_miss'
di "Dropped for if, in or missing:" _col(50) `drop_total'
}
if "`nomiss'"=="nomiss" {
local ISsamenobs = 1 // since all missing are dropped, same N
local ISmissing = 0 // no missing since all missing dropped
}
// compute statistics
local vnum = 0
foreach v in `varlist' { // loop through variables
local ++vnum
* if pctiles need, use detail
if `ISpct'==1 {
qui sum `v' if `touse', detail
foreach p in `pctlist' {
local `v'pct`p' = r(p`p')
}
}
* pctiles not used
else {
qui sum `v' if `touse'
}
* statistics based on nonmissing
local `v'mean = r(mean)
local `v'sd = r(sd)
local `v'var = r(Var)
local `v'min = r(min)
local `v'max = r(max)
local `v'label : variable label `v'
local `v'name "`v'"
local `v'nobs = r(N)
local nnow = r(N)
* mode - missing if multipe modes
local `v'mode = .
if `ISmode'==1 {
tempvar vmode
qui egen `vmode' = mode(`v') if `touse'
local `v'mode = `vmode'[1]
}
* compute number missing
capture confirm string variable `v'
if !_rc { // action for string variables
qui count if `v'=="" & `touse'
}
else { // action for numeric variables
qui count if `v'>=. & `touse'
}
local `v'nmiss = r(N)
local nmissprior = r(N)
if ``v'nmiss'!=0 {
local ISmissing = 1 // missing data
}
* determine if n varies across variables due to missing values
if `vnum'==1 { // for 1st variable, assume N's are the same
local nprior = `nnow'
}
else { // now compare to prior variable
* if prior and current differ
if `nprior'!=`nnow' {
local ISsamenobs = 0 // if n's differ, change indicator
}
local nprior = `nnow'
}
* compute % with given values
if `ISvalues'==1 {
local n_notother = 0 // # in other categories
* non missing N
local n = ``v'nobs'
* compute pct at each value
foreach val in `values' {
qui count if `v'==`val' & `touse'
local nval = r(N)
local n_notother = `n_notother' + `nval'
local `v'pval`val' = 100 * (`nval'/`n')
}
local `v'pvalother = 100 * ((`n'-`n_notother')/`n')
}
} // loop through variables for computations
// decide on whether to print nobs and nmiss
local n_for_all = `nprior'
* value to possibly print at end of table
if `ISmissing'==1 {
local nmiss_for_all = `nmissprior'
}
* if miss style, don't check on nobs
if "`style'"!="missing" {
* remove nobs and nmiss
if `ISsamenobs'==1 & "`forcen'"!="forcen" {
local outorder = subinword("`outorder'","nobs","",.)
local outorder = subinword("`outorder'","nmiss","",.)
}
}
local ISnobs_in_order = strpos("`outorder'","nobs")>0
local ISnmiss_in_order = strpos("`outorder'","nmiss")>0
// squeeze or optimize
if "`squeeze'"=="squeeze" | "`optimize'"=="optimize" ///
| `maxlength'>0 {
* set counters for columns need to 0
foreach o in `outorder' {
* if pct##, change to percentile
local o3 = substr("`o'",1,3)
if "`o3'"=="pct" {
local o "pctile"
}
local cis`o' = 0
}
* check needed lengths for each statistic for each variable
foreach v in `varlist' {
* loop through items to print
foreach o in `outorder' {
* name
if "`o'"=="name" {
local l = length("`v'")
local cisname = max(`cisname',`l')
}
* variable label
else if "`o'"=="label" {
local oout "``v'`o''"
* the following reduces it to clabel size
* local oout = substr("`oout'",1,`clabel')
local l = length("`labeloffset'`oout'") + 2 // for two spaces
local cislabel = max(`cislabel',`l')
}
* number missing
else if "`o'"=="nmiss" {
local ofmt "%`c`o''.`d`o''f"
local stat = ``v'nmiss'
local l = length(string(`stat',"`ofmt'"))
local cis`o' = max(`cis`o'',`l')
}
* values
else if "`o'"=="values" {
local ofmt "%`cvalues'.`dvalues'f"
foreach val in `values' {
local stat = ``v'pval`val''
local l = length(string(`stat',"`ofmt'"))
local cis`o' = max(`cis`o'',`l')
}
if "`othervalues'"=="othervalues" {
local stat = ``v'pvalother'
local l = length(string(`stat',"`ofmt'"))
local cis`o' = max(`cis`o'',`l')
}
}
* percentiles
local o3 = substr("`o'",1,3) // if pct## retrieve pct
else if "`o3'"=="pct" {
local pctnum = substr("`o'",4,5) // if pct## retrieve ##
local pctnum = `pctnum' // strip off leading 0
local ofmt "%`cpctile'.`dpctile'f"
local stat = ``v'pct`pctnum''
local l = length(string(`stat',"`ofmt'"))
local cis`o' = max(`cis`o'',`l')
}
* other statistics
else {
local ofmt "%`c`o''.`d`o''f"
local stat = ``v'`o''
local l = length(string(`stat',"`ofmt'"))
/*
if "`o'"=="min" | "`o'"=="max" {
local s = string(`stat',"`ofmt'")
di "____12345678901234"
di "s: >`s'<"
di "l: >`l'"
}
*/
local cis`o' = max(`cis`o'',`l')
}
} // loop through items to print
} // loop through variables
* if squeeze, change column sizes
if "`squeeze'"=="squeeze" {
foreach o in `outorder' {
* if override value, don't squeeze
if `oc`o''!=0 {
local c`o' = `oc`o''
}
* else use squeezed value
else {
if `cis`o''<`sqmin' {
local cis`o' = `sqmin'
}
if `c`o''>`cis`o'' {
local c`o' = `cis`o'' + `spacer'
}
}
/*
if "`o'"=="min" | "`o'"=="max" {
di "c: `c`o''"
}
*/
} // loop through outorder
}
* if optimize,
if "`optimize'"=="optimize" {
foreach o in `outorder' {
* if pct##, change to percentile
local o3 = substr("`o'",1,3)
if "`o3'"=="pct" {
local o "pctile"
}
* if override value
if `oc`o''!=0 {
local c`o' = `oc`o''
}
* else use optimize value
else {
*di "From `o': " _col(15) "`c`o''"
if `cis`o''<`optmin' {
local cis`o' = `optmin'
}
local c`o' = `cis`o'' + `spacer'
*di "To `o': " _col(15) "`c`o''"
}
} // outorder loop
}
* get current total length of output
local tlen = 2
foreach o in `outorder' {
* if pct##, change to percentile
local o3 = substr("`o'",1,3)
if "`o3'"=="pct" {
local o "pctile"
}
local tlen = `tlen' + `c`o''
} // outorder loop
* if exceeds maxlength, reduce label length
if `maxlength' != 0 {
if `tlen' > `maxlength' {
local dif = `tlen' - `maxlength'
local cl = `clabel'
local clabel = `clabel' - `dif'
}
}
/*
di "clabel: `clabel'"
di "tlen `tlen'"
di "maxlength: `maxlength'"
di "dif: `dif'"
di "clabel: `clabel'"
*/
} // squeeze or optimize
// print column headings
if "`noheader'"!="noheader" {
display
local no = 0
foreach o in `outorder' {
local ++no
* if pct, retrieve ## from pct##
local o3 = substr("`o'",1,3)
local isopct = 0
if "`o3'"=="pct" {
local pctnum = substr("`o'",4,5) // if pct## retrieve ##
local pctnum = `pctnum' // strip off leading 0
local isopct = 1
}
* if number option, print variable number
if "`number'"=="number" {
* add space before first item
if `no'==1 {
di _cont %2.0f " "
}
}
* heading for name
if "`o'"== "name" {
if `c`o''>8 {
di _cont %`namesign'`c`o''s "Variable"
}
else {
di _cont %`namesign'`c`o''s "Var"
}
}
* heading for nobs
else if "`o'"== "nobs" {
local nonobs ""
local ISno_n_equal = 1
di _cont %`c`o''s "Obs"
}
* heading for nmiss
else if "`o'"== "nmiss" {
if `c`o''>7 {
di _cont %`c`o''s "Missing"
}
else {
di _cont %`c`o''s "#Miss"
}
}
* heading for mean
else if "`o'"== "mean" {
di _cont %`c`o''s "Mean"
}
* heading for mode
else if "`o'"== "mode" {
di _cont %`c`o''s "Mode"
}
* heading for sd
else if "`o'"== "sd" {
if `csd'<=6 {
di _cont %`c`o''s "SD"
}
else {
di _cont %`c`o''s "StdDev"
}
}
* heading for var
else if "`o'"== "var" {
if `c`o''>7 {
di _cont %`c`o''s "Variance"
}
else {
di _cont %`c`o''s "Var"
}
}
* heading for minimum
else if "`o'"== "min" {
if `c`o''>7 {
di _cont %`c`o''s "Minimum"
}
else {
di _cont %`c`o''s "Min"
}
}
* heading for max
else if "`o'"== "max" {
if `c`o''>7 {
di _cont %`c`o''s "Maximum"
}
else {
di _cont %`c`o''s "Max"
}
}
* heading for variable label
else if "`o'"== "label" {
*di "lblsign: `lblsign'"
*di "x: `c`o''"
di _cont %`lblsign'`c`o''s "`labeloffset'Label"
}
* headings if % at given value
else if "`o'"=="values" {
foreach val in `values' {
di _cont %`c`o''s "%`val's"
}
if "`othervalues'"=="othervalues" {
di _cont %`c`o''s "%Other"
}
}
* heading for percentiles
else if "`o3'"=="pct" {
if `pctnum'==50 {
di _cont %`cpctile's "Median"
}
else {
di _cont %`cpctile's "`pctnum'%"
}
} // pctile
} // loop through order
} // if no header
display
// loop through variables and print table
local vnum = 0
foreach v in `varlist' { // loop through variables
local ++vnum
if "`number'"=="number" {
di _cont %2.0f `vnum' ". "
}
* loop through items to print
foreach o in `outorder' {
* name
if "`o'"=="name" {
local ofmt "%`namesign'`c`o''s"
di _cont `ofmt' "``v'`o''"
}
* variable label
else if "`o'"=="label" {
local ofmt "%`lblsign'`c`o''s"
local oout "``v'`o''"
* truncate based on clabel
local oout = substr("`oout'",1,`clabel')
di _cont `ofmt' "`labeloffset'`oout'"
}
* number missing
else if "`o'"=="nmiss" {
local stat = ``v'nmiss'
local ofmt "%`c`o''.`d`o''f"
di _cont `ofmt' `stat'
}
* values
else if "`o'"=="values" {
local ofmt "%`cvalues'.`dvalues'f"
foreach val in `values' {
local stat = ``v'pval`val''
di _cont `ofmt' `stat'
}
if "`othervalues'"=="othervalues" {
local stat = ``v'pvalother'
di _cont `ofmt' `stat'
}
}
* percentiles
local o3 = substr("`o'",1,3) // if pct## retrieve pct
else if "`o3'"=="pct" {
local pctnum = substr("`o'",4,5) // if pct## retrieve ##
local pctnum = `pctnum' // strip off leading 0
local ofmt "%`cpctile'.`dpctile'f"
local stat = ``v'pct`pctnum''
di _cont `ofmt' `stat'
}
* other statistics
else {
local ofmt "%`c`o''.`d`o''f"
local stat = ``v'`o''
di _cont `ofmt' `stat'
}
} // loop through items to print
display
} // loop through variables
* only print N= if all same n
if `ISsamenobs'==1 {
* if nobs still in order, don't print it
if `ISnobs_in_order'==0 {
di _new "N = `n_for_all'"
}
if `ISnmiss_in_order'==0 {
if `ISmissing'==1 {
di "N missing = `nmiss_for_all'"
}
}
}
end