Setup initial file structure

This commit is contained in:
2024-03-05 11:20:30 +01:00
parent 6183a6391b
commit 70e53e7760
708 changed files with 277486 additions and 0 deletions

View File

@ -0,0 +1,885 @@
capture program drop vardesc
*! version 0.5.1 jsl n option for names
* version 0.5.0 jsl string variables
* version 0.4.9b jsl trap some errors
* still buggy try vd-kins
* version 0.4.9 jsl sq bug fix
* version 0.4.8 jsl 01Jul2006 maxlen
* version 0.4.7 jsl 29Jun2006 with sq ad op overrides
* version 0.4.6b jsl 29Jun2006 with squeezing
* DO: add truncate for truncate at some column length
// prints names, labels and descriptive statistics in a table
program define vardesc, byable(recall)
version 8
syntax [varlist(default=none)] [if] [in] ///
[, ///
SQueeze /// squeeze out extra space
SQMin(integer 6) /// minimum column size if compression
OPtimize /// optimize space used without any truncation
OPTMin(integer 5) /// minimum column size if optimized
OCLABel(integer 0) /// column widths to override squeeze and optimize
OCNAme(integer 0) ///
OCVAlues(integer 0) ///
OCPCTile(integer 0) ///
OCMAx(integer 0) ///
OCMEan(integer 0) ///
OCMOde(integer 0) ///
OCMIn(integer 0) ///
OCNobs(integer 0) ///
OCNMISS(integer 0) ///
OCSd(integer 0) ///
OCVar(integer 0) ///
SPacer(integer 1) /// spacing added to minimum with squeeze and optimize
Range(string) /// range of values
Style(string) /// define style of output
FORCEN /// force printing of N's
Values(string) /// values to show % of cases with that value
OTHervalues /// show total of other values
aorder /// alphabetize output
First(string) /// first variable to list
Basic /// just the basic table
Columns(integer 0) /// change all column widths
Decimal(integer 0) /// change all # of decimals
CLABel(integer 25) ///
CNAme(integer 12) ///
CVAlues(integer 6) DVAlues(integer 1) /// columns and decimal digits
CPCTile(integer 10) DPCTile(integer 2) ///
CMAx(integer 9) DMAx(integer 2) ///
CMEan(integer 9) DMEan(integer 2) ///
CMOde(integer 9) DMOde(integer 2) ///
CMIn(integer 9) DMIn(integer 2) ///
CNobs(integer 6) DNobs(integer 0) ///
CNMISS(integer 7) DNMISS(integer 0) ///
CSd(integer 9) DSd(integer 2) ///
CVar(integer 9) DVar(integer 2) ///
LEFTLabel /// left justify var label
LEFTName /// left justify var name
MINLabel /// minimize column size of variable labels
MINName /// do not minimize column size of variable names
NBasic /// basic table with numbered rows
NOHeader /// surpress header
NOMiss /// drop all missing cases
NUMber /// number the list of variables
Order(string) /// order for displaying items
RIGHTLabel /// right justify var labels
RIGHTName /// right justify var names
VERBose /// add extra output
MAXLength(integer 0) /// truncated label if exceeds this length
M80 /// maxlength 80
NCOL80 /// print columns
Names /// only list names
]
if "`m80'"=="m80" {
local maxlength = 80
}
if "`ncol80'"=="ncol80" {
di "0 1 2 3 4 5 6 7 8"
di "12345678901234567890123456789012345678901234567890123456789012345678901234567890"
}
// default ordering of statistics -- change for different default
local orderdefault "name nobs mean sd min max label"
// valid names for statistics being printed; see synonyms below
local validnms mean min max sd var name label nobs nmiss mode
local validpct pct1 pct5 pct01 pct05 pct10 pct25 ///
pct50 pct75 pct90 pct95 pct99
local validall `validnms' `validpct' values
// experiment with defaults
/*
local optimize "optimize"
if "`squeeze'"=="squeeze" {
local optimize ""
}
if "`fixed'"=="fixed" {
local fixed "fixed"
}
*/
// locals used to keep track of things
local drop_ifin = 0 // drop for if and in conditions
local drop_total = 0 // drop for any reason
local drop_miss = 0 // drop due to missing values
local ISsamenobs = 1 // same N for all variables? assume yes to start
local ISmissing = 0 // is there missing? assume no to start
local ISnomissing = 1 // assume no missing to start
local ISpct = 0 // need to compute percentile?
local ISmode = 0 // only compute mode if requested since it can be slow
local ISvalues = 0 // need to % at values?
if "`values'"!="" {
local ISvalues = 1 // need to % at values?
}
local ISno_n_equal = 0 // supress n= at end of list
// variables to be analyzed
if "`varlist'" == "" {
unab varlist : _all // if none, use all
* remove variable create with byable
if "`_byindex'"!="" {
local varlist = subinstr("`varlist'","`_byindex'","",.)
}
}
// decode range of values
if "`range'"!="" {
local nrange = 0
foreach r in `range' {
local ++nrange
if `nrange'==1 local r1 = `r'
if `nrange'==2 local r2 = `r'
if `nrange'==3 {
di in red "range option can have only two values"
}
}
if `r1'<`r2' {
local rangemin = `r1'
local rangemax = `r2'
}
else {
local rangemin = `r2'
local rangemax = `r1'
}
local rangelist ""
forvalues v = `rangemin'(1)`rangemax' {
local rangelist `rangelist' `v'
}
local ISvalues = 1 // need to % at values?
local values `rangelist'
}
// set up for printing of labels and names
* spaces before variable label
if `spacer'==1 local labeloffset " "
if `spacer'==2 local labeloffset " "
if `spacer'==3 local labeloffset " "
if `spacer'==4 local labeloffset " "
if `spacer'==5 local labeloffset " "
if `spacer'==6 local labeloffset " "
if `spacer'==7 local labeloffset " "
if `spacer'==8 local labeloffset " "
if `spacer'==9 local labeloffset " "
* justification of label
local lblsign "-" // left by default
if "`rightlabel'"=="rightlabel" {
local lblsign ""
}
if "`leftlabel'"=="leftlabel" {
local lblsign "-"
}
* alignment of name
local namesign "-" // left by default
if "`rightname'"=="rightname" {
local namesign ""
}
if "`leftname'"=="leftname" {
local namesign "-"
}
// global changes to column and decimal settings
if `decimal'!=0 {
foreach n in max mean min sd var pctile values {
local d`n' = `decimal'
}
}
if `columns'!=0 {
foreach n in max mean min sd var pctile values {
local c`n' = `columns'
}
}
// arrange order of variables
* alphabetize the list
if "`aorder'"=="aorder" {
local varlistunsorted `varlist'
local varlist : list sort varlistunsorted
}
* put the first variable in front of the list
if "`first'"!="" {
local varlist = subinstr("`varlist'","`first'","",.)
local varlist `first' `varlist'
}
// decode order in which items are printed
local pctlist "" // list to hold requested percentiles
* default order of statistics if order not specified
if "`order'"=="" {
local order `orderdefault'
}
// define styles
if "`style'"=="basic" | "`style'"=="b" {
local style basic
local order name nobs mean sd min max label
}
else if "`style'"=="check" | "`style'"=="c" {
local style check
local order name mean med mode min p1 p99 max nmiss
}
else if "`style'"=="missing" | "`style'"=="miss" | "`style'"=="m" {
local style missing
local order name nmiss nobs mean min max label
}
else if "`style'"=="names" | "`style'"=="n" | ///
"`style'"=="nam" | "`style'"=="nm" | "`style'"=="name" | ///
"`names'"=="names" {
local style names
local order name label
if `clabel'==25 {
local clabel = 45
}
}
else if "`style'"=="outliers" | "`style'"=="out" | "`style'"=="o" {
local style outliers
local order name min p1 p5 p10 p90 p95 p99 max
}
else if "`style'"=="range" | "`style'"=="r" {
local style range
local order name nobs values label
}
// synonyms
local order = subinword("`order'","median" ,"pct50" ,.)
local order = subinword("`order'","med" ,"pct50" ,.)
local order = subinword("`order'","minimum" ,"min" ,.)
local order = subinword("`order'","maximum" ,"max" ,.)
local order = subinword("`order'","variance","var" ,.)
local order = subinword("`order'","mn" ,"mean" ,.)
local order = subinword("`order'","stddev" ,"sd" ,.)
local order = subinword("`order'","val" ,"values",.)
local order = subinword("`order'","value" ,"values",.)
local order = subinword("`order'","lab" ,"label" ,.)
local order = subinword("`order'","lbl" ,"label" ,.)
local order = subinword("`order'","nm" ,"name" ,.)
local order = subinword("`order'","nam" ,"name" ,.)
local order = subinword("`order'","obs" ,"nobs" ,.)
local order = subinword("`order'","n" ,"nobs" ,.)
foreach p in 1 5 10 25 50 75 90 95 99 {
local order = subinword("`order'","p`p'","pct`p'",.)
}
// check order and make sure statistics are valid
local isexit = 0
foreach o in `order' {
* check if valid item
local isbad = 1
foreach n in `validall' {
if "`o'"=="`n'" {
local isbad = 0
}
}
if `isbad' {
di in red "invalid name in order(): `o'"
local isexit = 1
}
}
if `isexit'==1 {
exit
}
* list of statistics in order to be output
local outorder "`order'"
* number of items in list
local norder = wordcount("`order'")
* is value among items
local print_value = 0
* check is item in list
local i = 0
foreach o in `order' {
local ++i
if `i'==1 {
* no lead spacing if listed first
if "`o'"=="label" {
local labeloffset ""
}
}
if "`o'"=="mode" {
local ISmode = 1
}
if "`o'"=="values" {
local print_value = 1
}
* check if valid item
local isvalid = strpos("`validall'","`o'")
if `isvalid'==0 {
di in red "invalid name in order(): `o'"
exit
}
* if pct, decode and add to list
local o3 = substr("`o'",1,3) // grab ## from pct##
* check if percentile
if "`o3'"=="pct" { // is percentile
local ISpct = 1
local pctnum = substr("`o'",4,5) // if pct## retrieve ##
local pctnum = `pctnum' // strip off leading 0
local pctlist `pctlist' `pctnum'
}
} // loop over output order
* add values at end if values() but values not in order() list
if `print_value'==0 & `ISvalues'==1 {
local outorder `order' values
}
// if minlabel, determine smallest size that will fit label
if "`minlabel'"=="minlabel" {
local maxlab = 0
* check length of each label
foreach v in `varlist' {
local `v'label : variable label `v'
local ll = length("``v'label'")
if `ll'>`maxlab' {
local maxlab = `ll'
}
}
* set new column size for labels as spacer larger than minimum
local clabel = `maxlab' + `spacer'
}
// if minname, determine smallest size that will fit names
if "`minname'"=="minname" {
local maxname = 0
foreach v in `varlist' { // loop through variables
local ll = length("`v'")
if `ll'>`maxname' {
local maxname = `ll'
}
}
* set new column size for names
local cname = `maxname' + `spacer'
}
* need at least 9 for name
if "`nbasic'"=="nbasic" | "`basic'"=="basic" {
if `cname'<8 local cname = 9
}
// define sample and check missing values - 0.4.0 - 28Jun2006 - byable
tempvar touse
* initially, just use if and in
mark `touse' `if' `in'
* count sample based on if and in
qui count if `touse'==0
local drop_ifin = r(N) // total droppedbased on if and in
* if nomiss, drop missing 0.4.0
if "`nomiss'"=="nomiss" {
markout `touse' `varlist'
}
* counted after missing might be dropped
qui count if `touse'==0
local drop_total = r(N) // total to drop
local drop_miss = `drop_total' - `drop_ifin'
if "`verbose'"=="verbose" {
di
di "Dropped for if & in conditions: " _col(50) `drop_ifin'
di "Dropped for missing data: " _col(50) `drop_miss'
di "Dropped for if, in or missing:" _col(50) `drop_total'
}
if "`nomiss'"=="nomiss" {
local ISsamenobs = 1 // since all missing are dropped, same N
local ISmissing = 0 // no missing since all missing dropped
}
// compute statistics
local vnum = 0
foreach v in `varlist' { // loop through variables
local ++vnum
* if pctiles need, use detail
if `ISpct'==1 {
qui sum `v' if `touse', detail
foreach p in `pctlist' {
local `v'pct`p' = r(p`p')
}
}
* pctiles not used
else {
qui sum `v' if `touse'
}
* statistics based on nonmissing
local `v'mean = r(mean)
local `v'sd = r(sd)
local `v'var = r(Var)
local `v'min = r(min)
local `v'max = r(max)
local `v'label : variable label `v'
local `v'name "`v'"
local `v'nobs = r(N)
local nnow = r(N)
* mode - missing if multipe modes
local `v'mode = .
if `ISmode'==1 {
tempvar vmode
qui egen `vmode' = mode(`v') if `touse'
local `v'mode = `vmode'[1]
}
* compute number missing
capture confirm string variable `v'
if !_rc { // action for string variables
qui count if `v'=="" & `touse'
}
else { // action for numeric variables
qui count if `v'>=. & `touse'
}
local `v'nmiss = r(N)
local nmissprior = r(N)
if ``v'nmiss'!=0 {
local ISmissing = 1 // missing data
}
* determine if n varies across variables due to missing values
if `vnum'==1 { // for 1st variable, assume N's are the same
local nprior = `nnow'
}
else { // now compare to prior variable
* if prior and current differ
if `nprior'!=`nnow' {
local ISsamenobs = 0 // if n's differ, change indicator
}
local nprior = `nnow'
}
* compute % with given values
if `ISvalues'==1 {
local n_notother = 0 // # in other categories
* non missing N
local n = ``v'nobs'
* compute pct at each value
foreach val in `values' {
qui count if `v'==`val' & `touse'
local nval = r(N)
local n_notother = `n_notother' + `nval'
local `v'pval`val' = 100 * (`nval'/`n')
}
local `v'pvalother = 100 * ((`n'-`n_notother')/`n')
}
} // loop through variables for computations
// decide on whether to print nobs and nmiss
local n_for_all = `nprior'
* value to possibly print at end of table
if `ISmissing'==1 {
local nmiss_for_all = `nmissprior'
}
* if miss style, don't check on nobs
if "`style'"!="missing" {
* remove nobs and nmiss
if `ISsamenobs'==1 & "`forcen'"!="forcen" {
local outorder = subinword("`outorder'","nobs","",.)
local outorder = subinword("`outorder'","nmiss","",.)
}
}
local ISnobs_in_order = strpos("`outorder'","nobs")>0
local ISnmiss_in_order = strpos("`outorder'","nmiss")>0
// squeeze or optimize
if "`squeeze'"=="squeeze" | "`optimize'"=="optimize" ///
| `maxlength'>0 {
* set counters for columns need to 0
foreach o in `outorder' {
* if pct##, change to percentile
local o3 = substr("`o'",1,3)
if "`o3'"=="pct" {
local o "pctile"
}
local cis`o' = 0
}
* check needed lengths for each statistic for each variable
foreach v in `varlist' {
* loop through items to print
foreach o in `outorder' {
* name
if "`o'"=="name" {
local l = length("`v'")
local cisname = max(`cisname',`l')
}
* variable label
else if "`o'"=="label" {
local oout "``v'`o''"
* the following reduces it to clabel size
* local oout = substr("`oout'",1,`clabel')
local l = length("`labeloffset'`oout'") + 2 // for two spaces
local cislabel = max(`cislabel',`l')
}
* number missing
else if "`o'"=="nmiss" {
local ofmt "%`c`o''.`d`o''f"
local stat = ``v'nmiss'
local l = length(string(`stat',"`ofmt'"))
local cis`o' = max(`cis`o'',`l')
}
* values
else if "`o'"=="values" {
local ofmt "%`cvalues'.`dvalues'f"
foreach val in `values' {
local stat = ``v'pval`val''
local l = length(string(`stat',"`ofmt'"))
local cis`o' = max(`cis`o'',`l')
}
if "`othervalues'"=="othervalues" {
local stat = ``v'pvalother'
local l = length(string(`stat',"`ofmt'"))
local cis`o' = max(`cis`o'',`l')
}
}
* percentiles
local o3 = substr("`o'",1,3) // if pct## retrieve pct
else if "`o3'"=="pct" {
local pctnum = substr("`o'",4,5) // if pct## retrieve ##
local pctnum = `pctnum' // strip off leading 0
local ofmt "%`cpctile'.`dpctile'f"
local stat = ``v'pct`pctnum''
local l = length(string(`stat',"`ofmt'"))
local cis`o' = max(`cis`o'',`l')
}
* other statistics
else {
local ofmt "%`c`o''.`d`o''f"
local stat = ``v'`o''
local l = length(string(`stat',"`ofmt'"))
/*
if "`o'"=="min" | "`o'"=="max" {
local s = string(`stat',"`ofmt'")
di "____12345678901234"
di "s: >`s'<"
di "l: >`l'"
}
*/
local cis`o' = max(`cis`o'',`l')
}
} // loop through items to print
} // loop through variables
* if squeeze, change column sizes
if "`squeeze'"=="squeeze" {
foreach o in `outorder' {
* if override value, don't squeeze
if `oc`o''!=0 {
local c`o' = `oc`o''
}
* else use squeezed value
else {
if `cis`o''<`sqmin' {
local cis`o' = `sqmin'
}
if `c`o''>`cis`o'' {
local c`o' = `cis`o'' + `spacer'
}
}
/*
if "`o'"=="min" | "`o'"=="max" {
di "c: `c`o''"
}
*/
} // loop through outorder
}
* if optimize,
if "`optimize'"=="optimize" {
foreach o in `outorder' {
* if pct##, change to percentile
local o3 = substr("`o'",1,3)
if "`o3'"=="pct" {
local o "pctile"
}
* if override value
if `oc`o''!=0 {
local c`o' = `oc`o''
}
* else use optimize value
else {
*di "From `o': " _col(15) "`c`o''"
if `cis`o''<`optmin' {
local cis`o' = `optmin'
}
local c`o' = `cis`o'' + `spacer'
*di "To `o': " _col(15) "`c`o''"
}
} // outorder loop
}
* get current total length of output
local tlen = 2
foreach o in `outorder' {
* if pct##, change to percentile
local o3 = substr("`o'",1,3)
if "`o3'"=="pct" {
local o "pctile"
}
local tlen = `tlen' + `c`o''
} // outorder loop
* if exceeds maxlength, reduce label length
if `maxlength' != 0 {
if `tlen' > `maxlength' {
local dif = `tlen' - `maxlength'
local cl = `clabel'
local clabel = `clabel' - `dif'
}
}
/*
di "clabel: `clabel'"
di "tlen `tlen'"
di "maxlength: `maxlength'"
di "dif: `dif'"
di "clabel: `clabel'"
*/
} // squeeze or optimize
// print column headings
if "`noheader'"!="noheader" {
display
local no = 0
foreach o in `outorder' {
local ++no
* if pct, retrieve ## from pct##
local o3 = substr("`o'",1,3)
local isopct = 0
if "`o3'"=="pct" {
local pctnum = substr("`o'",4,5) // if pct## retrieve ##
local pctnum = `pctnum' // strip off leading 0
local isopct = 1
}
* if number option, print variable number
if "`number'"=="number" {
* add space before first item
if `no'==1 {
di _cont %2.0f " "
}
}
* heading for name
if "`o'"== "name" {
if `c`o''>8 {
di _cont %`namesign'`c`o''s "Variable"
}
else {
di _cont %`namesign'`c`o''s "Var"
}
}
* heading for nobs
else if "`o'"== "nobs" {
local nonobs ""
local ISno_n_equal = 1
di _cont %`c`o''s "Obs"
}
* heading for nmiss
else if "`o'"== "nmiss" {
if `c`o''>7 {
di _cont %`c`o''s "Missing"
}
else {
di _cont %`c`o''s "#Miss"
}
}
* heading for mean
else if "`o'"== "mean" {
di _cont %`c`o''s "Mean"
}
* heading for mode
else if "`o'"== "mode" {
di _cont %`c`o''s "Mode"
}
* heading for sd
else if "`o'"== "sd" {
if `csd'<=6 {
di _cont %`c`o''s "SD"
}
else {
di _cont %`c`o''s "StdDev"
}
}
* heading for var
else if "`o'"== "var" {
if `c`o''>7 {
di _cont %`c`o''s "Variance"
}
else {
di _cont %`c`o''s "Var"
}
}
* heading for minimum
else if "`o'"== "min" {
if `c`o''>7 {
di _cont %`c`o''s "Minimum"
}
else {
di _cont %`c`o''s "Min"
}
}
* heading for max
else if "`o'"== "max" {
if `c`o''>7 {
di _cont %`c`o''s "Maximum"
}
else {
di _cont %`c`o''s "Max"
}
}
* heading for variable label
else if "`o'"== "label" {
*di "lblsign: `lblsign'"
*di "x: `c`o''"
di _cont %`lblsign'`c`o''s "`labeloffset'Label"
}
* headings if % at given value
else if "`o'"=="values" {
foreach val in `values' {
di _cont %`c`o''s "%`val's"
}
if "`othervalues'"=="othervalues" {
di _cont %`c`o''s "%Other"
}
}
* heading for percentiles
else if "`o3'"=="pct" {
if `pctnum'==50 {
di _cont %`cpctile's "Median"
}
else {
di _cont %`cpctile's "`pctnum'%"
}
} // pctile
} // loop through order
} // if no header
display
// loop through variables and print table
local vnum = 0
foreach v in `varlist' { // loop through variables
local ++vnum
if "`number'"=="number" {
di _cont %2.0f `vnum' ". "
}
* loop through items to print
foreach o in `outorder' {
* name
if "`o'"=="name" {
local ofmt "%`namesign'`c`o''s"
di _cont `ofmt' "``v'`o''"
}
* variable label
else if "`o'"=="label" {
local ofmt "%`lblsign'`c`o''s"
local oout "``v'`o''"
* truncate based on clabel
local oout = substr("`oout'",1,`clabel')
di _cont `ofmt' "`labeloffset'`oout'"
}
* number missing
else if "`o'"=="nmiss" {
local stat = ``v'nmiss'
local ofmt "%`c`o''.`d`o''f"
di _cont `ofmt' `stat'
}
* values
else if "`o'"=="values" {
local ofmt "%`cvalues'.`dvalues'f"
foreach val in `values' {
local stat = ``v'pval`val''
di _cont `ofmt' `stat'
}
if "`othervalues'"=="othervalues" {
local stat = ``v'pvalother'
di _cont `ofmt' `stat'
}
}
* percentiles
local o3 = substr("`o'",1,3) // if pct## retrieve pct
else if "`o3'"=="pct" {
local pctnum = substr("`o'",4,5) // if pct## retrieve ##
local pctnum = `pctnum' // strip off leading 0
local ofmt "%`cpctile'.`dpctile'f"
local stat = ``v'pct`pctnum''
di _cont `ofmt' `stat'
}
* other statistics
else {
local ofmt "%`c`o''.`d`o''f"
local stat = ``v'`o''
di _cont `ofmt' `stat'
}
} // loop through items to print
display
} // loop through variables
* only print N= if all same n
if `ISsamenobs'==1 {
* if nobs still in order, don't print it
if `ISnobs_in_order'==0 {
di _new "N = `n_for_all'"
}
if `ISnmiss_in_order'==0 {
if `ISmissing'==1 {
di "N missing = `nmiss_for_all'"
}
}
}
end

View File

@ -0,0 +1,164 @@
{smcl}
{* 24Aug2006}{...}
{cmd:help vardesc}
{hline}
{title:Title}
{p2colset 5 18 20 2}{...}
{p2col :{hi:vardesc} {hline 2}}Create a table with variable descriptions.
{p2colreset}{...}
{title:Syntax}
{phang}
Construct a table with variable names, labels and descrptive statistics.
{p 8 14 2}
{cmd:by} {it:by_variables}{cmd::} {cmd:vardesc}
[{varlist}]
{ifin}
[{cmd:,} {it:options}]
{synoptset 17 tabbed}{...}
{synopthdr}
{synoptline}
{syntab:Choice of Statistics}
{synopt :{opt o:rder(items)}}selection and order of items to print. Items are:{p_end}
{p 23 23 2}{opt nam:e}: name of variable (synonym: nm).{p_end}
{p 23 23 2}{opt lab:el}: variable label (synonym: lbl).{p_end}
{p 23 23 2}{opt mean}: mean(synonym: mn).{p_end}
{p 23 23 2}{opt med:ian}: median (synonym: pct50).{p_end}
{p 23 23 2}{opt var:iance}: variance.{p_end}
{p 23 23 2}{opt stddev}: standard deviation (synonym: sd).{p_end}
{p 23 23 2}{opt min:imum}: minimum.{p_end}
{p 23 23 2}{opt max:imum}: maximum.{p_end}
{p 23 23 2}{opt mode}: mode; missing if multiple modes.{p_end}
{p 23 23 2}{opt n:obs}: number of nonmissing cases for variable.{p_end}
{p 23 23 2}{opt nmiss}: number of missing cases for variable.{p_end}
{p 23 23 2}{opt p:ct}{it:#}: percentile for #=1 5 10 25 50 75 90 95 99.{p_end}
{p 23 23 2}{opt val:ues}: percent of cases with values specified with {it:values()}.{p_end}
{synopt :{opt v:alues(string)}}values at which % of cases with that value are listed.{p_end}
{synopt :{opt oth:ervalues}}include % of cases with values not listed in {it:values()}.{p_end}
{synopt :{opt r:ange(string)}}range of values for computing % of cases with those values.{p_end}
{syntab:Styles of Output}
{synopt :{opt s:tyle(type)}}select style for output. Styles are:{p_end}
{p 23 23 2}{opt basic}: standard output.{p_end}
{p 23 23 2}{opt check}: check the data during cleaning.{p_end}
{p 23 23 2}{opt missing}: look for missing data.{p_end}
{p 23 23 2}{opt names}: just names and labels; this option is equivalent to
the option {opt n:ame} used without the {opt style( )} option (e.g., vardesc, names).{p_end}
{p 23 23 2}{opt outliers}: percentiles to check for outliers.{p_end}
{p 23 23 2}{opt range}: name, N, values and labels.{p_end}
{syntab:Compressing Output and Removing Spaces}
{synopt :{opt sq:ueeze}}removes extra space; if a column size, truncates, it is not changed.{p_end}
{synopt :{opt sq:min(#)}}is the minimum size of column after squeezing; default 6.{p_end}
{synopt :{opt op:timize}}prints the specified statistics in a minimum of space, with each
column being no smaller than the {cmd:optmin} size.{p_end}
{synopt :{opt optm:in(#)}}is the minimum size of column after optimizing; default 6.{p_end}
{synopt :{opt sp:acer(#)}}spacing added between statistics when using {cmd:squeeze}
and {cmd:optimize}, default 1.{p_end}
{synopt :{opt maxl:ength(#)}}is the maximum line size allowed before length of
label is reduced.{p_end}
{synopt :{opt oclab:el(#)}}override columns used for variable label.{p_end}
{synopt :{opt ocma:x(#)}}override columns used for maximum.{p_end}
{synopt :{opt ocmode(#)}}override columns used for mode.{p_end}
{synopt :{opt ocme:an(#)}}override columns used for mean.{p_end}
{synopt :{opt ocmi:n(#)}}override columns used for minimum.{p_end}
{synopt :{opt ocn:obs(#)}}override columns used for # of observations.{p_end}
{synopt :{opt ocna:me(#)}}override columns used for variable name.{p_end}
{synopt :{opt ocpct:ile(#)}}override columns used for percentiles.{p_end}
{synopt :{opt ocs:d(#)}}override columns used for standard deviation.{p_end}
{synopt :{opt ocv:ar(#)}}override columns used for variance.{p_end}
{synopt :{opt ocval:ues(#)}}override columns used for percent with given value.{p_end}
{syntab:Main}
{synopt :{opt aorder}}alphabetizes order of variables.{p_end}
{synopt :{opt f:irst(var_name)}}prints this as the first variable.{p_end}
{synopt :{opt forcen}}force printing of nobs for each variable even if same value.{p_end}
{synopt :{opt noh:eader}}surpresses printing of column headings.{p_end}
{synopt :{opt nom:iss}}uses listwise deletion to remove missing data for variables in {it:varlist}.{p_end}
{synopt :{opt num:ber}}number each variables.{p_end}
{synopt :{opt v:erbose}}to print extra information.{p_end}
{syntab:Alignment of variable name and label}
{synopt :{opt left:label}}left justify printing of the variable label.{p_end}
{synopt :{opt leftn:ame}}left justify printing of the variable name.{p_end}
{synopt :{opt rightl:abel}}right justify printing of the variable label.{p_end}
{synopt :{opt rightn:ame}}right justify printing of the variable name.{p_end}
{syntab:Column size and decimal digits}
{synopt :{opt c:olumn(#)}}sets default column width used by mean, minimum, maximum, standard deviation and variance.{p_end}
{synopt :{opt clab:el(#)}}sets # columns used for variable label; default 20.{p_end}
{synopt :{opt cma:x(#)}}sets # columns used for maximum; default 10.{p_end}
{synopt :{opt cmode(#)}}sets # columns used for mode; default 10.{p_end}
{synopt :{opt cme:an(#)}}sets # columns used for mean; default 10.{p_end}
{synopt :{opt cmi:n(#)}}sets # columns used for minimum; default 10.{p_end}
{synopt :{opt cn:obs(#)}}sets # columns used for # of observations; default 6.{p_end}
{synopt :{opt cna:me(#)}}sets # columns used for variable name; default 12.{p_end}
{synopt :{opt cpct:ile(#)}}sets # columns used for percentiles; default 10.{p_end}
{synopt :{opt cs:d(#)}}sets # columns used for standard deviation; default 10.{p_end}
{synopt :{opt cv:ar(#)}}sets # columns used for variance; default 10.{p_end}
{synopt :{opt cval:ues(#)}}sets # columns used for percent with given value; default 6.{p_end}
{synopt :{opt d:ecimal(#)}}sets default # if decimal digits used by mean, minimum, maximum, standard deviation and variance.{p_end}
{synopt :{opt dma:x(#)}}sets # decimals used for maximum; default 2.{p_end}
{synopt :{opt dmode(#)}}sets # decimals used for mode; default 2.{p_end}
{synopt :{opt dme:an(#)}}sets # decimals used for mean; default 2.{p_end}
{synopt :{opt dmi:n(#)}}sets # decimals used for minimum; default 2.{p_end}
{synopt :{opt dn:obs(#)}}sets # decimals used for # of observations; default 0.{p_end}
{synopt :{opt dpct:ile(#)}}sets # decimals used for percentiles; default 10.{p_end}
{synopt :{opt ds:d(#)}}sets # decimals used for standard deviation; default 2.{p_end}
{synopt :{opt dval:ues(#)}}sets # of decimals for percent with given value; default 1.{p_end}
{synopt :{opt dv:ar(#)}}sets # decimals used for variance; default 2.{p_end}
{synopt :{opt minl:abel}}minimize column size for label.{p_end}
{synopt :{opt minn:ame}}minimize column size for name.{p_end}
{synoptline}
{p 4 6 2}
{title:Description}
{pstd}
{opt vardesc} creates a table to describe a set of specified variables. You can change
the order in which items are listed, column widths for each item, and the number
of decimal digits used..
{title:Options}
{dlgtab:Main}
{phang}
{opt xxx} By default :
{p 8 16 2}
aaa{p_end}
{p 8 16 2}
bbb{p_end}
{p 8 16 2}
xxx{p_end}
{title:Acknowledgements}
{title:Notes:}
{p 8 16 2}If {it:values} note in {it:order()} but values are listed with {it:values()},
values are listed as last statistics in table.{p_end}
{p 8 16 2}If all variables have the same number of valid observations, a column with the
sample size is not listed even if you specified {it:nobs} as an options for {it:order};
to force the printing of the sample size, use option {it:forcen}.
{title:Examples:}
{phang}{cmd:. vardesc}
{title:Author}
{p 5 5}
J. Scott Long{break}
Indiana University{break}
Departments of Sociology & Statistics{break}
jslong{@}indiana.edu{break}
{browse "http://www.indiana.edu/~jslong/"}{p_end}

View File

@ -0,0 +1,89 @@
program def varflist
*! NJC 1.0.0 3 Feb 2000
version 6
gettoken list 0 : 0, parse(",")
if "`list'" == "" | "`list'" == "," {
di in r "nothing in list"
exit 198
}
syntax , Generate(str) [ Type(str) Global SCalar STring ]
confirm new variable `generate'
local nopts = ("`global'" != "") + ("`scalar'" != "")
if `nopts' == 2 {
di in r "must choose between global and scalar"
exit 198
}
local nwords : word count `list'
local i = 1
while `i' <= `nwords' {
local len = length("``i''")
if `len' > 80 {
di in r "cannot handle word length > 80"
exit 498
}
local i = `i' + 1
}
if `nwords' > _N {
local n = _N
di in r "too many words: `nwords' words, `n' obs"
exit 498
}
if "`string'" != "" {
if "`type'" == "" { local type "str1" }
else if substr("`type'",1,3) != "str" {
di in r "string and type(`type') inconsistent""
exit 109
}
}
else if substr("`type'",1,3) == "str" {
local string "string"
}
tokenize `list'
tempvar g
if "`string'" != "" {
qui gen `type' `g' = ""
local i = 1
qui while `i' <= `nwords' {
if "`global'`scalar'" == "" {
replace `g' = "``i''" in `i'
}
else if "`global'" != "" {
replace `g' = "$``i''" in `i'
}
else if "`scalar'" != "" {
local sval = scalar(``i'')
replace `g' = "`sval'" in `i'
}
local i = `i' + 1
}
}
else {
qui gen `type' `g' = .
local i = 1
qui while `i' <= `nwords' {
if "`global'`scalar'" == "" {
replace `g' = ``i'' in `i'
}
else if "`global'" != "" {
replace `g' = $``i'' in `i'
}
else if "`scalar'" != "" {
local sval = scalar(``i'')
replace `g' = `sval' in `i'
}
local i = `i' + 1
}
}
* only generate new variable if all assignments OK
local type : type `g'
gen `type' `generate' = `g'
end

View File

@ -0,0 +1,2 @@
.h listutil

View File

@ -0,0 +1,49 @@
*! NJC 1.1.0 7 June 2000
* NJC 1.0.0 25 Jan 2000
program define vectlist, rclass
version 6.0
gettoken A 0 : 0, parse(" ,")
capture local nc = colsof(matrix(`A'))
if _rc {
di in r "matrix `A' not found"
exit 111
}
local nr = rowsof(matrix(`A'))
if `nc' > 1 & `nr' > 1 {
di in r "`A' not a vector"
exit 498
}
syntax [ , Noisily Global(str) ]
if length("`global'") > 8 {
di in r "global name must be <=8 characters"
exit 198
}
local isrow = `nr' == 1
local i = 1
if `isrow' {
while `i' <= `nc' {
local val = `A'[1, `i']
local newlist "`newlist'`val' "
local i = `i' + 1
}
}
else {
while `i' <= `nr' {
local val = `A'[`i', 1]
local newlist "`newlist'`val' "
local i = `i' + 1
}
}
if "`noisily'" != "" { di "`newlist'" }
if "`global'" != "" { global `global' "`newlist'" }
return local list "`newlist'"
end

View File

@ -0,0 +1,2 @@
.h listutil

View File

@ -0,0 +1,614 @@
*! version 1.2.4 TJS 31aug98 STB-46 gr33
program define violin
version 5.0
local varlist "req ex min(1)"
local if "opt"
local in "opt"
local weight "fweight aweight"
#delimit ;
local options "N(integer 50) Width(real 0.0) TRUncat(str)
BIweight COSine EPan GAUss RECtangle PARzen TRIangle
BY(str) Gap(integer 0) ROund(real 0.0) SAving(str)
B2title(str) *" ;
#delimit cr
parse "`*'"
parse "`varlist'", parse(" ")
* trap bad options
if index("`options'","tr") > 0 {
di in re "option tr ambiguous, " _c
error 199
}
if "`b2title'" != "" {
di in bl "b2title not allowed; option ignored."
}
* -> Kernel Density code stolen from kdensity.ado
local kflag = ( ("`epan'" != "") + ("`biweigh'" != "") + /*
*/ ("`triangl'" != "") + ("`gauss'" != "") + /*
*/ ("`rectang'" != "") + ("`parzen'" != "") )
if `kflag' > 1 {
di in red "specify only one kernel"
exit 198
}
if "`biweigh'" != "" { local kernel = "Biweight" }
else if "`cosine'" != "" { local kernel = "Cosine" }
else if "`triangl'" != "" { local kernel = "Triangle" }
else if "`gauss'" != "" { local kernel = "Gaussian" }
else if "`rectang'" != "" { local kernel = "Rectangular" }
else if "`parzen'" != "" { local kernel = "Parzen" }
else { local kernel = "Epanechnikov" }
tempvar use
quietly {
mark `use' [`weight'`exp'] `if' `in'
markout `use' `varlist'
count if `use'
}
if _result(1) == 0 { error 2000 }
if "`by'" != "" {
confirm var `by'
unabbrev `by'
local by $S_1
}
if "`if'" != "" {ifexp "`if'"}
preserve /* Note: data preserved here */
keep `by' `varlist' `use'
local n1 = `n' + 1
local n2 = `n' * 2 + 1
if `n2' > _N { qui set obs `n2' }
* Generate BY groups
tempvar kk byg bylabel
sort `use' `by'
qui by `use' `by': gen byte `byg' = _n == 1 if `use'
if "`by'" != "" { qui gen `kk' = _n if `byg' == 1 }
qui replace `byg' = sum(`byg')
if "`by'" != "" {
local byn = `byg'[_N]
sort `kk'
if `use'[`byn'] == . { local byn = `byn' - 1}
}
else { local byn = 1 }
* Generate `by' labels -- if required
if "`by'" != "" {
capture decode `by', gen(`bylabel')
if _rc != 0 {
local type : type `by'
qui gen `type' `bylabel' = `by'
}
}
tempname t2flg b1flg
global t2flg = 0
global b1flg = 0
* Do calculations
* get # of vars
local i 1
while "``i''" != "" {
local i = `i' + 1
}
local nvars = `i' - 1
if `nvars' > 1 & "`by'" != "" {
di in red "by() cannot be used with /*
*/ multi-variable varlist"
exit
}
* Note: `k' loops over multiple individual variables
* `j' loops over the levels of a -by- variable
local k 1
while "``k''" != "" {
local ix "``k''"
local ixl: variable label ``k''
if "`ixl'" == "" | `nvars' > 1 { local ixl "``k''" }
local j = 1
while `j' <= `byn' { /* start of loop for each `by' group */
if "`by'" != "" {
sort `kk'
local byl : di "`by': " `bylabel'[`j']
}
* boxplot stats
qui centile `ix' if `use' & `byg' == `j', c(25 50 75)
local q25 = $S_7
local q50 = $S_8
local q75 = $S_4
* compute additional boxplot info
tempvar xi
local uav = `q75' + 1.5 * (`q75' - `q25')
qui egen `xi' = max(`ix') /*
*/ if `ix' <= `uav' & `use' & `byg' == `j'
local uav = `xi'
drop `xi'
local lav = `q25' - 1.5 * (`q75' - `q25')
qui egen `xi' = min(`ix') /*
*/ if `ix' >= `lav' & `use' & `byg' == `j'
local lav = `xi'
drop `xi'
if `j' == 1 {
quietly summ `ix' [`weight'`exp'] if `use', detail
local ismin = _result(5)
local ismax = _result(6)
if "`by'" != "" {
local isn = _result(1)
local ismn = _result(5)
local ismx = _result(6)
local ism = _result(10)
local is25 = _result(9)
local is75 = _result(11)
local iss = 0
local isw = 0
}
}
if `j' == 1 & "`by'" != "" {
if `width' <= 0 {
tempname wwidth
local ismin = `ism'
local ismax = `ism'
local jj 1
while `jj' <= `byn' {
quietly summ `ix' [`weight'`exp'] /*
*/ if `use' & `byg' == `jj', detail
scalar `wwidth' = 0.9 * min(sqrt(_result(4)), /*
*/ (_result(11) - _result(9)) / 1.349) /*
*/ / (_result(1)^.2)
local ismin = min(`ismin', _result(5) - `wwidth')
local ismax = max(`ismax', _result(6) + `wwidth')
local jj = `jj' + 1
}
}
else {
local ismin = `ismn' - `width'
local ismax = `ismx' + `width'
}
}
quietly summ `ix' [`weight'`exp'] if `use' & `byg' == `j', detail
local ixmin = _result(5)
local ixmax = _result(6)
local ixn = _result(1)
if `gap' == 0 { local gp = 1 + max( /*
*/ length(string(round(`ixmin', `round'))), /*
*/ length(string(round(_result(10),`round'))), /*
*/ length(string(round(`ixmax', `round')))) }
else { local gp = `gap' }
tempname wwidth
scalar `wwidth' = `width'
if `wwidth' <= 0.0 {
scalar `wwidth' = 0.9 * min(sqrt(_result(4)),(_result(11) /*
*/ - _result(9)) / 1.349) / (_result(1)^.2)
}
local ww = `wwidth'
tempvar d m
qui gen double `d' = .
qui gen double `m' = .
kd `ix' `d' `m' `use' `byg' [`weight'`exp'], n(`n') /*
*/ ww(`ww') j(`j') `biweight' `cosine' `epan' /*
*/ `gauss' `rectangle' `parzen' `triangle'
label var `d' "density"
label var `m' "`ixl'"
* truncat option
if "`truncat'" != "" {
if "`truncat'" == "*" {
local tn = `ixmin'
local tx = `ixmax'
}
else {
quietly summ `m' [`weight'`exp'] if `use', detail
local ismn = _result(5)
local ismx = _result(6)
local nc 1
while `nc' > 0 {
local nc = index("`truncat'",",")
if `nc' > 0 { local truncat = substr("`truncat'",1, /*
*/ `nc' - 1) + " " + substr("`truncat'",`nc' + 1,.) }
}
local tn: word 1 of `truncat'
local tx: word 2 of `truncat'
local tn = real("`tn'")
local tx = real("`tx'")
if `tn' > `ismn' { local tn = min(`tn',`ixmin') }
if `tx' < `ismx' { local tx = max(`tx',`ixmax') }
}
qui replace `m' = . if `m' < `tn' | `m' > `tx'
}
qui summ `d' in 1/`n'
local scale = 1 / (`n' * _result(3))
qui replace `d' = `d' * `scale' in 1/`n'
local n21 = `n' * 2 + 1
qui replace `d' = -`d'[`n21' - _n] in `n1'/`n2'
qui replace `m' = `m'[`n21' - _n] in `n1'/`n2'
qui replace `d' = `d'[1] in `n2'
qui replace `m' = `m'[1] in `n2'
if "`truncate'" != "" {
tempvar tm1 tm2
qui gen `tm2' = _n
qui gen `tm1' = sign(`d')
gsort -`tm1' `m'
local tm = `m'[1]
local td = `d'[1]
sort `tm2'
qui replace `d' = `td' in `n2'
qui replace `m' = `tm' in `n2'
}
if "`by'" != "" {
local iss = `iss' + `scale'
local isw = `isw' + `wwidth'
}
* saving option
if `j' * `k' == 1 & "`saving'" != "" {
local c = index("`saving'",",")
local cs " "
if index("`saving'",", ") != 0 { local cs "" }
if `c' != 0 { local saving = substr("`saving'",1,`c' - 1) /*
*/ + "`cs'" + substr("`saving'",`c' + 1, .) }
local savfile : word 1 of `saving'
local replace : word 2 of `saving'
if "`replace'" == "replace" { capture erase "`savfile'.gph" }
capture confirm new file "`savfile'.gph"
if _rc == 0 { local saving ", saving(`savfile')" }
else {
local rc = _rc
di in re "file `savfile'.gph exists."
di in bl "use another filename or add 'replace' option."
exit `rc'
}
}
if "`byl'" != "" { local bylbyl byl("`byl'") }
tempname ixlixl
global ixlixl "`ixl'"
* do plot
if `j' * `k' == 1 { gph open `saving' }
viogph `d' `m', j(`j') k(`k') byn(`byn') ixmin(`ixmin') /*
*/ ixmax(`ixmax') q50(`q50') ismin(`ismin') ismax(`ismax') /*
*/ nvars(`nvars') gp(`gp') `bylbyl' uav(`uav') lav(`lav') /*
*/ q75(`q75') q25(`q25') rou(`round') `options'
if `j' >= `byn' & `k' >= `nvars' { gph close }
* display stats
if `byn' == 1 {
di _n in gr "Statistics for ``k'':"
di in gr " LAV: " in ye `lav' _c
di in gr " Q25: " in ye `q25' _c
di in gr " Q75: " in ye `q75' _c
di in gr " UAV: " in ye `uav'
di in gr " Min: " in ye `ixmin' _c
di in gr " Median: " in ye `q50' _c
di in gr " Max: " in ye `ixmax' _c
di in gr " n: " in ye `ixn'
di _n in gr "For ``k'', density computed using:"
di in gr " Kernel: " in ye "`kernel'" _c
di in gr " N: " in ye `n' _c
di in gr " Scale: " in ye %6.2f `scale' _c
di in gr " Width: " in ye %6.2f `wwidth'
}
local j = `j' + 1
} /* end of loop for each `by' group */
if "`by'" != "" {
di _n in gr "Statistics (all groups combined): "
di in gr " Min: " in ye `ismn' _c
di in gr " Q25: " in ye `is25' _c
di in gr " Median: " in ye `ism' _c
di in gr " Q75: " in ye `is75' _c
di in gr " Max: " in ye `ismx' _c
di in gr " n: " in ye `isn'
di _n in gr "Densities computed using:"
di in gr " Kernel: " in ye "`kernel'" _c
di in gr " N: " in ye `n' _c
di in gr " Ave. Scale: " in ye %6.2f `iss' / `byn' _c
di in gr " Ave. Width: " in ye %6.2f `isw' / `byn'
local scale = `iss' / `byn'
local wwidth = `isw' / `byn'
local ixmin = `ismn'
local lav = .
local q25 = `is25'
local q50 = `ism'
local q75 = `is25'
local uav = .
local ixmax = `ismx'
local ixn = `isn'
}
local k = `k' + 1
}
* save globals
global S_1 "`kernel'"
global S_2 = `n'
global S_3 = `wwidth'
global S_4 = `scale'
global S_5 = `ixmin'
global S_6 = `lav'
global S_7 = `q25'
global S_8 = `q50'
global S_9 = `q75'
global S_10 = `uav'
global S_11 = `ixmax'
global S_12 = `ixn'
macro drop ixlixl b1flg t2flg
end
program define viogph
version 5.0
local varlist "req ex min(2) max(2)"
#delimit ;
local options "J(int 1) K(int 1) BYN(int 1) TItle(str) B1title(str)
IXMIN(real 0.0) IXMAX(real 0.0) Q50(real 0.0) ISMIN(real 0.0)
ISMAX(real 0.0) NVARS(int 0) GP(int 3) BYL(str) UAV(real 0.0)
LAV(real 0.0) Q75(real 0.0) Q25(real 0.0) Pen(str) Symbol(str)
Connect(str) T1title(str) T2title(str) YLAbel(str) YSCale(str)
ROUnd(real 0.0) *" ;
#delimit cr
parse "`*'"
parse "`varlist'", parse(" ")
local d "`1'"
local m `2'
local t2t2 = $t2flg
local b1b1 = $b1flg
local ixl = "$ixlixl"
* set up the plot
if "`pen'" == "" { local pen "2" }
if "`symbol'" == "" { local symbol "i" }
if "`connect'" == "" { local connect "l" }
if `j' == 1 & `k' == 1 {
if "`t1title'" == "." { local t1title t1t(" ") }
else if "`t1title'" == "" { local t1title t1t(Violin Plot) }
else { local t1title t1t("`t1title'") }
if `byn' > 1 { local t1title t1t("`ixl'") }
}
if `j' > 1 | `k' > 1 { local t1title t1t(" ") }
if `j' == 1 & `k' == 1 {
if "`t2title'" != "" {
local t2title t2t("`t2title'")
local t2t2 1
}
}
else if `t2t2' == 1 { local t2title t2t(" ") }
if "`title'" == "" { local title "`b1title'" }
if "`title'" != "" {
local b1title b1t(" ")
local b1b1 1
}
if "`ylabel'" == "" { local yl = "yla(" /*
*/ + string(round(`ixmin',`round')) + "," /*
*/ + string(round(`q50',`round')) + "," /*
*/ + string(round(`ixmax',`round')) + ")" }
else { local yl yla(`ylabel') }
if index("`options'","yla") > 0 { local yl "" }
if "`yscale'" == "" { local ys ysc(`ismin',`ismax') }
else { local ys ysc(`yscale') }
* do plot
* draw density traces
local pw = min(.33, 1 / (`byn' * `nvars') )
local pw = 32000 * `pw'
local p1 = (`j' * `k' - 1) * `pw'
local p2 = `p1' + `pw'
#delimit ;
graph `m' `d', bbox(0,`p1',23063,`p2',923,444,0) s(`symbol')
c(`connect') pe(`pen') `t1title' `yl' gap(`gp') `ys' `t2title'
`b1title' b2t(" ") `options' ;
#delimit cr
tempname ysca yloc xloc
scalar `ysca' = _result(5)
scalar `yloc' = _result(6)
scalar `xloc' = _result(8)
* draw label
local r1 = 20700
local r2 = 21700
local c1 = 21500
if `b1b1' == 1 {
local r1 = `r1' - 1000
local r2 = `r2' - 1000
local c1 = `c1' - 1000
}
gph clear `r1' `p1' `r2' `p2'
gph pen 1
local xlo = `xloc'
if `byn' == 1 { gph text `c1' `xlo' 0 0 `ixl' }
else { gph text `c1' `xlo' 0 0 `byl' }
* do title
if "`title'" != "" {
gph font 1300 650
gph text 22100 10 0 -1 `title'
gph font 923 444
}
* draw adjacent values line
local r1 = `uav' * `ysca' + `yloc'
local r2 = `lav' * `ysca' + `yloc'
local c1 = `xloc'
local c2 = `c1'
gph pen `pen'
gph line `r1' `c1' `r2' `c2'
* draw quartile box (shaded)
local r1 = `q75' * `ysca' + `yloc'
local r2 = `q25' * `ysca' + `yloc'
local c1 = -250 + `xloc'
local c2 = 250 + `xloc'
gph box `r1' `c1' `r2' `c2' 1
* draw median
local r1 = `q50' * `ysca' + `yloc' + 100
local r2 = `q50' * `ysca' + `yloc' - 100
local c1 = -500 + `xloc'
local c2 = 500 + `xloc'
gph box `r1' `c1' `r2' `c2' 0
global t2flg = `t2t2'
global b1flg = `b1b1'
end
program define kd
version 5.0
* -> Kernel Density code stolen from kdensity.ado
local varlist "req ex min(1) max(5)"
local weight "fweight aweight"
#delimit ;
local options "N(integer 50) WW(real 0.0) J(int 1)
BIweight COSine EPan GAUss RECtangle PARzen TRIangle" ;
#delimit cr
parse "`*'"
parse "`varlist'", parse(" ")
local ix "`1'"
local d "`2'"
local m "`3'"
local use "`4'"
local byg "`5'"
tempvar y z
qui gen double `y' = .
qui gen double `z' = .
tempname delta wid wwidth
scalar `wwidth' = `ww'
scalar `delta' = (_result(6) - _result(5) + 2 * `wwidth') /*
*/ / (`n' - 1)
scalar `wid' = _result(1) * `wwidth'
qui replace `m' = _result(5) - `wwidth' + (_n - 1) /*
*/ * `delta' in 1/`n'
local i 1
if "`biweigh'" != "" {
local con1 = .9375
while `i' <= `n' {
qui replace `z' = (`ix' - `m'[`i']) / (`wwidth') /*
*/ if `use' & `byg' == `j'
qui replace `y' = `con1' * (1 - (`z')^2)^2 /*
*/ if abs(round(`z',1e-8)) < 1
qui summ `y' [`weight'`exp'] if `y' != .
qui replace `d' = (_result(3) * _result(1)) / `wid' in `i'
qui replace `y' = .
local i = `i' + 1
}
qui replace `d' = 0 if `d' == . in 1/`n'
}
else if "`cosine'" != "" {
while `i' <= `n' {
qui replace `z' = (`ix' - `m'[`i']) / (`wwidth') /*
*/ if `use' & `byg' == `j'
qui replace `y' = 1 + cos(2 * _pi * `z') /*
*/ if abs(round(`z',1e-8)) < 0.5
qui summ `y' [`weight'`exp'] if `y' != .
qui replace `d' = (_result(3) * _result(1)) / `wid' in `i'
qui replace `y' = .
local i = `i' + 1
}
qui replace `d' = 0 if `d' == . in 1/`n'
}
else if "`triangl'" != "" {
while `i' <= `n' {
qui replace `z' = (`ix' - `m'[`i']) / (`wwidth') /*
*/ if `use' & `byg' == `j'
qui replace `y' = 1 - abs(`z') if abs(round(`z',1e-8)) < 1
qui summ `y' [`weight'`exp'] if `y' != .
qui replace `d' = (_result(3) * _result(1)) / `wid' in `i'
qui replace `y' = .
local i = `i' + 1
}
qui replace `d' = 0 if `d' == . in 1/`n'
}
else if "`parzen'" != "" {
local con1 = 4 / 3
local con2 = 2 * `con1'
while `i' <= `n' {
qui replace `z' = (`ix' - `m'[`i']) / (`wwidth') /*
*/ if `use' & `byg' == `j'
qui replace `y' = `con1' - 8 * (`z')^2 + 8 * abs(`z')^3 /*
*/ if abs(round(`z',1e-8)) <= .5
qui replace `y' = `con2' * (1 - abs(`z'))^3 /*
*/ if abs(round(`z',1e-8)) > .5 /*
*/ & abs(round(`z',1e-8)) < 1
qui summ `y' [`weight'`exp'] if `y' != .
qui replace `d' = (_result(3) * _result(1)) / `wid' in `i'
qui replace `y' = .
local i = `i' + 1
}
qui replace `d' = 0 if `d' == . in 1/`n'
}
else if "`gauss'" != "" {
local con1 = sqrt(2 * _pi)
while `i' <= `n' {
qui replace `z' = (`ix' - `m'[`i']) / (`wwidth') /*
*/ if `use' & `byg' == `j'
qui replace `y' = exp(-0.5 * ((`z')^2)) / `con1'
qui summ `y' [`weight'`exp']
qui replace `d' = (_result(3) * _result(1)) / `wid' in `i'
local i = `i' + 1
}
qui replace `d' = 0 if `d' == . in 1/`n'
}
else if "`rectang'" != "" {
while `i' <= `n' {
qui replace `z' = (`ix' - `m'[`i']) / (`wwidth') /*
*/ if `use' & `byg' == `j'
qui replace `y' = 0.5 if abs(round(`z',1e-8)) < 1
qui summ `y' [`weight'`exp'] if `y' != .
qui replace `d' = (_result(3) * _result(1)) / `wid' in `i'
qui replace `y' = .
local i = `i' + 1
}
qui replace `d' = 0 if `d' == . in 1/`n'
}
else {
local con1 = 3 / (4 * sqrt(5))
local con2 = sqrt(5)
while `i' <= `n' {
qui replace `z' = (`ix' - `m'[`i']) / (`wwidth') /*
*/ if `use' & `byg' == `j'
qui replace `y' = `con1' * (1 - ((`z')^2 / 5)) /*
*/ if abs(round(`z',1e-8)) <= `con2'
qui summ `y' [`weight'`exp'] if `y' != .
qui replace `d' = (_result(3) * _result(1)) / `wid' in `i'
qui replace `y' = .
local i = `i' + 1
}
qui replace `d' = 0 if `d' == . in 1/`n'
}
end

View File

@ -0,0 +1,165 @@
.-
help for ^violin^ (STB-46: gr33)
.-
Violin plots
------------
^violin^ varlist [weight] [^if^ exp] [^in^ range]
[^,^ {^bi^weight|^cos^ine|^ep^an|^gau^ss|^par^zen|^rec^tangle|^tri^angle}
^n(^#^) w^idth^(^#^) by(^byvar^) tru^ncat^(^#,#|*^) ro^und^(^#^)^
graph_options ]
^fweights^ and ^aweights^ are allowed; see ^help^ @weights@.
Description
-----------
^violin^ produces violin plots, a graphical box plot--kernel density synergism.
The violin plot combines the basic summary statistics of a box plot with the
visual information provided by a local density estimator. The goal is to
reveal the distributional structure in a variable. Much like a traditional
box plot, the violin plot displays the median as a short horizontal line, the
first-to-third interquartile range as a narrow shaded box, and the lower-to-
upper adjacent value range as a vertical line, but it does not plot outside
values. Instead, it "boxes" the data with mirrored density curves and labels
the y-axis at the minimum, median and maximum observed data values.
^violin^ also lists basic descriptive statistics about the data (i.e., the
lower and upper adjacent values, the 25th and 75th centiles, the minimum,
median and maximum of the data, and the sample size) and it provides
information about the density estimation (i.e., the kernel method used, the
number of points of estimation, and the resulting scale and width factors).
When ^by()^ is specified, descriptive statistics are displayed for the combined
group only. When multiple variables are included in varlist, statistics are
displayed for the last variable only.
^violin^ discards observations on an casewise basis as a function of 1) missing
data and 2) the ^if^ (or ^in^) specification (i.e, it ignores the entire
observation). This behavior may lead to unexpected results when multiple
variables are in the varlist.
Note: ^violin^ calls ^centile^ to compute the needed centiles but ^centile^ does
not respond to a ^[weight]^ specification. This conflicts with the
^kdensity^ code which responds to that specification. The implications of
this conflict have not been explored, but ^violin^ currently allows the the
^[weight]^ specification to be passed through to ^kdensity^.
Note: ^violin^ uses a low-level ^gph^ command which is not supported in Stata's
release 2 ^.gph^ format. As a result neither ^Stage^ nor the ^gphdot^ or
^gphpen^ DOS-based graphics output programs can process a saved violin-plot
graphics file. This limitation does not affect screen display or output
using the ^Print Graph^ option of Stata's ^File^ menu.
Options
-------
^biweight^, ^cosine^, ..., ^triangle^ specify the kernel. By default, ^epan^, the
Epanechnikov kernel, is used.
^n(^#^)^ specifies the number of points at which density estimates will be
evaluated. The default is 50.
^width(^#^)^ specifies the halfwidth of the kernel, the width of the density
window around each point. If ^width()^ is not specified, then the "optimal"
width is used; see ^[R] kdensity^. For multimodal and highly skewed
densities, the "optimal" width is usually too wide and oversmooths the
density.
^by(^byvar^)^ produces separate plots for the groups of observations defined by
byvar and displays them in a single graph having common vertical scale.
^by()^ cannot be specified when there is more than one variable in the
varlist.
^truncat(^#^,^#|^*)^ limits the range of the density trace, either to a range
specified as ^(^#^,^#^)^, or to the observed data limits, specified as ^(*)^.
Regardless of the actual ^(^#^,^#^)^ specification, the maximum range truncation
honored is the observed data limits. The precise truncation points will
be the most extreme points within the specified range where the density is
calculated (the points of density calculation depend on ^n()^, ^width()^
and the observed data).
^round(^#^)^ rounds the y-axis numeric labels to the value specified. As a result,
the labels and their corresponding tic marks may not be placed at the true
minimum, median, or maximum values, rather they will be at the rounded
values. ^round()^ has no effect if ^ylabel^ is specified without arguments,
but is operative if ^ylabel^ is not specified or is specified with arguments.
The ^round()^ option follows the rules of Stata's ^round(^x^,^y^)^ function, with
# being the y argument and each label value being the x argument;
see ^[U] 20.3.5 Special functions^.
graph_options are any of the options allowed by ^graph, twoway^ except ^b2title()^
(which is ignored); see ^help^ @graph@. Some options are preset and, although
changeable, usually should not be modified. These include ^symbol(i)^ and
^connect(l)^ for specifying the plotting symbol and point connection method
for the density curve. In addition, ^ylabel()^ is preset to label only the
minimum, median and maximum points. ^t1title(Violin Plot)^ is preset but can
be changed--except when ^by()^ is specified; in this instance ^t1title^ is used
for the variable name or label. When changeable, use of ^t1title(.)^ will
result in a blank title. Other preset options, such as ^pen(2)^ for the
plot pen color, are intended to be freely changed to suit user preference.
A few options, such as the left and right titles, are set (or default to)
blank. If specified, they appear beside each plot in a multi-variable
graph. Lastly, the ^saving()^ option differs slightly from ^graph^'s in
that the filename extension is always ^.gph^ and must not be specified.
Saved values
------------
S_1 name of kernel used for density trace
S_2 number of points of density estimation
S_3 band width for density estimation
S_4 scale factor of density plot
S_5 minimum
S_6 lower adjacent value
S_7 first quartile
S_8 median
S_9 third quartile
S_10 upper adjacent value
S_11 maximum
S_12 n
When ^by()^ is specified: S_3 and S_4 contain the averages of the band width and
scale factors used in the subgroup density estimations; S_5, S_7, S_8, S_9,
S_11 and S_12 are statistics for the combined group; and S_6 and S_10 are set
missing.
When multiple variables are specified, the saved values contain results for
the last variable in the varlist.
Examples
--------
. ^violin length, t1(Auto data) l1(length of car)^
. ^violin length weight, n(100) w(20)^
. ^violin weight, by(foreign) parzen^
Author
------
Thomas J. Steichen
RJRT
steicht@@rjrt.com
Reference
---------
Hintze, J. L. and R. D. Nelson (1998). "Violin plots: a box plot-density trace
synergism." The American Statistician, 52(2):181-4.
Also see
--------
STB: gr33 (STB-46)
Manual: ^[R] kdensity^, ^[R] graph box^, ^[R] centile^
^[U] 20.3.5 Special functions^
On-line: help for @kdensity@, @graph@, @centile@, @functions@