Initial commit

This commit is contained in:
2024-06-04 11:19:21 +02:00
commit a808b2a204
710 changed files with 277632 additions and 0 deletions

View File

@ -0,0 +1,16 @@
pro def polych_ll
version 8.1
args lf rho
#delimit ;
qui replace __POLYpi =
binorm(__POLY1hi,__POLY2hi,`rho') -
binorm(__POLY1lo,__POLY2hi,`rho') -
binorm(__POLY1hi,__POLY2lo,`rho') +
binorm(__POLY1lo,__POLY2lo,`rho')
;
#delimit cr
qui replace `lf' = ln( __POLYpi )
end

View File

@ -0,0 +1,612 @@
*! Polychoric correlations -- v.1.4.3, by Stas Kolenikov
program define polychoric, rclass
version 8.2
#delimit ;
syntax varlist(min=2 numeric) [if] [in] [aw fw pw /],
[pca pw VERBose NOLOG SCore(str) dots IMissing NSCore(int 0) noINIT *]
;
#delimit cr
* PCA to perform PCA
* pw for pairwise correlations
* verbose to output the correlation type, rho, s.e., and goodness of fit if applicable
* NOLOG
* score to generate scores from PCA
* dots to entertain the user with % signs
* imissing to believe that the missing of an ordinal variables should be imputed zero
if "`imissing'" == "" {
local imissing not
}
if "`score'"=="" & `nscore'>0 {
di as err "cannot specify nscore without score"
exit 198
}
* else local nscore=5
* this is a bit weird: what if the user specifies -score-
* without the name of the new variable? Then it gets into `options'
if index("`options'","score") {
di as err "must specify new variable prefix with -score-"
exit 198
}
if `:word count `varlist'' == 2{
local verbose verbose
}
tempvar w1
if "`weight'" ~= "" {
qui g double `w1' = `exp'
local www [`weight'=`w1']
}
else {
qui g double `w1' = 1
local www [pw=`w1']
local exp `w1'
}
* that way, we always have weights
if "`score'"~="" {
confirm new var `score'1
}
if "`pw'"==""{
marksample touse
}
else {
marksample touse, novar
}
tokenize `varlist'
local nvar: word count `varlist'
tempname corrmat
mat `corrmat' = J(`nvar',`nvar',.)
forvalues i=1/`nvar' {
mat `corrmat'[`i',`i']=1
}
mat rown `corrmat' = `varlist'
mat coln `corrmat' = `varlist'
* compress `varlist'
local i=1
local ndots = `nvar'*(`nvar'-1)/2
local idots 0
while "``i''"~="" {
local j = `i'+1
while "``j''"~="" {
polych1 ``i'' ``j'' `www' if `touse' , `options' `init'
if "`dots'"~="" {
if int(`idots'/`ndots'*10)-int(`++idots'/`ndots'*10)~=0 {
di as text int(10*`idots'/`ndots') "0%" _c
}
else {
di as text "." _c
}
}
mat `corrmat'[`i',`j']=r(rho)
mat `corrmat'[`j',`i']=r(rho)
if "`verbose'"=="verbose" {
di _n ///
as text "Variables : " as res "``i'' ``j''" _n ///
as text "Type : " as res r(type) _n ///
as text "Rho = " as res r(rho) _n ///
as text "S.e. = " as res r(se_rho)
if "`r(type)'" == "polychoric" {
di as text "Goodness of fit tests:" _n ///
as text "Pearson G2 = " as res r(G2) ///
as text ", Prob( >chi2(" as res r(dfG2) as text")) = " as res r(pG2) _n ///
as text "LR X2 = " as res r(X2) ///
as text ", Prob( >chi2(" as res r(dfX2) as text")) = " as res r(pX2)
}
}
local `j++'
}
local i=`i'+1
tokenize `varlist'
}
return add
if "`verbose'" == "" {
di as text _n "Polychoric correlation matrix"
mat li `corrmat', noheader
}
if "`pca'"~="" {
return clear
polypca `corrmat' `touse' `www' `imissing' `nscore' `score'
return add
/*
if "`score'"~="" {
cap noi sum `score'*
}
*/
}
return matrix R `corrmat'
end
prog def polypca, rclass
* perform PCA with the estimated matrix
args corrmat touse www imissing nscore score
* the correlation matrix
* parse the weights
tokenize `www' , parse(" [=]")
* should become `1' == [, `2' == weight type, `3' == "=", `4' == exp, `5' == ]
local exp `4'
tempname X V value
if `nscore'==0 local nscore .
mat symeigen `X' `V' = `corrmat'
* `V' are the eigenvalues, `X' are the eigenvectors
local nvar = colsof(`corrmat')
local p = min(`nvar',`nscore')
di _n as text "Principal component analysis" _n(2) " k {c |} Eigenvalues {c |} Proportion explained {c |} Cum. explained"
di as text "{dup 4:{c -}}{c +}{dup 15:{c -}}{c +}{dup 24:{c -}}{c +}{dup 18:{c -}}"
local sum=0
forvalues i=1/`nvar' {
return scalar lambda`i' = `V'[1,`i']
local sum = `sum'+return(lambda`i')
#delimit ;
di as res " "`i' as text " {c |} "
as res %9.6f `V'[1,`i'] _col(21) as text "{c |} "
as res %9.6f `V'[1,`i']/`nvar' _col(46) as text "{c |} "
as res %8.6f `sum'/`nvar'
;
#delimit cr
}
if "`score'"~= "" {
* set trace on
local varlist : rownames `X'
tempvar tt ii
** mat li `X'
di as text _n _col(15) "{bf: Scoring coefficients}" _n(2) ///
" Variable {c |} Coeff. 1 {c |} Coeff. 2 {c |} Coeff. 3 " ///
_n "{dup 54:{c -}}" _c
qui foreach x of varlist `varlist' {
noi di _n as res " `x'" _col(16) _c
* is it continuous or discrete?
cap drop `tt'
cap drop __tt`x'
* cap confirm byte var `x'
* need to properly determine if continuous or discrete
cap inspect `x'
if r(N_unique)>9 {
* continuous
* egen __tt`x' = std(`x') if `touse'
sum `x' [iw=`exp']
g double __tt`x' = (`x'-r(mean))/r(sd) if `touse'
forvalues i=1/3 {
noi di as text " {c |} " as res %9.6f /* sqrt(`V'[1,`i'])* */`X'[rownumb(`X',"`x'"),`i'] " " _c
}
}
else {
/*
cap tab `x' if `touse'
if r(r) > 10 | _rc == 134 {
* quasi-continuous
* egen __tt`x' = std(`x') if `touse'
sum `x' [iw=`exp']
g double __tt`x' = (`x'-r(mean))/r(sd) if `touse'
forvalues i=1/3 {
noi di as text " {c |} " as res %9.6f `X'[rownumb(`X',"`x'"),`i'] " " _c
}
}
else {
*/
* discrete; make it a centered categorized normal
** noi di as text " : ordinal" _c
local ncat = r(N_unique)
sum `exp', mean
local N = r(sum)
sort `touse' `x'
cap drop `ii'
egen byte `ii' = group(`x') if `touse'
local p0 = 0.1/`N'
local t0 = invnorm(`p0')
** noi di `t0'
forvalues k=1/`ncat' {
sum `exp' if `ii'<=`k' , mean
local p`k' = (r(sum)-0.5)/`N'
local t`k' = invnorm(`p`k'')
** noi di `t`k''
}
local p`ncat' = (`N'-0.1)/`N'
local t`ncat' = invnorm(`p`ncat'')
** noi di `t`ncat''
gen double __tt`x' = 0 if `touse'
forvalues k=1/`ncat' {
local k1 = `k'-1
scalar `value' = ( exp(-.5*`t`k1''*`t`k1'') - exp(-.5*`t`k''*`t`k'') ) ///
/(sqrt(2*_pi)*(norm(`t`k'')-norm(`t`k1'') ) )
replace __tt`x' = `value' if `touse' & `ii'==`k'
* need to determine what was the original category
sum `x' if `ii'==`k'
noi di _n _col(14) as res %-2.0f r(mean) _c
forvalues i=1/3 {
noi di as text " {c |} " as res %9.6f /* sqrt(`V'[1,`i'])* */ `X'[rownumb(`X',"`x'"),`i']*`value' " " _c
}
}
if "`imissing'" == "imissing" {
replace _tt`x' = 0 if `touse' & mi(`x')
}
** }
}
}
di
nobreak {
qui forvalues i=1/`p' {
* we'll score `p' components prefixed by `score'
gen double `score'`i'=0 if `touse'
foreach x of varlist `varlist' {
replace `score'`i' = `score'`i' + /* sqrt(`V'[1,`i'])* */ `X'[rownumb(`X',"`x'"),`i']*__tt`x' if `touse'
}
}
}
}
return matrix eigenvalues `V'
return matrix eigenvectors `X'
polyquit
end
prog def polych1, rclass
syntax varlist(numeric min=2 max=2) if [aw fw pw /] [, noINIT * ]
*** !!! <20><><EFBFBD><EFBFBD><EFBFBD>-<2D><> <20><><EFBFBD><EFBFBD><EFBFBD> <20> score nscore
local www [`weight'=`exp']
marksample touse
local x1 `1'
local x2 `2'
forvalues i = 1/2 {
/* prior to 1.3.2
cap confirm byte var `x`i''
if _rc==7 {
local call `call'c
continue
}
cap tab `x`i''
* if more than 10 categories -- treat as continuous
if r(r)>9 | _rc==134 {
*/
cap inspect `x`i''
if r(N_unique)>9 {
local call `call'c
}
else {
local call `call'd
}
}
if "`call'"=="cc" {
* both are continuous
** qui corr `x1' `x2' `www' if `touse'
tempname A
qui mat accum `A' = `x1' `x2' `www' if `touse' , nocons dev
return scalar sum_w = r(N)
qui count if !mi(`x1') & !mi(`x2') & !mi(`exp')
return scalar N=r(N)
mat `A' = corr(`A')
return scalar rho = `A'[1,2]
return local type Pearson
return scalar se_rho = sqrt( (1-return(rho)*return(rho))/(return(N)-2) )
exit
}
if "`call'"=="dc" {
* the first variable has to be continuous, and the second, discrete
local call cd `x2' `x1' `www' if `touse'
return local type polyserial
}
if "`call'"=="cd" {
* the first variable has to be continuous, and the second, discrete
local call cd `x1' `x2' `www' if `touse'
return local type polyserial
}
if "`call'"=="dd" {
* the first variable has to be continuous, and the second, discrete
local call dd `x2' `x1' `www' if `touse'
return local type polychoric
}
cap noi corr`call' , `init' `options'
***********
* set trace off
if _rc==1 {
polyquit
exit 1
}
return add
end
prog def corrdd, rclass sort
* the module to compute the polychoric correlation
****************
* set tracedepth 2
* set trace on
syntax varlist(numeric min=2 max=2) if [aw fw pw /], [ * noINIT ITERate(int 50) SEArch(str) ]
cap confirm integer number `search'
if !_rc {
local searchstr search(quietly) repeat(`search')
}
else {
if "`search'" == "" {
local searchstr search(off)
}
else {
local searchstr search(`search')
}
}
marksample touse
* compute the thresholds
tempvar x1 x2
cap drop __POLY*
eret clear
ret clear
qui forvalues k = 1/2 {
sort `touse' ``k''
egen `x`k'' = group(``k'') if `touse'
tab `x`k''
local r`k' = r(r)
sum `exp' if `touse', meanonly
local N = r(sum)
return scalar sum_w = `N'
return scalar N = r(N)
gen __POLY`k'hi = .
gen __POLY`k'lo = .
forvalues h = 1/`r`k'' {
* create the variables: upper threshold - lower threshold
local h1 = `h'-1
sum `exp' if `x`k''<=`h' & `touse', meanonly
replace __POLY`k'hi = cond(`h'==`r`k'',10,invnorm( (r(sum)-.5)/`N' ) ) if `x`k'' == `h' & `touse'
sum `exp' if `x`k'' <= `h1' & `touse', meanonly
replace __POLY`k'lo = cond(`h'==1,-10,invnorm( (r(sum)-.5)/`N' ) ) if `x`k'' == `h' & `touse'
}
}
qui corr `1' `2' if `touse'
local mcorr = r(rho)
local mcorr = sign(`mcorr')*min(0.9, (1+abs(`mcorr'))/2)
if "`init'" == "noinit" {
local initstr init(_cons = `mcorr')
}
else {
local initstr
}
* shouldn't -collapse- come somewhere here so that we don't have
* to compute a complicated bivariate distribution for too many observations?
*
* needed further: __POLY([1|2][hi|lo]&pi); sum of weights; `touse'
* no, for some reason, it did not work: the s.e.s are wrong
qui gen double __POLYpi = .
preserve
qui keep if `touse'
cap ml model lf polych_ll (rho: `touse' =) if `touse' /// [fw=`counts']
[`weight' = `exp'] ///
, maximize `options' search(off) bounds(rho: -1 1) init(_cons = 0) iter(0) nolog
if _rc==1 {
polyquit
exit 1
}
local ll0c = e(ll)
cap noi ml model lf polych_ll (rho: `touse' =) if `touse' /// [fw=`counts']
[`weight' = `exp'] ///
, maximize `options' `searchstr' bounds(rho: -1 1) ///
`initstr' iter(`iterate') nolog
local rc=_rc
if `rc'==1 {
polyquit
exit 1
}
else if `rc' {
cap noi ml model lf polych_ll (rho: `touse' =) if `touse' /// [fw=`counts']
[`weight' = `exp'] ///
, maximize `options' search(quietly) repeat(10) bounds(rho: -1 1) ///
iter(`iterate') nolog
}
return scalar rho = _b[_cons]
return scalar se_rho = _se[_cons]
collapse (sum) `exp' (mean) `touse' (mean) __POLY* if `touse', by(`x1' `x2')
* tests
* null hypothesis: no structure
local df0 = `r1'*`r2' - `r1' - `r2'
tempvar ll pp
tempvar counts
qui g `ll' = sum( `exp'*ln(`exp'/`N') )
local ll0 = `ll'[_N]
* Likelihood ratio
return scalar G2 = 2*(`ll0'-e(ll))
return scalar dfG2 = `df0'
return scalar pG2 = chi2tail(`df0',return(G2))
* Pearson chi-square
qui g double `pp' = sum( ((`exp'/`N'-__POLYpi)^2)/__POLYpi )
return scalar X2 = `pp'[_N]*`N'
return scalar dfX2 = `df0'
return scalar pX2 = chi2tail(`df0',return(X2))
restore
* no correlation
return scalar LR0 = -2*(`ll0c'-e(ll))
return scalar pLR0 = chi2tail(1,return(LR0))
polyquit
end
prog def corrcd, rclass
* the module to compute the polyserial correlation
syntax varlist(numeric min=2 max=2) if [aw fw pw /], [ * ITERate(int 50) noINIT SEArch(str) ]
* the first variable is continuous, the second is discrete
cap confirm integer number `search'
if !_rc {
local searchstr search(quietly) repeat(`search')
}
else {
if "`search'" == "" {
local searchstr search(off)
}
else {
local searchstr search(`search')
}
}
marksample touse
* thresholds for the discrete part
tempvar x1 x2
cap drop __POLY*
ret clear
qui{
* egen `x1' = std(`1') if `touse'
sum `1' [iw=`exp']
g double `x1' = (`1'-r(mean))/r(sd) if `touse'
sort `touse' `2' `1'
egen `x2' = group(`2') if `touse'
tab `x2' if `touse'
local r2 = r(r)
sum `exp' if `touse', mean
local N = r(sum)
return scalar N = r(N)
return scalar sum_w = `N'
gen __POLY2hi = .
gen __POLY2lo = .
forvalues h = 1/`r2' {
* create the variables: upper threshold - lower threshold
local h1 = `h'-1
sum `exp' if `x2'<=`h' & `touse'
replace __POLY2hi = cond(`h'==`r2',10,invnorm( (r(sum)-.5)/`N' ) ) if `x2' == `h' & `touse'
sum `exp' if `x2' <= `h1' & `touse'
replace __POLY2lo = cond(`h'==1,-10,invnorm( (r(sum)-.5)/`N' ) ) if `x2' == `h' & `touse'
}
spearman `1' `2' if `touse'
local mcorr = r(rho)
}
if "`init'" == "noinit" {
local initstr init(_cons = `mcorr')
}
else {
local initstr init(_cons = 0)
}
eret clear
cap ml model lf polyser_ll (rho: `x1' =) if `touse' [`weight'=`exp'] , ///
maximize `options' search(off) bounds(rho: -1 1) init(_cons = 0) iter(0) nolog
if _rc==1 {
polyquit
exit 1
}
local ll0c = e(ll)
cap noi ml model lf polyser_ll (rho: `x1' =) if `touse' [`weight'=`exp'] , ///
maximize `options' `searchstr' bounds(rho: -1 1) `initstr' nolog iter(`iterate')
local rc=_rc
if `rc'==1 {
polyquit
exit 1
}
if `rc' {
cap noi ml model lf polyser_ll (rho: `x1' =) if `touse' [`weight'=`exp'] , ///
maximize `options' `bounds(rho: -1 1) nolog iter(`iterate') ///
search(quietly) repeat(10)
}
* return the correlation coefficient
return scalar rho = _b[_cons]
return scalar se_rho = _se[_cons]
* no correlation
return scalar LR0 = -2*(`ll0c'-e(ll))
return scalar pLR0 = chi2tail(1,return(LR0))
end
pro def polyquit
cap drop __POLY*
end
exit
History:
v.1.1 -- Aug 2003
The basic development of everything
v.1.2 -- November 2003
-- weights accomodated
-- imissing option
v.1.3 -- February 2004
-- polychoricpca as a separate command
-- nscore option added -- changed the order of arguments in polychoric.polypca
v.1.3.2 -- -inspect- is used to count the number of categories in place
of -tab-; no need to -compress-
v.1.3.3 -- iterate, search, and other stuff to failsafe convergence
v.1.3.4 -- init string changed, score option clarified
v.1.4 -- weights dealt with properly
v.1.4.1 -- April 27, 2004
-- bug with PCA fixed (categorical variables not recognized properly)
v.1.4.2 -- output the original category numbers
v.1.4.3 -- the default matrix is filled with missing values rather than zeroes

View File

@ -0,0 +1,228 @@
{smcl}
{.-}
help for {cmd:polychoric} and {cmd:polychoricpca} {right:author: {browse "http://www.komkon.org/~tacik/stata/":Stas Kolenikov}}
{.-}
{title:Polychoric and polyserial correlations}
{p 8 27}
{cmd:polychoric}
{it:varlist}
[{it:weight}]
[{cmd:if} {it:exp}] [{cmd:in} {it:range}]
[{cmd:,}
{cmd:pw}
{cmdab:verb:ose}
{cmd:nolog}
{cmd:dots}
]
{p 8 27}
{cmd:polychoricpca}
{it:varlist}
[{it:weight}]
[{cmd:if} {it:exp}] [{cmd:in} {it:range}]
[{cmd:,}
{cmdab:sc:ore}{cmd:(}{it:prefix}{cmd:)}
{cmdab:nsc:ore}{cmd:(}{it:#}{cmd:)}
]
{title:Description}
{p}{cmd:polychoric} estimates polychoric and polyserial correlations,
and {cmd:polychoricpca} performs the principal component analysis on
the resulting correlation matrix. The current version (1.4) of the
routine requires Stata 8.2.
{p}The polychoric correlation of two ordinal variables is derived as follows.
Suppose each of the ordinal variables was obtained by categorizing a normally
distributed underlying variable, and those two unobserved variables follow
a bivariate normal distribution. Then the (maximum likelihood) estimate
of that correlation is the polychoric correlation. If each of the ordinal
variables has only two categories, then the correlation between the two
variables is referred to as tetrachoric.
{p}A closely related concept is that of a polyserial correlation. It is defined
in a similar manner when one variable is continuous (assumed normal) and
an ordinal variable. If there are only two categories of the latter, then
the correlation is referred to as biserial.
{p}If the number of the categories of one of the variables is greater than
10, {cmd:polychoric} treats it is continuous, so the correlation of two
variables that have 10 categories each would be simply the usual
Pearson moment correlation found through {help correlate}.
{p}Make sure you read {bf:Remarks} about the known problems
in the end of this help file! If you are coming from development/health
economics research literature, you would also benefit from having
a look at our paper on polychoric PCA.
{title:Options of {cmd:polychoric}}
{p 0 4}{cmd:dots} entertains the user by displaing dots for each
estimated correlation.
{p 0 4}{cmd:nolog} suppresses the log from the maximum likelihood estimation.
{p 0 4}{cmd:pw} fills the entries of the correlation matrix with the
pairwise correlation. If this option is not specified, then, similarly
to {help correlate}, it uses the same subsample for all of the
correlations.
{p 0 4}{cmd:verbose} for each estimated correlation displays the
names of the variables, the type of the estimated correlation
(polychoric, polyserial, or Pearson moment correlation).
{cmd:polychoric} will default to this option if there are only
two input variables. If there are more than two variables,
{cmd:polychoric} will not show anything, so you would need
to address the returned values (see below).
{title:Options of {cmd:polychoricpca}}
{p 0 4}{cmd:score} is the prefix for the variables to be generated
to contain the principal component scores.
{p 0 4}{cmd:nscore} specifies the number of score variables to be generated.
{cmd:polychoricpca} will show the output from the first three eigenvalues,
at most.
{title:Returned values}
{cmd:polychoric} sets the following set of {help return} values.
{p 0 4}{cmd:r(R)} (matrix) is the estimated correlation matrix{p_end}
{p 0 4}{cmd:r(type)} (local) is the type of estimated correlation, one of
{it:polychoric}, {it:polyserial}, or {it:Pearson}{p_end}
{p 0 4}{cmd:r(rho)} is the estimated correlation{p_end}
{p 0 4}{cmd:r(se_rho)} is the estimated standard error of the correlation{p_end}
{p 0 4}{cmd:r(N)} is the number of observations used{p_end}
{p 0 4}{cmd:r(LR0)} and {cmd:r(pLR0)} are the results of the likelihood ratio
test of no correlation
{p}In addition, if both variables are ordinal, the specification tests
on normality are performed that compare the empirical proportions of
the cells with the theoretical ones implied by normality, together
with estimated polychoric correlation. The tests are not available
for a 2x2 case as the tests have zero degrees of freedom.
The returned results are:
{p 0 4}{cmd:r(X2)}, {cmd:r(dfX2)} and {cmd:r(pX2)} are the observed
test statistic, degrees of freedom, and the corresponding p-value of Pearson chi-square test: ;{p_end}
{p 0 4}{cmd:r(G2)}, {cmd:r(dfG2)} and {cmd:r(pG2)} are the observed
test statistic, degrees of freedom, and the corresponding p-value of the
likelihood ratio test.{p_end}
{p}If there are more than two input variables, then the returned values
correspond to the last estimated pair, in the manner similar to
{help correlate}.
{p}{cmd:polychoricpca} returns the matrices of eigenvectors, eigenvalues,
and the correlation matrix, as well as a few largest eigenvalues corresponding
to the number of scores requested.
{title:Example}
{.-}
{com}. use c:\stata8\auto
{txt}(1978 Automobile Data)
{com}. polychoric rep78 foreign
{txt}Variables : {res}rep78 foreign
{txt}Type : {res}polychoric
{txt}Rho = {res}.80668059
{txt}S.e. = {res}.07631279
{txt}Goodness of fit tests:
Pearson G2 = {res}.43127115{txt}, Prob( >chi2({res}3{txt})) = {res}.93370948
{txt}LR X2 = {res}.38908216{txt}, Prob( >chi2({res}3{txt})) = {res}.94248852
{txt}
{com}. return list
{txt}scalars:
r(pLR0) = {res}5.12057153705e-08
{txt}r(LR0) = {res}29.67059428252011
{txt}r(pX2) = {res}.9424885157334509
{txt}r(dfX2) = {res}3
{txt}r(X2) = {res}.3890821586898692
{txt}r(pG2) = {res}.9337094786275901
{txt}r(dfG2) = {res}3
{txt}r(G2) = {res}.4312711544473018
{txt}r(se_rho) = {res}.0763127851819864
{txt}r(rho) = {res}.8066805935187174
{txt}r(N) = {res}69
{txt}r(sumw) = {res}69
{txt}macros:
r(type) : "{res}polychoric{txt}"
matrices:
r(R) : {res} 2 x 2
{txt}
{com}. polychoric foreign mpg
{txt}Variables : {res}foreign mpg
{txt}Type : {res}polyserial
{txt}Rho = {res}.48603372
{txt}S.e. = {res}.11286311
{txt}
{com}. polychoricpca foreign mpg rep78
{txt} k {c |} Eigenvalues {c |} Proportion explained {c |} Cum. explained
{dup 4:{c -}}{c +}{dup 15:{c -}}{c +}{dup 24:{c -}}{c +}{dup 18:{c -}}
{res} 1{txt} {c |} {res} 2.206757{col 21}{txt}{c |} {res} 0.735586{col 46}{txt}{c |} {res}0.735586
2{txt} {c |} {res} 0.615445{col 21}{txt}{c |} {res} 0.205148{col 46}{txt}{c |} {res}0.940734
3{txt} {c |} {res} 0.177798{col 21}{txt}{c |} {res} 0.059266{col 46}{txt}{c |} {res}1.000000
{txt}
{com}. return list
{txt}scalars:
r(lambda3) = {res}.1777976956026297
{txt}r(lambda2) = {res}.6154453299437229
{txt}r(lambda1) = {res}2.206756974453646
{txt}matrices:
r(R) : {res} 3 x 3
{txt}r(eigenvectors) : {res} 3 x 3
{txt}r(eigenvalues) : {res} 1 x 3
{txt}
{com}. matrix list r(R)
{txt}symmetric r(R)[3,3]
foreign mpg rep78
foreign {res} 1
{txt} mpg {res}.55443556 1
{txt} rep78 {res}.80668065 .42655387 1
{txt}
{.-}
{title:Remarks}
{p}{cmd:polychoric} is a bit sloppy with options. It assumes
the user might want to specify some {help maximize:maximization options}
for the {help ml} command, so anything it does not recognize as its
own option is getting transferred to the {cmd:ml}. That may cause
an error in the latter.
{p}The standard error for the Pearson moment correlation does not
account for weights properly. That will be fixed later if anybody
needs that standard error.
{title:Reference}
{p 0 4}{bind:}Kolenikov, S., and Angeles, G. (2004). The Use of Discrete Data
in Principal Component Analysis With Applications to Socio-Economic Indices.
CPC/MEASURE Working paper No. WP-04-85.
{browse "https://www.cpc.unc.edu/measure/publications/pdf/wp-04-85.pdf":Full text in PDF format}
{p_end}
{title:Also see}
{p 0 21}{bind:}Online: help for {help correlate}, {help tetrac} (if installed)
{p_end}
{p 0 21}{bind:} Internet: {browse "http://www.google.com/search?q=polychoric%20correlation":Google search}{p_end}
{title:Contact}
Stas Kolenikov, skolenik@unc.edu

Binary file not shown.

View File

@ -0,0 +1,23 @@
*! Principal component analysis based on polychoric correlations
*! Author: Stas Kolenikov, skolenik@unc.edu. Version 1.0
program define polychoricpca, rclass
syntax varlist(numeric min=2) [aw pw fw /], [SCore(passthru) NSCore(passthru) nolog *]
if "`score'"!="" & "`nscore'"=="" {
di as err "how many score variables?"
exit 198
}
if "`exp'"=="" {
tempvar ww
qui g byte `ww'=1
local exp `ww'
local weight pw
}
polychoric `varlist' [`weight'=`exp'] , pca nolog `score' `nscore' `options'
return add
end

View File

@ -0,0 +1 @@
.h polychoric

Binary file not shown.

View File

@ -0,0 +1,17 @@
program define polyser_ll
version 8.1
args lf rho
#delimit ;
qui replace `lf' =
ln(
(
norm( (__POLY2hi - `rho'*$ML_y1)/sqrt(1-`rho'*`rho') ) -
norm( (__POLY2lo - `rho'*$ML_y1)/sqrt(1-`rho'*`rho') )
) * normden($ML_y1)
)
;
#delimit cr
end

View File

@ -0,0 +1,64 @@
program def poslist, rclass
*! was indlist
*! NJC 1.2.0 6 June 2000
* NJC 1.1.0 22 Dec 1999
* NJC 1.0.0 14 Oct 1999
version 6.0
gettoken lists 0 : 0, parse(",")
if "`lists'" == "" | "`lists'" == "," { /* no \ */
di in r "incorrect syntax: no separator"
exit 198
}
tokenize "`lists'", parse("\")
if "`4'" != "" {
di in r "incorrect syntax: too much stuff"
exit 198
}
if "`1'" == "\" { /* list1 empty */
if "`2'" == "\" {
di in r "incorrect syntax: one \ only"
exit 198
}
local list2 "`2'" /* might be empty */
}
else if "`2'" == "\" {
local list1 "`1'"
local list2 "`3'" /* might be empty */
}
else {
di in r "incorrect syntax: what to compare?"
exit 198
}
syntax [ , Global(str) Noisily ]
if length("`global'") > 8 {
di in r "global name must be <=8 characters"
exit 198
}
local n1 : word count `list1'
local n2 : word count `list2'
tokenize `list2'
local j = 1
while `j' <= `n2' {
local index 0
local i = 1
while `index' == 0 & `i' <= `n1' {
local word : word `i' of `list1'
if "`word'" == "``j''" { /* found it */
local index = `i'
}
local i = `i' + 1
}
local newlist "`newlist' `index'"
local j = `j' + 1
}
if "`noisily'" != "" { di "`newlist'" }
if "`global'" != "" { global `global' "`newlist'" }
return local list `newlist'
end

View File

@ -0,0 +1,2 @@
.h listutil

View File

@ -0,0 +1,37 @@
program def postlist, rclass
*! NJC 1.3.0 6 June 2000
* NJC 1.2.0 22 Dec 1999
* NJC 1.0.0 12 Nov 1999
version 6.0
gettoken list 0 : 0, parse(",")
if "`list'" == "" | "`list'" == "," {
di in r "nothing in list"
exit 198
}
syntax , Post(str) [ Global(str) Sep Noisily ]
if length("`global'") > 8 {
di in r "global name must be <=8 characters"
exit 198
}
tokenize `list'
local n : word count `list'
if "`sep'" != "" {
local last = `n'
local n = `n' - 1
}
local i = 1
while `i' <= `n' {
local newlist "`newlist'``i''`post' "
local i = `i' + 1
}
if "`sep'" != "" { local newlist "`newlist'``last''" }
if "`noisily'" != "" { di "`newlist'" }
if "`global'" != "" { global `global' "`newlist'" }
return local list `newlist'
end

View File

@ -0,0 +1,2 @@
.h listutil

View File

@ -0,0 +1,237 @@
*! version 1.7.2 2Nov2005 ztp ztnb
* version 1.7.1 13Apr2005
* version 1.7.0 27Mar2004 slogit
* version 1.6.4 27Apr2001 change to work with forvalues
* version 1.6.3 17Mar2001
capture program drop praccum
program define praccum
version 6
tempname newmat
*=> classify each valid type of model
if "`e(cmd)'"=="cloglog" { local io = "typical binary" }
if "`e(cmd)'"=="cnreg" { local io = "typical regress" }
if "`e(cmd)'"=="fit" { local io = "typical regress" }
if "`e(cmd)'"=="gologit" { local io = "typical mlogit" }
if "`e(cmd)'"=="intreg" { local io = "typical regress" }
if "`e(cmd)'"=="logistic" { local io = "typical none" }
if "`e(cmd)'"=="logit" { local io = "typical binary" }
if "`e(cmd)'"=="mlogit" { local io = "typical mlogit" }
if "`e(cmd)'"=="nbreg" { local io = "typical count" }
if "`e(cmd)'"=="ologit" { local io = "typical ordered" }
if "`e(cmd)'"=="oprobit" { local io = "typical ordered" }
if "`e(cmd)'"=="poisson" { local io = "typical count" }
if "`e(cmd)'"=="probit" { local io = "typical binary" }
if "`e(cmd)'"=="regress" { local io = "typical regress" }
if "`e(cmd)'"=="slogit" { local io = "typical ordered" }
if "`e(cmd)'"=="tobit" { local io = "typical regress" }
if "`e(cmd)'"=="zinb" { local io = "twoeq count" }
if "`e(cmd)'"=="zip" { local io = "twoeq count" }
if "`e(cmd)'"=="ztp" { local io = "typical count" }
if "`e(cmd)'"=="ztnb" { local io = "typical count" }
if "`io'"=="" {
di
di in y "praccum" in r /*
*/ " does not work for the last type of model estimated."
exit
}
local input : word 1 of `io' /* input routine to _pepred */
local output : word 2 of `io' /* output routine */
*=> decode specified input
syntax [, Saving(string) Using(string) GENerate(string) XIS(string)]
*truncate generate root if more than five characters
if "`generate'" ~= "" {
local gen = substr("`generate'",1,29)
cap version 7
if _rc != 0 { local gen = substr("`gen'",1,5) }
version 6.0
}
if "`output'" == "ordered" | "`output'" == "mlogit" {
tempname values
mat `values' = r(values)
local outcms = rowsof(r(probs))
local count = 1
while `count' <= `outcms' {
local k`count' = `values'[`count',1]
if `k`count'' < -9 | `k`count'' > 99 | int(`k`count'')!=`k`count'' {
di in red "category values must be integers between -9 and 99"
exit 198
}
if `k`count'' < 0 {
local k`count' = abs(`k`count'')
local k`count' = "_`k`count''"
}
local count = `count' + 1
}
}
if "`xis'"!="" {
tempname results
if "`output'" == "regress" {
matrix `results' = ( `xis' , r(xb) )
}
if "`output'" == "binary" {
* grab output from binary model
matrix `results' = ( `xis' , r(p0) , r(p1) )
}
if "`output'" == "ordered" | "`output'" == "mlogit" {
tempname probs newprob
mat `probs' = r(probs)
local outcms = rowsof(r(probs))
matrix `results' = `xis'
local count = 1
while `count' <= `outcms' {
matrix `newprob' = `probs'[`count', 1]
matrix `results' = `results' , `newprob'
local count = `count' + 1
}
}
if "`output'" == "count" {
tempname probs newprob values
mat `values' = r(values)
mat `probs' = r(probs)
local outcms = rowsof(r(probs))
local rmu = r(mu)
matrix `results' = `xis', `rmu'
local count = 1
while `count' <= `outcms' {
matrix `newprob' = `probs'[`count', 1]
matrix `results' = `results' , `newprob'
local count = `count' + 1
}
}
if "`input'" == "twoeq" & "`output'"=="count" {
tempname az
matrix `az' = r(always0)
matrix `results' = `results' , `az'
}
*=> saving is the initial run
if "`saving'" ~= "" { mat `saving' = `results' }
if "`using'" ~= "" {
cap mat list `using'
if _rc ~= 0 {
mat `using' = `results'
* OLD SYNTAX: error if `using' matrix does not already exist
* di in r "matrix `using' does not exist"
* exit 111
}
else { mat `using' = (`using') \ (`results') }
}
}
*=> generates creates the new variables
if "`gen'" ~= "" {
if "`output'" == "regress" {
local columns = "`gen'x `gen'xb"
}
if "`output'" == "binary" {
local columns = "`gen'x `gen'p0 `gen'p1"
}
if "`output'" == "ordered" | "`output'" == "mlogit" {
local columns "`gen'x"
local outcms = rowsof(r(probs))
local count = 1
while `count' <= `outcms' {
local columns "`columns' `gen'p`k`count''"
local count = `count' + 1
}
}
if "`output'" == "count" {
local columns "`gen'x `gen'mu"
local outcms = rowsof(r(probs))
local count = 0
while `count' <= (`outcms'-1) {
local columns "`columns' `gen'p`count'"
local count = `count' + 1
}
if "`input'"=="twoeq" {
local columns "`columns' `gen'inf"
}
}
* create new variables
matrix colnames `using' = `columns'
svmat `using', names(col)
*=> label variables
label variable `gen'x "value of x"
if "`output'" == "regress" {
label variable `gen'xb "value of xb"
}
if "`output'" == "binary" {
label variable `gen'p0 "Pr(0)"
label variable `gen'p1 "Pr(1)"
}
if "`output'" == "ordered" {
tempname values
mat `values' = r(values)
local outcms = rowsof(r(probs))
local count = 1
while `count' <= `outcms' {
local count2 = `count'
label variable `gen'p`k`count'' "Pr(`k`count'')"
local count = `count' + 1
}
}
if "`output'" == "mlogit" {
tempname values
mat `values' = r(values)
local outcms = rowsof(r(probs))
local count = 1
while `count' <= `outcms' {
local value = `values'[`count', 1]
local count2 = `count'
label variable `gen'p`k`count'' "Pr(`k`count'')"
local count = `count' + 1
}
}
if "`output'" == "count" {
tempname values
mat `values' = r(values)
local outcms = rowsof(r(probs))
local count = 1
while `count' <= `outcms' {
local value = `values'[`count', 1]
label variable `gen'p`value' "Pr(`value')"
local count = `count' + 1
}
if "`input'"=="twoeq" {
label variable `gen'inf "Pr(always0)"
}
}
*=> for ordered and count variables, generate cumulative counts
if "`output'" == "ordered" {
local outcms = rowsof(r(probs))
local count = 1
while `count' <= `outcms' {
qui egen `gen's`k`count'' = rsum(`gen'p`k1'-`gen'p`k`count'') if `gen'p`k1'~=.
local cumul = "`cumul'`gen's`k`count'' "
label variable `gen's`k`count'' "Pr(<=`k`count'')"
local count = `count' + 1
}
}
if "`output'" == "count" {
local outcms = rowsof(r(probs))
local count = 0
while `count' <= (`outcms'-1) {
qui egen `gen's`count' = rsum(`gen'p0-`gen'p`count') if `gen'p0~=.
local cumul = "`cumul'`gen's`count' "
label variable `gen's`count' "Pr(<=`count')"
local count = `count' + 1
}
}
*=> display new variables
di _n in g "New variables created by" in w " praccum" in y ":"
sum `columns' `cumul'
} /* generate */
end

View File

@ -0,0 +1,162 @@
.-
help for ^praccum^ - 1.6.4 - 2Nov2005
.-
Accumulate results from ^prvalue^
-------------------------------
^praccum^, [^xis(^value^)^ ^u^sing^(^matrixnm^)^ ^s^aving^(^matrixnm^)^ ^gen^erate^(^rootname^)^]
where either saving() or using() are required.
Description
-----------
^praccum^ accumulates predictions from a series of calls to ^prvalue^ and
optionally saves these accumluated values to variables. These variables can
then be plotted. This command allows you to plot predicted values in
situations that cannot be handled by ^prgen^ (e.g., nonlinearities).
The command works with cloglog, cnreg, intreg, logit, mlogit, mprobit, nbreg,
ologit, oprobit, poisson, probit, regress, slogit, tobit, zinb, zip, ztnb,
and ztp.
Options
-------
^xis(^value^)^ specifies the value of the x-variable associated with the predicted
values that are being accumulated. If ^xis^ is not specified, new values
are not accumulated.
^using(^matrixnm^)^ specifies the name of matrix to which accumulated results
should be added. ^matrixnm^ will be created if it does not exist.
^saving(^matrixnm^)^: is only used to save the initial results and differs from
differs from ^using()^ in that it will overwrite ^matrixnm^ if it exists.
^generate(^rootname^)^: root name of variables to be created from the matrix
specified by ^using^. This is only used when you are done accumulating
results and are ready to generate the variables.
Examples of included squared terms
----------------------------------
Consider the logit:
^. use binlfp,clear^
^. gen age2 = age*age^
^. logit lfp k5 k618 age age2 wc hc lwg inc^
If you want to plot the predictions against age, you cannot use ^prgen^ since
when age changes, age2 must also change. The command:
^. prvalue , x(age=20 age2=400) rest(mean)^
computes predicted values for age=20 and age2=20*20=400. The command:
^. praccum , saving(mage) xis(20)^
creates a matrix named mage that contains three columns. The first column will
have a 20 for the value of age; the second the probability of a 0 given the
values of the independent variables used in ^prvalue^, and the third column
will have the probability of a 1. We now change the value of age and add this
to the matrix mage:
^. prvalue , x(age=25 age2=625) rest(mean)^
^. praccum , using(mage) xis(25)^
Here we are just adding a row to mage. This process repeats for other values:
^. prvalue , x(age=30 age2=900) rest(mean)^
^. praccum , using(mage) xis(30)^
^. prvalue , x(age=35 age2=1225) rest(mean)^
^. praccum , using(mage) xis(35)^
^. prvalue , x(age=40 age2=1600) rest(mean)^
^. praccum , using(mage) xis(40)^
^. prvalue , x(age=45 age2=2025) rest(mean)^
^. praccum , using(mage) xis(45)^
^. prvalue , x(age=50 age2=2500) rest(mean)^
^. praccum , using(mage) xis(50)^
^. prvalue , x(age=55 age2=3025) rest(mean)^
^. praccum , using(mage) xis(55)^
^. prvalue , x(age=60 age2=3600) rest(mean)^
^. praccum , using(mage) xis(60) gen(agsq)^
Produces the output:
^New variables created by praccum:^
^Variable | Obs Mean Std. Dev. Min Max^
^---------+-----------------------------------------------------^
^ agsqx | 9 40 13.69306 20 60^
^ agsqp0 | 9 .4282142 .1752595 .2676314 .7479599^
^ agsqp1 | 9 .5717858 .1752595 .2520402 .7323686 ^
Which can be plotted:
^. graph agsqp1 agsqx,c(s)^
Example using ^forvalues^
-------------------------
The ^forvalues^ command makes using ^praccum^ much simpler. The
following yields the same output as the example above:
^. capture matrix drop mage^
^. forvalues count = 20(5)60 {^
^. local countsq = `count'*`count'^
^. prvalue, x(age `count' age2 `countsq') rest(mean) brief^
^. praccum, using(mage) xis(`count')^
^. }^
^. praccum, using(mage) gen(agsq)^
Example using global macros
---------------------------
^forvalues^ is not available for Stata 6. Here, the task can still be
simplified by using global macros. The advantage of this approach is
that you can let Stata do the multiplying:
^. global age = 20^
^. global age2 = $age*$age^
^. prvalue , x(age=$age age2=$age2) rest(mean)^
^. praccum , saving(mage) xis($age)^
^. global age = 25^
^. global age2 = $age*$age^
^. prvalue , x(age=$age age2=$age2) rest(mean)^
^. praccum , using(mage) xis($age)^
^. global age = 30^
^. global age2 = $age*$age^
^. prvalue , x(age=$age age2=$age2) rest(mean)^
^. praccum , using(mage) xis($age)^
^. global age = 35^
^. global age2 = $age*$age^
^. prvalue , x(age=$age age2=$age2) rest(mean)^
^. praccum , using(mage) xis($age)^
^. global age = 40^
^. global age2 = $age*$age^
^. prvalue , x(age=$age age2=$age2) rest(mean)^
^. praccum , using(mage) xis($age)^
^. global age = 45^
^. global age2 = $age*$age^
^. prvalue , x(age=$age age2=$age2) rest(mean)^
^. praccum , using(mage) xis($age)^
^. global age = 50^
^. global age2 = $age*$age^
^. prvalue , x(age=$age age2=$age2) rest(mean)^
^. praccum , using(mage) xis($age)^
^. global age = 55^
^. global age2 = $age*$age^
^. prvalue , x(age=$age age2=$age2) rest(mean)^
^. praccum , using(mage) xis($age)^
^. global age = 60^
^. global age2 = $age*$age^
^. prvalue , x(age=$age age2=$age2) rest(mean)^
^. praccum , using(mage) xis($age) gen(agsq)^
^. graph agsqp1 agsqx,c(s)^
.-
Authors: J. Scott Long - jslong@@indiana.edu
Jeremy Freese - jfreese@@ssc.wisc.edu
www.indiana.edu/~jslsoc/

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,130 @@
.-
help for ^prchange^ - 2Nov2005
.-
Discrete and marginal change for regression models for categorical and
count variables
------------------------------------------------------------------------------
^prchange^ [varlist] [^if^ exp] [^in^ range] [, ^x(^variables_and_values^)^
^r^est^(^stat^)^ ^o^utcome^(^#^)^ ^f^romto ^b^rief ^noba^se ^nola^bel ^h^elp
^all^ ^unc^entered ^d^elta^(^#^)^ ^con^ditional]
where
^variables_and_values^ is an alternating list of variables and either
numeric values or mean, median, min, max, upper, lower, previous
^stat^ is either mean, median, min, max, upper, lower, previous, grmean
(group mean), grmedian, grmin, grmax
Description
-----------
^prchange^ computes discrete and marginal change for regression models for
categorical and count variables. Marginal change is the partial derivative
of the predicted probability or predicted rate with respect to the
independent variables. Discrete change is the difference in the predicted
value as one independent variable changes values while all others are held
constant at specified values.
By default, the discrete and marginal change is calculated holding all other
variables at their mean. Values for specific independent variables can be
set using the x() option after ^prchange^. For example, to compute predicted
values when educ is 10 and age is 30, type ^prchange, x(educ=10 age=30)^.
Values for the unspecified independent variables can be set using the rest()
option, e.g., ^prchange, x(educ=10 age=30) rest(mean)^. The ^if^ and ^in^
conditions specify conditions for computation of means, min, etc., that are
used with rest().
The discrete change is computed when a variable changes from its minimum to
its maximum (Min->Max), from 0 to 1 (0->1), from its specified value minus
.5 units to its specified value plus .5 (-+1/2), and from its specified
value minus .5 standard deviations to its value plus .5 standard deviations
(-+sd/2).
The command works with cloglog, cnreg, intreg, logit, mlogit, mprobit, nbreg,
ologit, oprobit, poisson, probit, regress, slogit, tobit, zinb, zip, ztnb,
and ztp.
Options
-------
^x()^ explicitly sets the values of specified independent variables.
^rest()^ sets the independent variables not specified in x() to their ^mean^
(default), ^minimum^, ^maximum^, ^median^ when calculating predicted values.
^grmean^ sets these independent variables to the mean conditional on the
variables and values specified in x(). If ^prvalue^ has already been run
after the last estimate, ^previous^ will set unspecified variables to their
prior values. For models other than mlogit, ^upper^ and ^lower^ can be used
to set independent variables to their minimum or maximum depending on
which will yield the upper or lower extreme predicted value.
^outcome()^ specifies that changes will be printed only for the outcome
indicated. For ^ologit^, ^oprobit^, ^mlogit^ and ^slogit^, the default is
to provide results for all outcomes. For the count models, the default is
to present results with respect to the predicted rate; specifying an
outcome number will provide changes in the probability of that outcome.
^delta()^ specifies the amount of the discrete change in the independent
variable. The default is a 1 unit change.
^fromto^ includes the pre- and post- probabilities in addition to the discrete
change
^uncentered^ specifies that the uncentered discrete change rather than the
centered discrete change is to be computed.
^conditional^ compute changes in conditional predictions rather than
unconditional predictions for the ztp and ztnb models.
^all^ specifies that any calculations of means, medians, etc., should use
the entire sample instead of the sample used to estimate the model.
^help^ presents guide to interpreting headings of output.
^nolabel^ uses values rather than value labels in output.
^nobase^ suppresses inclusion of the base values of x in the output.
^brief^ prints only limited output.
Examples
--------
To compute discrete and marginal change for ordered probit at base values
equal to the mean:
. ^oprobit warm yr89 male white age ed prst^
. ^prchange^
To compute discrete and marginal change for a specific set of base values:
. ^prchange, x(male=1 white=1 age=30)^
. ^prchange, x(male=1 white=1 age=30) rest(median)^
. ^prchange, x(male=1 white=max age=min) rest(median)^
To compute the discrete change for a set of base values while holding other
variables constant to a group-specific median:
. ^prchange if white == 1 & male == 0, x(white=1 male=0) rest(median)^
or
. ^prchange, x(white=1 male=0) rest(grmedian)^
To list only the discrete change for specific variables:
. ^prchange age, x(male=0 white=0) rest(median)^
.-
Authors: J. Scott Long and Jeremy Freese
www.indiana.edu/~jslsoc/spost.htm
spostsup@@indiana.edu

View File

@ -0,0 +1,213 @@
*! version 1.6.8 13Apr2005
* version 1.6.7 18Feb2005 ztp and ztnb
* version 1.6.6 8/20/03
* version 1.6.5 1/10/01
capture program drop prcounts
program define prcounts
version 6.0
tempname b alpha ai gai mu
syntax newvarname [if] [in] [, Max(integer 9) Plot]
*=> classify each valid type of model
local cmd = e(cmd)
*zt 18Feb2005
if "`cmd'"=="nbreg" | "`cmd'"=="poisson" | /*
*/ "`cmd'"=="zinb" | "`cmd'"=="zip" | /*
*/ "`cmd'"=="ztp" | "`cmd'"=="ztnb" {
}
else {
di
di in r "prcounts does not work for the last type of model estimated."
exit
}
if `max' > 99 | `max' < 0 {
di in r "max() must be in range from 0 to 99"
exit 198
}
local stem "`varlist'"
local stem = substr("`stem'", 1, 28)
cap version 7
if _rc!=0 { local stem = substr("`stem'", 1, 4) }
version 6.0
local modelis "from `cmd'"
*-> GENERATE PREDICTED RATE
quietly predict `stem'rate `if' `in', n
label variable `stem'rate "Predicted rate `modelis'"
gen `mu' = `stem'rate
*18Feb2005
* compute conditional mu
if "`cmd'"=="ztp" | "`cmd'"=="ztnb" {
quietly predict `stem'Crate `if' `in', cm
label variable `stem'Crate "Predicted conditional rate `modelis'"
}
*-> GENERATE PREDICTED FREQUENCY OF ALWAYS ZERO
if "`cmd'"=="zip" | "`cmd'"=="zinb" {
quietly predict `stem'all0 `if' `in', p
label variable `stem'all0 "Pr(always 0) `modelis'"
* predict n, n computes: exp(x*beta)*(1-all0).
* The last term needs to be removed.
quietly replace `mu' = `mu'/(1-`stem'all0) `if' `in'
}
*-> TAKE CARE OF ALPHA
*18Feb2005
if "`cmd'"=="nbreg" | "`cmd'"=="ztnb" {
sca `alpha' = e(alpha)
}
if "`cmd'"=="zinb" {
mat `b' = e(b)
local temp = colsof(`b')
sca `alpha' = `b'[1, `temp']
sca `alpha' = exp(`alpha')
}
*18Feb2005
if "`cmd'"=="nbreg" | "`cmd'"=="zinb" | "`cmd'"=="ztnb" {
sca `ai' = 1/`alpha'
sca `gai' = exp(lngamma(`ai'))
if `gai'==. {
di in r "problem with alpha prevents" /*
*/ " estimation of predicted probabilities."
exit 198
}
}
*-> GENERATE PREDICTED PROBABILITIES
local i 0
while `i' <= `max' {
local newvar "`stem'pr`i'"
*18Feb2005
if "`cmd'"=="poisson" | "`cmd'"=="ztp" {
quietly gen `newvar' = /*
*/ ((exp(-`mu'))*(`mu'^`i'))/(round(exp(lnfact(`i'))), 1) /*
*/ `if' `in'
}
*18Feb2005
if "`cmd'"=="nbreg" | "`cmd'"=="ztnb" {
quietly gen `newvar' = /*
*/ (exp(lngamma(`i'+`ai')) / /*
*/ (round(exp(lnfact(`i')),1) * exp(lngamma(`ai')))) /*
*/ * ((`ai'/(`ai'+`mu'))^`ai') * ((`mu'/(`ai'+`mu'))^`i') /*
*/ `if' `in'
}
if "`cmd'"=="zip" {
quietly gen `newvar' = (1-`stem'all0)*((exp(-`mu')) /*
*/ * (`mu'^`i'))/(round(exp(lnfact(`i'))), 1) `if' `in'
if `i'==0 {
quietly replace `newvar' = `newvar' + `stem'all0 `if' `in'
}
}
if "`cmd'"=="zinb" {
quietly gen `newvar' = (1-`stem'all0)*(exp(lngamma(`i'+`ai')) /*
*/ / ( round(exp(lnfact(`i')),1) * exp(lngamma(`ai')) ) ) /*
*/ * ((`ai'/(`ai'+`mu'))^`ai') * ((`mu'/(`ai'+`mu'))^`i') /*
*/ `if' `in'
if `i'==0 {
quietly replace `newvar' = `newvar' + `stem'all0 `if' `in'
}
}
label variable `newvar' "Pr(y=`i') `modelis'"
local i = `i' + 1
}
*spost9 zt needs to compute conditional probabilities 19feb2005
*-> GENERATE CONDITIONAL PREDICTED PROBABILITIES
if "`cmd'"=="ztp" | "`cmd'"=="ztnb" {
local i 1
while `i' <= `max' {
local newvar "`stem'Cpr`i'"
quietly gen `newvar' = `stem'pr`i'/(1-`stem'pr0)
label variable `newvar' "Pr(y=`i'|y>0) `modelis'"
local i = `i' + 1
}
} /// zt
*-> GENERATE CUMULATIVE PROBABILITIES
quietly gen `stem'cu0=`stem'pr0
label variable `stem'cu0 "Pr(y=0) `modelis'"
local i 1
while `i' <= `max' {
quietly egen `stem'cu`i' = rsum(`stem'pr0-`stem'pr`i') if `mu'~=.
label variable `stem'cu`i' "Pr(y<=`i') `modelis'"
*spost9 zt cumulative probabilities 18Feb2005
if "`cmd'"=="ztp" | "`cmd'"=="ztnb" {
quietly egen `stem'Ccu`i' = rsum(`stem'Cpr1-`stem'Cpr`i') if `mu'~=.
label variable `stem'Ccu`i' "Pr(y<=`i'|y<0) `modelis'"
}
local i = `i' + 1
}
*-> GENERATE GREATER THAN VARIABLE
quietly gen `stem'prgt = 1-`stem'cu`max' if `mu'~=.
label variable `stem'prgt "Pr(y>`max') `modelis'"
*18Feb2005
if "`cmd'"=="ztp" | "`cmd'"=="ztnb" {
quietly gen `stem'Cprgt = 1-`stem'Ccu`max' if `mu'~=.
label variable `stem'Cprgt "Pr(y>`max'|y>0) `modelis'"
}
*-> IF PLOT OPTION SPECIFIED
if "`plot'"=="plot" {
quietly gen `stem'val = .
label variable `stem'val "Count"
quietly gen `stem'obeq = .
label variable `stem'obeq "Observed Pr(y=k) `modelis'"
quietly gen `stem'preq = .
label variable `stem'preq "Predicted Pr(y=k) `modelis'"
quietly gen `stem'oble = .
label variable `stem'oble "Observed Pr(y<=k) `modelis'"
quietly gen `stem'prle = .
label variable `stem'prle "Predicted Pr(y<=k) `modelis'"
*18Feb2005
if "`cmd'"=="ztp" | "`cmd'"=="ztnb" {
quietly gen `stem'Cpreq = .
label variable `stem'Cpreq "Predicted Pr(y=k|y>0) `modelis'"
quietly gen `stem'Cprle = .
label variable `stem'Cprle "Predicted Pr(y<=k|y>0) `modelis'"
}
** bug fix -- makes sure observed probabilities are
** computed on estimation sample
if "`if'" == "" {
local if "if e(sample)==1"
}
else {
local if "`if' & e(sample)==1"
}
local i 0
while `i' <= `max' {
quietly {
local obs = `i' + 1
replace `stem'val = `i' in `obs'
tempvar count1 count2
* 1 if outcomes equal to i
gen `count1' = (`e(depvar)'==`i') `if' `in'
sum `count1' `if' `in'
replace `stem'obeq = r(mean) in `obs'
* 1 if outcome lt i
gen `count2' = (`e(depvar)'<=`i') `if' `in'
sum `count2' `if' `in'
replace `stem'oble = r(mean) in `obs'
* compute average predicted prob
sum `stem'pr`i' `if' `in'
replace `stem'preq = r(mean) in `obs'
* compute average cumulative predicted prob
sum `stem'cu`i' `if' `in'
replace `stem'prle = r(mean) in `obs'
*zt only compute if count > 0 18Feb2005*ZT
if ("`cmd'"=="ztp" | "`cmd'"=="ztnb") & `i'>0 {
sum `stem'Cpr`i' `if' `in'
replace `stem'Cpreq = r(mean) in `obs'
sum `stem'Ccu`i' `if' `in'
replace `stem'Cprle = r(mean) in `obs'
}
}
local i = `i' + 1
}
}
end

View File

@ -0,0 +1,78 @@
.-
help for ^prcounts^ - 11Mar2005
.-
Compute the predicted rate and probabilities for count models
-------------------------------------------------------------
^prcounts^ varname [^if^ exp] [^in^ range] [^, m^ax^(^max^)^ ^p^lot]
Description
-----------
^prcounts^ computes the predicted rate and probabilities of counts from 0
through the specified maximum count based on the last estimates from the
count models ^poisson^, ^nbreg^, ^zip^, ^zinb^. ^prcounts^ optionally generates
variables for the graphical comparison of observed and expected counts
for one model or for competing models.
For each observation ^prcounts^ computes the variables:
namerate = predicted count or rate
namepr^k^ = probability of a count of ^k^ for ^k^=0 to max
namecu^k^ = cumulative probabiliy of a count <=^k^ for ^k^=0 to max
For zip or zinb models, prcounts also generates:
namell0 = probability of being in the 'always zero' group
For ztp or ztnb models, prcounts also generates:
nameCrate = predicted count or rate conditional on y>0
nameCpr^k^ = probability of a count of ^k^ for ^k^=0 to max conditional
on y>0
nameCcu^k^ = cumulative probabiliy of a count <=^k^ for ^k^=0 to max
conditional on y>0
Options
-------
^max(^value^)^ The maximum count for which variables are generated.
value must be an integer between 0 and 99; the default is 9.
^plot^ causes Stata to generate extra variables that are useful for plotting.
The created variables will have values for the first max+1 rows of the
data set.
nameval = value of y
nameobeq = observed probability of counts = y
nameoble = cumulative observed probability of counts <= y
namepreq = predicted probability of counts = y
nameprle = predicted cumulative probability of counts <= y
For these variables, each observation provides information about the
probabilities of a given count for the entire sample. In other words, what
an observation consists of for these variables is different than for the rest
of the variables in one's dataset.
Note: the summary statistics computed by ^plot^ are automatically based on
the estimation sample, plus any further restrictions implied by the use of
^if^ or ^in^.
Examples
--------
. ^zinb art fem mar kid5 phd ment, inf(fem mar kid5 phd ment)^
. ^prcounts cnt^
. ^summarize cnt*^
To graphically compare predicted and observed counts for a model:
.^ poisson art fem mar kid5 phd ment^
.^ prcounts pois, max(8) plot^
.^ graph poispreq poisobeq poisval, c(ll)^
.-
Authors: J. Scott Long and Jeremy Freese
www.indiana.edu/~jsl650/spost.htm
spostsup@@indiana.edu

183
Modules/ado/plus/p/prdc.ado Normal file
View File

@ -0,0 +1,183 @@
*! version 0.0.4 jsl 13Apr2005 - beta version
*! To do: there are no IF, IN or ALL options
*! To do: Add if ystar or xb
capture program drop prdc
program define prdc, rclass
version 8
*=> classify each valid type of model
if "`e(cmd)'"=="cloglog" {
local io = "typical binary"
}
if "`e(cmd)'"=="gologit" {
local io = "typical ordered"
}
if "`e(cmd)'"=="logistic" {
local io = "typical binary"
}
if "`e(cmd)'"=="logit" {
local io = "typical binary"
}
if "`e(cmd)'"=="mlogit" {
local io = "typical nomord"
}
if "`e(cmd)'"=="nbreg" {
local io = "typical count"
}
if "`e(cmd)'"=="ologit" {
local io = "typical nomord"
}
if "`e(cmd)'"=="oprobit" {
local io = "typical nomord"
}
if "`e(cmd)'"=="poisson" {
local io = "typical count"
}
if "`e(cmd)'"=="probit" {
local io = "typical binary"
}
if "`e(cmd)'"=="zinb" {
local io = "twoeq count"
}
if "`e(cmd)'"=="zip" {
local io = "twoeq count"
}
if "`io'"=="" {
di
di in y "prdc" in r " does not work for last model estimated."
exit
}
*=> decode input
syntax [varlist(default=none)] ///
[, x(string) Rest(passthru) LEvel(passthru) ///
MAXcnt(passthru) noLAbel noBAse Brief Save Diff ///
YStar ept DELta ///
BOOTstrap REPs(passthru) SIze(passthru) DOts match ///
NORMal PERCENTile BIAScorrected ///
From(real 0) To(real 0) Change(real 0) ///
BINary Unit SD UNCentered ]
local qui "quietly"
local changetype " Uncentered"
if "`uncentered'"=="" {
local changetype " Centered"
}
if "`varlist'" =="" {
di in red "you must specify a variable list."
exit
}
* unit by default
if `from'==0 & `to'==0 & `change'==0 ///
& "`binary'"=="" & "`unit'"=="" & "`sd'"=="" {
local unit "unit"
}
tempname mn sdval sdval2 fromto stval endval
foreach v in `varlist' {
matrix `fromto' = J(10,2,-99) // hold start and end
local rownm ""
local rows = 0
qui sum `v' `if' `in'
scalar `mn' = r(mean)
scalar `sdval' = r(sd)
local sdnm "sd"
scalar `sdval2' = `sdval'/2
if "`binary'"=="binary" {
local rows = `rows' + 1
local nm`rows' ""
mat `fromto'[`rows',1] = 0
mat `fromto'[`rows',2] = 1
}
if "`unit'"=="unit" {
local rows = `rows' + 1
local nm`rows' "[ -+1/2 ]"
scalar `stval' = `mn' - 1/2
if "`uncentered'"!="" {
local nm`rows' "[ +1 ]"
scalar `stval' = `mn'
}
scalar `endval' = `stval' + 1
mat `fromto'[`rows',1] = `stval'
mat `fromto'[`rows',2] = `endval'
}
if "`sd'"=="sd" {
local rows = `rows' + 1
local nm`rows' "[ -+sd/2 ]"
scalar `stval' = `mn' - `sdval2'
if "`uncentered'"!="" {
local nm`rows' "[ +sd ]"
scalar `stval' = `mn'
}
scalar `endval' = `stval' + `sdval'
mat `fromto'[`rows',1] = `stval'
mat `fromto'[`rows',2] = `endval'
}
if `change'!=0 {
local rows = `rows' + 1
local nm`rows' "[ -+(delta=`change'/2) ]"
scalar `stval' = `mn' - (`change'/2)
if "`uncentered'"!="" {
local nm`rows' "[ +(delta=`delta') ]"
scalar `stval' = `mn'
}
scalar `endval' = `stval' + `change'
mat `fromto'[`rows',1] = `stval'
mat `fromto'[`rows',2] = `endval'
}
if `from'==0 & `to'==0 {
}
else {
local rows = `rows' + 1
mat `fromto'[`rows',1] = `from'
mat `fromto'[`rows',2] = `to'
local nm`rows' ""
}
di _n in g "Discrete change for variable: " in y "`v' "
tempname val dif diflb difub difmisc difout base stis endis
foreach r of numlist 1/`rows' {
scalar `stis' = `fromto'[`r',1]
scalar `endis' = `fromto'[`r',2]
local xstis = `stis'
local xendis = `endis'
`qui' prvalue2, x(`x' `v'=`xstis') ///
`rest' `level' save
if `r'==1 {
* get column names
mat `val' = pepred[1,1...]
local colnm ""
local colnum = colsof(`val')
foreach k of numlist 1/`colnum' {
local a = `val'[1,`k']
local colnm "`colnm' `a'"
}
}
`qui' prvalue2, x(`x' `v'=`xendis') ///
`rest' `level' diff
mat `dif' = pepred[6,1...]
mat `diflb' = pelower[6,1...]
mat `difub' = peupper[6,1...]
mat `difmisc' = pepred[7,1...]
matrix `difout' = `dif' \ `diflb' \ `difub'
mat colnames `difout' = `colnm'
mat rownames `difout' = " Change" " LowerBound" " UpperBound"
di _new in g "`changetype' change from " in y %6.4f `stis' ///
in g " to " in y %6.4f `endis' ///
in g " `nm`r''"
mat list `difout', noheader
} // rows
} // list of variables
* levels of variables
mat `base' = r(x)
mat list `base', noheader
end

110
Modules/ado/plus/p/prdc.hlp Normal file
View File

@ -0,0 +1,110 @@
.-
help for ^prdc^ - 25Oct2005
.-
?? if, in, all and ystar options need to be verified.
Compute descrete change of a specified amount for list of variables.
--------------------------------------------------------------------
^prdc^ [varlist] [^if^ exp] [^in^ range] [, ^x(^variables_and_values^)^
^r^est^(^stat^)^ ^ys^tar ^b^rief ^all ^max^cnt ^(^#^)^ ^noba^se
^nola^bel ^f^rom^(^#^)^ ^to(^#^)^ ^c^hange^(^#^)^ ^bin^ary
^u^nit ^sd^ ^unc^entered <you can also use options for confidence
intervals used with ^prvalue^.]
where
^varlist^ contains the names of variables for which discrete change will
be computed.
^variables_and_values^ is an alternating list of variables and either
numeric values or mean, median, min, max, upper, lower, previous
^stat^ is either mean, median, min, max, upper, lower, previous, grmean
(group mean), grmedian, grmin, grmax
Description
-----------
^prdc^ computes discrete changes with confidence intervals for regression
models for categorical and count variables. Discrete change is the
difference in the predicted value as one independent variable changes
values while all others are held constant at specified values. ^prdc^ is
simply a convenient way to execute a save-dif pair of ^prvalue^ commands.
By default, the discrete change is computed for a unit increase centered
around the mean.
Values for specific independent variables can be set using the x() option
after ^prdc^. Values for the unspecified independent variables can be
set using the rest() option. The ^if^ and ^in^ conditions specify conditions
for computation of means, min, etc., that are used with rest().
The discrete change is computed by the amounts (more than one can be specified)
specified with the options. ^prdc^ works with all regression programs that are
compatable with prvalue.
Options specifying levels of variables
--------------------------------------
^x()^ explicitly sets the values of specified independent variables.
^rest()^ sets the independent variables not specified in x() to their ^mean^
(default), ^minimum^, ^maximum^, ^median^ when calculating predicted values.
^grmean^ sets these independent variables to the mean conditional on the
variables and values specified in x(). If ^prvalue^ has already been run
after the last estimate, ^previous^ will set unspecified variables to their
prior values. For models other than mlogit, ^upper^ and ^lower^ can be used
to set independent variables to their minimum or maximum depending on
which will yield the upper or lower extreme predicted value.
Options specifying the amout of change
--------------------------------------
^from()^ specifies the start value of the change.
^to()^ specifies the start value of the change.
^binary^ specifies a change from 0 to 1.
^unit^ specifies a unit change at the values specified with x() and rest().
^sd^ specifies a standard deviation change at the values specified with x()
and rest().
^change()^ specifies the amount of the discrete change at the values
specified with x() and rest().
^uncentered^ specifies that the uncentered discrete change rather than the
centered discrete change is to be computed for unit, sd and change.
Other Options
-------------
^maxcnt()^ is the maximum count value for which the probability is computed
in count models. Default is 9.
^ystar^ requests change in predicted y*, rather than probabilities.
^all^ specifies that any calculations of means, medians, etc., should use
the entire sample instead of the sample used to estimate the model.
^nolabel^ uses values rather than value labels in output.
^nobase^ suppresses inclusion of the base values of x in the output.
^brief^ prints only limited output.
^help^ presents guide to interpreting headings of output.
Examples
--------
.-
Authors: J. Scott Long and Jeremy Freese
www.indiana.edu/~jslsoc/spost.htm
spostsup@@indiana.edu

View File

@ -0,0 +1,28 @@
program def prelist, rclass
*! NJC 1.2.0 6 June 2000
* NJC 1.1.0 22 Dec 1999
* NJC 1.0.0 12 Nov 1999
version 6.0
gettoken list 0 : 0, parse(",")
if "`list'" == "" | "`list'" == "," {
di in r "nothing in list"
exit 198
}
syntax , Pre(str) [ Global(str) Noisily ]
if length("`global'") > 8 {
di in r "global name must be <=8 characters"
exit 198
}
tokenize `list'
while "`1'" != "" {
local newlist "`newlist'`pre'`1' "
mac shift
}
if "`noisily'" != "" { di "`newlist'" }
if "`global'" != "" { global `global' "`newlist'" }
return local list `newlist'
end

View File

@ -0,0 +1,2 @@
.h listutil

View File

@ -0,0 +1,525 @@
*! version 2.5.0 2009-10-28 jsl
* - stata 11 update for returns from -mlogit-
// generate predictions and ci's to plot
capture program drop prgen
program define prgen, rclass
version 8
tempname temp inc xval addbase tobase tobase2
* check if prgen works with last model
_pecmdcheck prgen
local io = r(io)
if "`io'"=="." {
exit
}
local input : word 1 of `io' /* input routine to _pepred */
local output : word 2 of `io' /* output routine */
// decode options
syntax [varlist(numeric min=1 max=1)] [if] [in] ///
, Generate(string) [x(passthru) Rest(passthru) ///
Level(passthru) MAXcnt(passthru) all ///
Brief noBAse Ncases(integer 11) ///
From(real -867.5309) To(real 867.5309) ///
/// new options
MARginal NOIsily CI gap(real 0.0) ///
/// new options passed to prchange2 follow
noLAbel noBAse Brief ///
YStar ept DELta BOOTstrap REPs(passthru) SIze(passthru) ///
DOts match NORMal PERCENTile BIAScorrected ///
CONditional ]
* zt 19Feb2005
local iszt = 0
if ("`e(cmd)'"=="ztp" | "`e(cmd)'"=="ztnb") {
local iszt = 1
local cond ""
local condnm "Unconditional"
local condnmlc "unconditional "
if "`conditional'"=="conditional" {
local cond "C"
local condnm "Conditional"
local condnmlc "conditional "
}
}
* marginals not available for these models
if "`e(cmd)'"=="gologit" | "`input'"=="twoeq" ///
| "`output'" == "tobit" ///
| "`output'" == "regress" ///
| `iszt'==0 {
if "`marginal'"=="marginal" {
di _n in red "Note: Marginals not available for current model."
}
local marginal ""
}
* zt no ci's available 19Feb2005
if "`ci'"=="ci" & `iszt'==1 {
local ci ""
di _n in red "Note: ci's not available for current model."
}
* options to pass to prvalue
local pr2input "`level' `maxcnt' `nolabel' `nobase' `brief'"
local pr2input "`pr2input' `ystar' `ept' `delta' `bootstrap'"
local pr2input "`pr2input' `reps' `size' `dots' `match'"
local pr2input "`pr2input' `normal' `percentile' `biascorrected'"
* print results from prvalue
local quietly "quietly "
if "`noisily'"=="noisily" {
local quietly ""
di "Results from prvalue called by prgen"
di
}
// get information needed to create plot values
local max_i = r(maxcount)
_perhs
local nrhs = `r(nrhs)'
local rhsnms "`r(rhsnms)'"
if "`input'"=="twoeq" {
local nrhs2 = `r(nrhs2)'
local rhsnms2 "`r(rhsnms2)'"
}
* get info from pecats if depvar not continuous
if "`output'" != "regress" & "`output'" != "tobit" {
_pecats
local ncats = r(numcats)
local catnms8 `r(catnms8)'
local catvals `r(catvals)'
local catnms `r(catnms)'
}
*get root() for generating new variables
local root = substr("`generate'",1,29)
if _rc != 0 {
local root = substr("`generate'",1,5)
}
* convert input into base values
_pebase `if' `in', `x' `rest' `choices' `all'
mat `tobase' = r(pebase)
if "`input'"=="twoeq" {
mat `tobase2' = r(pebase2)
}
* create if to take e(sample) and if conditions into account
_peife `if', `all'
local if "`r(if)'"
* set from and to to min and max of chngvar if unspecified
qui sum `varlist' `if'
if `from' == -867.5309 {
local from = r(min)
}
if `to' == 867.5309 {
local to = r(max)
}
// set up and test range to be plotted
* check from and to
if `from'>=`to' {
di in r "from() must be < to()"
exit
}
* turn gap into increments
if `gap' != 0 {
if `gap'<=0 {
di in r "gap must be a positive value."
}
tempname range ngaps
sca `range' = `to' - `from'
sca `ngaps' = `range'/`gap'
if int(`ngaps')==`ngaps' {
local ncases = `ngaps' + 1
}
else {
di in r "gap does not divide evenly into from to interval."
exit 198
}
}
* verify valid number of plot points
if `ncases' < 3 {
di in r "ncases() must be greater than 3"
exit 198
}
if `ncases' > _N {
set obs `ncases'
}
sca `inc' = (`to'-`from')/(`ncases'-1)
// find variables among those in model
* locate specified variable among rhs variables
local found "no"
local varnum -1
*look in main equation, if not there: varnum == -1
local i2 = 1
local i2_to : word count `rhsnms'
while `i2' <= `i2_to' {
local varchk : word `i2' of `rhsnms'
unab varchk : `varchk', max(1)
if "`varlist'"=="`varchk'" {
local found "yes"
local varnum = `i2'
local i2 = `i2_to'
}
local i2 = `i2' + 1
}
* if zip,zinb look in inflate equation, if not there: varnum2 == -1
if "`input'"=="twoeq" {
local i3 = 1
local i3_to : word count `rhsnms2'
local varnum2 -1
while `i3' <= `i3_to' {
local varchk : word `i3' of `rhsnms2'
unab varchk : `varchk', max(1)
if "`varlist'"=="`varchk'" {
local found "yes"
local varnum2 = `i3'
local i3 = `i3_to'
}
local i3 = `i3' + 1
}
}
if "`found'"=="no" {
di in r "`var' not rhs variable"
exit 198
}
// insert from value into base values for initial call of prvalue
mat PE_in = `tobase'
* from to variable begins at from
if `varnum' != -1 {
mat PE_in[1, `varnum']=`from'
}
* for zip and zinb
if "`input'"=="twoeq" {
mat PE_in2 = `tobase2'
if `varnum2' != -1 {
mat PE_in2[1, `varnum2']=`from'
}
}
// make x() string and compute predictions
_pexstring
local xis "`r(xis)'"
`quietly' prvalue , x(`xis') `pr2input'
// get marginal
if "`marginal'"=="marginal" {
tempname marg tempmarg
_pemarg
mat `tempmarg' = r(marginal)
if "`output'"=="nominal" | "`output'"=="ordered" {
mat `tempmarg' = `tempmarg''
}
mat `marg' = `tempmarg'[1...,`varnum']'
}
// create matrices to be converted to variables
tempname x_pred pr_pred mu_pred all0_pred xb_pred catvals nextbase
tempname pr_upper mu_upper all0_upper xb_upper
tempname pr_lower mu_lower all0_lower xb_lower
mat def `catvals' = pepred[1,1...]
mat def `x_pred' = `from'
local predset "pred"
if "`ci'" == "ci" {
local predset "`predset' upper lower"
}
foreach nm in `predset' {
* zt 19Feb2005
mat def `pr_`nm'' = pe`cond'`nm'[2,1...]
mat def `mu_`nm'' = pe`cond'`nm'[3,2]
mat def `xb_`nm'' = pe`cond'`nm'[3,1]
mat def `all0_`nm'' = pe`cond'`nm'[3,4]
}
// loop from from value to to value
local i = 2
local i_to = `ncases'
while `i' <= `i_to' {
sca `xval' = `from' + (`inc'*(`i'-1))
* change from variable value
mat `nextbase' = `tobase'
if `varnum' != -1 {
mat `nextbase'[1, `varnum']=`xval'
}
mat PE_in = `nextbase'
* create x() string
_pexstring
local xis "`r(xis)'"
* 0.2.2 050203 `quietly' prvalue , x(`xis') `rest' `pr2input'
`quietly' prvalue , x(`xis') `pr2input'
* get marginal effect
if "`marginal'"=="marginal" {
_pemarg
mat `tempmarg' = r(marginal)
* some marginal matrices need to be transposed first
if "`output'"=="nominal" | "`output'"=="ordered" {
mat `tempmarg' = `tempmarg''
}
mat def `marg' = `marg' \ `tempmarg'[1...,`varnum']'
}
* stack new values in matrices
mat def `x_pred' = `x_pred' \ `xval'
foreach nm in `predset' {
mat def `pr_`nm'' = `pr_`nm'' \ pe`cond'`nm'[2,1...]
mat def `mu_`nm'' = `mu_`nm'' \ pe`cond'`nm'[3,2]
mat def `xb_`nm'' =`xb_`nm'' \ pe`cond'`nm'[3,1]
if "`input'"=="twoeq" {
mat def `all0_`nm'' =`all0_`nm'' \ pe`nm'[3,4]
}
}
local i = `i' + 1
}
// create plot variables
* x variable
svmat `x_pred', n(`root'x)
rename `root'x1 `root'x
* 23May2005
local tmplabel : variable label `varlist'
label variable `root'x "`tmplabel'"
* label variable `root'x "Changing value of `varlist'"
* marginal effects
if "`marginal'"=="marginal" {
local margnm : word `varnum' of `rhsnms'
svmat `marg', n(`root'me_`margnm')
}
// binary, ordered, nominal or count
if "`output'"=="binary" ///
| "`output'"=="nominal" ///
| "`output'"=="ordered" ///
| "`output'"=="count" {
* predictions and bounds
svmat `pr_pred', n(temp)
if "`ci'"=="ci" {
svmat `pr_lower', n(templb)
svmat `pr_upper', n(tempub)
}
* process each outcome probability
local ncats = peinfo[1,2]
foreach i of numlist 1/`ncats' {
* get # assigned to first category
local value = `catvals'[1,`i']
local k`i' = `value'
* if value are too large or small
if `value' < -9 | `value' > 99 | int(`value')!=`value' {
di in red "category values must be integers between -9 and 99"
exit 198
}
* if negative create name using _
if `value' < 0 {
local k`i' = abs(`k`i'')
local k`i' = "_`k`i''"
}
* rename and label probability
rename temp`i' `root'p`k`i''
local lbl: word `i' of `catnms8'
* get information to label variables
if "`lbl'"=="`value'" {
local fvalue "" // same, so only use 1
}
else {
local fvalue "=Pr(`value')"
}
if "`lbl'"=="" {
local lbl "`value'"
local fvalue ""
}
if "`nolabel'"!="nolabel" {
* zt 19Feb2005
label variable `root'p`k`i'' ///
"`condnmlc'pr(`lbl')`fvalue'"
}
else {
* zt 19Feb2005
label variable `root'p`k`i'' ///
"`condnmlc'pr(`value')"
}
* process upper and lower bounds
if "`ci'"=="ci" {
rename tempub`i' `root'p`k`i''ub
if "label'"!="nolabel" {
label variable `root'p`k`i''ub "UB pr(`lbl')`fvalue'"
}
else {
label variable `root'p`k`i'' "UB pr(`value')"
}
rename templb`i' `root'p`k`i''lb
if "label'"!="nolabel" {
label variable `root'p`k`i''lb "LB pr(`lbl')`fvalue'"
}
else {
label variable `root'p`k`i'' "LB pr(`value')"
}
}
* create variables summing prob of being <= k
if `ncats'>2 {
if `i' == 1 {
qui gen `root's`k1' = `root'p`k1'
}
else {
local i_min1 = `i' - 1
qui gen `root's`k`i'' = `root'p`k`i'' + `root's`k`i_min1''
}
label variable `root's`k`i'' "pr(y<=`value')"
}
* marginals
if "`marginal'"=="marginal" & ///
"`output'"!="count" { // no marg pr# for count
rename `root'me_`margnm'`i' `root'Dp`k`i''D`margnm'
label variable `root'Dp`k`i''D`margnm' ///
"Marginal dp`k'`i'/d`margnm'"
}
} // end of loop through categories
} // binary, ordered, nominal or count
// REGRESS/TOBIT MODELS
if "`output'"=="regress" | "`output'"=="tobit" {
svmat `xb_pred', n(`root'xb)
rename `root'xb1 `root'xb
if "`ci'" == "ci" {
svmat `xb_lower', n(`root'xblb)
rename `root'xblb1 `root'xblb
svmat `xb_upper', n(`root'xbub)
rename `root'xbub1 `root'xbub
}
if "`output'"=="regress" {
label variable `root'xb "y-hat"
if "`ci'" == "ci" {
label variable `root'xblb "UB y-hat"
label variable `root'xbub "LB y-hat"
}
}
if "`output'"=="tobit" {
label variable `root'xb "y*-hat"
if "`ci'" == "ci" {
label variable `root'xblb "UB y*-hat"
label variable `root'xbub "LB y*-hat"
}
}
} // regress/tobit
// for count models, process mu and prall0
if "`output'"=="count" {
* marginals
if "`marginal'"=="marginal" {
mat list `marg'
drop `root'me_`margnm'1
rename `root'me_`margnm'2 `root'DmuD`margnm'
label var `root'DmuD`margnm' "Marginal dmu/d`margnm'"
}
* mu
svmat `mu_pred', n(`root'mu)
rename `root'mu1 `root'mu
* zt 19Feb2005
label variable `root'mu "predicted `condnmlc'rate mu"
* upper and lower bounds
if "`ci'"=="ci" {
svmat `mu_lower', n(`root'mulb)
rename `root'mulb1 `root'mulb
label variable `root'mulb "LB predicted rate mu"
svmat `mu_upper', n(`root'muub)
rename `root'muub1 `root'muub
label variable `root'muub "UB predicted rate mu"
}
if "`input'"=="twoeq" {
svmat `all0_pred', n(`root'all0)
rename `root'all01 `root'all0
label variable `root'all0 "pr(Always-0)"
if "`ci'"=="ci" {
svmat `all0_lower', n(`root'all0lb)
rename `root'all0lb1 `root'all0lb
label variable `root'all0lb "LB pr(Always-0)"
svmat `all0_upper', n(`root'all0ub)
rename `root'all0ub1 `root'all0ub
label variable `root'all0ub "UB pr(Always-0)"
}
}
} // mu and prob always 0
// COMMON OUTPUT
if "`brief'"=="" & "`base'"!="nobase" {
di _n in y "`e(cmd)'" in g ": Predicted values as " /*
*/ in y "`varlist'" in g /*
*/ " varies from " in y "`from'" in g " to " /*
*/ in y "`to'" in g "."
*print base values
if "`input'"=="twoeq" {
di _n in g "base x values for count equation: "
}
mat rownames `tobase' = "x="
mat _PEtemp = `tobase'
_peabbv _PEtemp
mat list _PEtemp, noheader
if "`input'"=="twoeq" {
di _n in g "base z values for binary equation: "
mat rownames `tobase2' = "z="
mat _PEtemp = `tobase2'
_peabbv _PEtemp
mat list _PEtemp, noheader
}
}
end
exit
* version 2.0.2 23May2005 : use label of varlist variable not changing...
* version 2.0.3 20Jun2005 : _pexstring bug fix
* version 2.0.4 23Jun2005 : _pexstring bug fix (2)

View File

@ -0,0 +1,204 @@
{smcl}
{* 06Feb2010}{...}
{hline}
help for {hi:prgen}{right:06Feb2010}
{hline}
{title:Generate predicted values and confidence intervals for regression models}
{p 4 4 2}
To compute the predicted values with all variables but varname
held at values specified by x() and rest(). The program extends
{cmd:prgen} by allowing you to generate variables containing upper
and lower bounds for confidence intervals and marginal effects.
{p 8 15 2}{cmd:prgen} varname,
{cmdab:g:enerate(}{it:newvar}{cmd:)}
[{cmdab:f:rom(}{it:#}{cmd:)}
{cmdab:t:o(}{it:#}{cmd:)}]
[{cmd:x(}{it:variables_and_values}{cmd:)}
{cmdab:r:est(}{it:stat}{cmd:)}
{cmd:all}]
[{cmdab:b:rief}
{cmdab:max:cnt(}{it:#}{cmd:)}
{cmdab:noba:se}
{cmdab:nola:bel}
{cmdab:n:cases(}{it:#}{cmd:)}
{cmd:gap(}{it:#}{cmd:)}
{cmdab:noi:sily}
{cmdab:mar:ginal}
{cmdab:con:ditional}]
[{cmd:ci}
{it:prvalue_options}]
{p 4 4 2}
where {it:variables_and_values} is an alternating list of variables
and either numeric values or mean, median, min, max, upper, lower,
previous.
{p 4 4 2}
{it:stat} is either mean, median, min, max, upper, lower, previous,
grmean (group mean), grmedian, grmin, grmax.
{p 4 4 2}
See {help prvalue} for options that can be specified for computing
confidence intervals.
{title: Description}
{p 4 4 2}
{cmd:prgen} computes predicted values and confidence intervals
for regression with continuous, categorical, and count outcomes
in a way that is useful for making plots. Predicted values are computed
for the case in which one independent variable varies over a specified
range while the others are held constant. You can request variables
containing upper and lower bounds for these variables. You can also
create a variable containing the marginal change in the outcome with
respect to the specified variable, holding other variabels constant. New
variables are added to the existing dataset that contain these
predicted values that can be plotted.
{p 4 4 2}
Note: The new variables will contain data for the first k observations
in the dataset, where k is 11 if not specified with the {cmd: ncases()}
option or if not determined by the {cmd:gap} option.
{title: Options}
{p 4 8 2}
{cmd:from()} and {cmd:to()} specify the values over which varname
should vary when calculating predicted values. The defaults are the
observed minimum and maximum values.
{p 4 8 2}
{cmd:generate()} is up to five letters to name the created variables.
By changing the name you can run -prgen- repeatedly to compute predictions
with variables held at various values. It is best to chose a name that is
different from the beginning letters of variables in your data set.
This is required.
{p 4 8 2}
{cmd:ci} indicates that you want to generate confidence intervals
corresponding to the predictions made by {cmd:prgen}.
{p 4 8 2}
{cmd:marginal} indicates that you want to generate a variable
containing the marginal change in the outcome relative to varname,
holding all other variables constant.
{p 4 8 2}
{cmd:conditional} indicates that you want to generate conditional
predictions rather than unconditional predictions for {cmd:ztp} and
{cmd:ztnb} models.
{p 4 8 2}
{cmd:ncases} is the number of predicted values computed as varname
varies from the start value to the end value. If {cmd:Ncases} is not
specified, 11 points are generated.
{p 4 8 2}
{cmd:gap} is an alternative to {cmd:ncases}. You specify the gap or
size between tic marks and {cmd:prgen} determines if the specified
value divides evenly into the from-to range. If it does, {cmd:prgen}
determines the appropriate value for {cmd:ncases}.
{p 4 8 2}
{cmd:x()} sets the values of independent variables for calculating
predicted values. The list must alternate variable names and values.
The values may be either numeric values or can be mean, median, min, max,
previous, upper, or lower. The latter cannot be used if rest()
specifies a group summary statistic (e.g., grmean).
{p 4 8 2}
{cmd:rest()} sets the independent variables not specified in x() to
their {cmd:mean} (default), {cmd:minimum}, {cmd:maximum},
{cmd:median} when calculating predicted values.{cmd:grmean} sets these
independent variables to the mean conditional on the variables and
values specified in x(); {cmd:grmedian}, {cmd:grmax}, and {cmd:grmin}
can also be used. If {cmd:prvalue} has already been run after the last
estimate, {cmd:previous} will set unspecified variables to their prior values.
For models other than mlogit, {cmd:upper}and {cmd:lower} can be used to set
independent variables to their minimum or maximum depending on which will yield
the upper or lower extreme predicted value.
{p 4 8 2}
{cmd:maxcnt()} sets the maximum count for which variables are generated
for count models. The value must be an integer between 0 and 30;
the default is 9.
{p 4 8 2}
{cmd:all} specifies that any calculations of means, medians, etc.,
should use the entire sample instead of the sample used to estimate
the model.
{p 4 8 2}
{cmd:brief} and {cmd:nobase} suppress the base values of x in the output.
{p 4 8 2}
{cmd:nolabel} uses values rather than value labels in output.
{p 4 8 2}
{cmd:noisily} indicates that you want to see the output from
{cmd:prvalue}that was was used to generate the predicted values.
{p 4 8 2}
{it:prvalue_options} control the calculation of confidence intervals;
see {help prvalue} for details about these options.
{title:Models and Predictions - * is the prefix}
all models:
*x: value of x
logit & probit:
Predicted probability of each outcome: *p0, *p1
ologit, oprobit
Predicted probabilities: *p#1,*p#2,... where #1,#2,... are values of
the outcome variable.
Cumulative probabilities: *s#1,*s#2,... where #1,#2,... are values
of the outcome variable. *s#k is the probability of all
categories up to or equal to #k.
mlogit:
Predicted probabilities: *p#1,*p#2,... where #1,#2,... are values of
the outcome variable.
poisson & nbreg:
Predicted rate: *mu;
Predicted probabilities: *p0, *p1... where 0, 1, are counts
Cumulative probabilities: *s0, *s1... where 0, 1 are counts.
The cumulative probability of a given count is probability of
observing count less than or equal to that count.
regress, tobit, cnreg, intreg
Predicted xb: *xb
{title: Examples}
{p 4 4 2}
To compute predicted values and confidencen intervals from an ordered probit
where warm has four categories SD, D, A and SA:
{p 4 8 2}{cmd:.oprobit warm yr89 male white age ed prst}
{p 4 8 2}{cmd:.prgen age, f(20) t(80) gen(mn) ci delta}
{p 4 8 2}{cmd:.prgen age, x(male=0) f(20) t(80) gen(fem)}
{p 4 8 2}{cmd:.prgen age, x(male=1) f(20) t(80) gen(mal)}
{p 4 8 2}
To plot the predicted probabilites for average males:
{p 4 8 2}{cmd:.twoway connected malp1 malp2 malp3 malp4 malx}
{hline}
{p 2 4 2}Authors: J. Scott Long, Jeremy Freese & Jun Xu{p_end}
{p 11 4 2}{browse www.indiana.edu/~jslsoc/spost.htm}{p_end}
{p 11 4 2}spostsup@indiana.edu{p_end}

View File

@ -0,0 +1,36 @@
program def prodlist, rclass
*! NJC 1.0.0 9 April 2001
version 6.0
gettoken list 0 : 0, parse(",")
if "`list'" == "" | "`list'" == "," {
di in r "no list specified"
exit 198
}
numlist "`list'"
local list `r(numlist)'
local nw : word count `list'
syntax [ , Global(str) Noisily ]
if length("`global'") > 8 {
di in r "global name must be <=8 characters"
exit 198
}
tempname prod
scalar `prod' = 1
tokenize `list'
local i = 1
while `i' <= `nw' {
scalar `prod' = (`prod') * (``i'')
local i = `i' + 1
}
if "`noisily'" != "" { di `prod' }
if "`global'" != "" { global `global' = `prod' }
return scalar prod = `prod'
end

View File

@ -0,0 +1,2 @@
.h listutil

View File

@ -0,0 +1,460 @@
*! version 2.5.0 2009-10-28 jsl
* - stata 11 update for returns from -mlogit-
capture program drop prtab
program define prtab, rclass
version 6
tempname tobase tobase2 addbase replval temp
tempvar added cell
** #1 classify each valid type of model
* zt 18Feb2005
local iszt = 0
if ("`e(cmd)'"=="ztp" | "`e(cmd)'"=="ztnb") {
local iszt = 1
}
* zt 19Feb2005
if "`e(cmd)'"=="ztp" { local io = "typical count" }
if "`e(cmd)'"=="ztnb" { local io = "typical count" }
if "`e(cmd)'"=="cloglog" { local io = "typical binary" }
if "`e(cmd)'"=="cnreg" { local io = "typical tobit" }
if "`e(cmd)'"=="fit" { local io = "typical regress" }
if "`e(cmd)'"=="gologit" { local io = "typical mlogit" }
if "`e(cmd)'"=="intreg" { local io = "typical tobit" }
if "`e(cmd)'"=="logistic" { local io = "typical binary" }
if "`e(cmd)'"=="logit" { local io = "typical binary" }
if "`e(cmd)'"=="mlogit" { local io = "typical mlogit" }
if "`e(cmd)'"=="nbreg" { local io = "typical count" }
if "`e(cmd)'"=="ologit" { local io = "typical ordered" }
if "`e(cmd)'"=="oprobit" { local io = "typical ordered" }
if "`e(cmd)'"=="poisson" { local io = "typical count" }
if "`e(cmd)'"=="probit" { local io = "typical binary" }
if "`e(cmd)'"=="regress" { local io = "typical regress" }
if "`e(cmd)'"=="slogit" { local io = "typical ordered" }
if "`e(cmd)'"=="tobit" { local io = "typical tobit" }
if "`e(cmd)'"=="zinb" { local io = "twoeq count" }
if "`e(cmd)'"=="zip" { local io = "twoeq count" }
if "`io'"=="" {
di
di in r "prtab does not work for the last type of model estimated."
exit
}
local input : word 1 of `io' /* input routine to _pepred */
local output : word 2 of `io' /* output routine */
** #2 get info about variables
_perhs
local nrhs = `r(nrhs)'
local rhsnms "`r(rhsnms)'"
if "`input'"=="twoeq" {
local nrhs2 = `r(nrhs2)'
local rhsnms2 "`r(rhsnms2)'"
}
if "`output'" != "regress" & "`output'" != "tobit" {
_pecats
local ncats = r(numcats)
local catnms8 `r(catnms8)'
local catvals `r(catvals)'
local catnms `r(catnms)'
}
** #3 decode specified input
syntax varlist(min=1 max=3 numeric) [if] [in] /*
*/ [, x(passthru) Rest(passthru) Outcome(string) /*
*/ by(varlist numeric max=1) /*
*/ noBAse Brief NOLabel NOVARlbl all /*
*/ CONditional ]
* zt 19Feb2005
if `iszt'==1 & "`conditional'"=="conditional" ///
& "`outcome'"=="0" {
di _n in r "conditional probabilities for outcome 0 are undefined."
exit
}
*convert input into tobase
_pebase `if' `in', `x' `rest' `choices' `all'
mat `tobase' = r(pebase)
if "`input'"=="twoeq" { mat `tobase2' = r(pebase2) }
*fix if to take e(sample) and if conditions into account
_peife `if', `all'
local if "`r(if)'"
* handle outcome option for ordered mlogit and ordered models
if "`outcome'"!="" {
if "`output'"=="ordered" | "`output'"=="mlogit" {
local found "no"
local i = 1
while `i' <= `ncats' {
local valchk : word `i' of `catvals'
local nmchk : word `i' of `catnms'
if ("`outcome'"=="`valchk'") | ("`outcome'"=="`nmchk'") {
local found "yes"
local outcmv = "`valchk'"
if "`valchk'"!="`nmchk'" { local outcmnm "(`nmchk')" }
local outcome = `i'
local i = `ncats'
}
local i = `i' + 1
} /* while `i' <= `ncats' */
if "`found'"=="no" {
di in r "`outcome' not category of `e(depvar)'"
exit 198
}
} /* "`output'"=="ordered" | "`output'"=="mlogit" { */
else if "`output'"=="count" {
confirm integer number `outcome'
if `outcome' < 0 { exit 198 }
local outcmv "`outcome'"
}
else {
di in r "outcome() not allowed for prtab after `e(cmd)'"
exit 198
}
} /* if "`outcome'"!="" */
*if by option has been specified, put this variable into the end
*of `varlist'
local varnoby = "`varlist'" /* needed for tabdisp at end */
if "`by'"!="" { local varlist "`varlist' `by'" }
** #4 build PE_in matrix
local nvars : word count `varlist'
*cycle through varlist (as many as four variables)
local i = 1
while `i' <= 4 {
*if varlist shorter than i, set ncats# to 1 and varnum# to -1
if `nvars' < `i' {
local ncats`i' = 1
local varnum`i' = -1
if "`input'"=="twoeq" { local varnm2`i' = -1 }
}
else {
local var`i' : word `i' of `varlist'
_pecats `var`i'' `if'
local ncats`i' = r(numcats)
local nms`i' "`r(catnms)'"
local nms8`i' "`r(catnms8)'"
local vals`i' "`r(catvals)'"
* find variable position in base matrix
* assign varnum and varnm2 accordingly
* varnum == -1 if not in main equation
* varnum2 == -1 if zip/zinb but variable not in inflate equation
local found "no"
local varnum`i' -1
local i2 = 1
local i2_to : word count `rhsnms'
while `i2' <= `i2_to' {
local varchk : word `i2' of `rhsnms'
unab varchk: `varchk', max(1)
if "`var`i''"=="`varchk'" {
local found "yes"
local varnum`i' = `i2'
local i2 = `i2_to'
}
local i2 = `i2' + 1
}
*if zip/zinb model
if "`input'"=="twoeq" {
local varnm2`i' -1
local i3 = 1
local i3_to : word count `rhsnms2'
while `i3' <= `i3_to' {
local varchk : word `i3' of `rhsnms2'
unab varchk: `varchk', max(1)
if "`var`i''"=="`varchk'" {
local found "yes"
local varnm2`i' = `i3'
local i3 = `i3_to'
}
local i3 = `i3' + 1
}
}
if "`found'"=="no" {
di in r "`var`i'' not rhs variable"
exit 198
}
} /* else */
local i = `i' + 1
} /* while `i' <= `nvars' */
capture matrix drop PE_in
capture matrix drop PE_in2
*build PE_in matrix
local i1 = 1
while `i1' <= `ncats1' {
local i2 = 1
while `i2' <= `ncats2' {
local i3 = 1
while `i3' <= `ncats3' {
local i4 = 1
while `i4' <= `ncats4' {
*make new row of x's for main equation
mat `addbase' = `tobase'
if `varnum1' ~= -1 {
local addval : word `i1' of `vals1'
mat `addbase'[1, `varnum1'] = `addval'
}
if `varnum2' ~= -1 {
local addval : word `i2' of `vals2'
mat `addbase'[1, `varnum2'] = `addval'
}
if `varnum3' ~= -1 {
local addval : word `i3' of `vals3'
mat `addbase'[1, `varnum3'] = `addval'
}
if `varnum4' ~= -1 {
local addval : word `i4' of `vals4'
mat `addbase'[1, `varnum4'] = `addval'
}
*add row of x's to PE_in
mat PE_in = nullmat(PE_in) \ `addbase'
*second equation (binary eq for count models)
if "`input'"=="twoeq" {
mat `addbase' = `tobase2'
if `varnm21' ~= -1 {
local addval : word `i1' of `vals1'
mat `addbase'[1, `varnm21'] = `addval'
}
if `varnm22' ~= -1 {
local addval : word `i2' of `vals2'
mat `addbase'[1, `varnm22'] = `addval'
}
if `varnm23' ~= -1 {
local addval : word `i3' of `vals3'
mat `addbase'[1, `varnm23'] = `addval'
}
if `varnm24' ~= -1 {
local addval : word `i4' of `vals4'
mat `addbase'[1, `varnm24'] = `addval'
}
*add row of x's to PE_in
mat PE_in2 = nullmat(PE_in2) \ `addbase'
} /* if "`input'"=="twoeq" */
local i4 = `i4' + 1
} /* while `i4' <= `ncats4' */
local i3 = `i3' + 1
} /* while `i3' <= `ncats3' */
local i2 = `i2' + 1
} /* while `i2' <= `ncats2' */
local i1 = `i1' + 1
} /* while `i1' <= `ncats1' */
if "`output'"=="count" {
if "`outcome'"!="" { local maxcnt "maxcnt(`outcome')" }
else { local maxcnt "maxcnt(0)" }
}
_pepred, `maxcnt'
*note: the way the command is now: it requires -two- preserves
*one here and one in _pepred. This could be improved...
preserve
qui gen `added' = 0
local newobs = rowsof(PE_in)
local oldn = _N
local newn = `oldn'+`newobs'
qui set obs `newn'
qui replace `added' = 1 if `added' == .
local i = 1
while `i' <= `nrhs' {
local varname : word `i' of `rhsnms'
local i2 = 1
while `i2' <= `newobs' {
local to_rep = `oldn' + `i2'
sca `replval' = PE_in[`i2',`i']
qui replace `varname' = `replval' in `to_rep'
local i2 = `i2' + 1
}
local i = `i' + 1
}
if "`nrhs2'"!="" {
local i = 1
while `i' <= `nrhs2' {
local varname : word `i' of `rhsnms2'
local i2 = 1
while `i2' <= `newobs' {
local to_rep = `oldn' + `i2'
sca `replval' = PE_in2[`i2',`i']
qui replace `varname' = `replval' in `to_rep'
local i2 = `i2' + 1
}
local i = `i' + 1
}
}
di _n in y "`e(cmd)'" in g ": Predicted " _c
if "`output'"=="binary" {
local r_toget "p1"
local fmt "%6.4f"
if "`brief'"=="" { di in g "probabilities of positive outcome" _c }
}
if "`output'"=="regress" {
local r_toget "xb"
local fmt "%8.0g"
if "`brief'"=="" { di in g "values" _c }
}
if "`output'"=="tobit" {
local r_toget "xb"
local fmt "%8.0g"
if "`brief'"=="" { di in g "values of y*" _c }
}
if "`output'"=="count" & (`iszt'!=1) {
if "`outcome'"=="" {
local r_toget "mu"
local fmt "%8.4f"
if "`brief'"=="" { di in g "rates" _c }
}
if "`outcome'"!="" {
local r_toget "p`outcome'"
local fmt "%6.4f"
if "`brief'"=="" {
di in g "probabilities of count = `outcome'" _c
}
}
}
* zero truncated models 19Feb2005
if `iszt'==1 {
if "`conditional'"=="" {
local type "unconditional"
}
else {
local type "conditional"
}
if "`outcome'"=="" {
if "`conditional'"=="" {
local r_toget "mu"
}
else {
local r_toget "Cmu"
}
local fmt "%8.4f"
if "`brief'"=="" { di in g "`type' rates" _c }
}
if "`outcome'"!="" {
local r_toget "p`outcome'"
if "`conditional'"=="" {
local r_toget "p`outcome'"
}
else {
local r_toget "Cp`outcome'"
}
local fmt "%6.4f"
if "`brief'"=="" {
di in g "`type' probabilities of count = `outcome'" _c
}
}
}
if "`output'"=="ordered" | "`output'"=="mlogit" {
local fmt "%6.4f"
if "`outcome'"=="" {
local do_all "yes"
local do_allc = 1
if "`brief'"=="" { di in g "probabilities" _c }
}
else {
if "`brief'"=="" {
di in g "probabilities of outcome `outcmv' `outcmnm'" _c
}
local r_toget "p`outcome'"
}
}
di in g " for " in y "`e(depvar)'"
** #5 TRANSLATE EVERYTHING INTO TEMPORARY VARIABLES
tempvar tmpvar1 tmpvar2 tmpvar3 tmpvar4
local count = 1
while `count' <= `nvars' {
qui gen `tmpvar`count'' = `var`count'' if `added' == 1
label variable `tmpvar`count'' "`var`count''"
if "`nolabel'"!="nolabel" {
local lblnam1 : value label `var`count''
if "`lblnam1'"!="" { label values `tmpvar`count'' `lblnam1' }
}
if "`novarlbl'"!="novarlbl" {
local lblnam2 : variable label `var`count''
if "`lblnam2'"!="" { label variable `tmpvar`count'' "`lblnam2'" }
}
local count = `count' + 1
}
*make lists of temporary variables for tabdisp
local count = 1
local countto : word count `varnoby'
while `count' <= `countto' {
local tmpxxx = "`tmpvar`count''"
local tmpnoby = "`tmpnoby'`tmpxxx' "
local count = `count' + 1
}
if "`by'" != "" {
local tmpby = "`tmpvar`nvars''"
}
** #6 DISPLAY OUTPUT
*doneyet needed if tables needed for multiple categories (e.g. oprobit)
local doneyet "no"
while "`doneyet'"=="no" {
if "`do_all'"=="yes" {
local r_toget = "p`do_allc'"
local outcmv : word `do_allc' of `catvals'
local outcmnm : word `do_allc' of `catnms'
if "`outcmnm'"!="`outcmv'" {
di _n in gr "Predicted probability of outcome " /*
*/ "`outcmv' (`outcmnm')"
}
else { di _n in gr "Predicted probability of outcome `outcmv'" }
}
qui gen `cell' = .
mat `temp' = r(`r_toget')
local i = 1
while `i' <= `newobs' {
local to_rep = `oldn' + `i'
sca `replval' = `temp'[`i', 1]
qui replace `cell' = `replval' in `to_rep'
local i = `i' + 1
}
if "`by'" != "" { local by4 "by(`tmpby')" }
label variable `cell' "Prediction"
tabdisp `tmpnoby' if `added'==1, c(`cell') `by4' f(`fmt')
if "`do_all'"=="yes" {
local do_allc = `do_allc' + 1
if `do_allc' > `ncats' { local doneyet "yes" }
else { drop `cell' }
}
else { local doneyet "yes" }
} /* while "`doneyet'" = "no" */
*print base values if desired
if "`brief'"=="" & "`base'"!="nobase" {
if "`input'"=="twoeq" {
di _n in g "base x values for count equation: "
}
mat rownames `tobase' = "x="
mat _PEtemp = `tobase'
_peabbv _PEtemp
mat list _PEtemp, noheader
if "`input'"=="twoeq" {
di _n in g "base z values for binary equation: "
mat rownames `tobase2' = "z="
mat _PEtemp = `tobase2'
_peabbv _PEtemp
mat list _PEtemp, noheader
}
}
end
exit
* version 1.6.0 1/11/01
* version 1.6.1 19Feb2005 zt
* version 1.6.2 27Mar2005 slogit
* version 1.6.3 13Apr2005

View File

@ -0,0 +1,95 @@
.-
help for ^prtab^ - 2Nov2005
.-
Table of predicted values/probabilities/rates for regression models
-------------------------------------------------------------------
^prtab^ rowvar [colvar [supercolvar]] [^if^ exp] [^in^ range],
[^by(^superrowvar^)^ ^x(^variables_and_values^)^ ^r^est^(^stat^)^
^o^utcome^(^string^)^ ^b^rief ^noba^se ^nol^abel ^novar^lbl
^con^ditional ^a^ll]
where
^rowvar^, ^colvar^, ^supercolvar^, and ^superrowvar^ are independent variables
in the previous estimation.
^variables_and_values^ is an alternating list of variables and either
numeric values or mean, median, min, max, upper, lower, previous
^stat^ is either mean, median, min, max, upper, lower, previous, grmean
(group mean), grmedian, grmin, grmax
Description
-----------
After estimating a regression model, ^prtab^ presents a one- to four-way table
of the predicted values (probabilities, rate) for different combinations of
values of independent variable.
The command works with cloglog, cnreg, intreg, logit, mlogit, mprobit, nbreg,
ologit, oprobit, poisson, probit, regress, slogit, tobit, zinb, zip, ztnb,
and ztp.
Options
-------
^by(superrowvar)^ specifies numeric variable to be treated as superrow. Only
one variable is allowed.
^x()^ sets the values of independent variables for calculating predicted
values. The list must alternate variable names and values. The values
may be either numeric values or can be mean, median, min, max, previous,
upper, or lower. The latter cannot be used if rest() specifies a group
summary statistic (e.g., grmean).
^rest()^ sets the independent variables not specified in x() to their ^mean^
(default), ^minimum^, ^maximum^, ^median^ when calculating predicted values.
^grmean^ sets these independent variables to the mean conditional on the
variables and values specified in x(); ^grmedian^, ^grmax^, and ^grmin^ can
also be used. If ^prvalue^ has already been run after the last estimate,
^previous^ will set unspecified variables to their prior values. For
models other than mlogit, ^upper^ and ^lower^ can be used to set independent
variables to their minimum or maximum depending on which will yield the
upper or lower extreme predicted value.
^outcome()^ presents results for the specified outcome. For ordered models or
mlogit, the default is to provide results for all outcomes (each one in a
separate table); for count models, the default is to present changes in
the predicted rate.
^conditional^ for -ztp- and -ztnb-, table with contain conditional predictions
rather than the default unconditional predictions.
^all^ specifies that any calculations of means, medians, etc., should use
the entire sample instead of the sample used to estimate the model.
^brief^ and ^nobase^ suppress the base values of x in the output.
^novallbl^ causes the numeric codes to be displayed rather than value labels.
^nolabel^ causes the variable name to be displayed rather than the variable
label.
Examples
--------
^. probit faculty female fellow phd mcit3 mnas^
^. prtab female fellow mnas^
^. ologit jobclass female fellow pub1 phd^
^. prtab female fellow, x(phd=min)^
^. poisson pub1 female fellow phd enrol^
^. prtab female fellow^
^. prtab female fellow, outcome(0)^
.-
Authors: J. Scott Long and Jeremy Freese
www.indiana.edu/~jslsoc/spost.htm
spostsup@@indiana.edu

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,238 @@
{smcl}
{* 17Feb2007}{...}
{hline}
help for {hi:prvalue}{right:17Feb2007}
{hline}
{title:Predicted values with confidence intervals for regression models}
{p 8 15 2}{cmd:prvalue} [{cmd:if} exp] [{cmd:in} range] [{cmd:,}
{cmd:x(}{it:variables_and_values}{cmd:)}
{cmdab:r:est(}{it:stat}{cmd:)}
{cmd:all}
{cmdab:l:evel(}{it:#}{cmd:)}
{cmdab:s:ave}
{cmdab:d:iff}
{cmdab:lab:el(}{it:string}{cmd:)}
{cmdab:b:rief}
{cmdab:max:cnt(}{it:#}{cmd:)}
{cmdab:noba:se}
{cmdab:nola:bel}
{cmdab:ys:tar}
{cmd:ept}
{cmdab:del:ta}
{cmdab:boot:strap}
{cmdab:rep:s(}{it:#}{cmd:)}
{cmdab:si:ze(}{it:#}{cmd:)}
{cmdab:bias:corrected}|{cmdab:percent:ile}|{cmdab:norm:al}
{cmd:match}
{cmdab:do:ts}
{cmdab:sa:ving(}{it:filename, save_options}{cmd:)}]
{p 4 4 2}
where {it:variables_and_values} is an alternating list of variables
and either numeric values or mean, median, min, max, upper, lower,
previous.
{p 4 4 2}
{it:stat} is either mean, median, min, max, upper, lower,
previous, grmean(group mean), mrmedian, grmin, grmax.
{title:Description}
{p 4 4 2}
After estimating a regression model, {cmd:prvalue} computes the
predicted values at specific values of the independent variables.
Depending on the model and the options chosen, predicted values can
be estimated values of y, y*, probabilities for each outcome, or expected rate.
By default, the predictions are calculated holding all other variables
at their mean. Values for specific independent variables can be set
using the x() option after {cmd:prvalue}. For example, to compute
predicted values when educ is 10 and age is 30, type {cmd:prvalue, x(educ=10 age=30)}.
Values for the unspecified independent variables can be set using the rest()
option, e.g., {cmd:prvalue, x(educ=10 age=30) rest(mean)}. Changes in
predictions as values of the independent variables change can be
computed using the {cmd:save} and {cmd:diff} options. The {cmd:if}
and {cmd:in} conditions specify conditions for computation of means,
min, etc., that are used with rest(). The command works with {help cloglog},
{help cnreg}, {help fit}, {help gologit}, {help intreg}, {help logistic},
{help logit}, {help mlogit}, {help nbreg}, {help ologit}, {help oprobit},
{help poisson}, {help probit}, {help regress}, {help rologit}, {help slogit}, {help tobit}, {help zinb},
{help ztnb}, and {help ztp}. Standard maximum likelihood based confidence intervals
are computed for cnreg, fit, intreg, regress, and tobit. All other models for which
confidence intervals are available use delta method as default except for {help zinb} and {help zip}.
Confidence intervals for {help zinb} and {help zip} can only use bootstrap method.
{title:Options}
{p 4 8 2}
{cmd:save} saves current values of indepenent variables and predictions
for computing changes using the diff option.
{p 4 8 2}
{cmd:diff} computes difference between current predictions and those
that were saved.
{p 4 8 2}
{cmd:label()} adds a label for the prvalue associated with a given
{cmd:save} or {cmd:diff}. Labels are shown when using {cmd:prvalue, diff}.
{p 4 8 2}
{cmd:level()} sets the {help level} of the confidence interval for predicted
values or probabilities for the commands for which these are provided. The
default is 95.
{p 4 8 2}
{cmd:maxcnt()} is the maximum count value for which the probability
is computed in count models. Default is 9.
{p 4 8 2}
{cmd:x()} sets the values of independent variables for calculating
predicted values. The list must alternate variable names and values.
The values may be either numeric values or can be mean, median, min,
max, previous, upper, or lower. The latter cannot be used if rest()
specifies a group summary statistic (e.g., grmean).
{p 4 8 2}
{cmd:rest()} sets the independent variables not specified in x()
to their {cmd:mean} (default), {cmd:minimum}, {cmd:maximum}, {cmd:median}
when calculating predicted values. {cmd:grmean} sets these independent
variables to the mean conditional on the variables and values specified
in x(); {cmd:grmedian},{cmd:grmax}, and {cmd:grmin} can also be used.
If {cmd:prvalue} has already been run after the last estimate,
{cmd:previous} will set unspecified variables to their prior values.
For models other than mlogit, {cmd:upper} and {cmd:lower} can be used
to set independent variables to their minimum or maximum depending
on which will yield the upper or lower extreme predicted value.
{p 4 8 2}
{cmd:all} specifies that any calculations of means, medians, etc.,
should use the entire sample instead of the sample used to estimate
the model.
{p 4 8 2}
{cmd:nolabel} uses values rather than value labels in output.
{p 4 8 2}
{cmd:nobase} suppresses inclusion of the base values of x in the output.
{p 4 8 2}
{cmd:brief} prints only limited output.
{p 4 8 2}
{cmd:ystar} prints the predicted values and maximum likelihood based
confidence intervals of ystar for binary, ordinal, ols regression,
or tobit models.
{p 4 8 2}
{cmd:ept} computes confidence intervals for predicted probabilities
for cloglog, logit, and probit by endpoint transformation.
This method cannot be used for changes in predictions.
{p 4 8 2}
{cmd:delta} calculates confidence intervals by the delta method
using analytical derivatives. This method works with cloglog, logistic,
logit, probit, ologit, oprobit, gologit, poisson, and nbreg.
{p 4 8 2}
{cmd:bootstrap} computes confidence intervals using the bootstrap
method. This method takes roughly 1,000 times longer to compute than
other methods. This method works with cloglog, logistic, logit, mlogit,
probit, ologit, oprobit, gologit, poisson, nbreg, zip, and zinb.
{p 4 8 2}
{cmd:dots} is used with bootstrap to write a . at the beginning of
each replication and periodically prints the percent of total
replications that have been completed. If computations appears to
be stuck (i.e., new dots do not appear), it is likely that the
estimation is not converging for the current bootstrap sample.
This is to be most common with zip, zinb and gologit.
When this happens, you can click on the break symbol to stop
computations for the current sample or wait until the maximum
number of iterations have been computed (by default, the maximum
number of iterations is 16,000). When a model does not converge
for a given bootstrap sample, that sample is dropped.
{p 4 8 2}
{cmd:match} specifies that the bootstrap will resample within
each category of the dependent variable in proportion to the
distribution of the outcome categories in the estimation sample.
If match is not specified, the proportions in each category of
the bootstrap sample are determined entirely by the random draw
and it is possible to end up with samples in which no cases are
found in some of the categories. This option does not apply to
regression or count models (cnreg, intreg, nbreg, poisson, regress,
tobit, zinb, and zip). Usually, bootstrapped confidence intervals
using match option tend to be smaller than those without.
{p 4 8 2}
{cmd:percentile} computes the bootstrapped confidence interval using
the percentile method. This is the default method for bootstrap.
{p 4 8 2}
{cmd:biascorrected} computes the bootstrapped confidence interval
using the bias-corrected method.
{p 4 8 2}
{cmd:normal} computes the bootstrapped confidence interval using
the normal approximation method.
{p 4 8 2}
{cmd:saving()} creates a Stata data file (.dta file) containing
the bootstrap distribution for predictions (predicted probabilities
and expected rates) and discrete changes in discrete choice models
that {cmd:prvalue} applies to.
{title:Examples}
{p 4 4 2}
To compute the predicted values and confidence intervals using delta method
for an ordered logit in which all independent variables are held at the mean.
{p 4 8 2}{cmd:.oprobit warm yr89 male white age ed prst}
{p 4 8 2}{cmd:.prvalue, delta}
{p 4 4 2}To compute predicted values and confidence intervals using
bootstrap method where all independent variables are held at their
minimum
{p 4 8 2}{cmd:.prvalue, rest(minimum) boot}
{p 4 4 2}
To compute values for white females, holding all other variables
at their median with default delta method for confidence intervals.
{p 4 8 2}{cmd:.prvalue, x(white=1 male=0) rest(median)}
{p 4 4 2}
To compute values for white females, holding all other variables at
the median for white females with default delta method for confidence
intervals.:
{p 4 8 2}{cmd:.prvalue, x(white=1 male=0) rest(grmedian)}
{p 4 4 2}
To compute values at the minimum of education, holding all other
variables to the mean with default delta method for confidence intervals.:
{p 4 8 2}{cmd:.prvalue, x(ed=min)}
{p 4 4 2}
To compare the predicted values and compute confidence intervals of discrete
changes for males and females using delta method:
{p 4 8 2}{cmd:.prvalue, x(male=0) save delta}
{p 4 8 2}
:::
{p 4 8 2}{cmd:.prvalue, x(male=1) dif delta}
{hline}
{p 2 4 2}Authors: J. Scott Long & Jun Xu{p_end}
{p 11 4 2}{browse www.indiana.edu/~jslsoc/spost.htm}{p_end}
{p 11 4 2}spostsup@indiana.edu{p_end}

View File

@ -0,0 +1,64 @@
*! version 2.5.0 2009-10-28 jsl
// list versions of spost ado files being used
capture program drop prwhich
program define prwhich, rclass
version 8
di
di in w ///
"== spost versions as of $S_TIME ========================================="
di
which _get_mlogit_bv
which _get_mlogit_bvecv
which _peabbv
which _pebase
which _pecats
which _peciboot
which _pecidelta
which _peciml
which _pecmdcheck
which _pecollect
which _pedum
which _peife
which _pemarg
which _penocon
which _pepred
which _perhs
which _pesum
which _petrap
which _peunvec
which _pexstring
which asprvalue
which brant
which case2alt
which countfit
which fitstat
which leastlikely
which listcoef
which misschk
which mlogplot
which mlogtest
which mlogview
which mvtab1
which nmlab
which praccum
which prchange
which prcounts
which prdc
which prgen
which prtab
which prvalue
which prwhich
which spex
which spostupdate
which vardesc
which xpost
di
di in w ///
"========================================= spost versions as of $S_TIME =="
end
exit
* version 1.0.3 13Apr2005