You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
270 lines
8.7 KiB
Plaintext
270 lines
8.7 KiB
Plaintext
8 months ago
|
*! version 2.0 : Oct1995 : Joseph Hilbe
|
||
|
* Dichotomous Discriminate Anaylsis
|
||
|
* Based on version 1, written by Hilbe 12-7-91, published: STB-5 (Jan1992)
|
||
|
|
||
|
program define discrim2
|
||
|
version 5.0
|
||
|
local varlist "req ex"
|
||
|
local options "Detail Graph Anova Predict Keep"
|
||
|
local in "opt"
|
||
|
local if "opt"
|
||
|
parse "`*'"
|
||
|
parse "`varlist'",parse(" ")
|
||
|
qui summ `1'
|
||
|
|
||
|
if (_result(6)<0 | _result(6)>1) {
|
||
|
noi di in red "Error: Group variable must be 0/1"
|
||
|
exit
|
||
|
}
|
||
|
if "`keep'"=="" {
|
||
|
preserve
|
||
|
}
|
||
|
qui {
|
||
|
dropmiss `varlist'
|
||
|
tempvar prob0 prob1 dummy D nvar obs obs1 obs0 c0 c1 prop D cntall
|
||
|
gen `cntall'=_n
|
||
|
local group `1'
|
||
|
local y "`1'"
|
||
|
mac shift
|
||
|
qui count
|
||
|
gen `obs' = _result(1)
|
||
|
count if `group'==0
|
||
|
gen `obs0'= _result(1)
|
||
|
count if `group'==1
|
||
|
gen `obs1'= _result(1)
|
||
|
tempvar c0
|
||
|
gen `c0' =`obs1'/`obs'
|
||
|
gen `c1' = -(`obs0'/`obs')
|
||
|
gen `prob0' = `obs1'/`obs'
|
||
|
gen `dummy' = `c0' if `group'==0
|
||
|
replace `dummy' = `c1' if `group'==1
|
||
|
reg `dummy' `*', noheader
|
||
|
global nvar = _result(3)
|
||
|
cap drop `dummy' `c0' `c1'
|
||
|
global rsquare = _result(7)
|
||
|
global mahala=($rsquare/(1-$rsquare))*((`obs'*(`obs'-2))/(`obs0'*`obs1'))
|
||
|
gen `prop' = $rsquare/$mahala
|
||
|
gen `D' = sqrt($mahala)
|
||
|
|
||
|
* Calc discriminant scores
|
||
|
local i = 1
|
||
|
while "``i''"!= "" {
|
||
|
tempvar cof`i' dis`i'
|
||
|
gen `cof`i'' = _b[``i'']
|
||
|
gen `dis`i'' = `cof`i''/`prop'
|
||
|
local i = `i'+1
|
||
|
}
|
||
|
|
||
|
* Calc main values */
|
||
|
local j = 1
|
||
|
local i = 1
|
||
|
tempvar konst usdfk dscorep dscore dsFp dfun
|
||
|
gen `usdfk'=0
|
||
|
gen `konst' = 0
|
||
|
gen `dscorep'=0
|
||
|
gen `dscore'=0
|
||
|
gen `dsFp'=0
|
||
|
gen `dfun'=0
|
||
|
while "``i''"!="" {
|
||
|
tempvar mean`j' mn0`j' mn1`j' mndff`j' usdf`j'
|
||
|
summ ``i''
|
||
|
gen `mean`j'' = _result(3)
|
||
|
summ ``i'' if `group'==0
|
||
|
gen `mn0`j'' = _result(3)
|
||
|
summ ``i'' if `group'==1
|
||
|
gen `mn1`j'' = _result(3)
|
||
|
gen `mndff`j'' = `mn0`j'' + `mn1`j''
|
||
|
replace `konst' = `dis`i''*`mndff`j''+`konst'
|
||
|
gen `usdf`i''=`dis`i''/-`D'
|
||
|
replace `usdfk'=`usdf`j'' * `mean`j'' + `usdfk'
|
||
|
replace `dscorep'=`usdf`i''*``i''
|
||
|
replace `dscore'=`dscore'+`dscorep'
|
||
|
replace `dsFp'=`dis`i''*``i''
|
||
|
replace `dfun'=`dfun'+`dsFp'
|
||
|
local i = `i'+1
|
||
|
local j = `j'+1
|
||
|
}
|
||
|
replace `usdfk'=`usdfk'* -1
|
||
|
replace `dscore'=`dscore'+`usdfk'
|
||
|
replace `konst'=`konst'*-.5
|
||
|
replace `dfun' = `dfun'+`konst'
|
||
|
local tsquare = $mahala*((`obs0'*`obs1')/`obs')
|
||
|
local F = `tsquare'*(`obs'-$nvar)/(($nvar-1)*(`obs'-2))
|
||
|
global cnt0 = -`D'*(`obs1'/`obs')
|
||
|
global cnt1 = `D'*(`obs0'/`obs')
|
||
|
noi di _n
|
||
|
noi di in gr _col(20) "Dichotomous Discriminant Analysis"
|
||
|
noi di " "
|
||
|
#delimit ;
|
||
|
noi di in gr "Observations = " in ye `obs'
|
||
|
in gr _col(50) "Obs Group 0 = " in ye %9.0g `obs0';
|
||
|
noi di in gr "Indep variables = " in ye $nvar
|
||
|
in gr _col(50) "Obs Group 1 = " in ye %9.0g `obs1';
|
||
|
noi di " ";
|
||
|
noi di in gr "Centroid 0 = " in ye %9.4f $cnt0
|
||
|
in gr _col(50) "R-square = " in ye %9.4f $rsquare;
|
||
|
noi di in gr "Centroid 1 = " in ye %9.4f $cnt1
|
||
|
in gr _col(50) "Mahalanobis = " in ye %9.4f $mahala;
|
||
|
noi di in gr "Grand Cntd = " in ye %9.4f
|
||
|
((`obs1'*$cnt0)+(`obs0'*$cnt1))/`obs';
|
||
|
noi di " ";
|
||
|
#delimit cr
|
||
|
oneway `dscore' `group'
|
||
|
global eigen = _result(2)/_result(4)
|
||
|
global ccor = sqrt(_result(2)/(_result(2)+_result(4)))
|
||
|
global lambda = _result(4)/(_result(2)+_result(4))
|
||
|
global chisq = log($lambda)* -((`obs'-($nvar+2)/2)-1)
|
||
|
#delimit ;
|
||
|
noi di in gr "Eigenvalue = " in ye %9.4f $eigen
|
||
|
in gr _col(50) "Wilk's Lambda = " in ye %7.4f $lambda;
|
||
|
noi di in gr "Canon. Corr. = " in ye %9.4f $ccor
|
||
|
in gr _col(50) "Chi-square = " in ye %7.4f $chisq;
|
||
|
noi di in gr "Eta Squared = " in ye %9.4f $ccor^2
|
||
|
in gr _col(50) "Sign Chi2 = " in ye %7.4f chiprob($nvar,$chisq);
|
||
|
#delimit cr
|
||
|
|
||
|
* DISPLAY COEFFICIENTS
|
||
|
noi di _n(1)
|
||
|
noi di in gr _col(26) "Discrim Function" _col(46) "Unstandardized"
|
||
|
noi di in gr _col(11) /*
|
||
|
*/ "Variable" _col(28) "Coefficients" _col(48) "Coefficients"
|
||
|
noi di in gr _col(11) _dup(49) "-"
|
||
|
local i=1
|
||
|
while "``i''"!="" {
|
||
|
noi di in gr _col(11) "``i''" in ye _col(28) %9.4f `dis`i'' /*
|
||
|
*/ _col(51) %9.4f `usdf`i''
|
||
|
local i = `i'+1
|
||
|
}
|
||
|
noi di in gr _col(11) "constant" in ye _col(28) %9.4f `konst' /*
|
||
|
*/ _col(51) %9.4f `usdfk'
|
||
|
|
||
|
tempvar lprob grpred cell
|
||
|
gen `lprob' = 1/(1+exp(`dfun'))
|
||
|
gen `grpred' = 1 if `lprob'>=0.5
|
||
|
replace `grpred' = 0 if `lprob'<0.5
|
||
|
gen `cell' = 1
|
||
|
summ `cell' if `group'==0 & `lprob'<0.5
|
||
|
local aa=_result(2)
|
||
|
summ `cell' if `group'==0 & `lprob'>=0.5
|
||
|
local bb=_result(2)
|
||
|
summ `cell' if `group'==1 & `lprob'<0.5
|
||
|
local cc=_result(2)
|
||
|
summ `cell' if `group'==1 & `lprob'>=0.5
|
||
|
local dd=_result(2)
|
||
|
local tot = `aa'+`bb'+`cc'+`dd'
|
||
|
|
||
|
* CONFUSION MATRIX AND RELATED STATS OPTION
|
||
|
if "`predict'" !="" {
|
||
|
noi di _n " "
|
||
|
noi di in gr _col(25) "----- Predicted -----"
|
||
|
noi di in gr _col(13) "Actual | Group 0 Group 1 | Total"
|
||
|
noi di in gr _col(13) "---------+" _dup(26) "-" "+--------"
|
||
|
noi di in gr _col(13) "Group 0 |" in ye _col(26) %6.0g `aa' /*
|
||
|
*/ _col(40) %6.0g `bb' in gr _col(49) "|" in ye _col(52) %6.0g `aa'+`bb'
|
||
|
noi di in gr _col(13) "Group 1 |" in ye _col(26) %6.0g `cc' /*
|
||
|
*/ _col(40) %6.0g `dd' in gr _col(49) "|" in ye _col(52) %6.0g `cc'+`dd'
|
||
|
noi di in gr _col(13) "---------+" _dup(26) "-" "+--------"
|
||
|
noi di in gr _col(13) "Total |" in ye _col(26) %6.0g `aa'+`cc' /*
|
||
|
*/ _col(40) %6.0g `bb'+`dd' in gr _col(49) "|" in ye _col(52) /*
|
||
|
*/ %6.0g `tot'
|
||
|
noi di in gr _col(13) "---------+" _dup(26) "-" "+--------"
|
||
|
noi di " "
|
||
|
noi di in gr _col(21) "Correctly predicted = " in ye /*
|
||
|
*/ %6.2f ((`aa'+`dd')/`tot')*100 " %"
|
||
|
noi di in gr _col(21) "Model sensitivity = " in ye /*
|
||
|
*/ %6.2f (`aa'/(`aa'+`bb'))*100 " %"
|
||
|
noi di in gr _col(21) "Model specificity = " in ye /*
|
||
|
*/ %6.2f (`dd'/(`cc'+`dd'))*100 " %"
|
||
|
noi di in gr _col(21) "False positive = " in ye /*
|
||
|
*/ %6.2f (`cc'/(`aa'+`cc'))*100 " %"
|
||
|
noi di in gr _col(21) "False negative = " in ye /*
|
||
|
*/ %6.2f (`bb'/(`bb'+`dd'))*100 " %"
|
||
|
}
|
||
|
|
||
|
* ANOVA OPTION
|
||
|
if "`anova'"!="" {
|
||
|
noi di _n(1)
|
||
|
noi di in gr _col(18) "Discriminant Scores v Group Variable"
|
||
|
noi oneway `dscore' `group'
|
||
|
}
|
||
|
|
||
|
* GRAPH OPTION
|
||
|
if "`graph'"!="" {
|
||
|
tempvar lnp lnm
|
||
|
gen `lnp'=`lprob' if (`lprob'>=0.5 & `group'==1) | /*
|
||
|
*/ (`lprob'<0.5 & `group'==0)
|
||
|
gen `lnm'=`lprob' if `lnp'==.
|
||
|
lab var `lnp' "Classified"
|
||
|
lab var `lnm' "Misclassified"
|
||
|
lab var `dfun' "Discriminant Index"
|
||
|
noi gr `lnp' `lnm' `dfun', s(.p) xlab ylab(.1,.3,.4,.5,.6,.7,.9) /*
|
||
|
*/ border yline(.5) ti(" Probability of Classification")
|
||
|
cap drop `lnp' `lnm'
|
||
|
}
|
||
|
|
||
|
* INDIVIDUAL CLASSIFICATION STATISTICS OPTION: SAVES WITH SAVE OPTION
|
||
|
if "`detail'"!="" {
|
||
|
cap drop PRED DscScore DscIndex LnProb1 Group DIFF
|
||
|
sort `cntall'
|
||
|
noi di _n(2)
|
||
|
gen PRED=`grpred'
|
||
|
gen DscScore = `dscore'
|
||
|
gen DscIndex = `dfun'
|
||
|
gen LnProb1 = `lprob'
|
||
|
gen Group = `group'
|
||
|
compress Group PRED
|
||
|
gen str2 DIFF = " *" if `group'!=PRED
|
||
|
format DIFF %2s
|
||
|
format DscIndex DscScore LnProb1 %9.4f
|
||
|
noi di in gr " PRED = Predicted Group DIFF = Misclassification"
|
||
|
noi di in gr " LnProb1 = Probability Gr 1 DscScore = Discriminant Score"
|
||
|
noi di in gr " DscIndex = Discriminant Index"
|
||
|
noi di in gr _dup(63) "-"
|
||
|
di " "
|
||
|
noi l `y' PRED DIFF LnProb1 DscIndex DscScore, nol
|
||
|
}
|
||
|
|
||
|
global S_E_var $nvar
|
||
|
global S_E_obs `obs'
|
||
|
global S_E_ob0 `obs0'
|
||
|
global S_E_ob1 `obs1'
|
||
|
global S_E_cn0 $cnt0
|
||
|
global S_E_cn1 $cnt1
|
||
|
global S_E_cng ((`obs1'*$cnt0)+(`obs0'*$cnt1))/`obs'
|
||
|
global S_E_r2 $rsquare
|
||
|
global S_E_mah $mahala
|
||
|
global S_E_eig $eigen
|
||
|
global S_E_lam $lamda
|
||
|
global S_E_cc $ccor
|
||
|
global S_E_chi $chisq
|
||
|
global S_E_e2 $ccor^2
|
||
|
global S_E_cmd "discrim"
|
||
|
|
||
|
}
|
||
|
|
||
|
if "`keep'"!="" {
|
||
|
noi di _n in bl " Caution: data changed in memory"
|
||
|
}
|
||
|
if "`keep'"=="" {
|
||
|
restore
|
||
|
}
|
||
|
|
||
|
end
|
||
|
|
||
|
|
||
|
|
||
|
* DROP MISSING VALUES
|
||
|
capture program drop dropmiss
|
||
|
program define dropmiss
|
||
|
local varlist "req ex"
|
||
|
parse "`*'"
|
||
|
parse "`varlist'", parse(" ")
|
||
|
local i= 1
|
||
|
while "``i''"!="" {
|
||
|
drop if ``i''==.
|
||
|
local i=`i'+1
|
||
|
}
|
||
|
end
|
||
|
|
||
|
|