Setup initial file structure

main
Corentin Choisy 11 months ago
parent 6183a6391b
commit 70e53e7760

@ -0,0 +1,100 @@
capture program drop _graph
program _graph
syntax varlist, PARTition(numlist integer >0) [SCOrename(string)] calcmethod(string)
qui set autotabgraphs on
local P:word count `partition'
if "`scorename'" == "" {
local nname
forvalues i = 1/`P' {
local name "Dim`i'"
local scorename `scorename' `name'
}
}
local i = 1
foreach x in `varlist' {
local var`i' = "`x'"
local `++i'
}
*capture calcscore `varlist', partition(`partition') scorename(`scorename') calcmethod(`calcmethod')
if "`calcmethod'" == "stand" local w = 10
if "`calcmethod'" == "sum" local b = 10
if "`calcmethod'" == "mean" local w = 0.5
set graphics off
foreach s in `scorename' {
qui hist `s', name("`s'",replace) percent fcolor(emidblue) lcolor(none) width(`w') bin(`b')
}
set graphics on
gr combine `scorename', name("Histograms_scores",replace)
qui biplot `scorename', name("Biplot_dimensions",replace) norow std title("") xtitle("") ytitle("")
qui biplot `varlist', name("temp",replace) norow std nograph
mat a = r(V)
tempvar a1 a2
mat colnames a = `a1' `a2'
svmat a, names(col)
tempvar mina1 mina2 maxa1 maxa2
egen `mina1' = min(`a1')
egen `mina2' = min(`a2')
egen `maxa1' = max(`a1')
egen `maxa2' = max(`a2')
local mina1 = `mina1'
local mina2 = `mina2'
local maxa1 = `maxa1'+1.4
local maxa1x = `maxa1'+0.3
local maxa2 = `maxa2'
local colors = "red blue black green ebblue mint erose orange maroon magenta mint gray teal navy olive sienna"
local i = 1
foreach c in `colors' {
local col`i' = "`c'"
local `++i'
}
local i = 1
local y = 1
local c = 1
local bas = `maxa2'+0.2
local droite = max(`maxa1',0.2)
foreach x in `partition' {
tokenize `scorename'
if `i' == 1 local s = `x'
else local s = `s' +`x'
*local r = round(runiform()*255)
*local g = round(runiform()*255)
*local b = round(runiform()*255)
forvalues j=`y'/`s' {
local a = `a1'[`j']
local b = `a2'[`j']
local call `call' || pcarrowi 0 0 `b' `a' "`var`j''", mlabcolor(`col`i'') color(`col`i'') head
}
local bas = `bas'-0.2
*local droite = `droite'
local call `call' text(`bas' `droite' "``i''", size(3) color(`col`i'')) /*`bas' `droite' "``i''", mlabcolor("`r' `g' `b'")*/
local `++i'
local y = `s'+1
local `++c'
}
qui twoway `call' name("Biplot_items",replace) legend(off) xscale(range(`mina1' `maxa1x')) yscale(range(`mina2' `maxa2')) xtitle("") ytitle("")
end
*_graph x1-x40, partition(5 5 5 5 5 5 5 5) scorename(HAaaaaa PSE W BCC AC AE LI MOC)
*_graph ioc1-ioc37, partition(4 4 7 3 3 4 7 5) scorename(HA PSE W BCC AC AE LI MOC) calcmethod(stand)

@ -0,0 +1,48 @@
*! version 1 27may2007
*! Jean-Benoit Hardouin
*
************************************************************************************************************
* Stata program : anaoption
*
* Historic
* Version 1 (2007-05-27): Jean-Benoit Hardouin
*
* Jean-benoit Hardouin, phD, Assistant Professor
* Team of Biostatistics, Clinical Research and Subjective Measures in Health Sciences
* University of Nantes - Faculty of Pharmaceutical Sciences
* France
* jean-benoit.hardouin@anaqol.org
*
* News about this program :http://www.anaqol.org
* FreeIRT Project website : http://www.freeirt.org
*
* Copyright 2007 Jean-Benoit Hardouin
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*
************************************************************************************************************
program define anaoption ,rclas
version 7.0
syntax [, DETails minvi(real .03) siglevel(real 0.05) minsize(real 0)]
return scalar minvi=`minvi'
return scalar siglevel=`siglevel'
return scalar minsize=`minsize'
return local details `details'
end

@ -0,0 +1,36 @@
program define raschres,rclass
syntax varlist [, resname(string)]
tempname diff theta
matrix `diff'=r(beta)
matrix `theta'=r(theta)
if "`resname'"=="" {
local resname=res
}
tokenize `varlist'
local nbitems: word count `varlist'
tempname score lt
genscore `varlist',score(`score')
gen `lt'=.
forvalues i=0/`nbitems' {
replace `lt'=`theta'[1,`i'] if `score'==`i'
}
forvalues i=1/`nbitems'{
tempname p`i' res`i'
gen `p`i''=exp(`lt'-`diff'[1,`i'])/(1+exp(`lt'-`diff'[1,`i'])
gen `resname'`i'=(1-`p`i'')/sqrt(`p`i''*(1-`p`i''))
}
pca `resname'*
end

@ -0,0 +1,171 @@
*! version 3 11june2014
************************************************************************************************************
* Backrasch : Backward procedure under a Rasch model
*
* Historic
* Version 1 (2004-02-13) : Jean-Benoit Hardouin
* Version 2 (2005-05-23) : Jean-Benoit Hardouin
* Version 3 (2014-06-11) : Jean-Benoit Hardouin /*id for raschtest*/
*
* Needed modules :
* raschtestv7 version 7.2.1 (http://freeirt.free.fr)
* gammasym version 2.1 (http://freeirt.free.fr)
* gausshermite version 1 (http://freeirt.free.fr)
* geekel2d version 4.1 (http://freeirt.free.fr)
* ghquadm (findit ghquadm)
* gllamm version 2.3.10 (ssc describe gllamm)
* gllapred version 2.3.2 (ssc describe gllapred)
* elapse (ssc describe elapse)
*
* Jean-benoit Hardouin, Regional Health Observatory of Orléans - France
* jean-benoit.hardouin@orscentre.org
*
* News about this program : http://anaqol.free.fr
* FreeIRT Project : http://freeirt.free.fr
*
* Copyright 2004-2005, 2014 Jean-Benoit Hardouin
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*
************************************************************************************************************
program define backrasch , rclass
version 8.0
syntax varlist(min=3 numeric) , [p(real 0.05) Method(string) Test(string) NBSCales(integer 1) nodetail noAUTOGroup]
local nbitems : word count `varlist'
tokenize `varlist'
preserve
tempfile saveraschtest
qui save `saveraschtest'
local autogroup2
if "`autogroup'"=="" {
local autogroup2 autogroup
}
if "`method'"=="" {
local method cml
}
if "`test'"=="" {
local test R
}
tempname select
matrix `select'=J(1,`nbitems',0)
local dim=1
local less3items=0
while `dim'<=`nbscales'&`less3items'!=1 {
di
di in green _col(25) "subscale : " in yellow `dim'
di in green _col(25) "{hline 12}"
local nobaditem=0
while `nobaditem'!=1 {
local varlistscale
local nbitemsscale=0
forvalues i=1/`nbitems' {
if `select'[1,`i']==0 {
local nbitemsscale=`nbitemsscale'+1
local ssitem`nbitemsscale'=`i'
local varlistscale `varlistscale' ``i''
}
}
if `nbitemsscale'<3 {
if "`detail'"=="" {
di in green "The " in yellow "`dim'th " in green "sub-scale can not be created, because there is less than three items remaining"
}
local `less3items'=1
local dim=`dim'-1
continue, break
}
else {
tempvar tmp
capture gen `tmp'=_n
qui raschtestv7 `varlistscale',m(`method') t(`test') `autogroup2' id(`tmp')
tempname itemFit
matrix `itemFit'=r(itemFit)
local minp=`p'
local deleteitem
local nobaditem=1
forvalues i=1/`nbitemsscale' {
if `itemFit'[`i',3]<`minp' {
local minp=`itemFit'[`i',3]
local deleitem=`i'
local rowdeleteitem=`ssitem`i''
local nobaditem=0
}
}
if `nobaditem'==1 {
if "`detail'"=="" {
di in green "No more item to remove of the scale " in yellow "`dim'"
}
continue, break
}
else {
if "`detail'"=="" {
di in green "The item " in yellow "``rowdeleteitem'' " in green "is removed of the scale " in yellow "`dim'" in green " (p=" in yellow %6.4f `minp' in green ")"
}
matrix `select'[1,`rowdeleteitem']=-1
}
}
}
if `nbitemsscale'>=3 {
forvalues i=1/`nbitems' {
if `select'[1,`i']==0 {
matrix `select'[1,`i']==`dim'
}
if `select'[1,`i']==-1 {
matrix `select'[1,`i']==0
}
}
local scale`dim'
forvalues i=1/`nbitems' {
if `select'[1,`i']==`dim' {
local scale`dim' "`scale`dim'' ``i''"
}
}
if "`scale`dim''"!="" {
di
di in green _col(4) "Number of selected items : " in yellow "`nbitemsscale'"
tempvar tmp2
capture gen `tmp2'=_n
raschtestv7 `scale`dim'',m(`method') t(`test') `autogroup2' id(`tmp2')
di
di _dup(70) "-"
}
local dim=`dim'+1
}
if `nbitemsscale'<3{
forvalues i=1/`nbitems' {
if `select'[1,`i']==-1 {
matrix `select'[1,`i']==0
}
}
continue, break
}
}
matrix colnames `select'=`varlist'
matrix rownames `select'=scale
return matrix selection `select'
end

@ -0,0 +1,54 @@
{smcl}
{* 23may2005}{...}
{hline}
help for {hi:backrasch}
{hline}
{title:Backward procedure on a Rasch model}
{p 8 14 2}{cmd:backrasch} {it:varlist} {cmd:,} [{cmdab:m:ethod}({it:keyword})
{cmdab:t:est}({it:keyword}) {cmdab:nodetail}
{cmdab:p}({it:#.###}) {cmdab:nbsc:ales}({it:#})
{cmdab:noautog:roup}]
{title:Description}
{p 4 8 2}{cmd:backward} realizes a Backward procedure on a Rasch model: the
items are removed one per one if they have a bad fit to the Rasch model. The
fit of the items is valuated by a first-order statistics (test R1c, R1m or Q1)
It is possible to build several sub-scales of items, the second sub-scale is
build with the items unselected in the first sub-scales, the third one
with the items unselected in the two first sub-scales, and so on... By default,
the parameters of the Rasch model are estimated by conditional maximum
likelihood (CML), but it is possible to estimate them by marginal maximum
likelihood (MML) or generalized estimating equations (GEE).
{title:Options}
{p 4 8 2}{cmd:method}({it:cml/mml/gee}) defines the method of estimation of the difficulty parameters among conditional maximum likelihood (cml - by default), marginal maximum likelihood (mml) or generalized estimating equations (gee).
{p 4 8 2}{cmd:test}({it:R/Q}) defines the first order statistics to use between R-type test (R1c or R1m - by default) or the Q1 test of Van den Wollenberg.
{p 4 8 2}{cmd:nodetail} does not display the description of the algorithm.
{p 4 8 2}{cmd:p}({it:#.###}) defines the level of signification who define a significantly bad fitted item (0.05 by default).
{p 4 8 2}{cmd:nbscales}({it:#}) defines the maximal number of sub-scales to build. By default, the program builds only one sub-scale.
{p 4 8 2}{cmd:noautograoup} forces the program to compute the first order fit statistics with the groups defined by the value of the score. by default, the scores are grouped to obtained groups of 30 individuals or more.
{title:Examples}
{p 4 8 2}{cmd:. backrasch item1 item2 item3 item4} /*estimation by CML, test R1c, only one scale is built*/
{p 4 8 2}{cmd:. backrasch item1 item2 item3 item4 , p(0.2) method(mml) nodetail} /*estimation by MML, test R1m, only one scale*/
{p 4 8 2}{cmd:. backrasch item1 item2 item3 item4 , p(0.1) nbsc(5) noautog} /*CLM estimation, R1c tests, 5 scales will be build*/
{title:Author}
{p 4 8 2}Jean-Benoit Hardouin, Regional Health Observatory (ORS) - 1, rue Porte
Madeleine - BP 2439 - 45032 Orleans Cedex 1 - France. You can contact the author at
{browse "mailto:jean-benoit.hardouin@orscentre.org":jean-benoit.hardouin@orscentre.org}
and visit the websites {browse "http://anaqol.free.fr":AnaQol} and
{browse "http://freeirt.free.fr":FreeIRT}

@ -0,0 +1,74 @@
*! version 1.0.1 30sep2005
*! The first version of this module has been wroten by Ken Higbee (StataCorp)
*! Improvements by Jean-Benoit Hardouin
program biplotvlab
version 9
syntax varlist(numeric min=2) [if] [in] [, LABdes(string) stretch(int 1) *]
// run biplot quietly (and nograph) so we can get r(V)
qui biplot `varlist' `if' `in' , `options' nograph
tempname V
mat `V' = r(V)
local nbvar:word count `varlist'
tokenize `varlist'
// build the -text()- option
local topt "text("
local i 0
local miny=`V'[1,2]
local maxy=`V'[1,2]
local minx=`V'[1,1]
local maxx=`V'[1,1]
forvalues i=1/`nbvar' {
local miny=min(`V'[`i',2],`miny')
local maxy=max(`V'[`i',2],`maxy')
local minx=min(`V'[`i',1],`minx')
local maxx=max(`V'[`i',1],`maxx')
}
if `maxx'*`minx'>0 {
local coefx=max(abs(`maxx'),abs(`minx'))
local coefx=`coefx'/20
}
else {
local coefx=abs(`maxx'-`minx')/20
}
if `maxx'*`minx'>0 {
local coefy=max(abs(`maxy'),abs(`miny'))
local coefy=`coefy'/20
}
else {
local coefy=abs(`maxy'-`miny')/20
}
forvalues i=1/`nbvar' {
// y value
if `V'[`i',2]>0 {
local topt `"`topt' `= (`V'[`i',2]+`coefy')*`stretch''"'
}
else {
local topt `"`topt' `= (`V'[`i',2]-`coefy')*`stretch''"'
}
// x value
if `V'[`i',1]<0 {
local topt `"`topt' `= (`V'[`i',1]-`coefx')*`stretch''"'
}
else {
local topt `"`topt' `= (`V'[`i',1]+`coefx')*`stretch''"'
}
// variable label
local lab: var label ``i''
if "`lab'"=="" {
local lab ``i''
}
local topt `"`topt' `"`lab'"' "'
}
if "`labdes'"=="" {
local labdes size(vsmall)
}
local topt `"`topt',`labdes')"'
// call with -colopts(nolabel)- and -text()- just built
biplot `varlist' `if' `in', `options' colopts(nolabel) `topt' stretch(`stretch')
end

@ -0,0 +1,38 @@
{smcl}
{* 30sept2005}{...}
{hline}
help for {hi:biplotvlab}{right:Jean-benoit Hardouin}
{hline}
{title:Biplot with variable labels}
{p 8 14 2}{cmd:biplotvlab} {it:varlist} [{cmd:,} {cmdab:lab:des}({help textbox_options}) {help biplot:biplot_options})
{title:Description}
{p 4 8 2}{cmd:biplotvlab} allows displaying on a biplot graph the labels of the
variables instead of the names of the variables. If none label is defined for one
or several variables, the names of these variables are displayed instead.
{cmd:biplotvlab} replaces the official {cmd:biplot} command.
{title:Options}
{p 4 8 2}{cmd:labdes} defines the options to apply to the labels (color, size...).
{title:Examples}
{p 4 8 2}{cmd:. biplotvlab turn trunk mpg,norow}
{p 4 8 2}{cmd:. biplotvlab turn trunk mpg,labdes(size(vsmall) color(blue))}
{p 4 8 2}{cmd:. biplotvlab turn trunk mpg, stretch(12)}
{title:Authors}
{p 4 8 2}Ken Higbee (StataCorp) has wroten the first version of this module.
{p 4 8 2}Improvements by Jean-Benoit Hardouin, Regional Health Observatory (ORS) - 1, rue Porte
Madeleine - BP 2439 - 45032 Orleans Cedex 1 - France. You can contact the author at
{browse "mailto:jean-benoit.hardouin@orscentre.org":jean-benoit.hardouin@orscentre.org}
and visit the websites {browse "http://anaqol.free.fr":AnaQol} and
{browse "http://freeirt.free.fr":FreeIRT}

@ -0,0 +1 @@
Subproject commit bdcabcbde5043d31555d7d58758441d90f92f235

@ -0,0 +1,155 @@
capture program drop calcscore
program calcscore,rclass
syntax varlist, PARTition(numlist integer >0) [CALCmethod(string) SCOrename(string)]
local y = 1
*di "{bf:Calculs des scores}"
local C = 0
foreach z in `partition' {
local C = `C' + `z'
}
local nbvars : word count `varlist'
if `C' != `nbvars' {
di in red "The sum of the numbers in the partition option is different from the number of variables precised in varlist"
exit
}
if "`scorename'" != "" {
local P:word count `partition'
local S:word count `scorename'
if `P'!=`S' {
di in red "The number of score names given is different from the number of dimensions in the partition option"
exit 119
}
foreach sco in `scorename' {
capture confirm variable `sco'
if !_rc {
di in red "`sco' is a variable of the dataset. Choose another name"
exit 119
}
}
}
/*
local cpt = 0
if "`sum'" != "" {
local cpt `cpt' + 1
}
if "`mean'" != "" {
local cpt `cpt' + 1
}
if "`stand'" != "" {
local cpt `cpt' + 1
}
if `cpt'>1 {
di in red "You must choose between mean, sum or stand (the options are exclusive)"
exit 119
}
*/
local i = 1
foreach x in `partition' {
tokenize `varlist'
if `i' == 1 local s = `x'
else local s = `s' +`x'
local liste = ""
forvalues w = `y'/`s' {
local liste `liste' ``w''
}
tempvar nonmiss
qui egen `nonmiss' = rownonmiss(`liste')
if "`scorename'" != "" {
tokenize `scorename'
local sc = "``i''"
}
else local sc = "Dim`i'"
/* if "`calc_method'" == "" {
local calc_method = "mean"
}
if "`calc_method'" != "sum" & "`calc_method'" != "mean" {
di in red "The calc_method option is invalid. Choose mean or sum."
exit 119
}
if "`calc_method'" == "sum" {
qui egen `sc' = rowmean(`liste') if `nonmiss' >= `x'/2
if "`calc_stand'" != "" {
local maxs = 0
foreach var in `liste' {
qui levelsof `var', local(levels)
local max = 0
foreach l in `levels' {
if `l'>`max' local max = `l'
}
local maxs = `maxs' + `max'
}
di "`sc' : `maxs'"
qui replace `sc' = `sc'*`nonmiss'*100/`maxs'
}
else qui replace `sc' = `sc'*`nonmiss'
}
else if "`calc_method'" == "mean" {
qui egen `sc' = rowmean(`liste') if `nonmiss' >= `x'/2
}
*/
if "`calcmethod'" == "" local calcmethod = "mean"
if "`calcmethod'" != "mean" & "`calcmethod'" != "sum" & "`calcmethod'" != "stand" {
di in red "option calcmethod incorrectly specified (choose among mean, sum and stand)"
error 198
}
if "`calcmethod'" == "sum" {
qui egen `sc' = rowmean(`liste') if `nonmiss' >= `x'/2
qui replace `sc' = `sc'*`nonmiss'
}
else if "`calcmethod'" == "stand" {
qui egen `sc' = rowmean(`liste') if `nonmiss' >= `x'/2
qui replace `sc' = `sc'*`nonmiss'
tempvar min max
egen `min' = min(`sc')
egen `max' = max(`sc')
/*
foreach var in `liste' {
/*qui levelsof `var', local(levels)
local max = 0
foreach l in `levels' {
if `l'>`max' local max = `l'
}
local maxs = `maxs' + `max'*/
local max = max(`levels')
}
*/
*di "max : "`max'
*di "min : "`min'
qui replace `sc' = (`sc'-`min')/(`max'-`min')*100
}
else {
qui egen `sc' = rowmean(`liste') if `nonmiss' >= `x'/2
}
local `i++'
local y = `s'+1
}
end
*calcscore ioc1-ioc37, partition(4 4 7 3 3 4 7 5) scorename(HA PSE W BCC AC AE LI MOC) calcmethod(stand)
*calcscore x1-x40, partition(5 5 5 5 5 5 5 5) calcmethod(stand)
*calcscore sf36_3q_intenses sf36_3q_moderees sf36_3q_soulever sf36_3q_etages sf36_3q_etage sf36_3q_pencher sf36_3q_15km sf36_3q_500m sf36_3q_100m sf36_3q_douche sf36_4q_limite_temps_travail sf36_4q_moins_choses sf36_4q_type_travail sf36_4q_effort sf36_7q_intensite_douleurs sf36_8q_douleurs_physiques sf36_1q sf36_11q_malade sf36_11q_porte_bien sf36_11q_degrade sf36_11q_excellente_sante sf36_9q_enthousiaste sf36_9q_energie sf36_9q_epuise sf36_9q_fatigue sf36_6q_vie_sociale sf36_10q_etat_mental sf36_5q_limite_temps_travail sf36_5q_moins_choses sf36_5q_accomplies_soigneusement sf36_9q_nerveux sf36_9q_triste sf36_9q_calme sf36_9q_maussade sf36_9q_heureux, partition(10 4 2 5 4 2 3 5) scorename(PF RP BP GH VT SF RE MH) calcmethod(mean)

@ -0,0 +1,395 @@
*program drop calcul
program define calcul
syntax, s10(numlist)
matrix define deces=J(140,6,0)
local j=0
tcm, s10(`s10') anneepop(1989) annees(1988/1990) sexe(1)
matrix essai=r(donnees)
local TCMH89=r(TCM)
matrix deces[1,1]=essai[2..19,4]
matrix deces[25,1]=`TCMH89'
tcm, s10(`s10') anneepop(1989) annees(1988/1990) sexe(2)
matrix essai=r(donnees)
local TCMF89=r(TCM)
matrix deces[1,2]=essai[2..19,4]
matrix deces[25,2]=`TCMF89'
tcm, s10(`s10') anneepop(1982) annees(1981/1983) sexe(1)
matrix essai=r(donnees)
local TCMH82=r(TCM)
matrix deces[1,3]=essai[2..19,4]
matrix deces[20,1]=`TCMH82'
tcm, s10(`s10') anneepop(1982) annees(1981/1983) sexe(2)
matrix essai=r(donnees)
local TCMF82=r(TCM)
matrix deces[1,4]=essai[2..19,4]
matrix deces[20,2]=`TCMF82'
tcm, s10(`s10') anneepop(1983) annees(1982/1984) sexe(1)
matrix essai=r(donnees)
local TCMH=r(TCM)
matrix deces[21,1]=`TCMH'
tcm, s10(`s10') anneepop(1983) annees(1982/1984) sexe(2)
matrix essai=r(donnees)
local TCMF=r(TCM)
matrix deces[21,2]=`TCMF'
tcm, s10(`s10') anneepop(1984) annees(1983/1985) sexe(1)
matrix essai=r(donnees)
local TCMH=r(TCM)
matrix deces[22,1]=`TCMH'
tcm, s10(`s10') anneepop(1984) annees(1983/1985) sexe(2)
matrix essai=r(donnees)
local TCMF=r(TCM)
matrix deces[22,2]=`TCMF'
tcm, s10(`s10') anneepop(1985) annees(1984/1986) sexe(1)
matrix essai=r(donnees)
local TCMH=r(TCM)
matrix deces[23,1]=`TCMH'
tcm, s10(`s10') anneepop(1985) annees(1984/1986) sexe(2)
matrix essai=r(donnees)
local TCMF=r(TCM)
matrix deces[23,2]=`TCMF'
tcm, s10(`s10') anneepop(1986) annees(1985/1987) sexe(1)
matrix essai=r(donnees)
local TCMH=r(TCM)
matrix deces[24,1]=`TCMH'
tcm, s10(`s10') anneepop(1986) annees(1985/1987) sexe(2)
matrix essai=r(donnees)
local TCMF=r(TCM)
matrix deces[24,2]=`TCMF'
tcm, s10(`s10') anneepop(1992) annees(1991/1993) sexe(1)
matrix essai=r(donnees)
local TCMH=r(TCM)
matrix deces[26,1]=`TCMH'
tcm, s10(`s10') anneepop(1992) annees(1991/1993) sexe(2)
matrix essai=r(donnees)
local TCMF=r(TCM)
matrix deces[26,2]=`TCMF'
tcm, s10(`s10') anneepop(1993) annees(1992/1994) sexe(1)
matrix essai=r(donnees)
local TCMH=r(TCM)
matrix deces[27,1]=`TCMH'
tcm, s10(`s10') anneepop(1993) annees(1992/1994) sexe(2)
matrix essai=r(donnees)
local TCMF=r(TCM)
matrix deces[27,2]=`TCMF'
tcm, s10(`s10') anneepop(1994) annees(1993/1995) sexe(1)
matrix essai=r(donnees)
local TCMH=r(TCM)
matrix deces[28,1]=`TCMH'
tcm, s10(`s10') anneepop(1994) annees(1993/1995) sexe(2)
matrix essai=r(donnees)
local TCMF=r(TCM)
matrix deces[28,2]=`TCMF'
tcm, s10(`s10') anneepop(1995) annees(1994/1996) sexe(1)
matrix essai=r(donnees)
local TCMH=r(TCM)
matrix deces[29,1]=`TCMH'
tcm, s10(`s10') anneepop(1995) annees(1994/1996) sexe(2)
matrix essai=r(donnees)
local TCMF=r(TCM)
matrix deces[29,2]=`TCMF'
tcm, s10(`s10') anneepop(1996) annees(1995/1997) sexe(1)
matrix essai=r(donnees)
local TCMH=r(TCM)
matrix deces[30,1]=`TCMH'
tcm, s10(`s10') anneepop(1996) annees(1995/1997) sexe(2)
matrix essai=r(donnees)
local TCMF=r(TCM)
matrix deces[30,2]=`TCMF'
tcm, s10(`s10') anneepop(1997) annees(1996/1998) sexe(1)
matrix essai=r(donnees)
local TCMH=r(TCM)
matrix deces[31,1]=`TCMH'
tcm, s10(`s10') anneepop(1997) annees(1996/1998) sexe(2)
matrix essai=r(donnees)
local TCMF=r(TCM)
matrix deces[31,2]=`TCMF'
tcm, s10(`s10') anneepop(1998) annees(1997/1999) sexe(1)
matrix essai=r(donnees)
local TCMH=r(TCM)
matrix deces[32,1]=`TCMH'
tcm, s10(`s10') anneepop(1998) annees(1997/1999) sexe(2)
matrix essai=r(donnees)
local TCMF=r(TCM)
matrix deces[32,2]=`TCMF'
tcm, s10(`s10') anneepop(1999) annees(1998/2000) sexe(1)
matrix essai=r(donnees)
local TCMH=r(TCM)
local TCMH9800reg=r(TCM)
matrix deces[33,1]=`TCMH'
tcm, s10(`s10') anneepop(1999) annees(1998/2000) sexe(2)
matrix essai=r(donnees)
local TCMF=r(TCM)
local TCMF9800reg=r(TCM)
matrix deces[33,2]=`TCMF'
tcm, s10(`s10') anneepop(1999) annees(1998/2000) sexe(1) codegeo(18) tcmcomp(`TCMH9800reg')
matrix essai=r(donnees)
local TCMH18=r(TCM)
matrix deces[35,1]=essai[2..19,2]
matrix deces[53,1]=r(TCM)
matrix deces[54,1]=r(pvalue)
tcm, s10(`s10') anneepop(1999) annees(1998/2000) sexe(2) codegeo(18) tcmcomp(`TCMF9800reg')
matrix essai=r(donnees)
local TCMF=r(TCM)
matrix deces[56,1]=essai[2..19,2]
matrix deces[74,1]=r(TCM)
matrix deces[75,1]=r(pvalue)
tcm, s10(`s10') anneepop(1999) annees(1998/2000) sexe(1) codegeo(28) tcmcomp(`TCMH9800reg')
matrix essai=r(donnees)
local TCMH18=r(TCM)
matrix deces[35,2]=essai[2..19,2]
matrix deces[53,2]=r(TCM)
matrix deces[54,2]=r(pvalue)
tcm, s10(`s10') anneepop(1999) annees(1998/2000) sexe(2) codegeo(28) tcmcomp(`TCMF9800reg')
matrix essai=r(donnees)
local TCMF=r(TCM)
matrix deces[56,2]=essai[2..19,2]
matrix deces[74,2]=r(TCM)
matrix deces[75,2]=r(pvalue)
tcm, s10(`s10') anneepop(1999) annees(1998/2000) sexe(1) codegeo(36) tcmcomp(`TCMH9800reg')
matrix essai=r(donnees)
local TCMH18=r(TCM)
matrix deces[35,3]=essai[2..19,2]
matrix deces[53,3]=r(TCM)
matrix deces[54,3]=r(pvalue)
tcm, s10(`s10') anneepop(1999) annees(1998/2000) sexe(2) codegeo(36) tcmcomp(`TCMF9800reg')
matrix essai=r(donnees)
local TCMF=r(TCM)
matrix deces[56,3]=essai[2..19,2]
matrix deces[74,3]=r(TCM)
matrix deces[75,3]=r(pvalue)
tcm, s10(`s10') anneepop(1999) annees(1998/2000) sexe(1) codegeo(37) tcmcomp(`TCMH9800reg')
matrix essai=r(donnees)
local TCMH18=r(TCM)
matrix deces[35,4]=essai[2..19,2]
matrix deces[53,4]=r(TCM)
matrix deces[54,4]=r(pvalue)
tcm, s10(`s10') anneepop(1999) annees(1998/2000) sexe(2) codegeo(37) tcmcomp(`TCMF9800reg')
matrix essai=r(donnees)
local TCMF=r(TCM)
matrix deces[56,4]=essai[2..19,2]
matrix deces[74,4]=r(TCM)
matrix deces[75,4]=r(pvalue)
tcm, s10(`s10') anneepop(1999) annees(1998/2000) sexe(1) codegeo(41) tcmcomp(`TCMH9800reg')
matrix essai=r(donnees)
local TCMH18=r(TCM)
matrix deces[35,5]=essai[2..19,2]
matrix deces[53,5]=r(TCM)
matrix deces[54,5]=r(pvalue)
tcm, s10(`s10') anneepop(1999) annees(1998/2000) sexe(2) codegeo(41) tcmcomp(`TCMF9800reg')
matrix essai=r(donnees)
local TCMF=r(TCM)
matrix deces[56,5]=essai[2..19,2]
matrix deces[74,5]=r(TCM)
matrix deces[75,5]=r(pvalue)
tcm, s10(`s10') anneepop(1999) annees(1998/2000) sexe(1) codegeo(45) tcmcomp(`TCMH9800reg')
matrix essai=r(donnees)
local TCMH18=r(TCM)
matrix deces[35,6]=essai[2..19,2]
matrix deces[53,6]=r(TCM)
matrix deces[54,6]=r(pvalue)
tcm, s10(`s10') anneepop(1999) annees(1998/2000) sexe(2) codegeo(45) tcmcomp(`TCMF9800reg')
matrix essai=r(donnees)
local TCMF=r(TCM)
matrix deces[56,6]=essai[2..19,2]
matrix deces[74,6]=r(TCM)
matrix deces[75,6]=r(pvalue)
tcm, s10(`s10') anneepop(1999) annees(1998/2000) sexe(1) codegeo(2411)
local TCMH=r(TCM)
matrix deces[77,1]=`TCMH'
tcm, s10(`s10') anneepop(1999) annees(1998/2000) sexe(2) codegeo(2411)
local TCMF=r(TCM)
matrix deces[77,2]=`TCMF'
tcm, s10(`s10') anneepop(1999) annees(1998/2000) sexe(1) codegeo(2412)
local TCMH=r(TCM)
matrix deces[78,1]=`TCMH'
tcm, s10(`s10') anneepop(1999) annees(1998/2000) sexe(2) codegeo(2412)
local TCMF=r(TCM)
matrix deces[78,2]=`TCMF'
tcm, s10(`s10') anneepop(1999) annees(1998/2000) sexe(1) codegeo(2413)
local TCMH=r(TCM)
matrix deces[79,1]=`TCMH'
tcm, s10(`s10') anneepop(1999) annees(1998/2000) sexe(2) codegeo(2413)
local TCMF=r(TCM)
matrix deces[79,2]=`TCMF'
tcm, s10(`s10') anneepop(1999) annees(1998/2000) sexe(1) codegeo(2414)
local TCMH=r(TCM)
matrix deces[80,1]=`TCMH'
tcm, s10(`s10') anneepop(1999) annees(1998/2000) sexe(2) codegeo(2414)
local TCMF=r(TCM)
matrix deces[80,2]=`TCMF'
tcm, s10(`s10') anneepop(1999) annees(1998/2000) sexe(1) codegeo(2421)
local TCMH=r(TCM)
matrix deces[81,1]=`TCMH'
tcm, s10(`s10') anneepop(1999) annees(1998/2000) sexe(2) codegeo(2421)
local TCMF=r(TCM)
matrix deces[81,2]=`TCMF'
tcm, s10(`s10') anneepop(1999) annees(1998/2000) sexe(1) codegeo(2422)
local TCMH=r(TCM)
matrix deces[82,1]=`TCMH'
tcm, s10(`s10') anneepop(1999) annees(1998/2000) sexe(2) codegeo(2422)
local TCMF=r(TCM)
matrix deces[82,2]=`TCMF'
tcm, s10(`s10') anneepop(1999) annees(1998/2000) sexe(1) codegeo(2423)
local TCMH=r(TCM)
matrix deces[83,1]=`TCMH'
tcm, s10(`s10') anneepop(1999) annees(1998/2000) sexe(2) codegeo(2423)
local TCMF=r(TCM)
matrix deces[83,2]=`TCMF'
tcm, s10(`s10') anneepop(1999) annees(1998/2000) sexe(1) codegeo(2424)
local TCMH=r(TCM)
matrix deces[84,1]=`TCMH'
tcm, s10(`s10') anneepop(1999) annees(1998/2000) sexe(2) codegeo(2424)
local TCMF=r(TCM)
matrix deces[84,2]=`TCMF'
tcm, s10(`s10') anneepop(1999) annees(1998/2000) sexe(1) codegeo(2431)
local TCMH=r(TCM)
matrix deces[85,1]=`TCMH'
tcm, s10(`s10') anneepop(1999) annees(1998/2000) sexe(2) codegeo(2431)
local TCMF=r(TCM)
matrix deces[85,2]=`TCMF'
tcm, s10(`s10') anneepop(1999) annees(1998/2000) sexe(1) codegeo(2432)
local TCMH=r(TCM)
matrix deces[86,1]=`TCMH'
tcm, s10(`s10') anneepop(1999) annees(1998/2000) sexe(2) codegeo(2432)
local TCMF=r(TCM)
matrix deces[86,2]=`TCMF'
tcm, s10(`s10') anneepop(1999) annees(1998/2000) sexe(1) codegeo(2433)
local TCMH=r(TCM)
matrix deces[87,1]=`TCMH'
tcm, s10(`s10') anneepop(1999) annees(1998/2000) sexe(2) codegeo(2433)
local TCMF=r(TCM)
matrix deces[87,2]=`TCMF'
tcm, s10(`s10') anneepop(1999) annees(1998/2000) sexe(1) codegeo(2434)
local TCMH=r(TCM)
matrix deces[88,1]=`TCMH'
tcm, s10(`s10') anneepop(1999) annees(1998/2000) sexe(2) codegeo(2434)
local TCMF=r(TCM)
matrix deces[88,2]=`TCMF'
tcm, s10(`s10') anneepop(1999) annees(1998/2000) sexe(1) codegeo(2441)
local TCMH=r(TCM)
matrix deces[89,1]=`TCMH'
tcm, s10(`s10') anneepop(1999) annees(1998/2000) sexe(2) codegeo(2441)
local TCMF=r(TCM)
matrix deces[89,2]=`TCMF'
tcm, s10(`s10') anneepop(1999) annees(1998/2000) sexe(1) codegeo(2442)
local TCMH=r(TCM)
matrix deces[90,1]=`TCMH'
tcm, s10(`s10') anneepop(1999) annees(1998/2000) sexe(2) codegeo(2442)
local TCMF=r(TCM)
matrix deces[90,2]=`TCMF'
tcm, s10(`s10') anneepop(1999) annees(1998/2000) sexe(1) codegeo(2443)
local TCMH=r(TCM)
matrix deces[91,1]=`TCMH'
tcm, s10(`s10') anneepop(1999) annees(1998/2000) sexe(2) codegeo(2443)
local TCMF=r(TCM)
matrix deces[91,2]=`TCMF'
tcm, s10(`s10') anneepop(1999) annees(1998/2000) sexe(1) codegeo(2444)
local TCMH=r(TCM)
matrix deces[92,1]=`TCMH'
tcm, s10(`s10') anneepop(1999) annees(1998/2000) sexe(2) codegeo(2444)
local TCMF=r(TCM)
matrix deces[92,2]=`TCMF'
tcm, s10(`s10') anneepop(1999) annees(1998/2000) sexe(1) codegeo(2451)
local TCMH=r(TCM)
matrix deces[93,1]=`TCMH'
tcm, s10(`s10') anneepop(1999) annees(1998/2000) sexe(2) codegeo(2451)
local TCMF=r(TCM)
matrix deces[93,2]=`TCMF'
tcm, s10(`s10') anneepop(1999) annees(1998/2000) sexe(1) codegeo(2452)
local TCMH=r(TCM)
matrix deces[94,1]=`TCMH'
tcm, s10(`s10') anneepop(1999) annees(1998/2000) sexe(2) codegeo(2452)
local TCMF=r(TCM)
matrix deces[94,2]=`TCMF'
tcm, s10(`s10') anneepop(1999) annees(1998/2000) sexe(1) codegeo(2453)
local TCMH=r(TCM)
matrix deces[95,1]=`TCMH'
tcm, s10(`s10') anneepop(1999) annees(1998/2000) sexe(2) codegeo(2453)
local TCMF=r(TCM)
matrix deces[95,2]=`TCMF'
tcm, s10(`s10') anneepop(1999) annees(1998/2000) sexe(1) codegeo(2461)
local TCMH=r(TCM)
matrix deces[96,1]=`TCMH'
tcm, s10(`s10') anneepop(1999) annees(1998/2000) sexe(2) codegeo(2461)
local TCMF=r(TCM)
matrix deces[96,2]=`TCMF'
tcm, s10(`s10') anneepop(1999) annees(1998/2000) sexe(1) codegeo(2462)
local TCMH=r(TCM)
matrix deces[97,1]=`TCMH'
tcm, s10(`s10') anneepop(1999) annees(1998/2000) sexe(2) codegeo(2462)
local TCMF=r(TCM)
matrix deces[97,2]=`TCMF'
tcm, s10(`s10') anneepop(1999) annees(1998/2000) sexe(1) codegeo(2463)
local TCMH=r(TCM)
matrix deces[98,1]=`TCMH'
tcm, s10(`s10') anneepop(1999) annees(1998/2000) sexe(2) codegeo(2463)
local TCMF=r(TCM)
matrix deces[98,2]=`TCMF'
tcm, s10(`s10') anneepop(1999) annees(1998/2000) sexe(1) codegeo(2464)
local TCMH=r(TCM)
matrix deces[99,1]=`TCMH'
tcm, s10(`s10') anneepop(1999) annees(1998/2000) sexe(2) codegeo(2464)
local TCMF=r(TCM)
matrix deces[99,2]=`TCMF'
use "C:\ado\personal\files\dccentre8000reduit.dta", clear
tab annee deptdom if S10==`s10' & sexe==1 , matcell(nbH)
matrix deces[101,1]=nbH
tab annee deptdom if S10==`s10' & sexe==2 , matcell(nbF)
matrix deces[121,1]=nbF
matrix list deces
end

@ -0,0 +1,363 @@
capture program drop cfa
program cfa,rclass
syntax varlist, PARTition(numlist integer >0) [SCOrename(string) CFAMethod(string) CFAStand]
local C = 0
foreach z in `partition' {
local C = `C' + `z'
}
local nbvars : word count `varlist'
if `C' != `nbvars' {
di in red "The sum of the numbers in the partition option is different from the number of variables precised in varlist"
exit
}
local P:word count `partition'
if "`scorename'" !="" {
local S:word count `scorename'
if `P'!=`S' {
di in red "The number of score names given is different from the number of dimensions in the partition option"
exit
}
}
local i = 1
foreach x in `varlist' {
local var`i' = "`x'"
local `++i'
}
local name
local nname
if "`scorename'"=="" {
forvalues i = 1/`P' {
local name "Dim`i'"
local nname `nname' `name'
}
local scorename = "`nname'"
}
local upscorename = upper("`scorename'")
*capture calcscore `varlist', scorename(`upscorename') partition(`partition')
local i = 0
local y = 1
tokenize `upscorename'
foreach x in `partition' {
local `i++'
if `i' == 1 local s = `x'
else local s = `s' +`x'
local liste = ""
forvalues w = `y'/`s' {
local liste `liste' `var`w''
}
local a = "(``i'' -> `liste')"
local z `z' `a'
local y = `s'+1
}
/*
local cpt = 0
if "`cfa_ml'" != "" {
local method = "ml"
local cpt `cpt' + 1
}
if "`cfa_mlmv'" != "" {
local method = "mlmv"
local cpt `cpt' + 1
}
if "`cfa_adf'" != "" {
local method = "adf"
local cpt `cpt' + 1
}
else local method = "ml"
if `cpt'>1 {
di in red "You must choose between cfa_ml, cfa_mlmv or cfa_adf (the options are exclusive)"
exit 119
}
*/
if "`cfamethod'" == "" local cfamethod = "ml"
if "`cfamethod'" != "ml" & "`cfamethod'" != "mlmv" & "`cfamethod'" != "adf" {
di "`cfamethod'"
di in red "option cfamethod incorrectly specified (choose among ml, mlmv and adf)"
error 198
}
if "`cfastand'" != "" local cfastand = "stand"
di as result "{hline}"
di "{bf:Confirmatory factor analysis}"
di as result "{hline}"
di
qui sem `z', method(`cfamethod') `cfastand'
/*
sem (HA -> ioc1-ioc4) (PSE -> ioc5-ioc8) (W -> ioc9-ioc15) ///
(BCC -> ioc16-ioc18) (AC -> ioc19-ioc21) (AE -> ioc22-ioc25) ///
(LI -> ioc26-ioc32) (MOC -> ioc33-ioc37)
,stand
cov(e.ioc36*e.ioc37
e.ioc28*e.ioc29 e.ioc14*e.ioc15 e.ioc23*e.ioc25 e.ioc33*e.ioc34 ///
e.ioc9*e.ioc10 e.ioc6*e.ioc8 e.ioc5*e.ioc7) // method(mlmv)
*/
/* factor loadings */
matrix r = r(table)
matrix r = r[1,1...]
matrix r = r'
local n = `nbvars'*2
matrix a = r[1,1]
forvalues i=3(2)`n' {
matrix b = r[`i',1]
matrix a = a\b
}
/* standard error */
matrix r = r(table)
matrix r = r[2,1...]
matrix r = r'
local n = `nbvars'*2
matrix se = r[1,1]
forvalues i=3(2)`n' {
matrix b = r[`i',1]
matrix se = se\b
}
/* intercepts */
matrix r = r(table)
matrix r = r[1,1...]
matrix r = r'
local n = `nbvars'*2
matrix a2 = r[2,1]
forvalues i=4(2)`n' {
matrix b = r[`i',1]
matrix a2 = a2\b
}
/* variances des erreurs */
local m = `n'+1
matrix r = r(table)
matrix r = r[1,`m'...]
matrix r = r'
matrix a3 = r[1,1]
forvalues i=2/`nbvars' {
matrix b = r[`i',1]
matrix a3 = a3\b
}
/* variance des dimensions*/
matrix r = r(table)
local n = `nbvars'*3+1
matrix r = r[1,`n'...]
matrix r = r'
matrix var = r[1,1]
forvalues i=2/`P' {
matrix b = r[`i',1]
matrix var = var\b
}
local i = 1
foreach v in `varlist' {
local var`i' = abbrev("`v'",10)
local `++i'
}
local i = 1
foreach s in `scorename' {
local s`i' = abbrev("`s'",10)
local sc `sc' `s`i''
local `++i'
}
local max = 10
local dec = `max'+5
local max2 = 10
local dec2 = `dec'+`max2'+5
local a = e(N)
di "{text:Number of used individuals: `a'}"
di
di _col(`=`dec2'+17+4') "{bf:Estimation:}"
di as result "{bf:Item}" _c
di _col(`dec') "{bf:Dimension}" _c
*local col = `dec'+17
di _col(`dec2') "{bf:Factor}" _c
*local col = `dec2'+17
di _col(`=`dec2'+14') "{bf:Standard}" _c
*local col = `col'+17
di _col(`=`dec2'+28') "{bf:Intercept}" _c
*local col = `col'+13
if "`cfastand'" == "" {
di _col(`=`dec2'+42') "{bf:Variance of}" _c
di _col(`=`dec2'+56') "{bf:Variance of}"
di _col(`dec2') "{bf:loading}" _c
*local col = `dec2'+17
di _col(`=`dec2'+14') "{bf:error}" _c
*local col = `col'+30
di _col(`=`dec2'+42') "{bf:error}" _c
di _col(`=`dec2'+56') "{bf:dimension}"
local h = `dec2'+66
}
else {
di _col(`=`dec2'+42') "{bf:Variance of}"
di _col(`dec2') "{bf:loading}" _c
*local col = `dec2'+17
di _col(`=`dec2'+14') "{bf:error}" _c
*local col = `col'+30
di _col(`=`dec2'+42') "{bf:errors}"
local h = `dec2'+52
}
di "{hline `h'}"
local i = 1
local y = 1
foreach x in `partition' {
if `i' == 1 local s = `x'
else local s = `s' +`x'
forvalues z = `y'/`s' {
tokenize `sc'
di "{bf:`var`z''}"_c
di _col(`dec') "{bf:``i''}" _c
local t = a[`z',1]
local t : di %7.2f `t'
*local col = `dec'+17
di _col(`dec2') "{text:`t'}" _c
local t = se[`z',1]
local t : di %8.2f `t'
*local col = `dec2'+9
di _col(`=`dec2'+14') "{text:`t'}" _c
local t = a2[`z',1]
local t : di %9.2f `t'
*local col = `col'+17
di _col(`=`dec2'+28') "{text:`t'}" _c
local t = a3[`z',1]
local t : di %11.2f `t'
*local col = `col'+13
if "`cfastand'" == "" & `z' == `y'{
di _col(`=`dec2'+42') "{text:`t'}" _c
local t = var[`i',1]
local t : di %11.2f `t'
*local col = `dec2'+17+17+13+14
di _col(`=`dec2'+56') "{text:`t'}"
}
else di _col(`=`dec2'+42') "{text:`t'}"
}
di
local `i++'
local y = `s'+1
}
qui estat gof, stats(all)
local chi2 = r(chi2_ms)
local p = r(p_ms)
local ddl = r(df_ms)
local ratio = `chi2'/`ddl'
local rmsea = r(rmsea)
local lb = r(lb90_rmsea)
local ub = r(ub90_rmsea)
local nfi = 1-(r(chi2_ms)/r(chi2_bs))
local rni = 1-(r(chi2_ms)-r(df_ms))/(r(chi2_bs)-r(df_bs))
local cfi = r(cfi)
local ifi = (r(chi2_bs)-r(chi2_ms))/(r(chi2_bs)-r(df_ms))
local mci = exp(-0.5*((r(chi2_ms)-r(df_ms))/(e(N)-1)))
local srmr = r(srmr)
di
di "{bf:Goodness of fit}"
di
di as result _col(4) "chi2" _c
di as result _col(20) "ddl" _c
di as result _col(28) "chi2/ddl" _c
di as result _col(42) "RMSEA [90% CI]" _c
di as result _col(64) "SRMR" _c
di as result _col(74) "NFI" _c
di as result _col(84) "RNI" _c
di as result _col(94) "CFI" _c
di as result _col(104) "IFI" _c
di as result _col(114) "MCI"
*di as result "`P' dimensions" _c
local t : di %7.2f `chi2'
di "{text:`t'}" _c
local t : di %3.0f `ddl'
di _col(20) "{text:`t'}" _c
local t : di %7.1f `ratio'
di _col(29) "{text:`t'}" _c
local t : di %5.3f `rmsea'
local l : di %5.3f `lb'
local u : di %5.3f `up'
di _col(40) "{text:`t' [`l' ; `u']}" _c
local t : di %5.3f `srmr'
di _col(63) "{text:`t'}" _c
local t : di %5.3f `nfi'
di _col(72) "{text:`t'}" _c
local t : di %5.3f `rni'
di _col(82) "{text:`t'}" _c
local t : di %5.3f `cfi'
di _col(92) "{text:`t'}" _c
local t : di %5.3f `ifi'
di _col(102) "{text:`t'}" _c
local t : di %5.3f `mci'
di _col(112) "{text:`t'}"
local p : di %5.3f `p'
di "{text:(p-value = `p')}"
di as result
/*
matrix ind = (`chi2',`ddl',`ratio',`rmsea',`nfi',`rni',`cfi',`ifi',`srmr')
matrix colnames ind = "chi2" "ddl" "chi2/ddl" "RMSEA" "NFI" "RNI" "CFI" "IFI" "SRMR"
matrix rownames ind = ""
di
di "{bf:Goodness of fit}"
matrix list ind, format(%6.3f) noheader
*/
end
*cfa ioc1-ioc37, partition(4 4 7 3 3 4 7 5) scorename(HAaaaaaaaaaaaaaaaaaaaaaaaaaz PSE W BCC AC AE LI MOC) cfamethod(ml) //cfastand
*cfa x1-x40, partition(5 5 5 5 5 5 5 5) cfastand

@ -0,0 +1,168 @@
*!Version 1.1
*!Data management utility: check for existence of variables in a dataset.
*!Authors: Amadou Bassirou DIALLO (World Bank) and Jean-Benoit Hardouin (Regional Health Observatory of Orléans)
program checkfor2 , rclass
version 8
syntax anything [if] [in] [, noList Tolerance(real 0) TAble noSUm GENMiss(namelist min=1 max=1) MISsing(string)]
marksample touse
tempname rat
local av
local unav
local manymissings
local avnum
quietly count if `touse'
local tot = r(N)
qui isvar `anything'
local badlist `r(badlist)'
local varlist `r(varlist)'
di _n
if "`table'"!="" {
if "`badlist'"!="" {
di _col(4) in green "{hline 39}"
di _col(4)in green "Unavailable variables: "
foreach i of local badlist {
di _col(4) in ye "`i'"
}
di _col(4) in green "{hline 39}"
di
}
di _col(4) in green "{hline 39}"
display _col(4) in gr "Existing" _col(15) in gr "Rate of"
display _col(4) in gr "Variable" _col(14) "missings" _col(26) "Type" _col(34) "Available"
di _col(4) in green "{hline 39}"
}
tokenize `varlist'
local nbvar : word count `varlist'
forvalues i=1/`nbvar' {
capture assert missing(``i'') if `touse'
local ty: type ``i''
local tty = substr("`ty'", 1, 3)
if !_rc {
if "`table'"=="" {
display in ye "``i''" in gr " is empty in the database." in ye " ``i''" in gr " is not added to the available list."
}
else {
display _col(4) in gr "`=abbrev("``i''",8)'" _col(15) in ye "100.00%" _col(26) "`ty'"
}
local manymissings `manymissings' ``i''
}
else {
if "`table'"=="" {
display in ye "``i''" in gr " exists and is not empty."
}
*Consider type
if "`tty'" == "str" {
qui count if (``i'' == ""|``i''=="`missing'") & `touse'
local num = r(N)
scalar `rat' = (`num'/`tot')*100
}
else {
local avnum `avnum' ``i''
capture confirm number `missing'
if _rc!=0 {
quietly count if ``i'' >= . & `touse'
}
else {
quietly count if (``i'' >= .|``i''==`missing') & `touse'
}
local num = r(N)
scalar `rat' = (`num'/`tot')*100
}
if "`table'"=="" {
display in ye "``i''" in gr " has " in ye r(N) in gr " missings."
display in gr "Ratio number of missings of" in ye " ``i''" in gr " to total number of observations: " in ye %6.2f `rat' "%"
}
if `rat' <= `tolerance' {
local av `av' ``i''
if "`table'"=="" {
display in ye "``i''" in gr " is added to the available list."
}
else {
display _col(4) in gr "`=abbrev("``i''",8)'" in ye _col(15) %6.2f `rat' "%" _col(26) "`ty'" _col(34) "X"
}
}
else {
local manymissings `manymissings' ``i''
if "`table'"=="" {
display in ye "``i''" in gr " has too many missings, compared to the tolerance level."
display in ye "``i''" in gr " is not added to the available list."
}
else {
display _col(4) in gr "`=abbrev("``i''",8)'" _col(15) in ye %6.2f `rat' "%" _col(26) "`ty'"
}
}
}
if "`table'"=="" {
di
}
}
if "`table'"!="" {
di _col(4) in green "{hline 39}"
}
return local available `av'
return local unavailable `badlist'
return local manymissings `manymissings'
if "`avnum'" ~= ""&"`sum'"=="" {
display _newline
display in ye _col(14) "Unweighted summary statistics for available variables:" _n
capture confirm number `missing'
if _rc!=0 {
summarize `avnum' if `touse'
}
else {
foreach i of local avnum {
summarize `i' if `touse'&`i'!=`missing'
}
}
}
if "`list'"== "" {
display _newline
display in ye _d(97) "_"
display _newline
if "`badlist'"~="" {
display in gr "Unavailable variables: " in ye _col(45) "`badlist'" _n
}
if "`av'"~="" {
display in gr "Available variables: " in ye _col(45) "`av'" _n
}
if "`manymissings'"~="" {
display in gr "Available variables but with too missings: " in ye _col(45) "`manymissings'" _n
}
display in ye _d(97) "_"
}
if "`genmiss'" !="" {
capture confirm variable `genmiss'
if _rc!=0 {
qui gen `genmiss' = 0
local nbav : word count `av'
tokenize `av'
forvalues i=1/`nbav' {
local ty: type ``i''
local tty = substr("`ty'", 1, 3)
if "`tty'" == "str" {
qui replace `genmiss'=`genmiss'+1 if ``i''=="."
}
else {
qui replace `genmiss'=`genmiss'+1 if ``i''>=.
}
}
}
else {
di in green "The variable" in ye " `genmiss' " in green "already exists".
}
}
end

@ -0,0 +1,88 @@
{smcl}
{hline}
help for {cmd:checkfor2} {right:Amadou B. DIALLO}
{right:Jean-Benoit HARDOUIN}
{hline}
{title:Allows checking whether a variable exists or not in a dataset.}
{p 4 8 2}{cmd:checkfor2} {it:anything} [{cmd:,}
{cmdab:t:olerance}({it:#}) {cmdab:ta:ble} {cmdab:nol:ist} {cmdab:nosu:m}
{cmdab:genm:iss}({it:newvarname}) {cmdab:mis:sing}({it:string})]
{title:Description}
{p 4 4 2}{cmd:checkfor2} is a data management routine to check for existence of variables
within a (usually big) data set.
{p 4 4 2}{cmd:checkfor2} searchs through the data whether each variable exists.
The variables are clustered between unavailable variables, available variables with
a little amount of missing values and available variables with too many missing values.
{p 4 4 2}{cmd:isvar} must be installed ({stata ssc install isvar:ssc install isvar}).
{title:Options}
{p 4 4 2}{it:anything} is composed of variable names or lists of variables,
{p 4 4 2}{cmd:tolerance} is the tolerance level (in percentage) to consider a variable as available, with default 0,
{p 4 4 2}{cmd:nolist} avoids displaying availability status at the end of the process,
{p 4 4 2}{cmd:nosum} avoids displaying summary statistics of available variables,
{p 4 4 2}{cmd:table} displays the results in a table (instead as text),
{p 4 4 2}{cmd:genmiss} creates a new variable containing the number of missing values among the available variables,
{p 4 4 2}{cmd:missing} defines a specific value or string considered as a missing value.
{title:Saved results}
{p 4 4 2} {cmd:r(unavailable)} names of unavailable variables.{p_end}
{p 4 4 2} {cmd:r(available)} names of available variables with a small amount of missing values.{p_end}
{p 4 4 2} {cmd:r(manymissings)} names of variables present but with too missings.{p_end}
{title:Examples}
{p 4 4 2}{cmd:. use mydata, clear }{p_end}
{p 4 4 2}{cmd:. checkfor2 x y z , mis(99) genmiss(countmiss) }{p_end}
{p 4 4 2}{cmd:. su `r(available)' }{p_end}
{p 4 4 2}{cmd:. tab countmiss }{p_end}
{p 4 4 2}{cmd:. u bigdataset in 1/100, clear // Big data set}{p_end}
{p 4 4 2}{cmd:. checkfor2 v1 v2 v3 xx yy , nosum tol(5) tab}{p_end}
{p 4 4 2}{cmd:. use `r(available)' using bigdataset, clear }{p_end}
{title:Remarks}
{p 4 4 2}{cmd:checkfor2} and its primary version ({cmd:checkfor}) have been primarily written for comparable surveys such as the Demography and
Health Surveys (DHS) or the Multiple Indicator Cluster Surveys (MICS). But this could easily applied
to any other survey.
{title:Authors}
{p 4 4 2}Amadou Bassirou DIALLO.
Poverty and Health Division, PREM, The World Bank.{p_end}
{p 4 4 2}Email: {browse "mailto:adiallo5@worldbank.org":adiallo5@worldbank.org}
{p 4 4 2}Jean-Benoit HARDOUIN.
Regional Health Observatory of Orléans, France.{p_end}
{p 4 4 2}Email: {browse "mailto:jean-benoit.hardouin@orscentre.org":jean-benoit.hardouin@orscentre.org}
{title:Aknowledgements}
{p 4 4 2}We would like to thank Christophe Rockmore and also Nick Cox
and Kit Baum for their comments.
{title:Also see}
{p 4 13 2}Online: help for {help checkfor}, {help isvar}, {help nmissing}, {help npresent}, {help missing} and {help dropmiss} if installed.{p_end}

@ -0,0 +1,133 @@
*!Version 1.1
*!Data management utility: check for existence of variables in a dataset.
*!Authors: Amadou Bassirou DIALLO (World Bank) and Jean-Benoit Hardouin (Regional Health Observatory of Orléans)
program checkvars, rclass
version 8
syntax anything [if] [in] [, noList Tolerance(real 0) TAble noSUm GENMiss(namelist min=1 max=1)]
marksample touse
tempname rat
local av
local unav
local manymissings
quietly count if `touse'
local tot = r(N)
qui isvar `anything'
local badlist `r(badlist)'
local varlist `r(varlist)'
if "`table'"!="" {
if "`badlist'"!="" {
di _col(4)in green "Unavailable variables: " in ye "`badlist'"
di
}
di _col(4) in green "{hline 29}"
display _col(4) in gr "Existing" _col(15) in gr "Rate of"
display _col(4) in gr "Variable" _col(14) "missings" _col(24) "Available"
di _col(4) in green "{hline 29}"
}
tokenize `varlist'
local nbvar:word count `varlist'
forvalues i=1/`nbvar' {
capture assert missing(``i'') if `touse'
if !_rc {
if "`table'"=="" {
display in ye "``i''" in gr " is empty in the database." in ye " ``i''" in gr ///
" is not added to the available list"
}
else {
display _col(4) in gr "``i''" _col(14) "100.00%"
}
}
else {
if "`table'"=="" {
display in ye "``i''" in gr " exists and is not empty."
}
*if "`available'"~= "" {
quietly count if ``i'' >= . & `touse'
local num = r(N)
scalar `rat' = (`num'/`tot')*100
if "`table'"=="" {
display in ye "``i''" in gr " has " in ye r(N) in gr " missings."
display in gr "Ratio number of missings of" in ye " ``i''" in gr ///
" to total number of observations: " in ye %6.2f `rat' "%"
}
if `rat' <= `tolerance' {
local av `av' ``i''
if "`table'"=="" {
display in ye "``i''" in gr " is added to the available list."
}
else {
display _col(4) in gr "``i''" in ye _col(15) %6.2f `rat' "%" _col(32) "X"
}
}
else {
local manymissings `manymissings' ``i''
if "`table'"=="" {
display in ye "``i''" in gr " has too many missings, compared to the tolerance level."
display in ye "``i''" in gr " is not added to the available list."
}
else {
display _col(4) in gr "``i''" _col(15) in ye %6.2f `rat' "%"
}
}
*}
}
if "`table'"=="" {
di
}
}
if "`table'"!="" {
di _col(4) in green "{hline 29}"
}
return local available `av'
return local unavailable `badlist'
return local manymissings `manymissings'
if "`av'" ~= ""&"`sum'"=="" {
display _newline
display in ye _col(14) "Unweighted summary statistics for available variables:" _n
summarize `av' if `touse'
}
if "`list'"== "" {
display _newline
display in ye _d(97) "_"
display _newline
if "`badlist'"~="" {
display in gr "Unavailable variables: " in ye _col(45) "`badlist'" _n
}
if "`av'"~="" {
display in gr "Available variables: " in ye _col(45) "`av'" _n
}
if "`manymissings'"~="" {
display in gr "Available variables but with too missings: " in ye _col(45) "`manymissings'" _n
}
display in ye _d(97) "_"
}
if "`genmiss'" !="" {
capture confirm variable `genmiss'
if _rc!=0 {
qui gen `genmiss'=0
local nbav:word count `av'
tokenize `av'
forvalues i=1/`nbav' {
qui replace `genmiss'=`genmiss'+1 if ``i''>=.
}
}
else {
di in green "The variable" in ye " `genmiss' " in green "already exists".
}
}
end

@ -0,0 +1,90 @@
{smcl}
{hline}
help for {cmd:checkvars} {right:Amadou B. DIALLO}
{right:Jean-Benoit HARDOUIN}
{hline}
{title:Allows checking whether a variable exists or not in a dataset.}
{p 4 8 2}{cmd:checkvars} {it:anything} [{cmd:,}
{cmdab:t:olerance}({it:#}) {cmdab:ta:ble} {cmdab:nol:ist} {cmdab:nosu:m}
{cmdab:genm:iss}({it:newvarname})]
{title:Description}
{p 4 4 2}{cmd:checkvars} is a routine to check for existence of variables
within a (usually big) data set.
{p 4 4 2}{cmd:checkvars} searchs through the data whether each variable exists.
The variables are clustered between unavailable variables, available variables with
a little amount of missing values and available variables with too many missing values.
{p 4 4 2}{cmd:isvar} must be installed ({stata ssc install isvar:ssc install isvar}).
{title:Options}
{p 4 4 2}{it:anything} is composed of variable names or lists of variables,
{p 4 4 2}{cmd:tolerance} is the tolerance level (in percentage) to consider a variable as available, with default 0,
{p 4 4 2}{cmd:nolist} avoids displaying availability status at the end of the process,
{p 4 4 2}{cmd:nosum} avoids displaying summary statistics of available variables,
{p 4 4 2}{cmd:table} displays the results in a table (instead as text),
{p 4 4 2}{cmd:genmiss} creates a new variable containing the number of missing values among the available variables.
{title:Saved results}
{p 4 4 2} {cmd:r(unavailable)} names of unavailable variables.{p_end}
{p 4 4 2} {cmd:r(available)} names of available variables with a small amount of missing values.{p_end}
{p 4 4 2} {cmd:r(manymissings)} names of variables but with too missings.{p_end}
{title:Examples}
{p 4 4 2}{cmd:. use mydata, clear }{p_end}
{p 4 4 2}{cmd:. checkvars x y z ,genmiss(countmiss) }{p_end}
{p 4 4 2}{cmd:. su `r(available)' }{p_end}
{p 4 4 2}{cmd:. tab countmiss }{p_end}
{p 4 4 2}{cmd:. u bigdataset in 1/100, clear // Big data set}{p_end}
{p 4 4 2}{cmd:. checkvars v1 v2 v3 xx yy , nosum tol(5) tab}{p_end}
{p 4 4 2}{cmd:. use `r(available)' using bigdataset, clear }{p_end}
{title:Remarks}
{p 4 4 2}{cmd:checkvars} and its primary versions ({cmd:checkfor} and {cmd:checkfor2}) have been primarily written for comparable surveys such as the Demography and
Health Surveys (DHS) or the Multiple Indicator Cluster Surveys (MICS). But this could easily applied
to any other survey.
{title:Authors}
{p 4 4 2}Amadou Bassirou DIALLO.
Poverty and Health Specialist. AFTPM, The World Bank.{p_end}
{p 4 4 2}Email: {browse "mailto:adiallo5@worldbank.org":adiallo5@worldbank.org}
{p 4 4 2}Jean-Benoit HARDOUIN.
Regional Health Observatory of Orléans, France.{p_end}
{p 4 4 2}Email: {browse "mailto:jean-benoit.hardouin@orscentre.org":jean-benoit.hardouin@orscentre.org}
{title:Aknowledgements}
{p 4 4 2}We would like to thank Christophe Rockmore and also Nick Cox
and Kit Baum for their comments.
{title:Also see}
{p 4 13 2}Online: help for {help checkfor}, {help checkfor2}, {help isvar}, {help nmissing}, {help npresent}, {help missing} and {help dropmiss} if installed.{p_end}

@ -0,0 +1,957 @@
*! Version 2.17 10July2019
*! Jean-Benoit Hardouin
************************************************************************************************************
* Stata program : clv
* Clustering of variables around latent variables
* Version 2.14 : May 20th, 2010 /*dim and std options for biplots*/
*
* Historic
* Version 1 (2005-06-11): Jean-Benoit Hardouin
* Version 1.1 (2005-07-07): Jean-Benoit Hardouin /*small bug in the consolidation process with cluster of only one variable*/
* Version 1.2 (2005-07-08): Jean-Benoit Hardouin /*Bug in the consolidation procedure when there is negative correlation*/
* Version 2 (2005-09-03): Jean-Benoit Hardouin /*Horizontal dendrograms (with Stata 9)*/
* Version 2.1 (2005-09-08): Jean-Benoit Hardouin /*More flexibility to abbreviate the names of the variables (with Stata 9)*/
* Version 2.1.1 (2005-09-08): Jean-Benoit Hardouin /*Integration of some requests of Ronan Conroy*/
* Version 2.1.2 (2005-09-08): Jean-Benoit Hardouin /*Possibility to give a title and an X/Y caption*/
* Version 2.2 (2005-09-11): Jean-Benoit Hardouin /*Kernel option*/
* Version 2.3 (2005-09-12): Jean-Benoit Hardouin /*Polychoric option*/
* Version 2.4 (2005-09-13): Jean-Benoit Hardouin /*v2 option*/
* Version 2.5 (2005-09-21): Jean-Benoit Hardouin /*corrections*/
* Version 2.6 (2005-10-02): Jean-Benoit Hardouin /*centroid method, biplot*/
* Version 2.7 (2005-10-06): Jean-Benoit Hardouin /*return, multiple graphs, polychoric+consolidation*/
* Version 2.8 (2005-10-06): Jean-Benoit Hardouin /*fweights*/
* Version 2.9 (2006-01-26): Jean-Benoit Hardouin /*save the latent variables*/
* Version 2.10 (2006-07-10): Jean-Benoit Hardouin /*2nd order relative variation of the T criterion*/
* Version 2.11 (2006-10-09): Jean-Benoit Hardouin /*Size of the text in the dendrogram*/
* Version 2.12 (2006-12-01): Jean-Benoit Hardouin /*savedendro option*/
* Version 2.13 (2010-05-12): Jean-Benoit Hardouin /*corrections of bugs in KERNEL option and with METHOD(centroid)*/
* Version 2.14 (2010-05-20): Jean-Benoit Hardouin /*DIM and STD options for biplots*/
* Version 2.15 (2014-04-14): Jean-Benoit Hardouin /*save and use options*/
* Version 2.16 (2014-04-30): Jean-Benoit Hardouin, Bastien Perrot /*HTML option*/
* Version 2.17 (2019-07-10): Jean-Benoit Hardouin /*filesave and dirsave options*/
*
* Jean-benoit Hardouin, University of Nantes - Faculty of Pharmaceutical Sciences
* INSERM UMR 1246-SPHERE "Methods in Patient Centered Outcomes and Health Research", Nantes University, University of Tours
* jean-benoit.hardouin@univ-nantes.fr
*
* News about this program : http://anaqol.sphere-nantes.fr
*
* Copyright 2005-2006, 2010, 2014, 2019 Jean-Benoit Hardouin
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*
************************************************************************************************************
program define clv,rclass
version 10
syntax [varlist(default=none)] [if] [in] [fweight] [, CUTnumber(int 40) bar CONSolidation(int 0) noDENdro SAVEDendro(string) noSTANDardized deltaT HORizontal SHOWcount ABBrev(int 14) TITle(string) CAPtion(string) KERnel(numlist) METHod(string) noBIPlot ADDvar genlv(string) replace TEXTSize(string) std dim(string) save(string) use(string) FILESave DIRSave(string)]
preserve
tempfile clvfile
tempvar id
gen `id'=_n
qui save `clvfile',replace
local matsize=c(matsize)
local none=0
if "`varlist'"==""&"`use'"=="" {
capture confirm matrix r(vp)
if _rc==0 {
capture confirm matrix r(matclus)
if _rc ==0 {
local none=1
}
}
if `none'==0 {
di in red "You cannot use the {hi:clv} command without {hi:varlist} if you have not already run {hi:clv}"
error 198
exit
}
}
if "`filesave'"!="" {
if "`dirsave'"=="" {
local dirsave `c(pwd)'
}
local fsb saving(`dirsave'//bar,replace)
local fsd saving(`dirsave'//dendrogram,replace)
local fsbi saving(`dirsave'//biplot,replace)
}
tempname matclus vp indexes
/*********TESTS**********/
if "`use'"!="" {
local error=0
capture matrix `vp'=`use'_vp
if _rc!=0 {
local error=_rc
}
capture matrix `matclus'=`use'_matclus
if _rc!=0 {
local error=_rc
}
local varlist $`use'_varlist
local method $`use'_method
local kernel $`use'_kernel
if "`varlist'"==""|"`method'"=="" {
local error=1
}
if `error'!=0 {
di in red "You cannot use the {hi:use} option without a preliminary use of the {hi:save} option"
error 198
exit
}
}
if `none'==1 {
matrix `vp'=r(vp)
matrix `matclus'=r(matclus)
local varlist `r(varlist)'
tokenize `varlist'
local nbitems=rowsof(`matclus')
if "`method'"!="" {
di in green "The {hi:method} option can not be modified without specification of the varlist. {hi:method} is omitted."
}
local method `r(method)'
local kernel `r(kernel)'
}
if "`method'"=="" {
local method classical
}
if ("`method'"=="polychoric"|"`method'"=="polychoricv2")&"`standardized'"!="" {
di in green "Initial variables are used with the {hi:polychoric} methods"
di in green "But the procedure is based on the matrix of the polychoric correlations"
di
}
if "`method'"!="classical"&"`method'"!="v2"&"`method'"!="centroid"&"`method'"!="polychoric"&"`method'"!="polychoricv2" {
di in red "The {hi:method} `method' is unknown"
error 198
exit
}
tokenize `varlist'
local nbitems : word count `varlist'
marksample touse
qui keep if `touse'
local mat=max(`matsize',`=`nbitems'*2')
qui set matsize `mat'
if `nbitems'<3&`none'!=1 {
di in red "You need at least 3 variables"
error 198
exit
}
/*******DEFINES THE LABELS AND STANDARDIZED THE VARIABLES (IF NECESSARY)*******/
forvalues i=1/`nbitems'{
local label`i':variable label ``i''
if "`label`i''"=="" {
local label`i' ``i''
}
if "`method'"!="polychoric"&"`method'"!="polychoricv2" {
qui su ``i'' [`weight'`exp']
local mean=r(mean)
if "`standardized'"=="" {
local sd=r(sd)
}
else {
local sd=1
}
qui replace ``i''=(``i''-`mean')/`sd'
}
}
tempfile clvfiletmp
qui save `clvfiletmp',replace
qui su `1' [`weight'`exp']
local nbind=r(sum_w)
local cons=`consolidation'
/*COMPUTES THE TOTAL VARIANCE*/
if "`method'"!="polychoric"&"`method'"!="polychoricv2" {
local totvar=0
forvalues i=1/`nbitems' {
qui su ``i'' [`weight'`exp']
local totvar=`totvar'+`r(Var)'
}
}
else {
local totvar `nbitems'
}
local nbkerk=0
local nbkerg=0
/***** DEFINES THE KERNEL IF NECESSARY ********/
if "`kernel'"!="" {
local nbkerg:word count `kernel'
local fin0=0
forvalues i=1/`nbkerg' {
local nbi`i':word `i' of `kernel'
local nbkerk=`nbkerk'+`nbi`i''
local deb`i'=`fin`=`i'-1''+1
local fin`i'=`deb`i''+`nbi`i''-1
local list`i'
forvalues j=`deb`i''/`fin`i'' {
local list`i' `list`i'' ``j''
}
}
tempname kerclus
matrix `kerclus'=J(`=`nbkerk'-`nbkerg'',3,0)
local ligne=1
forvalues g=1/`nbkerg' {
matrix `kerclus'[`ligne',1]=`nbitems'+`ligne'
matrix `kerclus'[`ligne',2]=`deb`g''
matrix `kerclus'[`ligne',3]=`deb`g''+1
local clus`g'=`nbitems'+`ligne'
local ligne=`ligne'+1
if `nbi`g''>2 {
forvalues i=2/`=`nbi`g''-1' {
matrix `kerclus'[`ligne',1]=`nbitems'+`ligne'
matrix `kerclus'[`ligne',2]=`deb`g''+`i'
matrix `kerclus'[`ligne',3]=`nbitems'+`ligne'-1
local clus`g'=`nbitems'+`ligne'
local ligne=`ligne'+1
}
}
local eigen2=0
}
}
if `nbitems'<`nbkerk' {
di in red "You cannot define more variables in the {hi:kernel} option than items in the {hi:varlist}"
error 198
exit
}
/*******DISPLAY THE FIRST RESULTS *******/
di
di in green "{hline 32}"
di in green "TOTAL VARIANCE: " in ye %16.5f `totvar'
di in green "NUMBER OF INDIVIDUALS: " in ye %9.0f `nbind'
di in green "METHOD:" in ye _col(`=33-length("`method'")') "`=upper("`method'")'"
di in green "{hline 32}"
di
if "`kernel'"!="" {
forvalues i=1/`nbkerg' {
di in green "The kernel numbered " in ye `clus`i'' in green " is composed of `nbi`i'' variables: " in ye "`list`i''"
di
}
}
else {
local nbkerk=0
local nbkerg=0
}
/******** CLASSIFICATION PROCEDURE*******/
tempname Ev
if `none'!=1 {
matrix `matclus'=J(`nbitems',`nbitems',0)
matrix `vp'=J(`=2*`nbitems'-1',12,0)
matrix `indexes'=J(`nbitems',8,0)
forvalues i=1/`nbitems' {
matrix `matclus'[`i',1]=`i'
if "`method'"!="polychoric"&"`method'"!="polychoric" {
qui su ``i'' [`weight'`exp']
matrix `vp'[`i',10]=r(Var)
}
else {
matrix `vp'[`i',10]=1
}
matrix `vp'[`i',1]=`i'
matrix `vp'[`i',2]=`nbitems'
matrix `vp'[`i',8]=`totvar'
matrix `vp'[`i',9]=100
}
matrix `vp'[`nbitems',5]=`nbitems'
if "`method'"=="centroid" {
local crit G
di in green "{hline 101}"
di in green _col(93) "2nd order"
di in green _col(7) "Number of" _col(69) "`crit'" _col(71) "Explained" _col(82) "Relative" _col(94) "Relative"
di in green "Step" _col(8) "clusters" _col(20) "Child 1" _col(33) "Child 2" _col(46) "Parent" _col(53) "`crit' value" _col(61) "variation" _col(72) "Variance" _col(81) "Variation" _col(93) "Variation"
di in green "{hline 101}"
}
else {
local crit T
di in green "{hline 111}"
if "`method'"=="v2"|"`method'"=="polychoricv2" {
di in green _col(84) "Maximal" _col(103) "2nd order"
}
else {
di in green _col(84) "Current" _col(103) "2nd order"
}
di in green _col(7) "Number of" _col(69) "`crit'" _col(71) "Explained" _col(85) "Second" _col(93) "Relative" _col(104) "Relative"
di in green "Step" _col(8) "clusters" _col(20) "Child 1" _col(33) "Child 2" _col(46) "Parent" _col(53) "`crit' value" _col(61) "variation" _col(72) "Variance" _col(81) "Eigenvalue" _col(92) "Variation" _col(103) "Variation"
di in green "{hline 111}"
}
tempname threshold
matrix `threshold'=J(`nbitems',3,0)
forvalues i=1/`=`nbitems'-1' {
local clus=`nbitems'+`i'
local minegenval=999999
local minegenval2=999999
forvalues k=1/`=`clus'-1' {
local list`k'
local numlist`k'
forvalues j=1/`clus' {
if (`matclus'[`j',`i']==`k') {
local list`k' `list`k'' ``j''
local numlist`k' `numlist`k'' `j'
}
}
}
if `clus'>`nbitems'+`nbkerk'-`nbkerg' {
if "`method'"=="centroid" {
tempname centrj centrk diffjk
}
forvalues j=1/`clus' {
local nblistj:word count `list`j''
forvalues k=`=`j'+1'/`clus' {
local nblistk:word count `list`k''
if `nblistj'!=0&`nblistk'!=0 {
if "`method'"=="centroid" {
qui genscore `list`j'',score(`centrj') mean
qui su `centrj' [`weight'`exp']
local Varj=r(Var)
qui genscore `list`k'',score(`centrk') mean
qui su `centrk' [`weight'`exp']
local Vark=r(Var)
qui gen `diffjk'=`centrk'-`centrj'
qui su `diffjk' [`weight'`exp']
local Varjk=r(Var)
drop `centrj' `centrk' `diffjk'
local ev=(`nblistj'*`nblistk')/(`nblistj'+`nblistk')*`Varjk'
if `ev'<`minegenval' {
local minegenval=`ev'
local minj `j'
local mink `k'
local eigen=0
local eigen2=0
}
}
else {
if "`method'"=="classical"|"`method'"=="v2" {
qui pca `list`j'' `list`k'' [`weight'`exp'] ,cov
matrix `Ev'=e(Ev)
}
else if "`method'"=="polychoric"|"`method'"=="polychoricv2" {
qui polychoricpca `list`j'' `list`k'' [`weight'`exp']
matrix `Ev'=r(eigenvalues)
}
local lambda1=`Ev'[1,1]
local lambda2=`Ev'[1,2]
local ev=`vp'[`j',10]+`vp'[`k',10]-`lambda1'
local ev2=max(`vp'[`j',11],`vp'[`k',11],`lambda2')
if ("`method'"=="v2"|"`method'"=="polychoricv2")&`ev'<`minegenval' {
local minegenval=`ev'
local eigen=`lambda1'
local minj `j'
local mink `k'
local eigen2=`lambda2'
}
else if ("`method'"=="classical"|"`method'"=="polychoric")&`ev2'<`minegenval2' {
local minegenval=`ev'
local minegenval2=`ev2'
local eigen=`lambda1'
local minj `j'
local mink `k'
local eigen2=`ev2'
}
}
}
}
}
}
else {
local ligne=`clus'-`nbitems'
local j=`kerclus'[`ligne',2]
local k=`kerclus'[`ligne',3]
if "`method'"!="centroid" {
if "`method'"=="classical"|"`method'"=="v2" {
qui pca `list`j'' `list`k'' [`weight'`exp'],cov
matrix `Ev'=e(Ev)
}
else if "`method'"=="polychoric"|"`method'"=="polychoricv2"{
qui polychoricpca `list`j'' `list`k'' [`weight'`exp']
matrix `Ev'=r(eigenvalues)
}
local lambda1=`Ev'[1,1]
local lambda2=`Ev'[1,2]
local ev=`vp'[`j',10]+`vp'[`k',10]-`lambda1'
local minegenval=`ev'
local eigen=`lambda1'
local minj `j'
local mink `k'
local eigen2=`lambda2'
}
else if "`method'"=="centroid" {
local nblistj:word count `list`j''
local nblistk:word count `list`k''
tempname v1 v2 v12
qui genscore `list`j'',score(`v1') mean
qui genscore `list`k'',score(`v2') mean
qui gen `v12'=`v1'-`v2'
qui su `v12' [`weight'`exp']
local varj=r(Var)
local minegenval=(`nblistj'*`nblistk')/(`nblistj'+`nblistk')*`varj'
local minj `j'
local mink `k'
}
}
if `minj'<=`nbitems' {
local nomj=abbrev("``minj''",14)
}
else {
local nomj `minj'
}
if `mink'<=`nbitems' {
local nomk=abbrev("``mink''",14)
}
else {
local nomk `mink'
}
forvalues j=1/`nbitems' {
matrix `matclus'[`j',`=`i'+1']=`matclus'[`j',`i']
}
matrix `vp'[`clus',1]=`nbitems'+`i' /*PARENT*/
matrix `vp'[`clus',2]=`=`nbitems'-`i'' /*NUMBER OF CLUSTERS*/
matrix `vp'[`clus',3]=`minj' /*CHILD 1*/
matrix `vp'[`clus',4]=`mink' /*CHILD 2*/
matrix `vp'[`clus',6]=`minegenval' /*VARIATION OF THE T or G CRITERION*/
matrix `vp'[`clus',5]=`vp'[`=`clus'-1',5]-`vp'[`clus',6] /*T or G CRITERION*/
matrix `vp'[`clus',7]=(`vp'[`clus',6]-`vp'[`=`clus'-1',6])/`vp'[`=`clus'-1',6] /*RELATIVE VARIATION OF THE T OR G CRITERION*/
matrix `vp'[`clus',8]=`vp'[`=`clus'-1',8]-`minegenval' /*EXPLAINED VARIANCE*/
matrix `vp'[`clus',9]=`vp'[`clus',8]/`totvar'*100 /*% OF EXPLAINED VARIANCE*/
if "`method'"!="centroid" {
matrix `vp'[`clus',10]=`eigen' /*FIRST EIGEN VALUE OF THE NEW CLUSTER*/
matrix `vp'[`clus',11]=`eigen2' /*SECOND EIGEN VALUE OF THE NEW CLUSTER*/
}
if `vp'[`=`clus'-1',7]!=0 {
matrix `vp'[`clus',12]=(`vp'[`clus',7]-`vp'[`=`clus'-1',7])/abs(`vp'[`=`clus'-1',7]) /*2ND ORDER RELATIVE VARIATION OF THE T or G CRITERION*/
}
matrix `indexes'[`i',1]=`i' /*PARENT*/
matrix `indexes'[`i',2]=`nbitems'-`i' /*NUMBER OF CLUSTERS*/
matrix `indexes'[`i',3]=`minegenval' /*VARIATION OF THE T or G CRITERION*/
matrix `indexes'[`i',4]=`vp'[`clus',7] /*RELATIVE VARIATION OF THE T OR G CRITERION*/
matrix `indexes'[`i',5]=max(`eigen2',`indexes'[`=`i'-1',5]) /*MAXIMUM SECOND EIGENVALUE*/
matrix `indexes'[`i',7]=`vp'[`clus',12] /*2nd order RELATIVE VARIATION OF THE T OR G CRITERION*/
foreach j of numlist `numlist`minj'' `numlist`mink'' {
matrix `matclus'[`j',`=`i'+1']=`clus'
}
local varlistgen
local nbvarlistgen
forvalues j=1/`=`nbitems'+`i'' {
local varlist`j'
forvalues k=1/`nbitems' {
if `matclus'[`k',`=`i'+1']==`j' {
local varlist`j' `varlist`j'' ``k''
}
}
local nbvarlist`j': word count `varlist`j''
local varlistgen `varlistgen' `varlist`j''
local nbvarlistgen `nbvarlistgen' `nbvarlist`j''
}
local newlist
foreach m in `nbvarlistgen' {
if `m'!=0 {
local newlist `newlist' `m'
}
}
if "`kernel'"!=""&`i'==`=`nbkerk'-`nbkerg'+1' {
local T=`vp'[`=`clus'-1',8]
di _col(0) in ye "init" _col(12) %4.0f `=`nbitems'-`nbkerk'+`nbkerg'' _col(52) %8.4f `T' _col(62) %8.4f `=`totvar'-`T'' _col(72) %7.3f `=`T'/`totvar'*100' "%"
}
if `clus'>`nbitems'+`nbkerk'-`nbkerg' {
matrix `threshold'[`=`nbitems'-`i'+1',3]=`minegenval'
if `clus'==`nbitems'+`nbkerk'-`nbkerg'+1 {
local relv
local percent
local relv2
}
else {
local relv=`indexes'[`i',4]*100
local percent %
if `indexes'[`i',7]!=. {
local relv2=`indexes'[`i',7]*100
}
else {
local relv2=0
}
matrix `threshold'[`=`nbitems'-`i'+1',1]=`relv'
matrix `threshold'[`=`nbitems'-`i'+1',2]=`relv2'
}
if "`method'"=="centroid" {
di _col(0) in ye %4.0f `=`i'-`nbkerk'+`nbkerg'' _col(12) %4.0f `=`nbitems'-`i'' _col(20) "`nomj'" _col(33) "`nomk'" _col(45) %7.0f `=`i'+`nbitems'' _col(52) %8.4f `vp'[`clus',8] _col(62) %8.4f `minegenval' _col(72) %7.3f `vp'[`clus',9] "%" _col(83) _col(84) %5.2f `relv' "`percent'" _col(93) %8.2f `relv2' "`percent'"
}
else {
di _col(0) in ye %4.0f `=`i'-`nbkerk'+`nbkerg'' _col(12) %4.0f `=`nbitems'-`i'' _col(20) "`nomj'" _col(33) "`nomk'" _col(45) %7.0f `=`i'+`nbitems'' _col(52) %8.4f `vp'[`clus',8] _col(62) %8.4f `minegenval' _col(72) %7.3f `vp'[`clus',9] "%" _col(83) %8.4f `vp'[`clus',11] _col(94) %6.2f `relv' "`percent'" _col(103) %8.2f `relv2' "`percent'"
}
}
}
matrix `indexes'[`nbitems',3]=`vp'[`=2*`nbitems'-1',5] /*RELATIVE VARIATION OF THE T OR G CRITERION*/
matrix `indexes'[`nbitems',7]=`indexes'[`nbitems',3]/`indexes'[`=`nbitems'-1',3] /*RELATIVE VARIATION OF THE T OR G CRITERION*/
local i=2*`nbitems'-1
matrix `threshold'[1,1]=`vp'[`i',5]/`vp'[`i',6]*100-100
matrix `threshold'[1,2]=(`threshold'[1,1]-`threshold'[2,1])/abs(`threshold'[2,1])*100
matrix `threshold'[1,3]=`vp'[`i',5]
if "`method'"=="centroid" {
di in ye _col(62) %8.4f `threshold'[1,3] _col(83) %6.2f `threshold'[1,1] "`percent'" _col(93) %8.2f `threshold'[1,2] "`percent'"
}
else {
di in ye _col(62) %8.4f `threshold'[1,3] _col(94) %6.2f `threshold'[1,1] "`percent'" _col(103) %8.2f `threshold'[1,2] "`percent'"
}
local best=0
local maxbest=0
local best2=0
local maxbest2=0
local demipart=int(`nbitems'/2)+1
forvalues i=1/`demipart' {
if `threshold'[`i',3]>`maxbest2' {
if `threshold'[`i',3]>`maxbest' {
local maxbest2=`maxbest'
local best2=`best'
local maxbest=`threshold'[`i',3]
local best=`i'
}
else {
local maxbest2=`threshold'[`i',3]
local best2=`i'
}
}
}
di in green "{hline 111}"
di
di in green "{hline 60}"
di in green "PROPOSED BEST PARTITIONS (AMONG THE `demipart' SMALLER PARTITIONS)"
di in green "{hline 60}"
di
di in yellow _col(4) "Based on the variation of the T criterion: " _col(60) in gr "Partitions in " in ye `best' " or " `best2' in gr " clusters"
return local bestvariation `best' `best2'
local bestt=0
local bestt2=0
local var=0
local var2=0
forvalues i=1/`nbitems' {
if `threshold'[`i',1]>`var2'&`i'<`demipart' {
if `threshold'[`i',1]>`var' {
local bestt2=`bestt'
local var2=`var'
local var=`threshold'[`i',1]
local bestt=`i'
}
else {
local var2=`threshold'[`i',1]
local bestt2=`i'
}
}
}
di in yellow _col(4) "Based on the research of a threshold: " _col(60) in gr "Partitions in " in ye `bestt' " or " `bestt2' in gr " clusters"
forvalues i=`=`nbitems'+1'/`=`nbitems'+`nbkerk'-`nbkerg'' {
matrix `vp'[`i',6]=`totvar'-`T'
matrix `vp'[`i',8]=`T'
matrix `vp'[`i',9]=`T'/`nbitems'*100
}
return local bestthresold `bestt' `bestt2'
forvalues i=1/`nbitems' {
if `threshold'[`i',2]>`var2'&`i'<`demipart' {
if `threshold'[`i',2]>`var' {
local bestt2=`bestt'
local var2=`var'
local var=`threshold'[`i',2]
local bestt=`i'
}
else {
local var2=`threshold'[`i',2]
local bestt2=`i'
}
}
}
di in yellow _col(4) "Based on the research of a threshold (second order): " _col(60) in gr "Partitions in " in ye `bestt' " or " `bestt2' in gr " clusters"
return local bestthresold2 `bestt' `bestt2'
}
/******BAR CHART *******/
if "`bar'"!="" {
drop _all
qui set obs `nbitems'
qui svmat `indexes' ,names(v)
qui gen id=`nbitems'-_n
qui replace v7=. in 1
qui drop if id>`nbitems'-`nbkerk'+`nbkerg'-1
label variable id "Number of clusters"
label variable v3 "T variation"
qui su v3 if id!=0
local maxv3=ceil(r(max)*5)/5
local minv3=floor(r(min)*5)/5
label variable v4 "Relative T variation"
label variable v7 "Relative T variation order 2"
qui graph twoway (bar v3 id, name(bar,replace) `fsb' vert yaxis(1))(line v4 id,yaxis(2))/*(line v6 id,yaxis(3))(line v5 id,yaxis(4))*/(line v7 id,yaxis(5)) if id!=0,ylabel(`minv3'(0.2)`maxv3') xlabel(1(1)`=`nbitems'-`nbkerk'+`nbkerg'-1')
}
/****** DENDROGRAM********/
drop _all
qui set obs `nbitems'
qui svmat `matclus' ,names(v)
local listorder
forvalues i=`nbitems'(-1)1 {
local listorder `listorder' v`i'
}
qui gen id=_n
qui sort `listorder'
capture cluster delete clv,zap
qui cluster complete v* ,name(clv)
qui replace clv_id=_n
qui replace clv_ord=id
qui replace clv_hgt=.
qui gen fait=0
qui gen clus=0
forvalues i=2/`nbitems' {
local ligne=`nbitems'+`i'-1
if (`vp'[`ligne',3]<=`nbitems') {
local first=`vp'[`ligne',3]
gsort +fait -v`i' +clv_id
}
else {
local first=`vp'[`ligne',4]
gsort +fait -v`i' +clv_id
}
if "`deltaT'"!="" {
qui replace clv_hgt=`vp'[`ligne',6] in 1
}
else {
qui replace clv_hgt=100-`vp'[`ligne',9] in 1
}
qui replace fait=1 in 1
qui replace clus=`vp'[`ligne',1] in 1
}
if "`dendro'"=="" {
qui gen label=""
forvalues i=1/`nbitems' {
qui replace label=abbrev("`label`i''",`abbrev') if clv_id==`i'
}
sort clv_id
if `nbitems'>`cutnumber' {
local var "Groups of variables"
local cut cutnumber(`cutnumber') /*labcutn*/
}
else {
local var "Variables"
local cut label(label)
}
qui su clv_hgt
local tmp=r(max)
local max=floor(`tmp')+.5
if `tmp'>`max' {
local max=`max'+.5
}
local maxvar=`max'+5
if "`title'"=="" {
local title "Clustering around Latent Variables (CLV)"
}
if "`caption'"!="" {
local var "`caption'"
}
if "`deltaT'"!="" {
local titleL "Variation of the T criterion"
local yl "0(.5)`max'"
}
else {
local titleL "% Unexplained Variance"
local yl "0(25)`maxvar'"
}
if "`textsize'"=="" {
local textsize: word `=min(int(`nbitems'/15)+1,5)' of medium medsmall small vsmall tiny
}
if "`horizontal'"!="" {
*matrix list clv
qui cluster dendro clv, name (dendrogram,replace) `fsd' hor ytitle("`var'") `showcount' xtitle("`titleL'") title("`title'",span) xlabel(`yl') ylabel(,angle(0) labsize(`textsize')) `cut'
}
else {
qui cluster dendro clv, name(dendrogram,replace) `fsd' xtitle("`var'") `showcount' ytitle("`titleL'") title("`title'",span) ylabel(`yl') xlabel(,labsize(`textsize')) `cut'
}
if "`savedendro'"!="" {
qui graph save dendrogram `savedendro'
}
}
/***** END DENDROGRAM*****/
/**** TEST ********/
if `cons'>`nbitems'-`nbkerk'+`nbkerg' {
di in ye "The {hi:consolidation} is not possible for a number of clusters superior to the initial number of clusters"
local cons=0
}
/***** CONSOLIDATION PROCEDURE ********/
if `cons'!=0 {
sort v`=`nbitems'-`cons'+1'
gen cut`cons'=1
local g=1
forvalues i=2/`nbitems' {
if v`=`nbitems'-`cons'+1'[`i']!=v`=`nbitems'-`cons'+1'[`=`i'-1'] {
local g=`g'+1
}
qui replace cut`cons'=`g' in `i'
}
sort id
tempname group
mkmat cut`cons',matrix(`group')
use `clvfiletmp',replace
local n=1
local env=1
while (`env'==1) {
forvalues g=1/`cons' {
local list`g'
forvalues i=1/`nbitems' {
if `group'[`i',1]==`g' {
local list`g' `list`g'' ``i''
}
}
}
di
if `n'==1 {
di in green "{hline 30}"
di in green "PARTITION BEFORE CONSOLIDATION"
di in green "{hline 30}"
}
di
local col=13
local max=0
local critT=0
forvalues g=1/`cons' {
di _col(`col') in green "CLUSTER " %2.0f `g' _c
local col=`col'+12
local tmp`g':word count `list`g''
if `tmp`g''>`max' {
local max `tmp`g''
}
tempvar f1`g'
if "`method'"=="centroid" {
qui genscore `list`g'',score(`f1`g'') mean
qui su `f1`g'' [`weight'`exp']
local var=r(Var)
local critT=`critT'+`tmp`g''*`var'
qui pca `list`g'' [`weight'`exp'] ,cov
local trace=e(trace)
local explained`g'=`tmp`g''*`var'/`trace'
}
else {
if `tmp`g''>1 {
if "`method'"=="classical"|"`method'"=="v2" {
qui pca `list`g'' [`weight'`exp'] ,cov
matrix `Ev'=e(Ev)
local trace=e(trace)
qui predict `f1`g''
}
else if "`method'"=="polychoric"|"`method'"=="polychoric" {
qui polychoricpca `list`g'' [`weight'`exp'] ,score(`f1`g'') nscore(1)
matrix `Ev'=r(eigenvalues)
local trace=0
forvalues m=1/`tmp`g''{
local trace =`trace'+`r(lambda`m')'
}
rename `f1`g''1 `f1`g''
}
local lambda1=`Ev'[1,1]
local explained`g'=`lambda1'/`trace'
local critT=`critT'+`lambda1'
}
else {
local explained`g'=1
qui gen `f1`g''=`list`g''
if "`standardized'"=="" {
local critT=`critT'+1
}
else {
qui su [`weight'`exp']
local critT=`critT'+`r(Var)'
}
}
}
}
di
di _col(1) in green "ITEMS :" _c
forvalues i=1/`max' {
local col=15
forvalues g=1/`cons' {
local tmpv:word `i' of `list`g''
local tmpv=abbrev("`tmpv'",8)
di _col(`col') in ye %8s "`tmpv'" _c
local col= `col'+12
}
di
}
local col=16
di _col(1) in green "Expl. Var:" _c
forvalues g=1/`cons' {
di _col(`col') in ye %6.2f `=`explained`g''*100' in green "%" _c
local col= `col'+12
}
di
di
di in green "Variance Explained : " in ye %6.3f `=`critT'/`totvar'*100' in green "%"
di in green "T criterion : " in ye %6.4f `critT'
di
di in green "{hline 21}"
di in green "CONSOLIDATION: STEP `n'"
di in green "{hline 21}"
local n=`n'+1
local env=0
if "`method'"=="polychoric"|"`method'"=="polychoricv2" {
local command polychoric
}
else {
local command corr
}
forvalues i=1/`nbitems' {
local env`i'=0
local gr=`group'[`i',1]
qui `command' ``i'' `f1`gr'' [`weight'`exp']
local corr`i'=r(rho)
local corrs`i'=r(rho)
forvalues g=1/`cons' {
qui `command' ``i'' `f1`g'' [`weight'`exp']
local tmpcorr=r(rho)
if `g'!=`gr'&(((`corr`i'')<(`tmpcorr')&"`method'"=="centroid")|((`corr`i'')^2<(`tmpcorr')^2& "`method'"!="centroid")) {
local env=1
local env`i'=1
matrix `group'[`i',1]=`g'
local corr`i'=`tmpcorr'
}
}
if `env`i''==1 {
local g=`group'[`i',1]
di in green "The variable " in ye "``i'' " in green "is assigned to the `g'th group" _c
if "`method'"!="centroid" {
di in green " (corr^2=" %6.4f in ye (`corr`i'')^2 in green " vs " in ye %6.4f (`corrs`i'')^2 in green ")"
}
else {
di in green " (corr=" %6.4f in ye (`corr`i'') in green " vs " in ye %6.4f (`corrs`i'') in green ")"
}
}
}
if `env'==0 {
local latent
forvalues g=1/`cons' {
label variable `f1`g'' "Latent variable `g'"
if "`genlv'"!="" {
if "`replace'"!=""{
capture drop `genlv'`g'
}
gen `genlv'`g'=`f1`g''
}
local latent `latent' `f1`g''
return local cluster`g' `list`g''
}
matrix `group'=`group''
matrix colnames `group'=`varlist'
return matrix affect=`group'
di in ye "Stability of the partition is achieved"
if `cons'<=7 {
di
di in green "{hline 42}"
di in green "CORRELATION MATRIX OF THE LATENT VARIABLES"
di in green "{hline 42}"
di
di in green "{hline `=(`cons')*13+15'}"
forvalues g=1/`cons' {
di _col(`=13*(`g'-1)+23') in green "Latent" _c
}
di
forvalues g=1/`cons' {
di _col(`=13*(`g'-1)+19') in green "variable `g'" _c
}
di
di in green "{hline `=(`cons')*13+15'}"
forvalues g=1/`cons' {
di in green "Latent variable `g'" _c
forvalues h=1/`g' {
local loc=13*`h'+10
qui corr `f1`g'' `f1`h'' [`weight'`exp']
local rho=r(rho)
di _col(`loc') in ye %6.4f `rho' _c
}
di
}
di in green "{hline `=(`cons')*13+15'}"
di
}
if `nbind'<=800&"`biplot'"==""&"`weight'"=="" {
local max=max(`matsize',`nbind')
qui set matsize `max'
if "`addvar'"!="" {
local add `varlist'
}
if "`dim'"=="" {
local dim 1 2
}
qui qui biplotvlab `latent' `add', name(biplot,replace) `fsbi' norow colopts(name(latent variables)) alpha(0) title(Biplot of the latent variables) labdes(size(vsmall) color(blue)) stretch(1) `std' dim(`dim')
}
else if `nbind'>800&"`biplot'"==""&"`weight'"==""{
di in green "There is more than 800 individuals, so the {hi:biplot} option is disabled"
}
else if "`weight'"!=""&&"`biplot'"==""{
di in green "The {hi:biplot} option is disabled because you use weights"
}
}
forvalues g=1/`cons' {
drop `f1`g''
}
}
}
/***** END OF THE CONSOLIDATION PROCEDURE********/
qui set matsize `matsize'
if "`genlv'"!="" {
qui keep `id' `genlv'1-`genlv'`cons'
tempfile lvfile
qui sort `id'
qui save `lvfile',replace
}
use `clvfile',replace
if "`genlv'"!="" {
qui sort `id'
qui merge `id' using `lvfile'
}
qui drop `id'
capture drop _merge
capture cluster delete clv,zap
matrix colnames `vp'="Parent" "Number of clusters" "Child 1" "Child 2" "T" "DeltaT" "deltaT" "Explained Variance" "Explained Variance (%)" "First eigenvalue" "Second Eigenvalue" "2nd order deltaT"
if "`save'"!="" {
qui matrix `save'_vp=`vp'
qui matrix `save'_matclus=`matclus'
qui global `save'_varlist `varlist'
qui global `save'_method `method'
qui global `save'_kernel `kernel'
}
return matrix vp=`vp'
return matrix matclus=`matclus'
return local varlist `varlist'
return local method `method'
return local kernel `kernel'
restore,not
end

@ -0,0 +1,122 @@
{smcl}
{* 29 juillet 2019}{* version 2.17}{...}
{hline}
help for {hi:clv}{right:Jean-Benoit Hardouin}
{hline}
{title:Clustering around latent variables }
{p 8 14 2}{cmd:clv} [{it:varlist}] [{cmd:if} {it:exp}] [{cmd:in} {it:range}] [{cmd:weight}]
[{cmd:,} {cmdab:nostand:ardized} {cmdab:ker:nel}({it:numlist}) {cmdab:meth:od}({it:keyword}) {cmdab:cons:olidation}({it:#}) {cmd:genlv}(string) {cmdab:rep:lace}
{cmdab:noden:dro} {cmdab:saved:endro}({it:filename}[,replace]) {cmdab:cut:number}({it:#}) {cmdab:show:count} {cmdab:texts:ize}({it:string}) {cmdab:deltaT}
{cmdab:hor:izontal} {cmdab:abb:rev}({it:#}) {cmdab:tit:le}({it:string}) {cmdab:cap:tion}({it:string})
{cmdab:bar} {cmdab:nobip:lot} {cmdab:add:var} {cmd:std} {cmd:dim}({it:string}) {cmdab:files:ave} {cmdab:dirs:ave}({it:string})]
{title:Description}
{p 4 8 2}{cmd:clv} clusters variables around latent components. The variables are clustered by
seeking to minimize at each step the decrease of the T criterion, computed as the sum of the
first eigenvalues of the matrices of data of all the clusters. A hierarchical cluster analysis
based on this criterion is performed. A iterative consolidation procedure can be subsequently run which
allows each variable to be assigned to the latent component it is the most correlated with.
{title:Options}
{p 0 8 2}{cmd:Options concerning the method CLV}
{p 4 8 2}{cmd:nostandardized} uses centered variables instead of standardized variables.
{p 4 8 2}{cmd:kernel} defines one or several kernels of variables (variables which are clustered together in an initial step). The first number #k1 indicates that the first #k1 variables are clustered together, the second number #k2 indicates that the following #k2 variables are clustered together...
{p 4 8 2}{cmd:method} indicates the method to cluster the variables among {it:classical} (by default) for the method described by Vigneau and Qannari,
{it:polychoric} for a use of the matrix of polychoric coefficients of correlation (instead of Pearson coefficients of correlation), {it:v2} for a modified
algorithm wich search to minimize the maximum second eigenvalue among the clusters of 2 variables and more, {it:polychoricv2} which correspond to the {it:v2}
option with the matrix of polychoric coefficients of correlation, and {it:centroid} which is defined by Vigneau and Qannari as an adaptation of CLV when
the sign of the correlation coefficients between the variables is important.
{p 4 8 2}{cmd:consolidation} performs a consolidation procedure with the obtained partition into the specified number of clusters (by default, no consolidation procedure is performed).
{p 4 8 2}{cmd:genlv} saves the latent variables in new variables with the defined string as prefix (followed by a number). This option must be used in conjonction with the {cmd:consolidation} option.
{p 4 8 2}{cmd:replace} allows replacing the created variables with the {cmd:genlv} option if they already exist.
{p 0 8 2}{cmd:Options concerning the drawing of the dendrogram}
{p 4 8 2}{cmd:nodendro} avoids to display of the dendrogram.
{p 4 8 2}{cmd:savedendro} saves the dendrogram in the file defined by this option. If this file already exists, it is possible to replace it with the {cmd:replace} option.
{p 4 8 2}{cmd:cutnumber} defines the maximal number of clusters displayed in the dendrogram (40 by default).
{p 4 8 2}{cmd:showcount} displays the number of variables in each cluster (useful with the {cmd:cutnumber} option).
{p 4 8 2}{cmd:textsize} defines the size of the labels of the variables on the dendrogram (see {help textsizestyle}).
{p 4 8 2}{cmd:deltaT} uses the variation of the T criterion as height variable for the dendrogram.
{p 4 8 2}{cmd:horizontal} displays an horizontal (instead a vertical) dendrogram.
{p 4 8 2}{cmd:abbrev} defines the length of the variables labels on the dendrogram (15 characters by default).
{p 4 8 2}{cmd:title} defines the title of the dendrogram.
{p 4 8 2}{cmd:caption} defines the caption of the axis of the dendrogram which indicates the names of the variables.
{p 0 8 2}{cmd:Options concerning the others graphs}
{p 4 8 2}{cmd:bar} displays a chart of the decrease in the T criterion at each step.
{p 4 8 2}{cmd:nobiplot} avoids to display a biplot of the latent variables with the {cmd:consolidation} option.
{p 4 8 2}{cmd:addvar} allows drawing the items on the graphical representation on the biplot.
{p 4 8 2}{cmd:std} allows standardizing the latent variables for the graphical representation on the biplot.
{p 4 8 2}{cmd:dim}({it:string}) allows choosing the axes represented on the biplot.
{p 4 8 2}{cmd:filesave} allows saving the graphs in gph files on the default directory or on the directory defined by the {cmd:dirsave} option.
{p 4 8 2}{cmd:dirsave}({it:string}) allows determining the directory to save the graphs (usefull with the {cmd:filesave} option).
{p 4 8 2} If no {it:varlist} is indicated, the procedure uses the varlist from the last {cmd:clv} procedure, but does not perform the hierarchical cluster analysis.
{title:Notes}
{p 4 8 2} The classifications around latent variables (CLV) is defined by its authors (Vigneau and Qannari, 2003) only for continuous variables. Results with binary or ordinal variables must be interpreted with precautions.
{p 4 8 2} Only {cmd:fweights} are allowed. The biplots are disabled if weights are used.
{p 4 8 2} In this procedure, all the individuals with at least one missing value are omitted.
{p 4 8 2} With the {it:polychoric} and {it:polychoricv2} methods, the {cmd:nostandardized} option is disabled.
{p 4 8 2} This module uses the following modules downloadable on SSC: {stata ssc describe polychoric}, {stata ssc describe biplotvlab} and {stata ssc describe genscore}
{title:Example}
{p 4 8 2}{cmd:. clv var1-var15} /*performs the HCA procedure*/
{p 4 8 2}{cmd:. clv var1-var15, cons(6) bar nodendro meth(centroid)} /* performs the HCA procedure based on the centroid method followed by a consolidation procedure with 6 clusters*/
{p 4 8 2}{cmd:. clv, cons(3) addvar} /*performs only the consolidation procedure with 3 clusters, based on the preceeding HCA procedure*/
{title:Aknowledgements}
{p 4 8 2} The author thanks Ronan Conroy for all the propositions of improvements.
{title:Reference}
{p 4 8 2} Vigneau E. and Qannari E. M. Clustering of variables around latent components. Communications in Statistics - Simulation and Computation. 32(4): 1131-1150, 2003.
{title:Author}
{p 4 8 2}Jean-Benoit Hardouin, PhD, assistant professor{p_end}
{p 4 8 2}INSERM UMR 1246-SPHERE "MethodS in Patients-centered outcomes and HEalth ResEarch"{p_end}
{p 4 8 2}Nantes University - University of Tours{p_end}
{p 4 8 2}Institute for Research in Health 2 (IRS2), Nantes, France{p_end}
{p 4 8 2}Email:
{browse "mailto:jean-benoit.hardouin@univ-nantes.fr":jean-benoit.hardouin@univ-nantes.fr}{p_end}
{p 4 8 2}Website {browse "http://www.anaqol.org":AnaQol}

@ -0,0 +1,907 @@
*! Version 2.12 1December2006
*! Jean-Benoit Hardouin
************************************************************************************************************
* Stata program : clv
* Clustering of variables around latent variables
* Version 2.12 : December 1st, 2006 /*savedendro option*/
*
* Historic
* Version 1 (2005-06-11): Jean-Benoit Hardouin
* Version 1.1 (2005-07-07): Jean-Benoit Hardouin /*small bug in the consolidation process with cluster of only one variable*/
* Version 1.2 (2005-07-08): Jean-Benoit Hardouin /*Bug in the consolidation procedure when there is negative correlation*/
* Version 2 (2005-09-03): Jean-Benoit Hardouin /*Horizontal dendrograms (with Stata 9)*/
* Version 2.1 (2005-09-08): Jean-Benoit Hardouin /*More flexibility to abbreviate the names of the variables (with Stata 9)*/
* Version 2.1.1 (2005-09-08): Jean-Benoit Hardouin /*Integration of some requests of Ronan Conroy*/
* Version 2.1.2 (2005-09-08): Jean-Benoit Hardouin /*Possibility to give a title and an X/Y caption*/
* Version 2.2 (2005-09-11): Jean-Benoit Hardouin /*Kernel option*/
* Version 2.3 (2005-09-12): Jean-Benoit Hardouin /*Polychoric option*/
* Version 2.4 (2005-09-13): Jean-Benoit Hardouin /*v2 option*/
* Version 2.5 (2005-09-21): Jean-Benoit Hardouin /*corrections*/
* Version 2.6 (2005-10-02): Jean-Benoit Hardouin /*centroid method, biplot*/
* Version 2.7 (2005-10-06): Jean-Benoit Hardouin /*return, multiple graphs, polychoric+consolidation*/
* Version 2.8 (2005-10-06): Jean-Benoit Hardouin /*fweights*/
* Version 2.9 (2006-01-26): Jean-Benoit Hardouin /*save the latent variables*/
* Version 2.10 (2006-07-10): Jean-Benoit Hardouin /*2nd order relative variation of the T criterion*/
* Version 2.11 (2006-10-09): Jean-Benoit Hardouin /*Size of the text in the dendrogram*/
* Version 2.12 (2006-12-01): Jean-Benoit Hardouin /*savedendro option*/
*
* Jean-benoit Hardouin, University of Nantes - Faculty of Pharmaceutical Sciences
* Department of Biostatistics - France
* jean-benoit.hardouin@univ-nantes.fr
*
* News about this program : http://anaqol.free.fr
* FreeIRT Project : http://freeirt.free.fr
*
* Copyright 2005-2006 Jean-Benoit Hardouin
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*
************************************************************************************************************
program define clv,rclass
version 9.0
syntax [varlist(default=none)] [if] [in] [fweight] [, CUTnumber(int 40) bar CONSolidation(int 0) noDENdro SAVEDendro(string) noSTANDardized deltaT HORizontal SHOWcount ABBrev(int 14) TITle(string) CAPtion(string) KERnel(numlist) METHod(string) noBIPlot ADDvar genlv(string) replace TEXTSize(string)]
preserve
tempfile clvfile
tempvar id
gen `id'=_n
qui save `clvfile',replace
local matsize=c(matsize)
local none=0
if "`varlist'"=="" {
capture confirm matrix r(vp)
if _rc==0 {
capture confirm matrix r(matclus)
if _rc ==0 {
local none=1
}
}
if `none'==0 {
di in red "You cannot use the {hi:clv} command without {hi:varlist} if you have not already run {hi:clv}"
error 198
exit
}
}
tempname matclus vp indexes
/*********TESTS**********/
if `none'==1 {
matrix `vp'=r(vp)
matrix `matclus'=r(matclus)
local varlist `r(varlist)'
tokenize `varlist'
local nbitems=rowsof(`matclus')
if "`method'"!="" {
di in green "The {hi:method} option can not be modified without specification of the varlist. {hi:method} is omitted."
}
local method `r(method)'
local kernel `r(kernel)'
}
if "`method'"=="" {
local method classical
}
if ("`method'"=="polychoric"|"`method'"=="polychoricv2")&"`standardized'"!="" {
di in green "Initial variables are used with the {hi:polychoric} methods"
di in green "But the procedure is based on the matrix of the polychoric correlations"
di
}
if "`method'"!="classical"&"`method'"!="v2"&"`method'"!="centroid"&"`method'"!="polychoric"&"`method'"!="polychoricv2" {
di in red "The {hi:method} `method' is unknown"
error 198
exit
}
tokenize `varlist'
local nbitems : word count `varlist'
marksample touse
qui keep if `touse'
local mat=max(`matsize',`=`nbitems'*2')
qui set matsize `mat'
if `nbitems'<3&`none'!=1 {
di in red "You need at least 3 variables"
error 198
exit
}
/*******DEFINES THE LABELS AND STANDARDIZED THE VARIABLES (IF NECESSARY)*******/
forvalues i=1/`nbitems'{
local label`i':variable label ``i''
if "`label`i''"=="" {
local label`i' ``i''
}
if "`method'"!="polychoric"&"`method'"!="polychoricv2" {
qui su ``i'' [`weight'`exp']
local mean=r(mean)
if "`standardized'"=="" {
local sd=r(sd)
}
else {
local sd=1
}
qui replace ``i''=(``i''-`mean')/`sd'
}
}
tempfile clvfiletmp
qui save `clvfiletmp',replace
qui su `1' [`weight'`exp']
local nbind=r(sum_w)
local cons=`consolidation'
/*COMPUTES THE TOTAL VARIANCE*/
if "`method'"!="polychoric"&"`method'"!="polychoricv2" {
local totvar=0
forvalues i=1/`nbitems' {
qui su ``i'' [`weight'`exp']
local totvar=`totvar'+`r(Var)'
}
}
else {
local totvar `nbitems'
}
local nbkerk=0
local nbkerg=0
/***** DEFINES THE KERNEL IF NECESSARY ********/
if "`kernel'"!="" {
local nbkerg:word count `kernel'
local fin0=0
forvalues i=1/`nbkerg' {
local nbi`i':word `i' of `kernel'
local nbkerk=`nbkerk'+`nbi`i''
local deb`i'=`fin`=`i'-1''+1
local fin`i'=`deb`i''+`nbi`i''-1
local list`i'
forvalues j=`deb`i''/`fin`i'' {
local list`i' `list`i'' ``j''
}
}
tempname kerclus
matrix `kerclus'=J(`=`nbkerk'-`nbkerg'',3,0)
local ligne=1
forvalues g=1/`nbkerg' {
matrix `kerclus'[`ligne',1]=`nbitems'+`ligne'
matrix `kerclus'[`ligne',2]=`deb`g''
matrix `kerclus'[`ligne',3]=`deb`g''+1
local clus`g'=`nbitems'+`ligne'
local ligne=`ligne'+1
if `nbi`g''>2 {
forvalues i=2/`=`nbi`g''-1' {
matrix `kerclus'[`ligne',1]=`nbitems'+`ligne'
matrix `kerclus'[`ligne',2]=`deb`g''+`i'
matrix `kerclus'[`ligne',3]=`nbitems'+`ligne'-1
local clus`g'=`nbitems'+`ligne'
local ligne=`ligne'+1
}
}
}
}
if `nbitems'<`nbkerk' {
di in red "You cannot define more variables in the {hi:kernel} option than items in the {hi:varlist}"
error 198
exit
}
/*******DISPLAY THE FIRST RESULTS *******/
di
di in green "{hline 32}"
di in green "TOTAL VARIANCE: " in ye %16.5f `totvar'
di in green "NUMBER OF INDIVIDUALS: " in ye %9.0f `nbind'
di in green "METHOD:" in ye _col(`=33-length("`method'")') "`=upper("`method'")'"
di in green "{hline 32}"
di
if "`kernel'"!="" {
forvalues i=1/`nbkerg' {
di in green "The kernel numbered " in ye `clus`i'' in green " is composed of `nbi`i'' variables: " in ye "`list`i''"
di
}
}
else {
local nbkerk=0
local nbkerg=0
}
/******** CLASSIFICATION PROCEDURE*******/
tempname Ev
if `none'!=1 {
matrix `matclus'=J(`nbitems',`nbitems',0)
matrix `vp'=J(`=2*`nbitems'-1',12,0)
matrix `indexes'=J(`nbitems',8,0)
forvalues i=1/`nbitems' {
matrix `matclus'[`i',1]=`i'
if "`method'"!="polychoric"&"`method'"!="polychoric" {
qui su ``i'' [`weight'`exp']
matrix `vp'[`i',10]=r(Var)
}
else {
matrix `vp'[`i',10]=1
}
matrix `vp'[`i',1]=`i'
matrix `vp'[`i',2]=`nbitems'
matrix `vp'[`i',8]=`totvar'
matrix `vp'[`i',9]=100
}
matrix `vp'[`nbitems',5]=`nbitems'
if "`method'"=="centroid" {
local crit G
di in green "{hline 101}"
di in green _col(93) "2nd order"
di in green _col(7) "Number of" _col(69) "`crit'" _col(71) "Explained" _col(82) "Relative" _col(94) "Relative"
di in green "Step" _col(8) "clusters" _col(20) "Child 1" _col(33) "Child 2" _col(46) "Parent" _col(53) "`crit' value" _col(61) "variation" _col(72) "Variance" _col(81) "Variation" _col(93) "Variation"
di in green "{hline 101}"
}
else {
local crit T
di in green "{hline 111}"
if "`method'"=="v2"|"`method'"=="polychoricv2" {
di in green _col(84) "Maximal" _col(103) "2nd order"
}
else {
di in green _col(84) "Current" _col(103) "2nd order"
}
di in green _col(7) "Number of" _col(69) "`crit'" _col(71) "Explained" _col(85) "Second" _col(93) "Relative" _col(104) "Relative"
di in green "Step" _col(8) "clusters" _col(20) "Child 1" _col(33) "Child 2" _col(46) "Parent" _col(53) "`crit' value" _col(61) "variation" _col(72) "Variance" _col(81) "Eigenvalue" _col(92) "Variation" _col(103) "Variation"
di in green "{hline 111}"
}
tempname threshold
matrix `threshold'=J(`nbitems',3,0)
forvalues i=1/`=`nbitems'-1' {
local clus=`nbitems'+`i'
local minegenval=999999
local minegenval2=999999
forvalues k=1/`=`clus'-1' {
local list`k'
local numlist`k'
forvalues j=1/`clus' {
if (`matclus'[`j',`i']==`k') {
local list`k' `list`k'' ``j''
local numlist`k' `numlist`k'' `j'
}
}
}
if `clus'>`nbitems'+`nbkerk'-`nbkerg' {
if "`method'"=="centroid" {
tempname centrj centrk diffjk
}
forvalues j=1/`clus' {
local nblistj:word count `list`j''
forvalues k=`=`j'+1'/`clus' {
local nblistk:word count `list`k''
if `nblistj'!=0&`nblistk'!=0 {
if "`method'"=="centroid" {
qui genscore `list`j'',score(`centrj') mean
qui su `centrj' [`weight'`exp']
local Varj=r(Var)
qui genscore `list`k'',score(`centrk') mean
qui su `centrk' [`weight'`exp']
local Vark=r(Var)
qui gen `diffjk'=`centrk'-`centrj'
qui su `diffjk' [`weight'`exp']
local Varjk=r(Var)
drop `centrj' `centrk' `diffjk'
local ev=(`nblistj'*`nblistk')/(`nblistj'+`nblistk')*`Varjk'
if `ev'<`minegenval' {
local minegenval=`ev'
local minj `j'
local mink `k'
local eigen=0
local eigen2=0
}
}
else {
if "`method'"=="classical"|"`method'"=="v2" {
qui pca `list`j'' `list`k'' [`weight'`exp'] ,cov
matrix `Ev'=e(Ev)
}
else if "`method'"=="polychoric"|"`method'"=="polychoricv2" {
qui polychoricpca `list`j'' `list`k'' [`weight'`exp']
matrix `Ev'=r(eigenvalues)
}
local lambda1=`Ev'[1,1]
local lambda2=`Ev'[1,2]
local ev=`vp'[`j',10]+`vp'[`k',10]-`lambda1'
local ev2=max(`vp'[`j',11],`vp'[`k',11],`lambda2')
if ("`method'"=="v2"|"`method'"=="polychoricv2")&`ev'<`minegenval' {
local minegenval=`ev'
local eigen=`lambda1'
local minj `j'
local mink `k'
local eigen2=`lambda2'
}
else if ("`method'"=="classical"|"`method'"=="polychoric")&`ev2'<`minegenval2' {
local minegenval=`ev'
local minegenval2=`ev2'
local eigen=`lambda1'
local minj `j'
local mink `k'
local eigen2=`ev2'
}
}
}
}
}
}
else {
local ligne=`clus'-`nbitems'
local j=`kerclus'[`ligne',2]
local k=`kerclus'[`ligne',3]
if "`method'"!="centroid" {
if "`method'"=="classical"|"`method'"=="v2" {
qui pca `list`j'' `list`k'' [`weight'`exp'],cov
matrix `Ev'=e(Ev)
}
else if "`method'"=="polychoric"|"`method'"=="polychoricv2"{
qui polychoricpca `list`j'' `list`k'' [`weight'`exp']
matrix `Ev'=r(eigenvalues)
}
local lambda1=`Ev'[1,1]
local lambda2=`Ev'[1,2]
local ev=`vp'[`j',10]+`vp'[`k',10]-`lambda1'
local minegenval=`ev'
local eigen=`lambda1'
local minj `j'
local mink `k'
local eigen2=`lambda2'
}
else if "`method'"=="centroid" {
local nblistj:word count `list`j''
local nblistk:word count `list`k''
tempname v1 v2 v12
qui genscore `list`j'',score(`v1') mean
qui genscore `list`k'',score(`v2') mean
qui gen `v12'=`v1'-`v2'
qui su `v12' [`weight'`exp']
local varj=r(Var)
local minegenval=(`nblistj'*`nblistk')/(`nblistj'+`nblistk')*`varj'
local minj `j'
local mink `k'
}
}
if `minj'<=`nbitems' {
local nomj=abbrev("``minj''",14)
}
else {
local nomj `minj'
}
if `mink'<=`nbitems' {
local nomk=abbrev("``mink''",14)
}
else {
local nomk `mink'
}
forvalues j=1/`nbitems' {
matrix `matclus'[`j',`=`i'+1']=`matclus'[`j',`i']
}
matrix `vp'[`clus',1]=`nbitems'+`i' /*PARENT*/
matrix `vp'[`clus',2]=`=`nbitems'-`i'' /*NUMBER OF CLUSTERS*/
matrix `vp'[`clus',3]=`minj' /*CHILD 1*/
matrix `vp'[`clus',4]=`mink' /*CHILD 2*/
matrix `vp'[`clus',6]=`minegenval' /*VARIATION OF THE T or G CRITERION*/
matrix `vp'[`clus',5]=`vp'[`=`clus'-1',5]-`vp'[`clus',6] /*T or G CRITERION*/
matrix `vp'[`clus',7]=(`vp'[`clus',6]-`vp'[`=`clus'-1',6])/`vp'[`=`clus'-1',6] /*RELATIVE VARIATION OF THE T OR G CRITERION*/
matrix `vp'[`clus',8]=`vp'[`=`clus'-1',8]-`minegenval' /*EXPLAINED VARIANCE*/
matrix `vp'[`clus',9]=`vp'[`clus',8]/`totvar'*100 /*% OF EXPLAINED VARIANCE*/
if "`method'"!="centroid" {
matrix `vp'[`clus',10]=`eigen' /*FIRST EIGEN VALUE OF THE NEW CLUSTER*/
matrix `vp'[`clus',11]=`eigen2' /*SECOND EIGEN VALUE OF THE NEW CLUSTER*/
}
if `vp'[`=`clus'-1',7]!=0 {
matrix `vp'[`clus',12]=(`vp'[`clus',7]-`vp'[`=`clus'-1',7])/abs(`vp'[`=`clus'-1',7]) /*2ND ORDER RELATIVE VARIATION OF THE T or G CRITERION*/
}
matrix `indexes'[`i',1]=`i' /*PARENT*/
matrix `indexes'[`i',2]=`nbitems'-`i' /*NUMBER OF CLUSTERS*/
matrix `indexes'[`i',3]=`minegenval' /*VARIATION OF THE T or G CRITERION*/
matrix `indexes'[`i',4]=`vp'[`clus',7] /*RELATIVE VARIATION OF THE T OR G CRITERION*/
matrix `indexes'[`i',5]=max(`eigen2',`indexes'[`=`i'-1',5]) /*MAXIMUM SECOND EIGENVALUE*/
matrix `indexes'[`i',7]=`vp'[`clus',12] /*2nd order RELATIVE VARIATION OF THE T OR G CRITERION*/
foreach j of numlist `numlist`minj'' `numlist`mink'' {
matrix `matclus'[`j',`=`i'+1']=`clus'
}
local varlistgen
local nbvarlistgen
forvalues j=1/`=`nbitems'+`i'' {
local varlist`j'
forvalues k=1/`nbitems' {
if `matclus'[`k',`=`i'+1']==`j' {
local varlist`j' `varlist`j'' ``k''
}
}
local nbvarlist`j': word count `varlist`j''
local varlistgen `varlistgen' `varlist`j''
local nbvarlistgen `nbvarlistgen' `nbvarlist`j''
}
local newlist
foreach m in `nbvarlistgen' {
if `m'!=0 {
local newlist `newlist' `m'
}
}
if "`kernel'"!=""&`i'==`=`nbkerk'-`nbkerg'+1' {
local T=`vp'[`=`clus'-1',8]
di _col(0) in ye "init" _col(12) %4.0f `=`nbitems'-`nbkerk'+`nbkerg'' _col(52) %8.4f `T' _col(62) %8.4f `=`totvar'-`T'' _col(72) %7.3f `=`T'/`totvar'*100' "%"
}
if `clus'>`nbitems'+`nbkerk'-`nbkerg' {
matrix `threshold'[`=`nbitems'-`i'+1',3]=`minegenval'
if `clus'==`nbitems'+`nbkerk'-`nbkerg'+1 {
local relv
local percent
local relv2
}
else {
local relv=`indexes'[`i',4]*100
local percent %
if `indexes'[`i',7]!=. {
local relv2=`indexes'[`i',7]*100
}
else {
local relv2=0
}
matrix `threshold'[`=`nbitems'-`i'+1',1]=`relv'
matrix `threshold'[`=`nbitems'-`i'+1',2]=`relv2'
}
if "`method'"=="centroid" {
di _col(0) in ye %4.0f `=`i'-`nbkerk'+`nbkerg'' _col(12) %4.0f `=`nbitems'-`i'' _col(20) "`nomj'" _col(33) "`nomk'" _col(45) %7.0f `=`i'+`nbitems'' _col(52) %8.4f `vp'[`clus',8] _col(62) %8.4f `minegenval' _col(72) %7.3f `vp'[`clus',9] "%" _col(83) _col(84) %5.2f `relv' "`percent'" _col(93) %8.2f `relv2' "`percent'"
}
else {
di _col(0) in ye %4.0f `=`i'-`nbkerk'+`nbkerg'' _col(12) %4.0f `=`nbitems'-`i'' _col(20) "`nomj'" _col(33) "`nomk'" _col(45) %7.0f `=`i'+`nbitems'' _col(52) %8.4f `vp'[`clus',8] _col(62) %8.4f `minegenval' _col(72) %7.3f `vp'[`clus',9] "%" _col(83) %8.4f `vp'[`clus',11] _col(94) %6.2f `relv' "`percent'" _col(103) %8.2f `relv2' "`percent'"
}
}
}
matrix `indexes'[`nbitems',3]=`vp'[`=2*`nbitems'-1',5] /*RELATIVE VARIATION OF THE T OR G CRITERION*/
matrix `indexes'[`nbitems',7]=`indexes'[`nbitems',3]/`indexes'[`=`nbitems'-1',3] /*RELATIVE VARIATION OF THE T OR G CRITERION*/
local i=2*`nbitems'-1
matrix `threshold'[1,1]=`vp'[`i',5]/`vp'[`i',6]*100-100
matrix `threshold'[1,2]=(`threshold'[1,1]-`threshold'[2,1])/abs(`threshold'[2,1])*100
matrix `threshold'[1,3]=`vp'[`i',5]
if "`method'"=="centroid" {
di in ye _col(62) %8.4f `threshold'[1,3] _col(83) %6.2f `threshold'[1,1] "`percent'" _col(93) %8.2f `threshold'[1,2] "`percent'"
}
else {
di in ye _col(62) %8.4f `threshold'[1,3] _col(94) %6.2f `threshold'[1,1] "`percent'" _col(103) %8.2f `threshold'[1,2] "`percent'"
}
local best=0
local maxbest=0
local best2=0
local maxbest2=0
local demipart=int(`nbitems'/2)+1
forvalues i=1/`demipart' {
if `threshold'[`i',3]>`maxbest2' {
if `threshold'[`i',3]>`maxbest' {
local maxbest2=`maxbest'
local best2=`best'
local maxbest=`threshold'[`i',3]
local best=`i'
}
else {
local maxbest2=`threshold'[`i',3]
local best2=`i'
}
}
}
di in green "{hline 111}"
di
di in green "{hline 60}"
di in green "PROPOSED BEST PARTITIONS (AMONG THE `demipart' SMALLER PARTITIONS)"
di in green "{hline 60}"
di
di in yellow _col(4) "Based on the variation of the T criterion: " _col(60) in gr "Partitions in " in ye `best' " or " `best2' in gr " clusters"
return local bestvariation `best' `best2'
local bestt=0
local bestt2=0
local var=0
local var2=0
forvalues i=1/`nbitems' {
if `threshold'[`i',1]>`var2'&`i'<`demipart' {
if `threshold'[`i',1]>`var' {
local bestt2=`bestt'
local var2=`var'
local var=`threshold'[`i',1]
local bestt=`i'
}
else {
local var2=`threshold'[`i',1]
local bestt2=`i'
}
}
}
di in yellow _col(4) "Based on the research of a threshold: " _col(60) in gr "Partitions in " in ye `bestt' " or " `bestt2' in gr " clusters"
forvalues i=`=`nbitems'+1'/`=`nbitems'+`nbkerk'-`nbkerg'' {
matrix `vp'[`i',6]=`totvar'-`T'
matrix `vp'[`i',8]=`T'
matrix `vp'[`i',9]=`T'/`nbitems'*100
}
return local bestthresold `bestt' `bestt2'
forvalues i=1/`nbitems' {
if `threshold'[`i',2]>`var2'&`i'<`demipart' {
if `threshold'[`i',2]>`var' {
local bestt2=`bestt'
local var2=`var'
local var=`threshold'[`i',2]
local bestt=`i'
}
else {
local var2=`threshold'[`i',2]
local bestt2=`i'
}
}
}
di in yellow _col(4) "Based on the research of a threshold (second order): " _col(60) in gr "Partitions in " in ye `bestt' " or " `bestt2' in gr " clusters"
return local bestthresold2 `bestt' `bestt2'
}
/******BAR CHART *******/
if "`bar'"!="" {
drop _all
qui set obs `nbitems'
qui svmat `indexes' ,names(v)
qui gen id=`nbitems'-_n
qui replace v7=. in 1
qui drop if id>`nbitems'-`nbkerk'+`nbkerg'-1
label variable id "Number of clusters"
label variable v3 "T variation"
qui su v3 if id!=0
local maxv3=ceil(r(max)*5)/5
local minv3=floor(r(min)*5)/5
label variable v4 "Relative T variation"
label variable v7 "Relative T variation order 2"
graph twoway (bar v3 id, name(bar,replace) vert yaxis(1))(line v4 id,yaxis(2))/*(line v6 id,yaxis(3))(line v5 id,yaxis(4))*/(line v7 id,yaxis(5)) if id!=0,ylabel(`minv3'(0.2)`maxv3') xlabel(1(1)`=`nbitems'-`nbkerk'+`nbkerg'-1')
}
/****** DENDROGRAM********/
drop _all
qui set obs `nbitems'
qui svmat `matclus' ,names(v)
local listorder
forvalues i=`nbitems'(-1)1 {
local listorder `listorder' v`i'
}
qui gen id=_n
qui sort `listorder'
capture cluster delete clv,zap
qui cluster complete v* ,name(clv)
qui replace clv_id=_n
qui replace clv_ord=id
qui replace clv_hgt=.
qui gen fait=0
qui gen clus=0
forvalues i=2/`nbitems' {
local ligne=`nbitems'+`i'-1
if (`vp'[`ligne',3]<=`nbitems') {
local first=`vp'[`ligne',3]
gsort +fait -v`i' +clv_id
}
else {
local first=`vp'[`ligne',4]
gsort +fait -v`i' +clv_id
}
if "`deltaT'"!="" {
qui replace clv_hgt=`vp'[`ligne',6] in 1
}
else {
qui replace clv_hgt=100-`vp'[`ligne',9] in 1
}
qui replace fait=1 in 1
qui replace clus=`vp'[`ligne',1] in 1
}
if "`dendro'"=="" {
qui gen label=""
forvalues i=1/`nbitems' {
qui replace label=abbrev("`label`i''",`abbrev') if clv_id==`i'
}
sort clv_id
if `nbitems'>`cutnumber' {
local var "Groups of variables"
local cut cutnumber(`cutnumber') /*labcutn*/
}
else {
local var "Variables"
local cut label(label)
}
qui su clv_hgt
local tmp=r(max)
local max=floor(`tmp')+.5
if `tmp'>`max' {
local max=`max'+.5
}
local maxvar=`max'+5
if "`title'"=="" {
local title "Clustering around Latent Variables (CLV)"
}
if "`caption'"!="" {
local var "`caption'"
}
if "`deltaT'"!="" {
local titleL "Variation of the T criterion"
local yl "0(.5)`max'"
}
else {
local titleL "% Unexplained Variance"
local yl "0(25)`maxvar'"
}
if "`textsize'"=="" {
local textsize: word `=min(int(`nbitems'/15)+1,5)' of medium medsmall small vsmall tiny
}
if "`horizontal'"!="" {
cluster dendro clv, name (dendrogram,replace) hor ytitle("`var'") `showcount' xtitle("`titleL'") title("`title'",span) xlabel(`yl') ylabel(,angle(0) labsize(`textsize')) `cut'
}
else {
cluster dendro clv, name(dendrogram,replace) xtitle("`var'") `showcount' ytitle("`titleL'") title("`title'",span) ylabel(`yl') xlabel(,labsize(`textsize')) `cut'
}
if "`savedendro'"!="" {
graph save dendrogram `savedendro'
}
}
/***** END DENDROGRAM*****/
/**** TEST ********/
if `cons'>`nbitems'-`nbkerk'+`nbkerg' {
di in ye "The {hi:consolidation} is not possible for a number of clusters superior to the initial number of clusters"
local cons=0
}
/***** CONSOLIDATION PROCEDURE ********/
if `cons'!=0 {
sort v`=`nbitems'-`cons'+1'
gen cut`cons'=1
local g=1
forvalues i=2/`nbitems' {
if v`=`nbitems'-`cons'+1'[`i']!=v`=`nbitems'-`cons'+1'[`=`i'-1'] {
local g=`g'+1
}
qui replace cut`cons'=`g' in `i'
}
sort id
tempname group
mkmat cut`cons',matrix(`group')
use `clvfiletmp',replace
local n=1
local env=1
while (`env'==1) {
forvalues g=1/`cons' {
local list`g'
forvalues i=1/`nbitems' {
if `group'[`i',1]==`g' {
local list`g' `list`g'' ``i''
}
}
}
di
if `n'==1 {
di in green "{hline 30}"
di in green "PARTITION BEFORE CONSOLIDATION"
di in green "{hline 30}"
}
di
local col=13
local max=0
local critT=0
forvalues g=1/`cons' {
di _col(`col') in green "CLUSTER " %2.0f `g' _c
local col=`col'+12
local tmp`g':word count `list`g''
if `tmp`g''>`max' {
local max `tmp`g''
}
tempvar f1`g'
if "`method'"=="centroid" {
qui genscore `list`g'',score(`f1`g'') mean
qui su `f1`g'' [`weight'`exp']
local var=r(Var)
local critT=`critT'+`tmp`g''*`var'
}
else {
if `tmp`g''>1 {
if "`method'"=="classical"|"`method'"=="v2" {
qui pca `list`g'' [`weight'`exp'] ,cov
matrix `Ev'=e(Ev)
local trace=e(trace)
qui predict `f1`g''
}
else if "`method'"=="polychoric"|"`method'"=="polychoric" {
qui polychoricpca `list`g'' [`weight'`exp'] ,score(`f1`g'') nscore(1)
matrix `Ev'=r(eigenvalues)
local trace=0
forvalues m=1/`tmp`g''{
local trace =`trace'+`r(lambda`m')'
}
rename `f1`g''1 `f1`g''
}
local lambda1=`Ev'[1,1]
local explained`g'=`lambda1'/`trace'
local critT=`critT'+`lambda1'
}
else {
local explained`g'=1
qui gen `f1`g''=`list`g''
if "`standardized'"=="" {
local critT=`critT'+1
}
else {
qui su [`weight'`exp']
local critT=`critT'+`r(Var)'
}
}
}
}
di
di _col(1) in green "ITEMS :" _c
forvalues i=1/`max' {
local col=15
forvalues g=1/`cons' {
local tmpv:word `i' of `list`g''
local tmpv=abbrev("`tmpv'",8)
di _col(`col') in ye %8s "`tmpv'" _c
local col= `col'+12
}
di
}
local col=16
di _col(1) in green "Expl. Var:" _c
forvalues g=1/`cons' {
di _col(`col') in ye %6.2f `=`explained`g''*100' in green "%" _c
local col= `col'+12
}
di
di
di in green "Variance Explained : " in ye %6.3f `=`critT'/`totvar'*100' in green "%"
di in green "T criterion : " in ye %6.4f `critT'
di
di in green "{hline 21}"
di in green "CONSOLIDATION: STEP `n'"
di in green "{hline 21}"
local n=`n'+1
local env=0
if "`method'"=="polychoric"|"`method'"=="polychoricv2" {
local command polychoric
}
else {
local command corr
}
forvalues i=1/`nbitems' {
local env`i'=0
local gr=`group'[`i',1]
qui `command' ``i'' `f1`gr'' [`weight'`exp']
local corr`i'=r(rho)
local corrs`i'=r(rho)
forvalues g=1/`cons' {
qui `command' ``i'' `f1`g'' [`weight'`exp']
local tmpcorr=r(rho)
if `g'!=`gr'&(((`corr`i'')<(`tmpcorr')&"`method'"=="centroid")|((`corr`i'')^2<(`tmpcorr')^2& "`method'"!="centroid")) {
local env=1
local env`i'=1
matrix `group'[`i',1]=`g'
local corr`i'=`tmpcorr'
}
}
if `env`i''==1 {
local g=`group'[`i',1]
di in green "The variable " in ye "``i'' " in green "is assigned to the `g'th group" _c
if "`method'"!="centroid" {
di in green " (corr^2=" %6.4f in ye (`corr`i'')^2 in green " vs " in ye %6.4f (`corrs`i'')^2 in green ")"
}
else {
di in green " (corr=" %6.4f in ye (`corr`i'') in green " vs " in ye %6.4f (`corrs`i'') in green ")"
}
}
}
if `env'==0 {
local latent
forvalues g=1/`cons' {
label variable `f1`g'' "Latent variable `g'"
if "`genlv'"!="" {
if "`replace'"!=""{
capture drop `genlv'`g'
}
gen `genlv'`g'=`f1`g''
}
local latent `latent' `f1`g''
return local cluster`g' `list`g''
}
matrix `group'=`group''
matrix colnames `group'=`varlist'
return matrix affect=`group'
di in ye "Stability of the partition is achieved"
if `cons'<=7 {
di
di in green "{hline 42}"
di in green "CORRELATION MATRIX OF THE LATENT VARIABLES"
di in green "{hline 42}"
di
di in green "{hline `=(`cons')*13+15'}"
forvalues g=1/`cons' {
di _col(`=13*(`g'-1)+23') in green "Latent" _c
}
di
forvalues g=1/`cons' {
di _col(`=13*(`g'-1)+19') in green "variable `g'" _c
}
di
di in green "{hline `=(`cons')*13+15'}"
forvalues g=1/`cons' {
di in green "Latent variable `g'" _c
forvalues h=1/`g' {
local loc=13*`h'+10
qui corr `f1`g'' `f1`h'' [`weight'`exp']
local rho=r(rho)
di _col(`loc') in ye %6.4f `rho' _c
}
di
}
di in green "{hline `=(`cons')*13+15'}"
di
}
if `nbind'<=800&"`biplot'"==""&"`weight'"=="" {
local max=max(`matsize',`nbind')
set matsize `max'
if "`addvar'"!="" {
local add `varlist'
}
qui biplotvlab `latent' `add', name(biplot,replace) norow colopts(name(latent variables)) alpha(0) title(Biplot of the latent variables) labdes(size(vsmall) color(blue)) stretch(1)
}
else if `nbind'>800&"`biplot'"==""&"`weight'"==""{
di in green "There is more than 800 individuals, so the {hi:biplot} option is disabled"
}
else if "`weight'"!=""&&"`biplot'"==""{
di in green "The {hi:biplot} option is disabled because you use weights"
}
}
forvalues g=1/`cons' {
drop `f1`g''
}
}
}
/***** END OF THE CONSOLIDATION PROCEDURE********/
set matsize `matsize'
if "`genlv'"!="" {
qui keep `id' `genlv'1-`genlv'`cons'
tempfile lvfile
qui sort `id'
qui save `lvfile',replace
}
use `clvfile',replace
if "`genlv'"!="" {
qui sort `id'
qui merge `id' using `lvfile'
}
qui drop `id'
capture drop _merge
capture cluster delete clv,zap
matrix colnames `vp'="Parent" "Number of clusters" "Child 1" "Child 2" "T" "DeltaT" "deltaT" "Explained Variance" "Explained Variance (%)" "First eigenvalue" "Second Eigenvalue" "2nd order deltaT"
return matrix vp=`vp'
return matrix matclus=`matclus'
return local varlist `varlist'
return local method `method'
return local kernel `kernel'
restore,not
end

@ -0,0 +1,916 @@
*! Version 2.14 20May2010
*! Jean-Benoit Hardouin
************************************************************************************************************
* Stata program : clv
* Clustering of variables around latent variables
* Version 2.14 : May 20th, 2010 /*dim and std options for biplots*/
*
* Historic
* Version 1 (2005-06-11): Jean-Benoit Hardouin
* Version 1.1 (2005-07-07): Jean-Benoit Hardouin /*small bug in the consolidation process with cluster of only one variable*/
* Version 1.2 (2005-07-08): Jean-Benoit Hardouin /*Bug in the consolidation procedure when there is negative correlation*/
* Version 2 (2005-09-03): Jean-Benoit Hardouin /*Horizontal dendrograms (with Stata 9)*/
* Version 2.1 (2005-09-08): Jean-Benoit Hardouin /*More flexibility to abbreviate the names of the variables (with Stata 9)*/
* Version 2.1.1 (2005-09-08): Jean-Benoit Hardouin /*Integration of some requests of Ronan Conroy*/
* Version 2.1.2 (2005-09-08): Jean-Benoit Hardouin /*Possibility to give a title and an X/Y caption*/
* Version 2.2 (2005-09-11): Jean-Benoit Hardouin /*Kernel option*/
* Version 2.3 (2005-09-12): Jean-Benoit Hardouin /*Polychoric option*/
* Version 2.4 (2005-09-13): Jean-Benoit Hardouin /*v2 option*/
* Version 2.5 (2005-09-21): Jean-Benoit Hardouin /*corrections*/
* Version 2.6 (2005-10-02): Jean-Benoit Hardouin /*centroid method, biplot*/
* Version 2.7 (2005-10-06): Jean-Benoit Hardouin /*return, multiple graphs, polychoric+consolidation*/
* Version 2.8 (2005-10-06): Jean-Benoit Hardouin /*fweights*/
* Version 2.9 (2006-01-26): Jean-Benoit Hardouin /*save the latent variables*/
* Version 2.10 (2006-07-10): Jean-Benoit Hardouin /*2nd order relative variation of the T criterion*/
* Version 2.11 (2006-10-09): Jean-Benoit Hardouin /*Size of the text in the dendrogram*/
* Version 2.12 (2006-12-01): Jean-Benoit Hardouin /*savedendro option*/
* Version 2.13 (2010-05-12): Jean-Benoit Hardouin /*corrections of bugs in KERNEL option and with METHOD(centroid)*/
* Version 2.14 (2010-05-20): Jean-Benoit Hardouin /*DIM and STD options for biplots*/
*
* Jean-benoit Hardouin, University of Nantes - Faculty of Pharmaceutical Sciences
* Department of Biostatistics - France
* jean-benoit.hardouin@univ-nantes.fr
*
* News about this program : http://anaqol.free.fr
* FreeIRT Project : http://freeirt.free.fr
*
* Copyright 2005-2006, 2010 Jean-Benoit Hardouin
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*
************************************************************************************************************
program define clv,rclass
version 9.0
syntax [varlist(default=none)] [if] [in] [fweight] [, CUTnumber(int 40) bar CONSolidation(int 0) noDENdro SAVEDendro(string) noSTANDardized deltaT HORizontal SHOWcount ABBrev(int 14) TITle(string) CAPtion(string) KERnel(numlist) METHod(string) noBIPlot ADDvar genlv(string) replace TEXTSize(string) std dim(string)]
preserve
tempfile clvfile
tempvar id
gen `id'=_n
qui save `clvfile',replace
local matsize=c(matsize)
local none=0
if "`varlist'"=="" {
capture confirm matrix r(vp)
if _rc==0 {
capture confirm matrix r(matclus)
if _rc ==0 {
local none=1
}
}
if `none'==0 {
di in red "You cannot use the {hi:clv} command without {hi:varlist} if you have not already run {hi:clv}"
error 198
exit
}
}
tempname matclus vp indexes
/*********TESTS**********/
if `none'==1 {
matrix `vp'=r(vp)
matrix `matclus'=r(matclus)
local varlist `r(varlist)'
tokenize `varlist'
local nbitems=rowsof(`matclus')
if "`method'"!="" {
di in green "The {hi:method} option can not be modified without specification of the varlist. {hi:method} is omitted."
}
local method `r(method)'
local kernel `r(kernel)'
}
if "`method'"=="" {
local method classical
}
if ("`method'"=="polychoric"|"`method'"=="polychoricv2")&"`standardized'"!="" {
di in green "Initial variables are used with the {hi:polychoric} methods"
di in green "But the procedure is based on the matrix of the polychoric correlations"
di
}
if "`method'"!="classical"&"`method'"!="v2"&"`method'"!="centroid"&"`method'"!="polychoric"&"`method'"!="polychoricv2" {
di in red "The {hi:method} `method' is unknown"
error 198
exit
}
tokenize `varlist'
local nbitems : word count `varlist'
marksample touse
qui keep if `touse'
local mat=max(`matsize',`=`nbitems'*2')
qui set matsize `mat'
if `nbitems'<3&`none'!=1 {
di in red "You need at least 3 variables"
error 198
exit
}
/*******DEFINES THE LABELS AND STANDARDIZED THE VARIABLES (IF NECESSARY)*******/
forvalues i=1/`nbitems'{
local label`i':variable label ``i''
if "`label`i''"=="" {
local label`i' ``i''
}
if "`method'"!="polychoric"&"`method'"!="polychoricv2" {
qui su ``i'' [`weight'`exp']
local mean=r(mean)
if "`standardized'"=="" {
local sd=r(sd)
}
else {
local sd=1
}
qui replace ``i''=(``i''-`mean')/`sd'
}
}
tempfile clvfiletmp
qui save `clvfiletmp',replace
qui su `1' [`weight'`exp']
local nbind=r(sum_w)
local cons=`consolidation'
/*COMPUTES THE TOTAL VARIANCE*/
if "`method'"!="polychoric"&"`method'"!="polychoricv2" {
local totvar=0
forvalues i=1/`nbitems' {
qui su ``i'' [`weight'`exp']
local totvar=`totvar'+`r(Var)'
}
}
else {
local totvar `nbitems'
}
local nbkerk=0
local nbkerg=0
/***** DEFINES THE KERNEL IF NECESSARY ********/
if "`kernel'"!="" {
local nbkerg:word count `kernel'
local fin0=0
forvalues i=1/`nbkerg' {
local nbi`i':word `i' of `kernel'
local nbkerk=`nbkerk'+`nbi`i''
local deb`i'=`fin`=`i'-1''+1
local fin`i'=`deb`i''+`nbi`i''-1
local list`i'
forvalues j=`deb`i''/`fin`i'' {
local list`i' `list`i'' ``j''
}
}
tempname kerclus
matrix `kerclus'=J(`=`nbkerk'-`nbkerg'',3,0)
local ligne=1
forvalues g=1/`nbkerg' {
matrix `kerclus'[`ligne',1]=`nbitems'+`ligne'
matrix `kerclus'[`ligne',2]=`deb`g''
matrix `kerclus'[`ligne',3]=`deb`g''+1
local clus`g'=`nbitems'+`ligne'
local ligne=`ligne'+1
if `nbi`g''>2 {
forvalues i=2/`=`nbi`g''-1' {
matrix `kerclus'[`ligne',1]=`nbitems'+`ligne'
matrix `kerclus'[`ligne',2]=`deb`g''+`i'
matrix `kerclus'[`ligne',3]=`nbitems'+`ligne'-1
local clus`g'=`nbitems'+`ligne'
local ligne=`ligne'+1
}
}
local eigen2=0
}
}
if `nbitems'<`nbkerk' {
di in red "You cannot define more variables in the {hi:kernel} option than items in the {hi:varlist}"
error 198
exit
}
/*******DISPLAY THE FIRST RESULTS *******/
di
di in green "{hline 32}"
di in green "TOTAL VARIANCE: " in ye %16.5f `totvar'
di in green "NUMBER OF INDIVIDUALS: " in ye %9.0f `nbind'
di in green "METHOD:" in ye _col(`=33-length("`method'")') "`=upper("`method'")'"
di in green "{hline 32}"
di
if "`kernel'"!="" {
forvalues i=1/`nbkerg' {
di in green "The kernel numbered " in ye `clus`i'' in green " is composed of `nbi`i'' variables: " in ye "`list`i''"
di
}
}
else {
local nbkerk=0
local nbkerg=0
}
/******** CLASSIFICATION PROCEDURE*******/
tempname Ev
if `none'!=1 {
matrix `matclus'=J(`nbitems',`nbitems',0)
matrix `vp'=J(`=2*`nbitems'-1',12,0)
matrix `indexes'=J(`nbitems',8,0)
forvalues i=1/`nbitems' {
matrix `matclus'[`i',1]=`i'
if "`method'"!="polychoric"&"`method'"!="polychoric" {
qui su ``i'' [`weight'`exp']
matrix `vp'[`i',10]=r(Var)
}
else {
matrix `vp'[`i',10]=1
}
matrix `vp'[`i',1]=`i'
matrix `vp'[`i',2]=`nbitems'
matrix `vp'[`i',8]=`totvar'
matrix `vp'[`i',9]=100
}
matrix `vp'[`nbitems',5]=`nbitems'
if "`method'"=="centroid" {
local crit G
di in green "{hline 101}"
di in green _col(93) "2nd order"
di in green _col(7) "Number of" _col(69) "`crit'" _col(71) "Explained" _col(82) "Relative" _col(94) "Relative"
di in green "Step" _col(8) "clusters" _col(20) "Child 1" _col(33) "Child 2" _col(46) "Parent" _col(53) "`crit' value" _col(61) "variation" _col(72) "Variance" _col(81) "Variation" _col(93) "Variation"
di in green "{hline 101}"
}
else {
local crit T
di in green "{hline 111}"
if "`method'"=="v2"|"`method'"=="polychoricv2" {
di in green _col(84) "Maximal" _col(103) "2nd order"
}
else {
di in green _col(84) "Current" _col(103) "2nd order"
}
di in green _col(7) "Number of" _col(69) "`crit'" _col(71) "Explained" _col(85) "Second" _col(93) "Relative" _col(104) "Relative"
di in green "Step" _col(8) "clusters" _col(20) "Child 1" _col(33) "Child 2" _col(46) "Parent" _col(53) "`crit' value" _col(61) "variation" _col(72) "Variance" _col(81) "Eigenvalue" _col(92) "Variation" _col(103) "Variation"
di in green "{hline 111}"
}
tempname threshold
matrix `threshold'=J(`nbitems',3,0)
forvalues i=1/`=`nbitems'-1' {
local clus=`nbitems'+`i'
local minegenval=999999
local minegenval2=999999
forvalues k=1/`=`clus'-1' {
local list`k'
local numlist`k'
forvalues j=1/`clus' {
if (`matclus'[`j',`i']==`k') {
local list`k' `list`k'' ``j''
local numlist`k' `numlist`k'' `j'
}
}
}
if `clus'>`nbitems'+`nbkerk'-`nbkerg' {
if "`method'"=="centroid" {
tempname centrj centrk diffjk
}
forvalues j=1/`clus' {
local nblistj:word count `list`j''
forvalues k=`=`j'+1'/`clus' {
local nblistk:word count `list`k''
if `nblistj'!=0&`nblistk'!=0 {
if "`method'"=="centroid" {
qui genscore `list`j'',score(`centrj') mean
qui su `centrj' [`weight'`exp']
local Varj=r(Var)
qui genscore `list`k'',score(`centrk') mean
qui su `centrk' [`weight'`exp']
local Vark=r(Var)
qui gen `diffjk'=`centrk'-`centrj'
qui su `diffjk' [`weight'`exp']
local Varjk=r(Var)
drop `centrj' `centrk' `diffjk'
local ev=(`nblistj'*`nblistk')/(`nblistj'+`nblistk')*`Varjk'
if `ev'<`minegenval' {
local minegenval=`ev'
local minj `j'
local mink `k'
local eigen=0
local eigen2=0
}
}
else {
if "`method'"=="classical"|"`method'"=="v2" {
qui pca `list`j'' `list`k'' [`weight'`exp'] ,cov
matrix `Ev'=e(Ev)
}
else if "`method'"=="polychoric"|"`method'"=="polychoricv2" {
qui polychoricpca `list`j'' `list`k'' [`weight'`exp']
matrix `Ev'=r(eigenvalues)
}
local lambda1=`Ev'[1,1]
local lambda2=`Ev'[1,2]
local ev=`vp'[`j',10]+`vp'[`k',10]-`lambda1'
local ev2=max(`vp'[`j',11],`vp'[`k',11],`lambda2')
if ("`method'"=="v2"|"`method'"=="polychoricv2")&`ev'<`minegenval' {
local minegenval=`ev'
local eigen=`lambda1'
local minj `j'
local mink `k'
local eigen2=`lambda2'
}
else if ("`method'"=="classical"|"`method'"=="polychoric")&`ev2'<`minegenval2' {
local minegenval=`ev'
local minegenval2=`ev2'
local eigen=`lambda1'
local minj `j'
local mink `k'
local eigen2=`ev2'
}
}
}
}
}
}
else {
local ligne=`clus'-`nbitems'
local j=`kerclus'[`ligne',2]
local k=`kerclus'[`ligne',3]
if "`method'"!="centroid" {
if "`method'"=="classical"|"`method'"=="v2" {
qui pca `list`j'' `list`k'' [`weight'`exp'],cov
matrix `Ev'=e(Ev)
}
else if "`method'"=="polychoric"|"`method'"=="polychoricv2"{
qui polychoricpca `list`j'' `list`k'' [`weight'`exp']
matrix `Ev'=r(eigenvalues)
}
local lambda1=`Ev'[1,1]
local lambda2=`Ev'[1,2]
local ev=`vp'[`j',10]+`vp'[`k',10]-`lambda1'
local minegenval=`ev'
local eigen=`lambda1'
local minj `j'
local mink `k'
local eigen2=`lambda2'
}
else if "`method'"=="centroid" {
local nblistj:word count `list`j''
local nblistk:word count `list`k''
tempname v1 v2 v12
qui genscore `list`j'',score(`v1') mean
qui genscore `list`k'',score(`v2') mean
qui gen `v12'=`v1'-`v2'
qui su `v12' [`weight'`exp']
local varj=r(Var)
local minegenval=(`nblistj'*`nblistk')/(`nblistj'+`nblistk')*`varj'
local minj `j'
local mink `k'
}
}
if `minj'<=`nbitems' {
local nomj=abbrev("``minj''",14)
}
else {
local nomj `minj'
}
if `mink'<=`nbitems' {
local nomk=abbrev("``mink''",14)
}
else {
local nomk `mink'
}
forvalues j=1/`nbitems' {
matrix `matclus'[`j',`=`i'+1']=`matclus'[`j',`i']
}
matrix `vp'[`clus',1]=`nbitems'+`i' /*PARENT*/
matrix `vp'[`clus',2]=`=`nbitems'-`i'' /*NUMBER OF CLUSTERS*/
matrix `vp'[`clus',3]=`minj' /*CHILD 1*/
matrix `vp'[`clus',4]=`mink' /*CHILD 2*/
matrix `vp'[`clus',6]=`minegenval' /*VARIATION OF THE T or G CRITERION*/
matrix `vp'[`clus',5]=`vp'[`=`clus'-1',5]-`vp'[`clus',6] /*T or G CRITERION*/
matrix `vp'[`clus',7]=(`vp'[`clus',6]-`vp'[`=`clus'-1',6])/`vp'[`=`clus'-1',6] /*RELATIVE VARIATION OF THE T OR G CRITERION*/
matrix `vp'[`clus',8]=`vp'[`=`clus'-1',8]-`minegenval' /*EXPLAINED VARIANCE*/
matrix `vp'[`clus',9]=`vp'[`clus',8]/`totvar'*100 /*% OF EXPLAINED VARIANCE*/
if "`method'"!="centroid" {
matrix `vp'[`clus',10]=`eigen' /*FIRST EIGEN VALUE OF THE NEW CLUSTER*/
matrix `vp'[`clus',11]=`eigen2' /*SECOND EIGEN VALUE OF THE NEW CLUSTER*/
}
if `vp'[`=`clus'-1',7]!=0 {
matrix `vp'[`clus',12]=(`vp'[`clus',7]-`vp'[`=`clus'-1',7])/abs(`vp'[`=`clus'-1',7]) /*2ND ORDER RELATIVE VARIATION OF THE T or G CRITERION*/
}
matrix `indexes'[`i',1]=`i' /*PARENT*/
matrix `indexes'[`i',2]=`nbitems'-`i' /*NUMBER OF CLUSTERS*/
matrix `indexes'[`i',3]=`minegenval' /*VARIATION OF THE T or G CRITERION*/
matrix `indexes'[`i',4]=`vp'[`clus',7] /*RELATIVE VARIATION OF THE T OR G CRITERION*/
matrix `indexes'[`i',5]=max(`eigen2',`indexes'[`=`i'-1',5]) /*MAXIMUM SECOND EIGENVALUE*/
matrix `indexes'[`i',7]=`vp'[`clus',12] /*2nd order RELATIVE VARIATION OF THE T OR G CRITERION*/
foreach j of numlist `numlist`minj'' `numlist`mink'' {
matrix `matclus'[`j',`=`i'+1']=`clus'
}
local varlistgen
local nbvarlistgen
forvalues j=1/`=`nbitems'+`i'' {
local varlist`j'
forvalues k=1/`nbitems' {
if `matclus'[`k',`=`i'+1']==`j' {
local varlist`j' `varlist`j'' ``k''
}
}
local nbvarlist`j': word count `varlist`j''
local varlistgen `varlistgen' `varlist`j''
local nbvarlistgen `nbvarlistgen' `nbvarlist`j''
}
local newlist
foreach m in `nbvarlistgen' {
if `m'!=0 {
local newlist `newlist' `m'
}
}
if "`kernel'"!=""&`i'==`=`nbkerk'-`nbkerg'+1' {
local T=`vp'[`=`clus'-1',8]
di _col(0) in ye "init" _col(12) %4.0f `=`nbitems'-`nbkerk'+`nbkerg'' _col(52) %8.4f `T' _col(62) %8.4f `=`totvar'-`T'' _col(72) %7.3f `=`T'/`totvar'*100' "%"
}
if `clus'>`nbitems'+`nbkerk'-`nbkerg' {
matrix `threshold'[`=`nbitems'-`i'+1',3]=`minegenval'
if `clus'==`nbitems'+`nbkerk'-`nbkerg'+1 {
local relv
local percent
local relv2
}
else {
local relv=`indexes'[`i',4]*100
local percent %
if `indexes'[`i',7]!=. {
local relv2=`indexes'[`i',7]*100
}
else {
local relv2=0
}
matrix `threshold'[`=`nbitems'-`i'+1',1]=`relv'
matrix `threshold'[`=`nbitems'-`i'+1',2]=`relv2'
}
if "`method'"=="centroid" {
di _col(0) in ye %4.0f `=`i'-`nbkerk'+`nbkerg'' _col(12) %4.0f `=`nbitems'-`i'' _col(20) "`nomj'" _col(33) "`nomk'" _col(45) %7.0f `=`i'+`nbitems'' _col(52) %8.4f `vp'[`clus',8] _col(62) %8.4f `minegenval' _col(72) %7.3f `vp'[`clus',9] "%" _col(83) _col(84) %5.2f `relv' "`percent'" _col(93) %8.2f `relv2' "`percent'"
}
else {
di _col(0) in ye %4.0f `=`i'-`nbkerk'+`nbkerg'' _col(12) %4.0f `=`nbitems'-`i'' _col(20) "`nomj'" _col(33) "`nomk'" _col(45) %7.0f `=`i'+`nbitems'' _col(52) %8.4f `vp'[`clus',8] _col(62) %8.4f `minegenval' _col(72) %7.3f `vp'[`clus',9] "%" _col(83) %8.4f `vp'[`clus',11] _col(94) %6.2f `relv' "`percent'" _col(103) %8.2f `relv2' "`percent'"
}
}
}
matrix `indexes'[`nbitems',3]=`vp'[`=2*`nbitems'-1',5] /*RELATIVE VARIATION OF THE T OR G CRITERION*/
matrix `indexes'[`nbitems',7]=`indexes'[`nbitems',3]/`indexes'[`=`nbitems'-1',3] /*RELATIVE VARIATION OF THE T OR G CRITERION*/
local i=2*`nbitems'-1
matrix `threshold'[1,1]=`vp'[`i',5]/`vp'[`i',6]*100-100
matrix `threshold'[1,2]=(`threshold'[1,1]-`threshold'[2,1])/abs(`threshold'[2,1])*100
matrix `threshold'[1,3]=`vp'[`i',5]
if "`method'"=="centroid" {
di in ye _col(62) %8.4f `threshold'[1,3] _col(83) %6.2f `threshold'[1,1] "`percent'" _col(93) %8.2f `threshold'[1,2] "`percent'"
}
else {
di in ye _col(62) %8.4f `threshold'[1,3] _col(94) %6.2f `threshold'[1,1] "`percent'" _col(103) %8.2f `threshold'[1,2] "`percent'"
}
local best=0
local maxbest=0
local best2=0
local maxbest2=0
local demipart=int(`nbitems'/2)+1
forvalues i=1/`demipart' {
if `threshold'[`i',3]>`maxbest2' {
if `threshold'[`i',3]>`maxbest' {
local maxbest2=`maxbest'
local best2=`best'
local maxbest=`threshold'[`i',3]
local best=`i'
}
else {
local maxbest2=`threshold'[`i',3]
local best2=`i'
}
}
}
di in green "{hline 111}"
di
di in green "{hline 60}"
di in green "PROPOSED BEST PARTITIONS (AMONG THE `demipart' SMALLER PARTITIONS)"
di in green "{hline 60}"
di
di in yellow _col(4) "Based on the variation of the T criterion: " _col(60) in gr "Partitions in " in ye `best' " or " `best2' in gr " clusters"
return local bestvariation `best' `best2'
local bestt=0
local bestt2=0
local var=0
local var2=0
forvalues i=1/`nbitems' {
if `threshold'[`i',1]>`var2'&`i'<`demipart' {
if `threshold'[`i',1]>`var' {
local bestt2=`bestt'
local var2=`var'
local var=`threshold'[`i',1]
local bestt=`i'
}
else {
local var2=`threshold'[`i',1]
local bestt2=`i'
}
}
}
di in yellow _col(4) "Based on the research of a threshold: " _col(60) in gr "Partitions in " in ye `bestt' " or " `bestt2' in gr " clusters"
forvalues i=`=`nbitems'+1'/`=`nbitems'+`nbkerk'-`nbkerg'' {
matrix `vp'[`i',6]=`totvar'-`T'
matrix `vp'[`i',8]=`T'
matrix `vp'[`i',9]=`T'/`nbitems'*100
}
return local bestthresold `bestt' `bestt2'
forvalues i=1/`nbitems' {
if `threshold'[`i',2]>`var2'&`i'<`demipart' {
if `threshold'[`i',2]>`var' {
local bestt2=`bestt'
local var2=`var'
local var=`threshold'[`i',2]
local bestt=`i'
}
else {
local var2=`threshold'[`i',2]
local bestt2=`i'
}
}
}
di in yellow _col(4) "Based on the research of a threshold (second order): " _col(60) in gr "Partitions in " in ye `bestt' " or " `bestt2' in gr " clusters"
return local bestthresold2 `bestt' `bestt2'
}
/******BAR CHART *******/
if "`bar'"!="" {
drop _all
qui set obs `nbitems'
qui svmat `indexes' ,names(v)
qui gen id=`nbitems'-_n
qui replace v7=. in 1
qui drop if id>`nbitems'-`nbkerk'+`nbkerg'-1
label variable id "Number of clusters"
label variable v3 "T variation"
qui su v3 if id!=0
local maxv3=ceil(r(max)*5)/5
local minv3=floor(r(min)*5)/5
label variable v4 "Relative T variation"
label variable v7 "Relative T variation order 2"
graph twoway (bar v3 id, name(bar,replace) vert yaxis(1))(line v4 id,yaxis(2))/*(line v6 id,yaxis(3))(line v5 id,yaxis(4))*/(line v7 id,yaxis(5)) if id!=0,ylabel(`minv3'(0.2)`maxv3') xlabel(1(1)`=`nbitems'-`nbkerk'+`nbkerg'-1')
}
/****** DENDROGRAM********/
drop _all
qui set obs `nbitems'
qui svmat `matclus' ,names(v)
local listorder
forvalues i=`nbitems'(-1)1 {
local listorder `listorder' v`i'
}
qui gen id=_n
qui sort `listorder'
capture cluster delete clv,zap
qui cluster complete v* ,name(clv)
qui replace clv_id=_n
qui replace clv_ord=id
qui replace clv_hgt=.
qui gen fait=0
qui gen clus=0
forvalues i=2/`nbitems' {
local ligne=`nbitems'+`i'-1
if (`vp'[`ligne',3]<=`nbitems') {
local first=`vp'[`ligne',3]
gsort +fait -v`i' +clv_id
}
else {
local first=`vp'[`ligne',4]
gsort +fait -v`i' +clv_id
}
if "`deltaT'"!="" {
qui replace clv_hgt=`vp'[`ligne',6] in 1
}
else {
qui replace clv_hgt=100-`vp'[`ligne',9] in 1
}
qui replace fait=1 in 1
qui replace clus=`vp'[`ligne',1] in 1
}
if "`dendro'"=="" {
qui gen label=""
forvalues i=1/`nbitems' {
qui replace label=abbrev("`label`i''",`abbrev') if clv_id==`i'
}
sort clv_id
if `nbitems'>`cutnumber' {
local var "Groups of variables"
local cut cutnumber(`cutnumber') /*labcutn*/
}
else {
local var "Variables"
local cut label(label)
}
qui su clv_hgt
local tmp=r(max)
local max=floor(`tmp')+.5
if `tmp'>`max' {
local max=`max'+.5
}
local maxvar=`max'+5
if "`title'"=="" {
local title "Clustering around Latent Variables (CLV)"
}
if "`caption'"!="" {
local var "`caption'"
}
if "`deltaT'"!="" {
local titleL "Variation of the T criterion"
local yl "0(.5)`max'"
}
else {
local titleL "% Unexplained Variance"
local yl "0(25)`maxvar'"
}
if "`textsize'"=="" {
local textsize: word `=min(int(`nbitems'/15)+1,5)' of medium medsmall small vsmall tiny
}
if "`horizontal'"!="" {
cluster dendro clv, name (dendrogram,replace) hor ytitle("`var'") `showcount' xtitle("`titleL'") title("`title'",span) xlabel(`yl') ylabel(,angle(0) labsize(`textsize')) `cut'
}
else {
cluster dendro clv, name(dendrogram,replace) xtitle("`var'") `showcount' ytitle("`titleL'") title("`title'",span) ylabel(`yl') xlabel(,labsize(`textsize')) `cut'
}
if "`savedendro'"!="" {
graph save dendrogram `savedendro'
}
}
/***** END DENDROGRAM*****/
/**** TEST ********/
if `cons'>`nbitems'-`nbkerk'+`nbkerg' {
di in ye "The {hi:consolidation} is not possible for a number of clusters superior to the initial number of clusters"
local cons=0
}
/***** CONSOLIDATION PROCEDURE ********/
if `cons'!=0 {
sort v`=`nbitems'-`cons'+1'
gen cut`cons'=1
local g=1
forvalues i=2/`nbitems' {
if v`=`nbitems'-`cons'+1'[`i']!=v`=`nbitems'-`cons'+1'[`=`i'-1'] {
local g=`g'+1
}
qui replace cut`cons'=`g' in `i'
}
sort id
tempname group
mkmat cut`cons',matrix(`group')
use `clvfiletmp',replace
local n=1
local env=1
while (`env'==1) {
forvalues g=1/`cons' {
local list`g'
forvalues i=1/`nbitems' {
if `group'[`i',1]==`g' {
local list`g' `list`g'' ``i''
}
}
}
di
if `n'==1 {
di in green "{hline 30}"
di in green "PARTITION BEFORE CONSOLIDATION"
di in green "{hline 30}"
}
di
local col=13
local max=0
local critT=0
forvalues g=1/`cons' {
di _col(`col') in green "CLUSTER " %2.0f `g' _c
local col=`col'+12
local tmp`g':word count `list`g''
if `tmp`g''>`max' {
local max `tmp`g''
}
tempvar f1`g'
if "`method'"=="centroid" {
qui genscore `list`g'',score(`f1`g'') mean
qui su `f1`g'' [`weight'`exp']
local var=r(Var)
local critT=`critT'+`tmp`g''*`var'
qui pca `list`g'' [`weight'`exp'] ,cov
local trace=e(trace)
local explained`g'=`tmp`g''*`var'/`trace'
}
else {
if `tmp`g''>1 {
if "`method'"=="classical"|"`method'"=="v2" {
qui pca `list`g'' [`weight'`exp'] ,cov
matrix `Ev'=e(Ev)
local trace=e(trace)
qui predict `f1`g''
}
else if "`method'"=="polychoric"|"`method'"=="polychoric" {
qui polychoricpca `list`g'' [`weight'`exp'] ,score(`f1`g'') nscore(1)
matrix `Ev'=r(eigenvalues)
local trace=0
forvalues m=1/`tmp`g''{
local trace =`trace'+`r(lambda`m')'
}
rename `f1`g''1 `f1`g''
}
local lambda1=`Ev'[1,1]
local explained`g'=`lambda1'/`trace'
local critT=`critT'+`lambda1'
}
else {
local explained`g'=1
qui gen `f1`g''=`list`g''
if "`standardized'"=="" {
local critT=`critT'+1
}
else {
qui su [`weight'`exp']
local critT=`critT'+`r(Var)'
}
}
}
}
di
di _col(1) in green "ITEMS :" _c
forvalues i=1/`max' {
local col=15
forvalues g=1/`cons' {
local tmpv:word `i' of `list`g''
local tmpv=abbrev("`tmpv'",8)
di _col(`col') in ye %8s "`tmpv'" _c
local col= `col'+12
}
di
}
local col=16
di _col(1) in green "Expl. Var:" _c
forvalues g=1/`cons' {
di _col(`col') in ye %6.2f `=`explained`g''*100' in green "%" _c
local col= `col'+12
}
di
di
di in green "Variance Explained : " in ye %6.3f `=`critT'/`totvar'*100' in green "%"
di in green "T criterion : " in ye %6.4f `critT'
di
di in green "{hline 21}"
di in green "CONSOLIDATION: STEP `n'"
di in green "{hline 21}"
local n=`n'+1
local env=0
if "`method'"=="polychoric"|"`method'"=="polychoricv2" {
local command polychoric
}
else {
local command corr
}
forvalues i=1/`nbitems' {
local env`i'=0
local gr=`group'[`i',1]
qui `command' ``i'' `f1`gr'' [`weight'`exp']
local corr`i'=r(rho)
local corrs`i'=r(rho)
forvalues g=1/`cons' {
qui `command' ``i'' `f1`g'' [`weight'`exp']
local tmpcorr=r(rho)
if `g'!=`gr'&(((`corr`i'')<(`tmpcorr')&"`method'"=="centroid")|((`corr`i'')^2<(`tmpcorr')^2& "`method'"!="centroid")) {
local env=1
local env`i'=1
matrix `group'[`i',1]=`g'
local corr`i'=`tmpcorr'
}
}
if `env`i''==1 {
local g=`group'[`i',1]
di in green "The variable " in ye "``i'' " in green "is assigned to the `g'th group" _c
if "`method'"!="centroid" {
di in green " (corr^2=" %6.4f in ye (`corr`i'')^2 in green " vs " in ye %6.4f (`corrs`i'')^2 in green ")"
}
else {
di in green " (corr=" %6.4f in ye (`corr`i'') in green " vs " in ye %6.4f (`corrs`i'') in green ")"
}
}
}
if `env'==0 {
local latent
forvalues g=1/`cons' {
label variable `f1`g'' "Latent variable `g'"
if "`genlv'"!="" {
if "`replace'"!=""{
capture drop `genlv'`g'
}
gen `genlv'`g'=`f1`g''
}
local latent `latent' `f1`g''
return local cluster`g' `list`g''
}
matrix `group'=`group''
matrix colnames `group'=`varlist'
return matrix affect=`group'
di in ye "Stability of the partition is achieved"
if `cons'<=7 {
di
di in green "{hline 42}"
di in green "CORRELATION MATRIX OF THE LATENT VARIABLES"
di in green "{hline 42}"
di
di in green "{hline `=(`cons')*13+15'}"
forvalues g=1/`cons' {
di _col(`=13*(`g'-1)+23') in green "Latent" _c
}
di
forvalues g=1/`cons' {
di _col(`=13*(`g'-1)+19') in green "variable `g'" _c
}
di
di in green "{hline `=(`cons')*13+15'}"
forvalues g=1/`cons' {
di in green "Latent variable `g'" _c
forvalues h=1/`g' {
local loc=13*`h'+10
qui corr `f1`g'' `f1`h'' [`weight'`exp']
local rho=r(rho)
di _col(`loc') in ye %6.4f `rho' _c
}
di
}
di in green "{hline `=(`cons')*13+15'}"
di
}
if `nbind'<=800&"`biplot'"==""&"`weight'"=="" {
local max=max(`matsize',`nbind')
qui set matsize `max'
if "`addvar'"!="" {
local add `varlist'
}
if "`dim'"=="" {
local dim 1 2
}
qui biplotvlab `latent' `add', name(biplot,replace) norow colopts(name(latent variables)) alpha(0) title(Biplot of the latent variables) labdes(size(vsmall) color(blue)) stretch(1) `std' dim(`dim')
}
else if `nbind'>800&"`biplot'"==""&"`weight'"==""{
di in green "There is more than 800 individuals, so the {hi:biplot} option is disabled"
}
else if "`weight'"!=""&&"`biplot'"==""{
di in green "The {hi:biplot} option is disabled because you use weights"
}
}
forvalues g=1/`cons' {
drop `f1`g''
}
}
}
/***** END OF THE CONSOLIDATION PROCEDURE********/
qui set matsize `matsize'
if "`genlv'"!="" {
qui keep `id' `genlv'1-`genlv'`cons'
tempfile lvfile
qui sort `id'
qui save `lvfile',replace
}
use `clvfile',replace
if "`genlv'"!="" {
qui sort `id'
qui merge `id' using `lvfile'
}
qui drop `id'
capture drop _merge
capture cluster delete clv,zap
matrix colnames `vp'="Parent" "Number of clusters" "Child 1" "Child 2" "T" "DeltaT" "deltaT" "Explained Variance" "Explained Variance (%)" "First eigenvalue" "Second Eigenvalue" "2nd order deltaT"
return matrix vp=`vp'
return matrix matclus=`matclus'
return local varlist `varlist'
return local method `method'
return local kernel `kernel'
restore,not
end

@ -0,0 +1,946 @@
*! Version 2.15 14April2014
*! Jean-Benoit Hardouin
************************************************************************************************************
* Stata program : clv
* Clustering of variables around latent variables
* Version 2.14 : May 20th, 2010 /*dim and std options for biplots*/
*
* Historic
* Version 1 (2005-06-11): Jean-Benoit Hardouin
* Version 1.1 (2005-07-07): Jean-Benoit Hardouin /*small bug in the consolidation process with cluster of only one variable*/
* Version 1.2 (2005-07-08): Jean-Benoit Hardouin /*Bug in the consolidation procedure when there is negative correlation*/
* Version 2 (2005-09-03): Jean-Benoit Hardouin /*Horizontal dendrograms (with Stata 9)*/
* Version 2.1 (2005-09-08): Jean-Benoit Hardouin /*More flexibility to abbreviate the names of the variables (with Stata 9)*/
* Version 2.1.1 (2005-09-08): Jean-Benoit Hardouin /*Integration of some requests of Ronan Conroy*/
* Version 2.1.2 (2005-09-08): Jean-Benoit Hardouin /*Possibility to give a title and an X/Y caption*/
* Version 2.2 (2005-09-11): Jean-Benoit Hardouin /*Kernel option*/
* Version 2.3 (2005-09-12): Jean-Benoit Hardouin /*Polychoric option*/
* Version 2.4 (2005-09-13): Jean-Benoit Hardouin /*v2 option*/
* Version 2.5 (2005-09-21): Jean-Benoit Hardouin /*corrections*/
* Version 2.6 (2005-10-02): Jean-Benoit Hardouin /*centroid method, biplot*/
* Version 2.7 (2005-10-06): Jean-Benoit Hardouin /*return, multiple graphs, polychoric+consolidation*/
* Version 2.8 (2005-10-06): Jean-Benoit Hardouin /*fweights*/
* Version 2.9 (2006-01-26): Jean-Benoit Hardouin /*save the latent variables*/
* Version 2.10 (2006-07-10): Jean-Benoit Hardouin /*2nd order relative variation of the T criterion*/
* Version 2.11 (2006-10-09): Jean-Benoit Hardouin /*Size of the text in the dendrogram*/
* Version 2.12 (2006-12-01): Jean-Benoit Hardouin /*savedendro option*/
* Version 2.13 (2010-05-12): Jean-Benoit Hardouin /*corrections of bugs in KERNEL option and with METHOD(centroid)*/
* Version 2.14 (2010-05-20): Jean-Benoit Hardouin /*DIM and STD options for biplots*/
* Version 2.15 (2014-04-14): Jean-Benoit Hardouin /*save and use options*/
*
* Jean-benoit Hardouin, University of Nantes - Faculty of Pharmaceutical Sciences
* Department of Biostatistics - France
* jean-benoit.hardouin@univ-nantes.fr
*
* News about this program : http://anaqol.sphere-nantes.fr
*
* Copyright 2005-2006, 2010, 2014 Jean-Benoit Hardouin
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*
************************************************************************************************************
program define clv215,rclass
version 9.0
syntax [varlist(default=none)] [if] [in] [fweight] [, CUTnumber(int 40) bar CONSolidation(int 0) noDENdro SAVEDendro(string) noSTANDardized deltaT HORizontal SHOWcount ABBrev(int 14) TITle(string) CAPtion(string) KERnel(numlist) METHod(string) noBIPlot ADDvar genlv(string) replace TEXTSize(string) std dim(string) save(string) use(string)]
preserve
tempfile clvfile
tempvar id
gen `id'=_n
qui save `clvfile',replace
local matsize=c(matsize)
local none=0
if "`varlist'"==""&"`use'"=="" {
capture confirm matrix r(vp)
if _rc==0 {
capture confirm matrix r(matclus)
if _rc ==0 {
local none=1
}
}
if `none'==0 {
di in red "You cannot use the {hi:clv} command without {hi:varlist} if you have not already run {hi:clv}"
error 198
exit
}
}
tempname matclus vp indexes
/*********TESTS**********/
if "`use'"!="" {
local error=0
capture matrix `vp'=`use'_vp
if _rc!=0 {
local error=_rc
}
capture matrix `matclus'=`use'_matclus
if _rc!=0 {
local error=_rc
}
local varlist $`use'_varlist
local method $`use'_method
local kernel $`use'_kernel
if "`varlist'"==""|"`method'"=="" {
local error=1
}
if `error'!=0 {
di in red "You cannot use the {hi:use} option without a preliminary use of the {hi:save} option"
error 198
exit
}
}
if `none'==1 {
matrix `vp'=r(vp)
matrix `matclus'=r(matclus)
local varlist `r(varlist)'
tokenize `varlist'
local nbitems=rowsof(`matclus')
if "`method'"!="" {
di in green "The {hi:method} option can not be modified without specification of the varlist. {hi:method} is omitted."
}
local method `r(method)'
local kernel `r(kernel)'
}
if "`method'"=="" {
local method classical
}
if ("`method'"=="polychoric"|"`method'"=="polychoricv2")&"`standardized'"!="" {
di in green "Initial variables are used with the {hi:polychoric} methods"
di in green "But the procedure is based on the matrix of the polychoric correlations"
di
}
if "`method'"!="classical"&"`method'"!="v2"&"`method'"!="centroid"&"`method'"!="polychoric"&"`method'"!="polychoricv2" {
di in red "The {hi:method} `method' is unknown"
error 198
exit
}
tokenize `varlist'
local nbitems : word count `varlist'
marksample touse
qui keep if `touse'
local mat=max(`matsize',`=`nbitems'*2')
qui set matsize `mat'
if `nbitems'<3&`none'!=1 {
di in red "You need at least 3 variables"
error 198
exit
}
/*******DEFINES THE LABELS AND STANDARDIZED THE VARIABLES (IF NECESSARY)*******/
forvalues i=1/`nbitems'{
local label`i':variable label ``i''
if "`label`i''"=="" {
local label`i' ``i''
}
if "`method'"!="polychoric"&"`method'"!="polychoricv2" {
qui su ``i'' [`weight'`exp']
local mean=r(mean)
if "`standardized'"=="" {
local sd=r(sd)
}
else {
local sd=1
}
qui replace ``i''=(``i''-`mean')/`sd'
}
}
tempfile clvfiletmp
qui save `clvfiletmp',replace
qui su `1' [`weight'`exp']
local nbind=r(sum_w)
local cons=`consolidation'
/*COMPUTES THE TOTAL VARIANCE*/
if "`method'"!="polychoric"&"`method'"!="polychoricv2" {
local totvar=0
forvalues i=1/`nbitems' {
qui su ``i'' [`weight'`exp']
local totvar=`totvar'+`r(Var)'
}
}
else {
local totvar `nbitems'
}
local nbkerk=0
local nbkerg=0
/***** DEFINES THE KERNEL IF NECESSARY ********/
if "`kernel'"!="" {
local nbkerg:word count `kernel'
local fin0=0
forvalues i=1/`nbkerg' {
local nbi`i':word `i' of `kernel'
local nbkerk=`nbkerk'+`nbi`i''
local deb`i'=`fin`=`i'-1''+1
local fin`i'=`deb`i''+`nbi`i''-1
local list`i'
forvalues j=`deb`i''/`fin`i'' {
local list`i' `list`i'' ``j''
}
}
tempname kerclus
matrix `kerclus'=J(`=`nbkerk'-`nbkerg'',3,0)
local ligne=1
forvalues g=1/`nbkerg' {
matrix `kerclus'[`ligne',1]=`nbitems'+`ligne'
matrix `kerclus'[`ligne',2]=`deb`g''
matrix `kerclus'[`ligne',3]=`deb`g''+1
local clus`g'=`nbitems'+`ligne'
local ligne=`ligne'+1
if `nbi`g''>2 {
forvalues i=2/`=`nbi`g''-1' {
matrix `kerclus'[`ligne',1]=`nbitems'+`ligne'
matrix `kerclus'[`ligne',2]=`deb`g''+`i'
matrix `kerclus'[`ligne',3]=`nbitems'+`ligne'-1
local clus`g'=`nbitems'+`ligne'
local ligne=`ligne'+1
}
}
local eigen2=0
}
}
if `nbitems'<`nbkerk' {
di in red "You cannot define more variables in the {hi:kernel} option than items in the {hi:varlist}"
error 198
exit
}
/*******DISPLAY THE FIRST RESULTS *******/
di
di in green "{hline 32}"
di in green "TOTAL VARIANCE: " in ye %16.5f `totvar'
di in green "NUMBER OF INDIVIDUALS: " in ye %9.0f `nbind'
di in green "METHOD:" in ye _col(`=33-length("`method'")') "`=upper("`method'")'"
di in green "{hline 32}"
di
if "`kernel'"!="" {
forvalues i=1/`nbkerg' {
di in green "The kernel numbered " in ye `clus`i'' in green " is composed of `nbi`i'' variables: " in ye "`list`i''"
di
}
}
else {
local nbkerk=0
local nbkerg=0
}
/******** CLASSIFICATION PROCEDURE*******/
tempname Ev
if `none'!=1 {
matrix `matclus'=J(`nbitems',`nbitems',0)
matrix `vp'=J(`=2*`nbitems'-1',12,0)
matrix `indexes'=J(`nbitems',8,0)
forvalues i=1/`nbitems' {
matrix `matclus'[`i',1]=`i'
if "`method'"!="polychoric"&"`method'"!="polychoric" {
qui su ``i'' [`weight'`exp']
matrix `vp'[`i',10]=r(Var)
}
else {
matrix `vp'[`i',10]=1
}
matrix `vp'[`i',1]=`i'
matrix `vp'[`i',2]=`nbitems'
matrix `vp'[`i',8]=`totvar'
matrix `vp'[`i',9]=100
}
matrix `vp'[`nbitems',5]=`nbitems'
if "`method'"=="centroid" {
local crit G
di in green "{hline 101}"
di in green _col(93) "2nd order"
di in green _col(7) "Number of" _col(69) "`crit'" _col(71) "Explained" _col(82) "Relative" _col(94) "Relative"
di in green "Step" _col(8) "clusters" _col(20) "Child 1" _col(33) "Child 2" _col(46) "Parent" _col(53) "`crit' value" _col(61) "variation" _col(72) "Variance" _col(81) "Variation" _col(93) "Variation"
di in green "{hline 101}"
}
else {
local crit T
di in green "{hline 111}"
if "`method'"=="v2"|"`method'"=="polychoricv2" {
di in green _col(84) "Maximal" _col(103) "2nd order"
}
else {
di in green _col(84) "Current" _col(103) "2nd order"
}
di in green _col(7) "Number of" _col(69) "`crit'" _col(71) "Explained" _col(85) "Second" _col(93) "Relative" _col(104) "Relative"
di in green "Step" _col(8) "clusters" _col(20) "Child 1" _col(33) "Child 2" _col(46) "Parent" _col(53) "`crit' value" _col(61) "variation" _col(72) "Variance" _col(81) "Eigenvalue" _col(92) "Variation" _col(103) "Variation"
di in green "{hline 111}"
}
tempname threshold
matrix `threshold'=J(`nbitems',3,0)
forvalues i=1/`=`nbitems'-1' {
local clus=`nbitems'+`i'
local minegenval=999999
local minegenval2=999999
forvalues k=1/`=`clus'-1' {
local list`k'
local numlist`k'
forvalues j=1/`clus' {
if (`matclus'[`j',`i']==`k') {
local list`k' `list`k'' ``j''
local numlist`k' `numlist`k'' `j'
}
}
}
if `clus'>`nbitems'+`nbkerk'-`nbkerg' {
if "`method'"=="centroid" {
tempname centrj centrk diffjk
}
forvalues j=1/`clus' {
local nblistj:word count `list`j''
forvalues k=`=`j'+1'/`clus' {
local nblistk:word count `list`k''
if `nblistj'!=0&`nblistk'!=0 {
if "`method'"=="centroid" {
qui genscore `list`j'',score(`centrj') mean
qui su `centrj' [`weight'`exp']
local Varj=r(Var)
qui genscore `list`k'',score(`centrk') mean
qui su `centrk' [`weight'`exp']
local Vark=r(Var)
qui gen `diffjk'=`centrk'-`centrj'
qui su `diffjk' [`weight'`exp']
local Varjk=r(Var)
drop `centrj' `centrk' `diffjk'
local ev=(`nblistj'*`nblistk')/(`nblistj'+`nblistk')*`Varjk'
if `ev'<`minegenval' {
local minegenval=`ev'
local minj `j'
local mink `k'
local eigen=0
local eigen2=0
}
}
else {
if "`method'"=="classical"|"`method'"=="v2" {
qui pca `list`j'' `list`k'' [`weight'`exp'] ,cov
matrix `Ev'=e(Ev)
}
else if "`method'"=="polychoric"|"`method'"=="polychoricv2" {
qui polychoricpca `list`j'' `list`k'' [`weight'`exp']
matrix `Ev'=r(eigenvalues)
}
local lambda1=`Ev'[1,1]
local lambda2=`Ev'[1,2]
local ev=`vp'[`j',10]+`vp'[`k',10]-`lambda1'
local ev2=max(`vp'[`j',11],`vp'[`k',11],`lambda2')
if ("`method'"=="v2"|"`method'"=="polychoricv2")&`ev'<`minegenval' {
local minegenval=`ev'
local eigen=`lambda1'
local minj `j'
local mink `k'
local eigen2=`lambda2'
}
else if ("`method'"=="classical"|"`method'"=="polychoric")&`ev2'<`minegenval2' {
local minegenval=`ev'
local minegenval2=`ev2'
local eigen=`lambda1'
local minj `j'
local mink `k'
local eigen2=`ev2'
}
}
}
}
}
}
else {
local ligne=`clus'-`nbitems'
local j=`kerclus'[`ligne',2]
local k=`kerclus'[`ligne',3]
if "`method'"!="centroid" {
if "`method'"=="classical"|"`method'"=="v2" {
qui pca `list`j'' `list`k'' [`weight'`exp'],cov
matrix `Ev'=e(Ev)
}
else if "`method'"=="polychoric"|"`method'"=="polychoricv2"{
qui polychoricpca `list`j'' `list`k'' [`weight'`exp']
matrix `Ev'=r(eigenvalues)
}
local lambda1=`Ev'[1,1]
local lambda2=`Ev'[1,2]
local ev=`vp'[`j',10]+`vp'[`k',10]-`lambda1'
local minegenval=`ev'
local eigen=`lambda1'
local minj `j'
local mink `k'
local eigen2=`lambda2'
}
else if "`method'"=="centroid" {
local nblistj:word count `list`j''
local nblistk:word count `list`k''
tempname v1 v2 v12
qui genscore `list`j'',score(`v1') mean
qui genscore `list`k'',score(`v2') mean
qui gen `v12'=`v1'-`v2'
qui su `v12' [`weight'`exp']
local varj=r(Var)
local minegenval=(`nblistj'*`nblistk')/(`nblistj'+`nblistk')*`varj'
local minj `j'
local mink `k'
}
}
if `minj'<=`nbitems' {
local nomj=abbrev("``minj''",14)
}
else {
local nomj `minj'
}
if `mink'<=`nbitems' {
local nomk=abbrev("``mink''",14)
}
else {
local nomk `mink'
}
forvalues j=1/`nbitems' {
matrix `matclus'[`j',`=`i'+1']=`matclus'[`j',`i']
}
matrix `vp'[`clus',1]=`nbitems'+`i' /*PARENT*/
matrix `vp'[`clus',2]=`=`nbitems'-`i'' /*NUMBER OF CLUSTERS*/
matrix `vp'[`clus',3]=`minj' /*CHILD 1*/
matrix `vp'[`clus',4]=`mink' /*CHILD 2*/
matrix `vp'[`clus',6]=`minegenval' /*VARIATION OF THE T or G CRITERION*/
matrix `vp'[`clus',5]=`vp'[`=`clus'-1',5]-`vp'[`clus',6] /*T or G CRITERION*/
matrix `vp'[`clus',7]=(`vp'[`clus',6]-`vp'[`=`clus'-1',6])/`vp'[`=`clus'-1',6] /*RELATIVE VARIATION OF THE T OR G CRITERION*/
matrix `vp'[`clus',8]=`vp'[`=`clus'-1',8]-`minegenval' /*EXPLAINED VARIANCE*/
matrix `vp'[`clus',9]=`vp'[`clus',8]/`totvar'*100 /*% OF EXPLAINED VARIANCE*/
if "`method'"!="centroid" {
matrix `vp'[`clus',10]=`eigen' /*FIRST EIGEN VALUE OF THE NEW CLUSTER*/
matrix `vp'[`clus',11]=`eigen2' /*SECOND EIGEN VALUE OF THE NEW CLUSTER*/
}
if `vp'[`=`clus'-1',7]!=0 {
matrix `vp'[`clus',12]=(`vp'[`clus',7]-`vp'[`=`clus'-1',7])/abs(`vp'[`=`clus'-1',7]) /*2ND ORDER RELATIVE VARIATION OF THE T or G CRITERION*/
}
matrix `indexes'[`i',1]=`i' /*PARENT*/
matrix `indexes'[`i',2]=`nbitems'-`i' /*NUMBER OF CLUSTERS*/
matrix `indexes'[`i',3]=`minegenval' /*VARIATION OF THE T or G CRITERION*/
matrix `indexes'[`i',4]=`vp'[`clus',7] /*RELATIVE VARIATION OF THE T OR G CRITERION*/
matrix `indexes'[`i',5]=max(`eigen2',`indexes'[`=`i'-1',5]) /*MAXIMUM SECOND EIGENVALUE*/
matrix `indexes'[`i',7]=`vp'[`clus',12] /*2nd order RELATIVE VARIATION OF THE T OR G CRITERION*/
foreach j of numlist `numlist`minj'' `numlist`mink'' {
matrix `matclus'[`j',`=`i'+1']=`clus'
}
local varlistgen
local nbvarlistgen
forvalues j=1/`=`nbitems'+`i'' {
local varlist`j'
forvalues k=1/`nbitems' {
if `matclus'[`k',`=`i'+1']==`j' {
local varlist`j' `varlist`j'' ``k''
}
}
local nbvarlist`j': word count `varlist`j''
local varlistgen `varlistgen' `varlist`j''
local nbvarlistgen `nbvarlistgen' `nbvarlist`j''
}
local newlist
foreach m in `nbvarlistgen' {
if `m'!=0 {
local newlist `newlist' `m'
}
}
if "`kernel'"!=""&`i'==`=`nbkerk'-`nbkerg'+1' {
local T=`vp'[`=`clus'-1',8]
di _col(0) in ye "init" _col(12) %4.0f `=`nbitems'-`nbkerk'+`nbkerg'' _col(52) %8.4f `T' _col(62) %8.4f `=`totvar'-`T'' _col(72) %7.3f `=`T'/`totvar'*100' "%"
}
if `clus'>`nbitems'+`nbkerk'-`nbkerg' {
matrix `threshold'[`=`nbitems'-`i'+1',3]=`minegenval'
if `clus'==`nbitems'+`nbkerk'-`nbkerg'+1 {
local relv
local percent
local relv2
}
else {
local relv=`indexes'[`i',4]*100
local percent %
if `indexes'[`i',7]!=. {
local relv2=`indexes'[`i',7]*100
}
else {
local relv2=0
}
matrix `threshold'[`=`nbitems'-`i'+1',1]=`relv'
matrix `threshold'[`=`nbitems'-`i'+1',2]=`relv2'
}
if "`method'"=="centroid" {
di _col(0) in ye %4.0f `=`i'-`nbkerk'+`nbkerg'' _col(12) %4.0f `=`nbitems'-`i'' _col(20) "`nomj'" _col(33) "`nomk'" _col(45) %7.0f `=`i'+`nbitems'' _col(52) %8.4f `vp'[`clus',8] _col(62) %8.4f `minegenval' _col(72) %7.3f `vp'[`clus',9] "%" _col(83) _col(84) %5.2f `relv' "`percent'" _col(93) %8.2f `relv2' "`percent'"
}
else {
di _col(0) in ye %4.0f `=`i'-`nbkerk'+`nbkerg'' _col(12) %4.0f `=`nbitems'-`i'' _col(20) "`nomj'" _col(33) "`nomk'" _col(45) %7.0f `=`i'+`nbitems'' _col(52) %8.4f `vp'[`clus',8] _col(62) %8.4f `minegenval' _col(72) %7.3f `vp'[`clus',9] "%" _col(83) %8.4f `vp'[`clus',11] _col(94) %6.2f `relv' "`percent'" _col(103) %8.2f `relv2' "`percent'"
}
}
}
matrix `indexes'[`nbitems',3]=`vp'[`=2*`nbitems'-1',5] /*RELATIVE VARIATION OF THE T OR G CRITERION*/
matrix `indexes'[`nbitems',7]=`indexes'[`nbitems',3]/`indexes'[`=`nbitems'-1',3] /*RELATIVE VARIATION OF THE T OR G CRITERION*/
local i=2*`nbitems'-1
matrix `threshold'[1,1]=`vp'[`i',5]/`vp'[`i',6]*100-100
matrix `threshold'[1,2]=(`threshold'[1,1]-`threshold'[2,1])/abs(`threshold'[2,1])*100
matrix `threshold'[1,3]=`vp'[`i',5]
if "`method'"=="centroid" {
di in ye _col(62) %8.4f `threshold'[1,3] _col(83) %6.2f `threshold'[1,1] "`percent'" _col(93) %8.2f `threshold'[1,2] "`percent'"
}
else {
di in ye _col(62) %8.4f `threshold'[1,3] _col(94) %6.2f `threshold'[1,1] "`percent'" _col(103) %8.2f `threshold'[1,2] "`percent'"
}
local best=0
local maxbest=0
local best2=0
local maxbest2=0
local demipart=int(`nbitems'/2)+1
forvalues i=1/`demipart' {
if `threshold'[`i',3]>`maxbest2' {
if `threshold'[`i',3]>`maxbest' {
local maxbest2=`maxbest'
local best2=`best'
local maxbest=`threshold'[`i',3]
local best=`i'
}
else {
local maxbest2=`threshold'[`i',3]
local best2=`i'
}
}
}
di in green "{hline 111}"
di
di in green "{hline 60}"
di in green "PROPOSED BEST PARTITIONS (AMONG THE `demipart' SMALLER PARTITIONS)"
di in green "{hline 60}"
di
di in yellow _col(4) "Based on the variation of the T criterion: " _col(60) in gr "Partitions in " in ye `best' " or " `best2' in gr " clusters"
return local bestvariation `best' `best2'
local bestt=0
local bestt2=0
local var=0
local var2=0
forvalues i=1/`nbitems' {
if `threshold'[`i',1]>`var2'&`i'<`demipart' {
if `threshold'[`i',1]>`var' {
local bestt2=`bestt'
local var2=`var'
local var=`threshold'[`i',1]
local bestt=`i'
}
else {
local var2=`threshold'[`i',1]
local bestt2=`i'
}
}
}
di in yellow _col(4) "Based on the research of a threshold: " _col(60) in gr "Partitions in " in ye `bestt' " or " `bestt2' in gr " clusters"
forvalues i=`=`nbitems'+1'/`=`nbitems'+`nbkerk'-`nbkerg'' {
matrix `vp'[`i',6]=`totvar'-`T'
matrix `vp'[`i',8]=`T'
matrix `vp'[`i',9]=`T'/`nbitems'*100
}
return local bestthresold `bestt' `bestt2'
forvalues i=1/`nbitems' {
if `threshold'[`i',2]>`var2'&`i'<`demipart' {
if `threshold'[`i',2]>`var' {
local bestt2=`bestt'
local var2=`var'
local var=`threshold'[`i',2]
local bestt=`i'
}
else {
local var2=`threshold'[`i',2]
local bestt2=`i'
}
}
}
di in yellow _col(4) "Based on the research of a threshold (second order): " _col(60) in gr "Partitions in " in ye `bestt' " or " `bestt2' in gr " clusters"
return local bestthresold2 `bestt' `bestt2'
}
/******BAR CHART *******/
if "`bar'"!="" {
drop _all
qui set obs `nbitems'
qui svmat `indexes' ,names(v)
qui gen id=`nbitems'-_n
qui replace v7=. in 1
qui drop if id>`nbitems'-`nbkerk'+`nbkerg'-1
label variable id "Number of clusters"
label variable v3 "T variation"
qui su v3 if id!=0
local maxv3=ceil(r(max)*5)/5
local minv3=floor(r(min)*5)/5
label variable v4 "Relative T variation"
label variable v7 "Relative T variation order 2"
graph twoway (bar v3 id, name(bar,replace) vert yaxis(1))(line v4 id,yaxis(2))/*(line v6 id,yaxis(3))(line v5 id,yaxis(4))*/(line v7 id,yaxis(5)) if id!=0,ylabel(`minv3'(0.2)`maxv3') xlabel(1(1)`=`nbitems'-`nbkerk'+`nbkerg'-1')
}
/****** DENDROGRAM********/
drop _all
qui set obs `nbitems'
qui svmat `matclus' ,names(v)
local listorder
forvalues i=`nbitems'(-1)1 {
local listorder `listorder' v`i'
}
qui gen id=_n
qui sort `listorder'
capture cluster delete clv,zap
qui cluster complete v* ,name(clv)
qui replace clv_id=_n
qui replace clv_ord=id
qui replace clv_hgt=.
qui gen fait=0
qui gen clus=0
forvalues i=2/`nbitems' {
local ligne=`nbitems'+`i'-1
if (`vp'[`ligne',3]<=`nbitems') {
local first=`vp'[`ligne',3]
gsort +fait -v`i' +clv_id
}
else {
local first=`vp'[`ligne',4]
gsort +fait -v`i' +clv_id
}
if "`deltaT'"!="" {
qui replace clv_hgt=`vp'[`ligne',6] in 1
}
else {
qui replace clv_hgt=100-`vp'[`ligne',9] in 1
}
qui replace fait=1 in 1
qui replace clus=`vp'[`ligne',1] in 1
}
if "`dendro'"=="" {
qui gen label=""
forvalues i=1/`nbitems' {
qui replace label=abbrev("`label`i''",`abbrev') if clv_id==`i'
}
sort clv_id
if `nbitems'>`cutnumber' {
local var "Groups of variables"
local cut cutnumber(`cutnumber') /*labcutn*/
}
else {
local var "Variables"
local cut label(label)
}
qui su clv_hgt
local tmp=r(max)
local max=floor(`tmp')+.5
if `tmp'>`max' {
local max=`max'+.5
}
local maxvar=`max'+5
if "`title'"=="" {
local title "Clustering around Latent Variables (CLV)"
}
if "`caption'"!="" {
local var "`caption'"
}
if "`deltaT'"!="" {
local titleL "Variation of the T criterion"
local yl "0(.5)`max'"
}
else {
local titleL "% Unexplained Variance"
local yl "0(25)`maxvar'"
}
if "`textsize'"=="" {
local textsize: word `=min(int(`nbitems'/15)+1,5)' of medium medsmall small vsmall tiny
}
if "`horizontal'"!="" {
cluster dendro clv, name (dendrogram,replace) hor ytitle("`var'") `showcount' xtitle("`titleL'") title("`title'",span) xlabel(`yl') ylabel(,angle(0) labsize(`textsize')) `cut'
}
else {
cluster dendro clv, name(dendrogram,replace) xtitle("`var'") `showcount' ytitle("`titleL'") title("`title'",span) ylabel(`yl') xlabel(,labsize(`textsize')) `cut'
}
if "`savedendro'"!="" {
graph save dendrogram `savedendro'
}
}
/***** END DENDROGRAM*****/
/**** TEST ********/
if `cons'>`nbitems'-`nbkerk'+`nbkerg' {
di in ye "The {hi:consolidation} is not possible for a number of clusters superior to the initial number of clusters"
local cons=0
}
/***** CONSOLIDATION PROCEDURE ********/
if `cons'!=0 {
sort v`=`nbitems'-`cons'+1'
gen cut`cons'=1
local g=1
forvalues i=2/`nbitems' {
if v`=`nbitems'-`cons'+1'[`i']!=v`=`nbitems'-`cons'+1'[`=`i'-1'] {
local g=`g'+1
}
qui replace cut`cons'=`g' in `i'
}
sort id
tempname group
mkmat cut`cons',matrix(`group')
use `clvfiletmp',replace
local n=1
local env=1
while (`env'==1) {
forvalues g=1/`cons' {
local list`g'
forvalues i=1/`nbitems' {
if `group'[`i',1]==`g' {
local list`g' `list`g'' ``i''
}
}
}
di
if `n'==1 {
di in green "{hline 30}"
di in green "PARTITION BEFORE CONSOLIDATION"
di in green "{hline 30}"
}
di
local col=13
local max=0
local critT=0
forvalues g=1/`cons' {
di _col(`col') in green "CLUSTER " %2.0f `g' _c
local col=`col'+12
local tmp`g':word count `list`g''
if `tmp`g''>`max' {
local max `tmp`g''
}
tempvar f1`g'
if "`method'"=="centroid" {
qui genscore `list`g'',score(`f1`g'') mean
qui su `f1`g'' [`weight'`exp']
local var=r(Var)
local critT=`critT'+`tmp`g''*`var'
qui pca `list`g'' [`weight'`exp'] ,cov
local trace=e(trace)
local explained`g'=`tmp`g''*`var'/`trace'
}
else {
if `tmp`g''>1 {
if "`method'"=="classical"|"`method'"=="v2" {
qui pca `list`g'' [`weight'`exp'] ,cov
matrix `Ev'=e(Ev)
local trace=e(trace)
qui predict `f1`g''
}
else if "`method'"=="polychoric"|"`method'"=="polychoric" {
qui polychoricpca `list`g'' [`weight'`exp'] ,score(`f1`g'') nscore(1)
matrix `Ev'=r(eigenvalues)
local trace=0
forvalues m=1/`tmp`g''{
local trace =`trace'+`r(lambda`m')'
}
rename `f1`g''1 `f1`g''
}
local lambda1=`Ev'[1,1]
local explained`g'=`lambda1'/`trace'
local critT=`critT'+`lambda1'
}
else {
local explained`g'=1
qui gen `f1`g''=`list`g''
if "`standardized'"=="" {
local critT=`critT'+1
}
else {
qui su [`weight'`exp']
local critT=`critT'+`r(Var)'
}
}
}
}
di
di _col(1) in green "ITEMS :" _c
forvalues i=1/`max' {
local col=15
forvalues g=1/`cons' {
local tmpv:word `i' of `list`g''
local tmpv=abbrev("`tmpv'",8)
di _col(`col') in ye %8s "`tmpv'" _c
local col= `col'+12
}
di
}
local col=16
di _col(1) in green "Expl. Var:" _c
forvalues g=1/`cons' {
di _col(`col') in ye %6.2f `=`explained`g''*100' in green "%" _c
local col= `col'+12
}
di
di
di in green "Variance Explained : " in ye %6.3f `=`critT'/`totvar'*100' in green "%"
di in green "T criterion : " in ye %6.4f `critT'
di
di in green "{hline 21}"
di in green "CONSOLIDATION: STEP `n'"
di in green "{hline 21}"
local n=`n'+1
local env=0
if "`method'"=="polychoric"|"`method'"=="polychoricv2" {
local command polychoric
}
else {
local command corr
}
forvalues i=1/`nbitems' {
local env`i'=0
local gr=`group'[`i',1]
qui `command' ``i'' `f1`gr'' [`weight'`exp']
local corr`i'=r(rho)
local corrs`i'=r(rho)
forvalues g=1/`cons' {
qui `command' ``i'' `f1`g'' [`weight'`exp']
local tmpcorr=r(rho)
if `g'!=`gr'&(((`corr`i'')<(`tmpcorr')&"`method'"=="centroid")|((`corr`i'')^2<(`tmpcorr')^2& "`method'"!="centroid")) {
local env=1
local env`i'=1
matrix `group'[`i',1]=`g'
local corr`i'=`tmpcorr'
}
}
if `env`i''==1 {
local g=`group'[`i',1]
di in green "The variable " in ye "``i'' " in green "is assigned to the `g'th group" _c
if "`method'"!="centroid" {
di in green " (corr^2=" %6.4f in ye (`corr`i'')^2 in green " vs " in ye %6.4f (`corrs`i'')^2 in green ")"
}
else {
di in green " (corr=" %6.4f in ye (`corr`i'') in green " vs " in ye %6.4f (`corrs`i'') in green ")"
}
}
}
if `env'==0 {
local latent
forvalues g=1/`cons' {
label variable `f1`g'' "Latent variable `g'"
if "`genlv'"!="" {
if "`replace'"!=""{
capture drop `genlv'`g'
}
gen `genlv'`g'=`f1`g''
}
local latent `latent' `f1`g''
return local cluster`g' `list`g''
}
matrix `group'=`group''
matrix colnames `group'=`varlist'
return matrix affect=`group'
di in ye "Stability of the partition is achieved"
if `cons'<=7 {
di
di in green "{hline 42}"
di in green "CORRELATION MATRIX OF THE LATENT VARIABLES"
di in green "{hline 42}"
di
di in green "{hline `=(`cons')*13+15'}"
forvalues g=1/`cons' {
di _col(`=13*(`g'-1)+23') in green "Latent" _c
}
di
forvalues g=1/`cons' {
di _col(`=13*(`g'-1)+19') in green "variable `g'" _c
}
di
di in green "{hline `=(`cons')*13+15'}"
forvalues g=1/`cons' {
di in green "Latent variable `g'" _c
forvalues h=1/`g' {
local loc=13*`h'+10
qui corr `f1`g'' `f1`h'' [`weight'`exp']
local rho=r(rho)
di _col(`loc') in ye %6.4f `rho' _c
}
di
}
di in green "{hline `=(`cons')*13+15'}"
di
}
if `nbind'<=800&"`biplot'"==""&"`weight'"=="" {
local max=max(`matsize',`nbind')
qui set matsize `max'
if "`addvar'"!="" {
local add `varlist'
}
if "`dim'"=="" {
local dim 1 2
}
qui biplotvlab `latent' `add', name(biplot,replace) norow colopts(name(latent variables)) alpha(0) title(Biplot of the latent variables) labdes(size(vsmall) color(blue)) stretch(1) `std' dim(`dim')
}
else if `nbind'>800&"`biplot'"==""&"`weight'"==""{
di in green "There is more than 800 individuals, so the {hi:biplot} option is disabled"
}
else if "`weight'"!=""&&"`biplot'"==""{
di in green "The {hi:biplot} option is disabled because you use weights"
}
}
forvalues g=1/`cons' {
drop `f1`g''
}
}
}
/***** END OF THE CONSOLIDATION PROCEDURE********/
qui set matsize `matsize'
if "`genlv'"!="" {
qui keep `id' `genlv'1-`genlv'`cons'
tempfile lvfile
qui sort `id'
qui save `lvfile',replace
}
use `clvfile',replace
if "`genlv'"!="" {
qui sort `id'
qui merge `id' using `lvfile'
}
qui drop `id'
capture drop _merge
capture cluster delete clv,zap
matrix colnames `vp'="Parent" "Number of clusters" "Child 1" "Child 2" "T" "DeltaT" "deltaT" "Explained Variance" "Explained Variance (%)" "First eigenvalue" "Second Eigenvalue" "2nd order deltaT"
if "`save'"!="" {
qui matrix `save'_vp=`vp'
qui matrix `save'_matclus=`matclus'
qui global `save'_varlist `varlist'
qui global `save'_method `method'
qui global `save'_kernel `kernel'
}
return matrix vp=`vp'
return matrix matclus=`matclus'
return local varlist `varlist'
return local method `method'
return local kernel `kernel'
restore,not
end

File diff suppressed because it is too large Load Diff

@ -0,0 +1,806 @@
*! Version 2.7 6October2005
*! Jean-Benoit Hardouin
************************************************************************************************************
* Stata program : clv
* Clustering of variables around latent variables
* Version 2.7 : October 6, 2005 /*return, multiple graphs, polychoric+consolidation*/
*
* Historic
* Version 1 (2005-06-11): Jean-Benoit Hardouin
* Version 1.1 (2005-07-07): Jean-Benoit Hardouin /*small bug in the consolidation process with cluster of only one variable*/
* Version 1.2 (2005-07-08): Jean-Benoit Hardouin /*Bug in the consolidation procedure when there is negative correlation*/
* Version 2 (2005-09-03): Jean-Benoit Hardouin /*Horizontal dendrograms (with Stata 9)*/
* Version 2.1 (2005-09-08): Jean-Benoit Hardouin /*More flexibility to abbreviate the names of the variables (with Stata 9)*/
* Version 2.1.1 (2005-09-08): Jean-Benoit Hardouin /*Integration of some requests of Ronan Conroy*/
* Version 2.1.2 (2005-09-08): Jean-Benoit Hardouin /*Possibility to give a title and an X/Y caption*/
* Version 2.2 (2005-09-11): Jean-Benoit Hardouin /*Kernel option*/
* Version 2.3 (2005-09-12): Jean-Benoit Hardouin /*Polychoric option*/
* Version 2.4 (2005-09-13): Jean-Benoit Hardouin /*v2 option*/
* Version 2.5 (2005-09-21): Jean-Benoit Hardouin /*corrections*/
* Version 2.6 (2005-10-02): Jean-Benoit Hardouin /*centroid method, biplot*/
*
* Jean-benoit Hardouin, Regional Health Observatory of Orléans - France
* jean-benoit.hardouin@orscentre.org
*
* News about this program : http://anaqol.free.fr
* FreeIRT Project : http://freeirt.free.fr
*
* Copyright 2005 Jean-Benoit Hardouin
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*
************************************************************************************************************
program define clv,rclass
version 9.0
syntax [varlist(default=none)] [if] [in] [, CUTnumber(int 30) bar CONSolidation(int 0) noDENdro noSTANDardized deltaT HORizontal SHOWcount ABBrev(int 14) TITle(string) CAPtion(string) KERnel(numlist) METHod(string) noBIPlot ADDvar]
preserve
tempfile clvfile
qui save `clvfile',replace
local matsize=c(matsize)
local none=0
if "`varlist'"=="" {
*set trace on
capture confirm matrix r(vp)
if _rc==0 {
capture confirm matrix r(matclus)
if _rc ==0 {
local none=1
}
}
if `none'==0 {
di in red "You cannot use the {hi:clv} command without {hi:varlist} if you have not already run {hi:clv}"
error 198
exit
}
}
tempname matclus vp
if `none'==1 {
matrix `vp'=r(vp)
matrix `matclus'=r(matclus)
local varlist `r(varlist)'
tokenize `varlist'
local nbitems=rowsof(`matclus')
if "`method'"!="" {
di in green "The {hi:method} option can not be modified without specification of the varlist. {hi:method} is omitted."
}
local method `r(method)'
local kernel `r(kernel)'
/*
if "`method'"=="polychoric" {
di in red "The {hi:consolidation} is not possible with the {hi:polychoric} option"
error 198
exit
}
*/
}
if "`method'"=="" {
local method classical
}
if ("`method'"=="polychoric"|"`method'"=="polychoricv2")&"`standardized'"!="" {
di in green "Initial variables are used with the {hi:polychoric} methods"
di in green "But the procedure is based on the matrix of the polychoric correlations"
di
}
if "`method'"!="classical"&"`method'"!="v2"&"`method'"!="centroid"&"`method'"!="polychoric"&"`method'"!="polychoricv2" {
di in red "The {hi:method} `method' is unknown"
error 198
exit
}
tokenize `varlist'
local nbitems : word count `varlist'
marksample touse
qui keep if `touse'
local mat=max(`matsize',`=`nbitems'*2')
qui set matsize `mat'
if `nbitems'<3&`none'!=1 {
di in red "You need at least 3 variables"
error 198
exit
}
forvalues i=1/`nbitems'{
local label`i':variable label ``i''
if "`label`i''"=="" {
local label`i' ``i''
}
if "`method'"!="polychoric"&"`method'"!="polychoricv2" {
qui su ``i''
local mean=r(mean)
if "`standardized'"=="" {
local sd=r(sd)
}
else {
local sd=1
}
qui replace ``i''=(``i''-`mean')/`sd'
}
}
tempfile clvfiletmp
qui save `clvfiletmp',replace
qui count
local nbind=r(N)
local cons=`consolidation'
if "`method'"!="polychoric"&"`method'"!="polychoricv2" {
local totvar=0
forvalues i=1/`nbitems' {
qui su ``i''
local totvar=`totvar'+`r(Var)'
}
}
else {
local totvar `nbitems'
}
local nbkerk=0
local nbkerg=0
if "`kernel'"!="" {
local nbkerg:word count `kernel'
local fin0=0
forvalues i=1/`nbkerg' {
local nbi`i':word `i' of `kernel'
local nbkerk=`nbkerk'+`nbi`i''
local deb`i'=`fin`=`i'-1''+1
local fin`i'=`deb`i''+`nbi`i''-1
local list`i'
forvalues j=`deb`i''/`fin`i'' {
local list`i' `list`i'' ``j''
}
}
tempname kerclus
matrix `kerclus'=J(`=`nbkerk'-`nbkerg'',3,0)
local ligne=1
forvalues g=1/`nbkerg' {
matrix `kerclus'[`ligne',1]=`nbitems'+`ligne'
matrix `kerclus'[`ligne',2]=`deb`g''
matrix `kerclus'[`ligne',3]=`deb`g''+1
local clus`g'=`nbitems'+`ligne'
local ligne=`ligne'+1
if `nbi`g''>2 {
forvalues i=2/`=`nbi`g''-1' {
matrix `kerclus'[`ligne',1]=`nbitems'+`ligne'
matrix `kerclus'[`ligne',2]=`deb`g''+`i'
matrix `kerclus'[`ligne',3]=`nbitems'+`ligne'-1
local clus`g'=`nbitems'+`ligne'
local ligne=`ligne'+1
}
}
}
}
if `nbitems'<`nbkerk' {
di in red "You cannot define more variables in the {hi:kernel} option than items in the {hi:varlist}"
error 198
exit
}
di
di in green "{hline 30}"
di in green "TOTAL VARIANCE: " in ye %14.3f `totvar'
di in green "NUMBER OF INDIVIDUALS: " in ye %7.0f `nbind'
di in green "METHOD:" in ye _col(`=31-length("`method'")') "`=upper("`method'")'"
di in green "{hline 30}"
di
if "`kernel'"!="" {
forvalues i=1/`nbkerg' {
di in green "The kernel numbered " in ye `clus`i'' in green " is composed of `nbi`i'' variables: " in ye "`list`i''"
di
}
}
else {
local nbkerk=0
local nbkerg=0
}
tempname Ev
if `none'!=1 {
matrix `matclus'=J(`nbitems',`nbitems',0)
matrix `vp'=J(`=2*`nbitems'-1',10,0)
forvalues i=1/`nbitems' {
matrix `matclus'[`i',1]=`i'
if "`method'"!="polychoric"&"`method'"!="polychoric" {
qui su ``i''
matrix `vp'[`i',1]=r(Var)
}
else {
matrix `vp'[`i',1]=1
}
matrix `vp'[`i',8]=`i'
matrix `vp'[`i',9]=`totvar'
matrix `vp'[`i',10]=100
}
matrix `vp'[`nbitems',3]=`nbitems'
if "`method'"=="centroid" {
local crit G
di in green "{hline 89}"
di in green _col(7) "Number of" _col(69) "`crit'" _col(71) "Explained" _col(82) "Relative"
di in green "Step" _col(8) "clusters" _col(20) "Child 1" _col(33) "Child 2" _col(46) "Parent" _col(53) "`crit' value" _col(61) "variation" _col(72) "Variance" _col(81) "Variation"
di in green "{hline 89}"
}
else {
local crit T
di in green "{hline 100}"
if "`method'"=="v2"|"`method'"=="polychoricv2" {
di in green _col(84) "Maximal"
}
else {
di in green _col(84) "Current"
}
di in green _col(7) "Number of" _col(69) "`crit'" _col(71) "Explained" _col(85) "Second" _col(93) "Relative"
di in green "Step" _col(8) "clusters" _col(20) "Child 1" _col(33) "Child 2" _col(46) "Parent" _col(53) "`crit' value" _col(61) "variation" _col(72) "Variance" _col(81) "Eigenvalue" _col(92) "Variation"
di in green "{hline 100}"
}
tempname threshold
matrix `threshold'=J(`nbitems',3,0)
forvalues i=1/`=`nbitems'-1' {
local clus=`nbitems'+`i'
local minegenval=999999
local minegenval2=999999
forvalues k=1/`=`clus'-1' {
local list`k'
local numlist`k'
forvalues j=1/`clus' {
if (`matclus'[`j',`i']==`k') {
local list`k' `list`k'' ``j''
local numlist`k' `numlist`k'' `j'
}
}
}
if `clus'>`nbitems'+`nbkerk'-`nbkerg' {
if "`method'"=="centroid" {
tempname centrj centrk diffjk
}
forvalues j=1/`clus' {
local nblistj:word count `list`j''
forvalues k=`=`j'+1'/`clus' {
local nblistk:word count `list`k''
if `nblistj'!=0&`nblistk'!=0 {
if "`method'"=="centroid" {
qui genscore `list`j'',score(`centrj') mean
qui su `centrj'
local Varj=r(Var)
qui genscore `list`k'',score(`centrk') mean
qui su `centrk'
local Vark=r(Var)
qui gen `diffjk'=`centrk'-`centrj'
qui su `diffjk'
local Varjk=r(Var)
drop `centrj' `centrk' `diffjk'
local ev=(`nblistj'*`nblistk')/(`nblistj'+`nblistk')*`Varjk'
if `ev'<`minegenval' {
local minegenval=`ev'
local minj `j'
local mink `k'
local eigen=0
local eigen2=0
}
}
else {
if "`method'"=="classical"|"`method'"=="v2" {
qui pca `list`j'' `list`k'',cov
matrix `Ev'=e(Ev)
}
else if "`method'"=="polychoric"|"`method'"=="polychoricv2" {
qui polychoricpca `list`j'' `list`k''
matrix `Ev'=r(eigenvalues)
}
local lambda1=`Ev'[1,1]
local lambda2=`Ev'[1,2]
local ev=`vp'[`j',1]+`vp'[`k',1]-`lambda1'
/*
local t1=`vp'[`j',1]
local t2=`vp'[`k',1]
di "`ev'=`t1'+`t2'-`lambda1'"
*/
local ev2=max(`vp'[`j',5],`vp'[`k',5],`lambda2')
if ("`method'"=="v2"|"`method'"=="polychoricv2")&`ev'<`minegenval' {
local minegenval=`ev'
local eigen=`lambda1'
local minj `j'
local mink `k'
local eigen2=`lambda2'
}
else if ("`method'"=="classical"|"`method'"=="polychoric")&`ev2'<`minegenval2' {
local minegenval=`ev'
local minegenval2=`ev2'
local eigen=`lambda1'
local minj `j'
local mink `k'
local eigen2=`ev2'
}
}
}
}
}
}
else {
local ligne=`clus'-`nbitems'
local j=`kerclus'[`ligne',2]
local k=`kerclus'[`ligne',3]
if "`method'"!="centroid" {
if "`method'"=="classical"|"`method'"=="v2" {
qui pca `list`j'' `list`k'',cov
matrix `Ev'=e(Ev)
}
else if "`method'"=="polychoric"|"`method'"=="polychoricv2"{
qui polychoricpca `list`j'' `list`k''
matrix `Ev'=r(eigenvalues)
}
local lambda1=`Ev'[1,1]
local lambda2=`Ev'[1,2]
local ev=`vp'[`j',1]+`vp'[`k',1]-`lambda1'
local minegenval=`ev'
local eigen=`lambda1'
local minj `j'
local mink `k'
local eigen2=`lambda2'
}
else if "`method'"=="centroid" {
local nblistj:word count `list`j''
local nblistk:word count `list`k''
tempname v1 v2 v12
qui genscore `list`j'',score(`v1') mean
qui genscore `list`k'',score(`v2') mean
qui gen `v12'=`v1'-`v2'
qui su `v12'
local varj=r(Var)
local minegenval=(`nblistj'*`nblistk')/(`nblistj'+`nblistk')*`varj'
local minj `j'
local mink `k'
}
}
if `minj'<=`nbitems' {
local nomj=abbrev("``minj''",14)
}
else {
local nomj `minj'
}
*set trace off
if `mink'<=`nbitems' {
local nomk=abbrev("``mink''",14)
}
else {
local nomk `mink'
}
forvalues j=1/`nbitems' {
matrix `matclus'[`j',`=`i'+1']=`matclus'[`j',`i']
}
if "`method'"!="centroid" {
matrix `vp'[`clus',1]=`eigen' /*FIRST EIGEN VALUE OF THE NEW CLUSTER*/
matrix `vp'[`clus',2]=`minegenval' /*VARIATION OF THE T CRITERION*/
matrix `vp'[`clus',3]=`vp'[`=`clus'-1',3]-`vp'[`clus',2] /*T CRITERION*/
matrix `vp'[`clus',4]=`vp'[`clus',2]/`vp'[`=`clus'-1',3] /*RELATIVE VARIATION OF THE T CRITERION*/
matrix `vp'[`clus',5]=`eigen2' /*SECOND EIGEN VALUE OF THE NEW CLUSTER*/
matrix `vp'[`clus',6]=`minj' /*CHILD 1*/
matrix `vp'[`clus',7]=`mink' /*CHILD 2*/
matrix `vp'[`clus',8]=`nbitems'+`i' /*NUMBER OF THE NEW CLUSTER*/
matrix `vp'[`clus',9]=`vp'[`=`clus'-1',9]-`minegenval' /*EXPLAINED VARIANCE*/
matrix `vp'[`clus',10]=`vp'[`clus',9]/`totvar'*100 /*% OF EXPLAINED VARIANCE*/
}
else {
matrix `vp'[`clus',1]=0 /*FIRST EIGEN VALUE OF THE NEW CLUSTER*/
matrix `vp'[`clus',2]=`minegenval' /*VARIATION OF THE G CRITERION*/
matrix `vp'[`clus',3]=`vp'[`=`clus'-1',3]-`vp'[`clus',2] /*G CRITERION*/
matrix `vp'[`clus',4]=`vp'[`clus',2]/`vp'[`=`clus'-1',3] /*RELATIVE VARIATION OF THE T CRITERION*/
matrix `vp'[`clus',5]=0 /*SECOND EIGEN VALUE OF THE NEW CLUSTER*/
matrix `vp'[`clus',6]=`minj' /*CHILD 1*/
matrix `vp'[`clus',7]=`mink' /*CHILD 2*/
matrix `vp'[`clus',8]=`nbitems'+`i' /*NUMBER OF THE NEW CLUSTER*/
matrix `vp'[`clus',9]=`vp'[`=`clus'-1',9]-`minegenval' /*EXPLAINED VARIANCE*/
matrix `vp'[`clus',10]=`vp'[`clus',9]/`totvar'*100 /*% OF EXPLAINED VARIANCE*/
}
foreach j of numlist `numlist`minj'' `numlist`mink'' {
matrix `matclus'[`j',`=`i'+1']=`clus'
}
if "`kernel'"!=""&`i'==`=`nbkerk'-`nbkerg'+1' {
local T=`vp'[`=`clus'-1',9]
di _col(0) in ye "init" _col(12) %4.0f `=`nbitems'-`nbkerk'+`nbkerg'' _col(52) %8.4f `T' _col(62) %8.4f `=`totvar'-`T'' _col(72) %7.3f `=`T'/`totvar'*100' "%"
}
if `clus'>`nbitems'+`nbkerk'-`nbkerg' {
if `clus'==`nbitems'+`nbkerk'-`nbkerg'+1 {
local relv
local percent
}
else {
local relv=(`minegenval'-`vp'[`=`clus'-1',2])/`vp'[`=`clus'-1',3]*100
local percent %
matrix `threshold'[`=`nbitems'-`i'+1',1]=`relv'
matrix `threshold'[`=`nbitems'-`i'+1',3]=`minegenval'
if `i'>1 {
matrix `threshold'[`=`nbitems'-`i'+1',2]=`relv'-`threshold'[`=`nbitems'-`i'+2',1]
}
}
if "`method'"=="centroid" {
di _col(0) in ye %4.0f `=`i'-`nbkerk'+`nbkerg'' _col(12) %4.0f `=`nbitems'-`i'' _col(20) "`nomj'" _col(33) "`nomk'" _col(45) %7.0f `=`i'+`nbitems'' _col(52) %8.4f `vp'[`clus',9] _col(62) %8.4f `minegenval' _col(72) %7.3f `vp'[`clus',10] "%" _col(83) _col(84) %5.2f `relv' "`percent'"
}
else {
di _col(0) in ye %4.0f `=`i'-`nbkerk'+`nbkerg'' _col(12) %4.0f `=`nbitems'-`i'' _col(20) "`nomj'" _col(33) "`nomk'" _col(45) %7.0f `=`i'+`nbitems'' _col(52) %8.4f `vp'[`clus',9] _col(62) %8.4f `minegenval' _col(72) %7.3f `vp'[`clus',10] "%" _col(83) %8.4f `vp'[`clus',5] _col(95) %5.2f `relv' "`percent'"
}
}
}
local i=2*`nbitems'-1
local relv=(`vp'[`i',3]-`vp'[`i',2])/`vp'[`i',3]*100
if "`method'"=="centroid" {
di in ye _col(84) %5.2f `relv' "`percent'"
}
else {
di in ye _col(95) %5.2f `relv' "`percent'"
}
matrix `threshold'[1,1]=`relv'
matrix `threshold'[1,2]=`relv'-`threshold'[2,1]
matrix `threshold'[1,3]=`vp'[`i',3]
*matrix list `threshold'
local best=0
local maxbest=0
local best2=0
local maxbest2=0
forvalues i=1/`nbitems' {
if `threshold'[`i',3]>`maxbest2' {
if `threshold'[`i',3]>`maxbest' {
local maxbest2=`maxbest'
local best2=`best'
local maxbest=`threshold'[`i',3]
local best=`i'
}
else {
local maxbest2=`threshold'[`i',3]
local best2=`i'
}
}
}
di in green "{hline 100}"
di in green "Proposed best partitions: "
di in green "Based on the variation of the T criterion"
di in green _col(10) "1. Partitions in " in ye `best' in green " clusters"
di in green _col(10) "2. Partitions in " in ye `best2' in green " clusters"
return local bestvariation `best' `best2'
local bestt=0
local bestt2=0
local var=0
local var2=0
forvalues i=1/`nbitems' {
if `threshold'[`i',2]>`var2'&`i'<`nbitems'-1 {
if `threshold'[`i',2]>`var' {
local bestt2=`bestt'
local var2=`var'
local var=`threshold'[`i',2]
local bestt=`i'
}
else {
local var2=`threshold'[`i',2]
local bestt2=`i'
}
}
}
di in green "Based on a research of a threshold"
di in green _col(10) "1. Partitions in " in ye `bestt' in green " clusters"
di in green _col(10) "2. Partitions in " in ye `bestt2' in green " clusters"
forvalues i=`=`nbitems'+1'/`=`nbitems'+`nbkerk'-`nbkerg'' {
matrix `vp'[`i',2]=`totvar'-`T'
matrix `vp'[`i',9]=`T'
matrix `vp'[`i',10]=`T'/`nbitems'*100
}
return local bestthresold `bestt' `bestt2'
}
if "`bar'"!="" {
drop _all
qui set obs `nbitems'
qui svmat `vp' ,names(v)
qui drop in 1/`nbitems'
qui gen id=`nbitems'-_n
qui drop if id>`nbitems'-`nbkerk'+`nbkerg'-1
label variable id "Number of clusters"
label variable v2 "T variation"
graph twoway bar v2 id, name(bar,replace) vert ,ylabel(0(0.5)2) xlabel(1(1)`=`nbitems'-`nbkerk'+`nbkerg'-1')
}
drop _all
qui set obs `nbitems'
qui svmat `matclus' ,names(v)
local listorder
forvalues i=`nbitems'(-1)1 {
local listorder `listorder' v`i'
}
qui gen id=_n
qui sort `listorder'
capture cluster delete clv,zap
qui cluster complete v* ,name(clv)
qui replace clv_id=_n
qui replace clv_ord=id
qui replace clv_hgt=.
qui gen fait=0
qui gen clus=0
forvalues i=2/`nbitems' {
local ligne=`nbitems'+`i'-1
if (`vp'[`ligne',6]<=`nbitems') {
local first=`vp'[`ligne',6]
gsort +fait -v`i' +clv_id
}
else {
local first=`vp'[`ligne',7]
gsort +fait -v`i' +clv_id
}
if "`deltaT'"!="" {
qui replace clv_hgt=`vp'[`ligne',2] in 1
}
else {
qui replace clv_hgt=100-`vp'[`ligne',10] in 1
}
qui replace fait=1 in 1
qui replace clus=`vp'[`ligne',8] in 1
}
qui gen label=""
forvalues i=1/`nbitems' {
qui replace label=abbrev("`label`i''",`abbrev') if clv_id==`i'
}
sort clv_id
if `nbitems'>`cutnumber' {
local var "Groups of variables"
local cut cutnumber(`cutnumber') /*labcutn*/
}
else {
local var "Variables"
local cut label(label)
}
qui su clv_hgt
local tmp=r(max)
local max=floor(`tmp')+.5
if `tmp'>`max' {
local max=`max'+.5
}
local maxvar=`max'+5
if "`dendro'"=="" {
if "`title'"=="" {
local title "Clustering around Latent Variables (CLV)"
}
if "`caption'"!="" {
local var "`caption'"
}
if "`deltaT'"!="" {
local titleL "Variation of the T criterion"
local yl "0(.5)`max'"
}
else {
local titleL "% Unexplained Variance"
local yl "0(25)`maxvar'"
}
if "`horizontal'"!="" {
cluster dendro clv, name (dendrogram,replace) hor ytitle("`var'") `showcount' xtitle("`titleL'") title("`title'",span) xlabel(`yl') ylabel(,angle(0)) `cut'
}
else {
cluster dendro clv, name(dendrogram,replace) xtitle("`var'") `showcount' ytitle("`titleL'") title("`title'",span) ylabel(`yl') `cut'
}
}
if `cons'>`nbitems'-`nbkerk'+`nbkerg' {
di in ye "The {hi:consolidation} is not possible for a number of clusters superior to the initial number of clusters"
local cons=0
}
/*
if `cons'!=0&("`method'"=="polychoric"|"`method'"=="polychoricv2") {
di in ye "The {hi:consolidation} is not possible with the {hi:polychoric} methods"
local cons=0
}
*/
if `cons'!=0 {
sort v`=`nbitems'-`cons'+1'
gen cut`cons'=1
local g=1
forvalues i=2/`nbitems' {
if v`=`nbitems'-`cons'+1'[`i']!=v`=`nbitems'-`cons'+1'[`=`i'-1'] {
local g=`g'+1
}
qui replace cut`cons'=`g' in `i'
}
sort id
tempname group
mkmat cut`cons',matrix(`group')
*cluster generate cut = groups(2/9) , name(clv)
use `clvfiletmp',replace
local n=1
local env=1
while (`env'==1) {
forvalues g=1/`cons' {
local list`g'
forvalues i=1/`nbitems' {
if `group'[`i',1]==`g' {
local list`g' `list`g'' ``i''
}
}
}
di
if `n'==1 {
di in green "{hline 30}"
di in green "PARTITION BEFORE CONSOLIDATION"
di in green "{hline 30}"
}
di
local col=1
local max=0
*set trace on
local critT=0
forvalues g=1/`cons' {
di _col(`col') in green "GROUP " %2.0f `g' _c
local col=`col'+10
local tmp`g':word count `list`g''
if `tmp`g''>`max' {
local max `tmp`g''
}
tempvar f1`g'
if "`method'"=="centroid" {
qui genscore `list`g'',score(`f1`g'') mean
qui su `f1`g''
local var=r(Var)
local critT=`critT'+`tmp`g''*`var'
}
else {
if `tmp`g''>1 {
if "`method'"=="classical"|"`method'"=="v2" {
qui pca `list`g'',cov
matrix `Ev'=e(Ev)
qui predict `f1`g''
}
else if "`method'"=="polychoric"|"`method'"=="polychoric" {
qui polychoricpca `list`g'',score(`f1`g'') nscore(1)
matrix `Ev'=r(eigenvalues)
rename `f1`g''1 `f1`g''
}
local lambda1=`Ev'[1,1]
local critT=`critT'+`lambda1'
}
else {
qui gen `f1`g''=`list`g''
if "`standardized'"=="" {
local critT=`critT'+1
}
else {
qui su
local critT=`critT'+`r(Var)'
}
}
}
}
di
forvalues i=1/`max' {
local col=1
forvalues g=1/`cons' {
local tmpv:word `i' of `list`g''
local tmpv=abbrev("`tmpv'",8)
di _col(`col') in ye %8s "`tmpv'" _c
local col= `col'+10
}
di
}
di
di in green "Variance Explained : " in ye %6.3f `=`critT'/`totvar'*100' in green "%"
di in green "T criterion : " in ye %6.4f `critT'
di
di in green "{hline 21}"
di in green "CONSOLIDATION: STEP `n'"
di in green "{hline 21}"
local n=`n'+1
local env=0
if "`method'"=="polychoric"|"`method'"=="polychoricv2" {
local command polychoric
}
else {
local command corr
}
forvalues i=1/`nbitems' {
local env`i'=0
local gr=`group'[`i',1]
qui `command' ``i'' `f1`gr''
local corr`i'=r(rho)
local corrs`i'=r(rho)
forvalues g=1/`cons' {
qui `command' ``i'' `f1`g''
local tmpcorr=r(rho)
if ((`corr`i'')<(`tmpcorr')&"`method'"=="centroid")|((`corr`i'')^2<(`tmpcorr')^2& "`method'"!="centroid") {
local env=1
local env`i'=1
matrix `group'[`i',1]=`g'
local corr`i'=`tmpcorr'
}
}
if `env`i''==1 {
local g=`group'[`i',1]
di in green "The variable " in ye "``i'' " in green "is assigned to the `g'th group" _c
if "`method'"!="centroid" {
di in green " (corr^2=" %6.4f in ye (`corr`i'')^2 in green " vs " in ye %6.4f (`corrs`i'')^2 in green ")"
}
else {
di in green " (corr=" %6.4f in ye (`corr`i'') in green " vs " in ye %6.4f (`corrs`i'') in green ")"
}
}
}
if `env'==0 {
local latent
forvalues g=1/`cons' {
label variable `f1`g'' "Latent variable `g'"
local latent `latent' `f1`g''
return local cluster`g' `list`g''
}
matrix `group'=`group''
matrix colnames `group'=`varlist'
return matrix affect=`group'
di in ye "Stability of the partition is achieved"
if `cons'<=7 {
di
di in green "{hline 42}"
di in green "CORRELATION MATRIX OF THE LATENT VARIABLES"
di in green "{hline 42}"
di
di in green "{hline `=(`cons')*13+15'}"
forvalues g=1/`cons' {
di _col(`=13*(`g'-1)+23') in green "Latent" _c
}
di
forvalues g=1/`cons' {
di _col(`=13*(`g'-1)+19') in green "variable `g'" _c
}
di
di in green "{hline `=(`cons')*13+15'}"
forvalues g=1/`cons' {
di in green "Latent variable `g'" _c
forvalues h=1/`cons' {
local loc=13*`h'+10
qui corr `f1`g'' `f1`h''
local rho=r(rho)
di _col(`loc') in ye %6.4f `rho' _c
}
di
}
di in green "{hline `=(`cons')*13+15'}"
di
}
if `nbind'<=800&"`biplot'"=="" {
local max=max(`matsize',`nbind')
set matsize `max'
if "`addvar'"!="" {
local add `varlist'
}
qui biplotvlab `latent' `add', name(biplot,replace) norow colopts(name(latent variables)) alpha(0) title(Biplot of the latent variables) labdes(size(vsmall) color(blue)) stretch(1)
}
else {
di in green "There is more than 800 individuals, so the {hi:biplot} is disabled"
}
}
forvalues g=1/`cons' {
drop `f1`g''
}
}
}
set matsize `matsize'
use `clvfile',replace
capture cluster delete clv,zap
return matrix vp=`vp'
return matrix matclus=`matclus'
return local varlist `varlist'
return local method `method'
return local kernel `kernel'
end

@ -0,0 +1,798 @@
*! Version 2.9 9December2005
*! Jean-Benoit Hardouin
************************************************************************************************************
* Stata program : clv
* Clustering of variables around latent variables
* Version 2.9 : December 9, 2005 /*save the latent variables*/
*
* Historic
* Version 1 (2005-06-11): Jean-Benoit Hardouin
* Version 1.1 (2005-07-07): Jean-Benoit Hardouin /*small bug in the consolidation process with cluster of only one variable*/
* Version 1.2 (2005-07-08): Jean-Benoit Hardouin /*Bug in the consolidation procedure when there is negative correlation*/
* Version 2 (2005-09-03): Jean-Benoit Hardouin /*Horizontal dendrograms (with Stata 9)*/
* Version 2.1 (2005-09-08): Jean-Benoit Hardouin /*More flexibility to abbreviate the names of the variables (with Stata 9)*/
* Version 2.1.1 (2005-09-08): Jean-Benoit Hardouin /*Integration of some requests of Ronan Conroy*/
* Version 2.1.2 (2005-09-08): Jean-Benoit Hardouin /*Possibility to give a title and an X/Y caption*/
* Version 2.2 (2005-09-11): Jean-Benoit Hardouin /*Kernel option*/
* Version 2.3 (2005-09-12): Jean-Benoit Hardouin /*Polychoric option*/
* Version 2.4 (2005-09-13): Jean-Benoit Hardouin /*v2 option*/
* Version 2.5 (2005-09-21): Jean-Benoit Hardouin /*corrections*/
* Version 2.6 (2005-10-02): Jean-Benoit Hardouin /*centroid method, biplot*/
* Version 2.7 (2005-10-06): Jean-Benoit Hardouin /*return, multiple graphs, polychoric+consolidation*/
* Version 2.8 (2005-10-06): Jean-Benoit Hardouin /*fweights*/
*
* Jean-benoit Hardouin, Regional Health Observatory of Orléans - France
* jean-benoit.hardouin@orscentre.org
*
* News about this program : http://anaqol.free.fr
* FreeIRT Project : http://freeirt.free.fr
*
* Copyright 2005 Jean-Benoit Hardouin
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*
************************************************************************************************************
program define clv,rclass
version 9.0
syntax [varlist(default=none)] [if] [in] [fweight] [, CUTnumber(int 30) bar CONSolidation(int 0) noDENdro noSTANDardized deltaT HORizontal SHOWcount ABBrev(int 14) TITle(string) CAPtion(string) KERnel(numlist) METHod(string) noBIPlot ADDvar genlv(string)]
preserve
tempfile clvfile
qui save `clvfile',replace
local matsize=c(matsize)
local none=0
if "`varlist'"=="" {
capture confirm matrix r(vp)
if _rc==0 {
capture confirm matrix r(matclus)
if _rc ==0 {
local none=1
}
}
if `none'==0 {
di in red "You cannot use the {hi:clv} command without {hi:varlist} if you have not already run {hi:clv}"
error 198
exit
}
}
tempname matclus vp
if `none'==1 {
matrix `vp'=r(vp)
matrix `matclus'=r(matclus)
local varlist `r(varlist)'
tokenize `varlist'
local nbitems=rowsof(`matclus')
if "`method'"!="" {
di in green "The {hi:method} option can not be modified without specification of the varlist. {hi:method} is omitted."
}
local method `r(method)'
local kernel `r(kernel)'
}
if "`method'"=="" {
local method classical
}
if ("`method'"=="polychoric"|"`method'"=="polychoricv2")&"`standardized'"!="" {
di in green "Initial variables are used with the {hi:polychoric} methods"
di in green "But the procedure is based on the matrix of the polychoric correlations"
di
}
if "`method'"!="classical"&"`method'"!="v2"&"`method'"!="centroid"&"`method'"!="polychoric"&"`method'"!="polychoricv2" {
di in red "The {hi:method} `method' is unknown"
error 198
exit
}
tokenize `varlist'
local nbitems : word count `varlist'
marksample touse
qui keep if `touse'
local mat=max(`matsize',`=`nbitems'*2')
qui set matsize `mat'
if `nbitems'<3&`none'!=1 {
di in red "You need at least 3 variables"
error 198
exit
}
forvalues i=1/`nbitems'{
local label`i':variable label ``i''
if "`label`i''"=="" {
local label`i' ``i''
}
if "`method'"!="polychoric"&"`method'"!="polychoricv2" {
qui su ``i'' [`weight'`exp']
local mean=r(mean)
if "`standardized'"=="" {
local sd=r(sd)
}
else {
local sd=1
}
qui replace ``i''=(``i''-`mean')/`sd'
}
}
tempfile clvfiletmp
qui save `clvfiletmp',replace
qui su `1' [`weight'`exp']
local nbind=r(sum_w)
local cons=`consolidation'
if "`method'"!="polychoric"&"`method'"!="polychoricv2" {
local totvar=0
forvalues i=1/`nbitems' {
qui su ``i'' [`weight'`exp']
local totvar=`totvar'+`r(Var)'
}
}
else {
local totvar `nbitems'
}
local nbkerk=0
local nbkerg=0
if "`kernel'"!="" {
local nbkerg:word count `kernel'
local fin0=0
forvalues i=1/`nbkerg' {
local nbi`i':word `i' of `kernel'
local nbkerk=`nbkerk'+`nbi`i''
local deb`i'=`fin`=`i'-1''+1
local fin`i'=`deb`i''+`nbi`i''-1
local list`i'
forvalues j=`deb`i''/`fin`i'' {
local list`i' `list`i'' ``j''
}
}
tempname kerclus
matrix `kerclus'=J(`=`nbkerk'-`nbkerg'',3,0)
local ligne=1
forvalues g=1/`nbkerg' {
matrix `kerclus'[`ligne',1]=`nbitems'+`ligne'
matrix `kerclus'[`ligne',2]=`deb`g''
matrix `kerclus'[`ligne',3]=`deb`g''+1
local clus`g'=`nbitems'+`ligne'
local ligne=`ligne'+1
if `nbi`g''>2 {
forvalues i=2/`=`nbi`g''-1' {
matrix `kerclus'[`ligne',1]=`nbitems'+`ligne'
matrix `kerclus'[`ligne',2]=`deb`g''+`i'
matrix `kerclus'[`ligne',3]=`nbitems'+`ligne'-1
local clus`g'=`nbitems'+`ligne'
local ligne=`ligne'+1
}
}
}
}
if `nbitems'<`nbkerk' {
di in red "You cannot define more variables in the {hi:kernel} option than items in the {hi:varlist}"
error 198
exit
}
di
di in green "{hline 30}"
di in green "TOTAL VARIANCE: " in ye %14.3f `totvar'
di in green "NUMBER OF INDIVIDUALS: " in ye %7.0f `nbind'
di in green "METHOD:" in ye _col(`=31-length("`method'")') "`=upper("`method'")'"
di in green "{hline 30}"
di
if "`kernel'"!="" {
forvalues i=1/`nbkerg' {
di in green "The kernel numbered " in ye `clus`i'' in green " is composed of `nbi`i'' variables: " in ye "`list`i''"
di
}
}
else {
local nbkerk=0
local nbkerg=0
}
tempname Ev
if `none'!=1 {
matrix `matclus'=J(`nbitems',`nbitems',0)
matrix `vp'=J(`=2*`nbitems'-1',10,0)
forvalues i=1/`nbitems' {
matrix `matclus'[`i',1]=`i'
if "`method'"!="polychoric"&"`method'"!="polychoric" {
qui su ``i'' [`weight'`exp']
matrix `vp'[`i',1]=r(Var)
}
else {
matrix `vp'[`i',1]=1
}
matrix `vp'[`i',8]=`i'
matrix `vp'[`i',9]=`totvar'
matrix `vp'[`i',10]=100
}
matrix `vp'[`nbitems',3]=`nbitems'
if "`method'"=="centroid" {
local crit G
di in green "{hline 89}"
di in green _col(7) "Number of" _col(69) "`crit'" _col(71) "Explained" _col(82) "Relative"
di in green "Step" _col(8) "clusters" _col(20) "Child 1" _col(33) "Child 2" _col(46) "Parent" _col(53) "`crit' value" _col(61) "variation" _col(72) "Variance" _col(81) "Variation"
di in green "{hline 89}"
}
else {
local crit T
di in green "{hline 100}"
if "`method'"=="v2"|"`method'"=="polychoricv2" {
di in green _col(84) "Maximal"
}
else {
di in green _col(84) "Current"
}
di in green _col(7) "Number of" _col(69) "`crit'" _col(71) "Explained" _col(85) "Second" _col(93) "Relative"
di in green "Step" _col(8) "clusters" _col(20) "Child 1" _col(33) "Child 2" _col(46) "Parent" _col(53) "`crit' value" _col(61) "variation" _col(72) "Variance" _col(81) "Eigenvalue" _col(92) "Variation"
di in green "{hline 100}"
}
tempname threshold
matrix `threshold'=J(`nbitems',3,0)
forvalues i=1/`=`nbitems'-1' {
local clus=`nbitems'+`i'
local minegenval=999999
local minegenval2=999999
forvalues k=1/`=`clus'-1' {
local list`k'
local numlist`k'
forvalues j=1/`clus' {
if (`matclus'[`j',`i']==`k') {
local list`k' `list`k'' ``j''
local numlist`k' `numlist`k'' `j'
}
}
}
if `clus'>`nbitems'+`nbkerk'-`nbkerg' {
if "`method'"=="centroid" {
tempname centrj centrk diffjk
}
forvalues j=1/`clus' {
local nblistj:word count `list`j''
forvalues k=`=`j'+1'/`clus' {
local nblistk:word count `list`k''
if `nblistj'!=0&`nblistk'!=0 {
if "`method'"=="centroid" {
qui genscore `list`j'',score(`centrj') mean
qui su `centrj' [`weight'`exp']
local Varj=r(Var)
qui genscore `list`k'',score(`centrk') mean
qui su `centrk' [`weight'`exp']
local Vark=r(Var)
qui gen `diffjk'=`centrk'-`centrj'
qui su `diffjk' [`weight'`exp']
local Varjk=r(Var)
drop `centrj' `centrk' `diffjk'
local ev=(`nblistj'*`nblistk')/(`nblistj'+`nblistk')*`Varjk'
if `ev'<`minegenval' {
local minegenval=`ev'
local minj `j'
local mink `k'
local eigen=0
local eigen2=0
}
}
else {
if "`method'"=="classical"|"`method'"=="v2" {
qui pca `list`j'' `list`k'' [`weight'`exp'] ,cov
matrix `Ev'=e(Ev)
}
else if "`method'"=="polychoric"|"`method'"=="polychoricv2" {
qui polychoricpca `list`j'' `list`k'' [`weight'`exp']
matrix `Ev'=r(eigenvalues)
}
local lambda1=`Ev'[1,1]
local lambda2=`Ev'[1,2]
local ev=`vp'[`j',1]+`vp'[`k',1]-`lambda1'
local ev2=max(`vp'[`j',5],`vp'[`k',5],`lambda2')
if ("`method'"=="v2"|"`method'"=="polychoricv2")&`ev'<`minegenval' {
local eigen2=`lambda2'
local minegenval=`ev'
local eigen=`lambda1'
local minj `j'
local mink `k'
}
else if ("`method'"=="classical"|"`method'"=="polychoric")&`ev2'<`minegenval2' {
local minegenval2=`ev2'
local eigen2=`ev2'
local minegenval=`ev'
local eigen=`lambda1'
local minj `j'
local mink `k'
}
}
}
}
}
}
else {
local ligne=`clus'-`nbitems'
local j=`kerclus'[`ligne',2]
local k=`kerclus'[`ligne',3]
if "`method'"!="centroid" {
if "`method'"=="classical"|"`method'"=="v2" {
qui pca `list`j'' `list`k'' [`weight'`exp'],cov
matrix `Ev'=e(Ev)
}
else if "`method'"=="polychoric"|"`method'"=="polychoricv2"{
qui polychoricpca `list`j'' `list`k'' [`weight'`exp']
matrix `Ev'=r(eigenvalues)
}
local lambda1=`Ev'[1,1]
local lambda2=`Ev'[1,2]
local ev=`vp'[`j',1]+`vp'[`k',1]-`lambda1'
local minegenval=`ev'
local eigen=`lambda1'
local minj `j'
local mink `k'
local eigen2=`lambda2'
}
else if "`method'"=="centroid" {
local nblistj:word count `list`j''
local nblistk:word count `list`k''
tempname v1 v2 v12
qui genscore `list`j'',score(`v1') mean
qui genscore `list`k'',score(`v2') mean
qui gen `v12'=`v1'-`v2'
qui su `v12' [`weight'`exp']
local varj=r(Var)
local minegenval=(`nblistj'*`nblistk')/(`nblistj'+`nblistk')*`varj'
local minj `j'
local mink `k'
}
}
if `minj'<=`nbitems' {
local nomj=abbrev("``minj''",14)
}
else {
local nomj `minj'
}
*set trace off
if `mink'<=`nbitems' {
local nomk=abbrev("``mink''",14)
}
else {
local nomk `mink'
}
forvalues j=1/`nbitems' {
matrix `matclus'[`j',`=`i'+1']=`matclus'[`j',`i']
}
if "`method'"!="centroid" {
matrix `vp'[`clus',1]=`eigen' /*FIRST EIGEN VALUE OF THE NEW CLUSTER*/
matrix `vp'[`clus',2]=`minegenval' /*VARIATION OF THE T CRITERION*/
matrix `vp'[`clus',3]=`vp'[`=`clus'-1',3]-`vp'[`clus',2] /*T CRITERION*/
matrix `vp'[`clus',4]=`vp'[`clus',2]/`vp'[`=`clus'-1',3] /*RELATIVE VARIATION OF THE T CRITERION*/
matrix `vp'[`clus',5]=`eigen2' /*SECOND EIGEN VALUE OF THE NEW CLUSTER*/
matrix `vp'[`clus',6]=`minj' /*CHILD 1*/
matrix `vp'[`clus',7]=`mink' /*CHILD 2*/
matrix `vp'[`clus',8]=`nbitems'+`i' /*NUMBER OF THE NEW CLUSTER*/
matrix `vp'[`clus',9]=`vp'[`=`clus'-1',9]-`minegenval' /*EXPLAINED VARIANCE*/
matrix `vp'[`clus',10]=`vp'[`clus',9]/`totvar'*100 /*% OF EXPLAINED VARIANCE*/
}
else {
matrix `vp'[`clus',1]=0 /*FIRST EIGEN VALUE OF THE NEW CLUSTER*/
matrix `vp'[`clus',2]=`minegenval' /*VARIATION OF THE G CRITERION*/
matrix `vp'[`clus',3]=`vp'[`=`clus'-1',3]-`vp'[`clus',2] /*G CRITERION*/
matrix `vp'[`clus',4]=`vp'[`clus',2]/`vp'[`=`clus'-1',3] /*RELATIVE VARIATION OF THE T CRITERION*/
matrix `vp'[`clus',5]=0 /*SECOND EIGEN VALUE OF THE NEW CLUSTER*/
matrix `vp'[`clus',6]=`minj' /*CHILD 1*/
matrix `vp'[`clus',7]=`mink' /*CHILD 2*/
matrix `vp'[`clus',8]=`nbitems'+`i' /*NUMBER OF THE NEW CLUSTER*/
matrix `vp'[`clus',9]=`vp'[`=`clus'-1',9]-`minegenval' /*EXPLAINED VARIANCE*/
matrix `vp'[`clus',10]=`vp'[`clus',9]/`totvar'*100 /*% OF EXPLAINED VARIANCE*/
}
foreach j of numlist `numlist`minj'' `numlist`mink'' {
matrix `matclus'[`j',`=`i'+1']=`clus'
}
if "`kernel'"!=""&`i'==`=`nbkerk'-`nbkerg'+1' {
local T=`vp'[`=`clus'-1',9]
di _col(0) in ye "init" _col(12) %4.0f `=`nbitems'-`nbkerk'+`nbkerg'' _col(52) %8.4f `T' _col(62) %8.4f `=`totvar'-`T'' _col(72) %7.3f `=`T'/`totvar'*100' "%"
}
if `clus'>`nbitems'+`nbkerk'-`nbkerg' {
if `clus'==`nbitems'+`nbkerk'-`nbkerg'+1 {
local relv
local percent
}
else {
local relv=(`minegenval'-`vp'[`=`clus'-1',2])/`vp'[`=`clus'-1',3]*100
local percent %
matrix `threshold'[`=`nbitems'-`i'+1',1]=`relv'
matrix `threshold'[`=`nbitems'-`i'+1',3]=`minegenval'
if `i'>1 {
matrix `threshold'[`=`nbitems'-`i'+1',2]=`relv'-`threshold'[`=`nbitems'-`i'+2',1]
}
}
if "`method'"=="centroid" {
di _col(0) in ye %4.0f `=`i'-`nbkerk'+`nbkerg'' _col(12) %4.0f `=`nbitems'-`i'' _col(20) "`nomj'" _col(33) "`nomk'" _col(45) %7.0f `=`i'+`nbitems'' _col(52) %8.4f `vp'[`clus',9] _col(62) %8.4f `minegenval' _col(72) %7.3f `vp'[`clus',10] "%" _col(83) _col(84) %5.2f `relv' "`percent'"
}
else {
di _col(0) in ye %4.0f `=`i'-`nbkerk'+`nbkerg'' _col(12) %4.0f `=`nbitems'-`i'' _col(20) "`nomj'" _col(33) "`nomk'" _col(45) %7.0f `=`i'+`nbitems'' _col(52) %8.4f `vp'[`clus',9] _col(62) %8.4f `minegenval' _col(72) %7.3f `vp'[`clus',10] "%" _col(83) %8.4f `vp'[`clus',5] _col(95) %5.2f `relv' "`percent'"
}
}
}
local i=2*`nbitems'-1
local relv=(`vp'[`i',3]-`vp'[`i',2])/`vp'[`i',3]*100
if "`method'"=="centroid" {
di in ye _col(84) %5.2f `relv' "`percent'"
}
else {
di in ye _col(95) %5.2f `relv' "`percent'"
}
matrix `threshold'[1,1]=`relv'
matrix `threshold'[1,2]=`relv'-`threshold'[2,1]
matrix `threshold'[1,3]=`vp'[`i',3]
*matrix list `threshold'
local best=0
local maxbest=0
local best2=0
local maxbest2=0
forvalues i=1/`nbitems' {
if `threshold'[`i',3]>`maxbest2' {
if `threshold'[`i',3]>`maxbest' {
local maxbest2=`maxbest'
local best2=`best'
local maxbest=`threshold'[`i',3]
local best=`i'
}
else {
local maxbest2=`threshold'[`i',3]
local best2=`i'
}
}
}
di in green "{hline 100}"
di
di in green "{hline 24}"
di in green "PROPOSED BEST PARTITIONS"
di in green "{hline 24}"
di
di in yellow _col(4) "Based on the variation of the T criterion"
di in green _col(10) "1. Partitions in " in ye `best' in green " clusters"
di in green _col(10) "2. Partitions in " in ye `best2' in green " clusters"
return local bestvariation `best' `best2'
local bestt=0
local bestt2=0
local var=0
local var2=0
forvalues i=1/`nbitems' {
if `threshold'[`i',2]>`var2'&`i'<`nbitems'-1 {
if `threshold'[`i',2]>`var' {
local bestt2=`bestt'
local var2=`var'
local var=`threshold'[`i',2]
local bestt=`i'
}
else {
local var2=`threshold'[`i',2]
local bestt2=`i'
}
}
}
di in yellow _col(4) "Based on the research of a threshold"
di in green _col(10) "1. Partitions in " in ye `bestt' in green " clusters"
di in green _col(10) "2. Partitions in " in ye `bestt2' in green " clusters"
forvalues i=`=`nbitems'+1'/`=`nbitems'+`nbkerk'-`nbkerg'' {
matrix `vp'[`i',2]=`totvar'-`T'
matrix `vp'[`i',9]=`T'
matrix `vp'[`i',10]=`T'/`nbitems'*100
}
return local bestthresold `bestt' `bestt2'
}
if "`bar'"!="" {
drop _all
qui set obs `nbitems'
qui svmat `vp' ,names(v)
qui drop in 1/`nbitems'
qui gen id=`nbitems'-_n
qui drop if id>`nbitems'-`nbkerk'+`nbkerg'-1
label variable id "Number of clusters"
label variable v2 "T variation"
graph twoway bar v2 id, name(bar,replace) vert ,ylabel(0(0.5)2) xlabel(1(1)`=`nbitems'-`nbkerk'+`nbkerg'-1')
}
drop _all
qui set obs `nbitems'
qui svmat `matclus' ,names(v)
local listorder
forvalues i=`nbitems'(-1)1 {
local listorder `listorder' v`i'
}
qui gen id=_n
qui sort `listorder'
capture cluster delete clv,zap
qui cluster complete v* ,name(clv)
qui replace clv_id=_n
qui replace clv_ord=id
qui replace clv_hgt=.
qui gen fait=0
qui gen clus=0
forvalues i=2/`nbitems' {
local ligne=`nbitems'+`i'-1
if (`vp'[`ligne',6]<=`nbitems') {
local first=`vp'[`ligne',6]
gsort +fait -v`i' +clv_id
}
else {
local first=`vp'[`ligne',7]
gsort +fait -v`i' +clv_id
}
if "`deltaT'"!="" {
qui replace clv_hgt=`vp'[`ligne',2] in 1
}
else {
qui replace clv_hgt=100-`vp'[`ligne',10] in 1
}
qui replace fait=1 in 1
qui replace clus=`vp'[`ligne',8] in 1
}
qui gen label=""
forvalues i=1/`nbitems' {
qui replace label=abbrev("`label`i''",`abbrev') if clv_id==`i'
}
sort clv_id
if `nbitems'>`cutnumber' {
local var "Groups of variables"
local cut cutnumber(`cutnumber') /*labcutn*/
}
else {
local var "Variables"
local cut label(label)
}
qui su clv_hgt
local tmp=r(max)
local max=floor(`tmp')+.5
if `tmp'>`max' {
local max=`max'+.5
}
local maxvar=`max'+5
if "`dendro'"=="" {
if "`title'"=="" {
local title "Clustering around Latent Variables (CLV)"
}
if "`caption'"!="" {
local var "`caption'"
}
if "`deltaT'"!="" {
local titleL "Variation of the T criterion"
local yl "0(.5)`max'"
}
else {
local titleL "% Unexplained Variance"
local yl "0(25)`maxvar'"
}
if "`horizontal'"!="" {
cluster dendro clv, name (dendrogram,replace) hor ytitle("`var'") `showcount' xtitle("`titleL'") title("`title'",span) xlabel(`yl') ylabel(,angle(0)) `cut'
}
else {
cluster dendro clv, name(dendrogram,replace) xtitle("`var'") `showcount' ytitle("`titleL'") title("`title'",span) ylabel(`yl') `cut'
}
}
if `cons'>`nbitems'-`nbkerk'+`nbkerg' {
di in ye "The {hi:consolidation} is not possible for a number of clusters superior to the initial number of clusters"
local cons=0
}
if `cons'!=0 {
sort v`=`nbitems'-`cons'+1'
gen cut`cons'=1
local g=1
forvalues i=2/`nbitems' {
if v`=`nbitems'-`cons'+1'[`i']!=v`=`nbitems'-`cons'+1'[`=`i'-1'] {
local g=`g'+1
}
qui replace cut`cons'=`g' in `i'
}
sort id
tempname group
mkmat cut`cons',matrix(`group')
*cluster generate cut = groups(2/9) , name(clv)
use `clvfiletmp',replace
local n=1
local env=1
while (`env'==1) {
forvalues g=1/`cons' {
local list`g'
forvalues i=1/`nbitems' {
if `group'[`i',1]==`g' {
local list`g' `list`g'' ``i''
}
}
}
di
if `n'==1 {
di in green "{hline 30}"
di in green "PARTITION BEFORE CONSOLIDATION"
di in green "{hline 30}"
}
di
local col=1
local max=0
local critT=0
forvalues g=1/`cons' {
di _col(`col') in green "GROUP " %2.0f `g' _c
local col=`col'+10
local tmp`g':word count `list`g''
if `tmp`g''>`max' {
local max `tmp`g''
}
tempvar f1`g'
if "`method'"=="centroid" {
qui genscore `list`g'',score(`f1`g'') mean
qui su `f1`g'' [`weight'`exp']
local var=r(Var)
local critT=`critT'+`tmp`g''*`var'
}
else {
if `tmp`g''>1 {
if "`method'"=="classical"|"`method'"=="v2" {
qui pca `list`g'' [`weight'`exp'] ,cov
matrix `Ev'=e(Ev)
qui predict `f1`g''
}
else if "`method'"=="polychoric"|"`method'"=="polychoric" {
qui polychoricpca `list`g'' [`weight'`exp'] ,score(`f1`g'') nscore(1)
matrix `Ev'=r(eigenvalues)
rename `f1`g''1 `f1`g''
}
local lambda1=`Ev'[1,1]
local critT=`critT'+`lambda1'
}
else {
qui gen `f1`g''=`list`g''
if "`standardized'"=="" {
local critT=`critT'+1
}
else {
qui su [`weight'`exp']
local critT=`critT'+`r(Var)'
}
}
}
}
di
forvalues i=1/`max' {
local col=1
forvalues g=1/`cons' {
local tmpv:word `i' of `list`g''
local tmpv=abbrev("`tmpv'",8)
di _col(`col') in ye %8s "`tmpv'" _c
local col= `col'+10
}
di
}
di
di in green "Variance Explained : " in ye %6.3f `=`critT'/`totvar'*100' in green "%"
di in green "T criterion : " in ye %6.4f `critT'
di
di in green "{hline 21}"
di in green "CONSOLIDATION: STEP `n'"
di in green "{hline 21}"
local n=`n'+1
local env=0
if "`method'"=="polychoric"|"`method'"=="polychoricv2" {
local command polychoric
}
else {
local command corr
}
forvalues i=1/`nbitems' {
local env`i'=0
local gr=`group'[`i',1]
qui `command' ``i'' `f1`gr'' [`weight'`exp']
local corr`i'=r(rho)
local corrs`i'=r(rho)
forvalues g=1/`cons' {
qui `command' ``i'' `f1`g'' [`weight'`exp']
local tmpcorr=r(rho)
if `g'!=`gr'&(((`corr`i'')<(`tmpcorr')&"`method'"=="centroid")|((`corr`i'')^2<(`tmpcorr')^2& "`method'"!="centroid")) {
local env=1
local env`i'=1
matrix `group'[`i',1]=`g'
local corr`i'=`tmpcorr'
}
}
if `env`i''==1 {
local g=`group'[`i',1]
di in green "The variable " in ye "``i'' " in green "is assigned to the `g'th group" _c
if "`method'"!="centroid" {
di in green " (corr^2=" %6.4f in ye (`corr`i'')^2 in green " vs " in ye %6.4f (`corrs`i'')^2 in green ")"
}
else {
di in green " (corr=" %6.4f in ye (`corr`i'') in green " vs " in ye %6.4f (`corrs`i'') in green ")"
}
}
}
if `env'==0 {
local latent
forvalues g=1/`cons' {
label variable `f1`g'' "Latent variable `g'"
if "`genlv'"!="" {
gen `genlv'`g'=`f1`g''
}
local latent `latent' `f1`g''
return local cluster`g' `list`g''
}
matrix `group'=`group''
matrix colnames `group'=`varlist'
return matrix affect=`group'
di in ye "Stability of the partition is achieved"
if `cons'<=7 {
di
di in green "{hline 42}"
di in green "CORRELATION MATRIX OF THE LATENT VARIABLES"
di in green "{hline 42}"
di
di in green "{hline `=(`cons')*13+15'}"
forvalues g=1/`cons' {
di _col(`=13*(`g'-1)+23') in green "Latent" _c
}
di
forvalues g=1/`cons' {
di _col(`=13*(`g'-1)+19') in green "variable `g'" _c
}
di
di in green "{hline `=(`cons')*13+15'}"
forvalues g=1/`cons' {
di in green "Latent variable `g'" _c
forvalues h=1/`g' {
local loc=13*`h'+10
qui corr `f1`g'' `f1`h'' [`weight'`exp']
local rho=r(rho)
di _col(`loc') in ye %6.4f `rho' _c
}
di
}
di in green "{hline `=(`cons')*13+15'}"
di
}
if `nbind'<=800&"`biplot'"==""&"`weight'"=="" {
local max=max(`matsize',`nbind')
set matsize `max'
if "`addvar'"!="" {
local add `varlist'
}
qui biplotvlab `latent' `add', name(biplot,replace) norow colopts(name(latent variables)) alpha(0) title(Biplot of the latent variables) labdes(size(vsmall) color(blue)) stretch(1)
}
else if `nbind'>800&"`biplot'"==""&"`weight'"==""{
di in green "There is more than 800 individuals, so the {hi:biplot} option is disabled"
}
else if "`weight'"!=""&&"`biplot'"==""{
di in green "The {hi:biplot} option is disabled because you use weights"
}
}
forvalues g=1/`cons' {
drop `f1`g''
}
}
}
set matsize `matsize'
use `clvfile',replace
capture cluster delete clv,zap
return matrix vp=`vp'
return matrix matclus=`matclus'
return local varlist `varlist'
return local method `method'
return local kernel `kernel'
end

@ -0,0 +1,291 @@
program define compart,rclass
version 8
syntax varlist [if] [in] [fweight iweight] [,part(numlist) Matrix(string) type(string) DETails noSTANDardized VARiables SQUare]
preserve
unab varlist:`varlist'
di "`varlist'"
tokenize `varlist'
marksample touse
local nbvar:word count `varlist'
qui count
local nbind=r(N)
tempname p
qui gen `p'=1
qui su `p' [`weight'`exp'] if `touse'
local nbind=r(N)
if "`square'"=="" {
local quad=1
}
else {
local quad=2
}
if "`type'"!=""&"`type'"!="polychoric" {
di in red "The type of the matrix is not authorized. Please correct your {hi:type} option."
error 198
}
if "`type'"!=""&"`matrix'"!="" {
di in red "You cannot define in the same time the {hi:type} and the {hi:matrix} options"
error 198
}
/* DEFINITION OF THE PARTITION OF THE VARIABLES*/
local newpart
foreach i in `part' {
if `i'!=0 {
local newpart `newpart' `i'
}
}
local part `newpart'
local meme=0
local diff=0
local nbpart:word count `part'
forvalues i=1/`nbpart' {
local iti:word `i' of `part'
local meme=`meme'+`iti'*(`iti'-1)/2
forvalues j=`=`i'+1'/`nbpart' {
local itj:word `j' of `part'
local diff=`diff'+`iti'*`itj'
}
}
local perc=`meme'/(`meme'+`diff')
di "meme: `meme' ; diff: `diff' ; perc: `perc'"
local test=0
local last0=0
forvalues i=1/`nbpart' {
local first`i'=`last`=`i'-1''+1
local size`i':word `i' of `part'
local last`i'=`first`i''+`size`i''-1
local test=`test'+`size`i''
local list`i'
forvalues j=`first`i''/`last`i'' {
local list`i' `list`i'' ``j''
}
}
if `test'!=`nbvar' {
di in red "{p}The described partition of the variables is composed of a number of variables different of the number of variables of varlist.{p_end}"
exit 198
}
/* BY DEFAULT, STANDARDIZATION*/
if "`standardized'"=="" {
forvalues i=1/`nbvar' {
qui su ``i'' [`weight'`exp']
qui replace ``i''=(``i''-r(mean))/r(sd)
}
}
tempname Cov W
if "`matrix'"==""&"`type'"!="polychoric" {
/* COVARIANCE OR CORRELATION MATRIX*/
qui matrix accum `Cov'=`varlist' [`weight'`exp'],nocons dev
qui matrix `Cov'=`Cov'/(`nbind'-1)
}
else if "`type'"=="polychoric" {
qui polychoric `varlist'
qui matrix `Cov'=r(R)
}
else {
qui matrix `Cov'=`matrix'
}
/* WE SAVE THE MATRIX AND WE COMPUTE THE AVERAGE COVARIANCE */
qui matrix `W'=`Cov'
local sum=0
forvalues i=1/`nbvar' {
forvalues j=`=`i'+1'/`nbvar' {
local sum=`sum'+ `W'[`i',`j']^`quad'
}
}
/* WE SAVE THE DATA AND WE COMPUTES THE USED PERCENTILES OF THE COVARIANCE*/
tempfile compartfile
qui save `compartfile',replace
drop _all
set obs `=`nbvar'*(`nbvar'-1)'
local n=1
qui gen i=.
qui gen j=.
qui gen corr=.
forvalues i=1/`nbvar' {
forvalues j=`=`i'+1'/`nbvar' {
qui replace i=`i' in `n'
qui replace j=`j' in `n'
qui replace corr=`W'[`i',`j']^`quad' in `n'
local ++n
}
}
matrix list `W'
su corr
sort corr
centile corr,centile(`=100-`perc'*100')
local centile=r(c_1)
if `diff'!=0 {
local perc2=(`meme'+1)/(`meme'+`diff')
centile corr,centile(`=100-`perc2'*100')
local centile2=r(c_1)
local centile=(`centile'+`centile2')/2
}
qui use `compartfile',clear
/***************************************************/
if `nbpart'==1 {
local mean=0
* local mean=(2*`sum')/(`nbvar'*(`nbvar'-1))
}
else {
local mean=(2*`sum')/(`nbvar'*(`nbvar'-1))
local mean=0
*local mean=`centile'
}
/*THE MATRIX IS CENTERED*/
forvalues i=1/`nbvar' {
matrix `W'[`i',`i']=0
forvalues j=`=`i'+1'/`nbvar' {
matrix `W'[`i',`j']=(`W'[`i',`j']^`quad'-`mean')
matrix `W'[`j',`i']=`W'[`i',`j']
}
}
/*WE COMPUTE THE INDEX D*/
local C=0
local C1=0
local C2=0
local minrho=2
local summeme=0
local sumdiff=0
if "`square'"!="" {
local maxrho=0
}
else {
local maxrho=-2
}
*set trace on
forvalues i=1/`nbpart' {
forvalues j=1/`nbpart' {
forvalues k=`first`i''/`last`i'' {
forvalues l=`first`j''/`last`j'' {
if `i'!=`j' {
if `k'>`l' {
local sumdiff=`sumdiff'+`Cov'[`k',`l']
}
if (`Cov'[`k',`l'])^`quad'>(`maxrho')^`quad' {
local maxrho=(`Cov'[`k',`l'])
}
matrix `W'[`k',`l']=-(`W'[`k',`l'])
}
else if (`Cov'[`k',`l'])^`quad'<(`minrho')^`quad'&`k'!=`l' {
local minrho=`Cov'[`k',`l']
}
if `i'==`j'&`k'>`l' {
local summeme=`summeme'+`Cov'[`k',`l']
}
local C=`C'+`W'[`k',`l']
local C1=`C1'+abs(`W'[`k',`l'])
if `W'[`k',`l']>+0 {
local ++C2
}
}
}
}
}
if `meme'!=0 {
local summeme=`summeme'/`meme'
}
if `diff'!=0 {
local sumdiff=`sumdiff'/`diff'
}
local diffsum=`summeme'-`sumdiff'
set trace off
di "Summeme: `summeme' ; Sumdiff: `sumdiff'"
local minrho=(`minrho')^(`quad')
local maxrho=(`maxrho')^(`quad')
local C=sign(`C')*(abs(`C'))^(1/`quad')/(`nbvar'*(`nbvar'-1))+`mean'
local C1=(`C1')^(1/`quad')/(`nbvar'*(`nbvar'-1))
local C2=(`C2')/(`nbvar'*(`nbvar'-1))*100
return local Pcov=`=`C'/`C1''
return local Pel=`=`C2'/100'
if `nbpart'==1 {
local C=`C'*(`nbvar'+1)/`nbvar'
}
*di "C:" `C' " C1: " `C1' " C2: " `C2'
if `nbpart'==1 {
local maxrho=0
}
if `nbpart'==`nbvar' {
local minrho=0
}
di " C=(`meme'*`minrho'-`diff'*`maxrho')/(`meme'+`diff')"
local C=(`meme'*`minrho'-`diff'*`maxrho')/(`meme'+`diff')
di in green "{hline 80}"
di in green "Number of individuals: " _col(71) in ye %8.0f `nbind'
di in green "Number of variables: " _col(71) in ye %8.0f `nbvar'
di in green "COMPART index: " _col(71) in ye %8.6f `C'
di in green "Proportion of the covariances explained by the COMPART index: " _col(73) in ye %6.2f `=abs(`C')/`C1'*100' "%"
di in green "Proportion of positive elements in the matrix: " _col(73) in ye %6.2f `C2' "%"
di in green "Minimum correlation coefficient for 2 variables of the same group: " _col(74) in ye %5.2f `minrho'
di in green "Maximum correlation coefficient for 2 variables of two different groups: " _col(74) in ye %5.2f `maxrho'
di in green "Average correlation coefficient: " _col(74) in ye %5.2f `mean'
di in green "{hline 80}"
di
if "`details'"!="" {
di in green "Matrix of the coefficients"
di in green "{hline 26}"
matrix list `W' ,noheader format(%7.4f)
di
}
if "`variables'"!="" {
di in green "Details for each variable"
di in green "{hline 26}"
di
di in green "{hline 80}"
di in green "Items" _col(17) "COMPART" _col(26) "Problematic items"
di in green "{hline 80}"
tempname Cvar
matrix `Cvar'=J(1,`nbvar',0)
forvalues i=1/`nbvar' {
local C`i'=0
local pourri`i'
forvalues j=1/`nbvar' {
local C`i'=`C`i''+`W'[`i',`j']
if `W'[`i',`j']<0 {
local pourri`i' `pourri`i'' ``j''
}
}
local C`i'=`C`i''/(`nbvar'-1)
matrix `Cvar'[1,`i']=`C`i''
di in ye abbrev("``i''",14) _col(15) %9.6f `C`i'' _c
if "`pourri`i''"!="" {
di in ye _col(26) "`pourri`i''"
}
else {
di
}
}
di in green "{hline 80}"
matrix colnames `Cvar'=`varlist'
matrix rownames `Cvar'=Compart
return matrix Cvar=`Cvar'
}
local test=`maxrho'-`minrho'
local diffsum=(`summeme'*`meme'-`diff'*`sumdiff')/(`meme'+`diff')
local diffsum=`summeme'/*-`sumdiff'*/
return local compart `C'
return local mean `mean'
return local list `varlist'
return local part `part'
restore
end

@ -0,0 +1,120 @@
program define compart2,rclass
version 9
syntax varlist [if] [in] [fweight iweight] [,part(numlist) type(string)]
preserve
unab varlist:`varlist'
di "`varlist'"
tokenize `varlist'
marksample touse
local nbvar:word count `varlist'
qui count
local nbind=r(N)
tempname p
qui gen `p'=1
qui su `p' [`weight'`exp'] if `touse'
local nbind=r(N)
if "`square'"=="" {
local quad=1
}
else {
local quad=2
}
if "`type'"=="" {
local type classical
}
if "`type'"!="classical"&"`type'"!="centroid"&"`type'"!="polychoric" {
di in red "The type of the matrix is not authorized. Please correct your {hi:type} option."
error 198
}
/* DEFINITION OF THE PARTITION OF THE VARIABLES*/
local newpart
foreach i in `part' {
if `i'!=0 {
local newpart `newpart' `i'
}
}
local part `newpart'
local meme=0
local diff=0
local nbpart:word count `part'
forvalues i=1/`nbpart' {
local iti:word `i' of `part'
local meme=`meme'+`iti'*(`iti'-1)/2
forvalues j=`=`i'+1'/`nbpart' {
local itj:word `j' of `part'
local diff=`diff'+`iti'*`itj'
}
}
local perc=`meme'/(`meme'+`diff')
di "meme: `meme' ; diff: `diff' ; perc: `perc'"
local test=0
local last0=0
forvalues i=1/`nbpart' {
local first`i'=`last`=`i'-1''+1
local size`i':word `i' of `part'
local last`i'=`first`i''+`size`i''-1
local test=`test'+`size`i''
local list`i'
forvalues j=`first`i''/`last`i'' {
local list`i' `list`i'' ``j''
}
}
if `test'!=`nbvar' {
di in red "{p}The described partition of the variables is composed of a number of variables different of the number of variables of varlist.{p_end}"
exit 198
}
forvalues g=1/`nbpart' {
tempname f1`g'
if `size`g''>1 {
if "`type'"=="classical" {
qui pca `list`g'',cov
qui predict `f1`g''
}
else if "`type'"=="polychoric" {
qui polychoricpca `list`g'',score(`f1`g'') nscore(1)
rename `f1`g''1 `f1`g''
}
else if "`type'"=="centroid" {
qui genscore `list`g'', score(`f1`g'') mean
}
}
else if `size`g''==1 {
qui gen `f1`g''=`list`g''
}
}
local minrho=2
local maxrho=-2
forvalue i=1/`nbvar' {
forvalues g=1/`nbpart' {
qui corr ``i'' `f1`g''
if `i'>=`first`g''&`i'<=`last`g'' {
if r(rho)<`minrho' {
local minrho=r(rho)
}
}
else if r(rho)>`maxrho' {
local maxrho=r(rho)
}
}
}
di "C=(`meme'*`minrho'-`diff'*`maxrho')/(`meme'+`diff')"
local C=(`meme'*`minrho'-`diff'*`maxrho')/(`meme'+`diff')
local C=(`meme'*`minrho'-`diff'*`maxrho')/(`meme'+`diff')
*local C=`minrho'
di "C= `C' min=`minrho' max=`maxrho'"
return local compart `C'
*return local mean `mean'
*return local list `varlist'
return local part `part'
restore
end

@ -0,0 +1,81 @@
capture program drop conc
program conc,rclass
syntax varlist, comp(varlist) [tconc(real 0.4)]
di as result "{hline}"
di "{bf:Concurrent validity}"
di as result "{hline}"
di
local n : word count `varlist'
local p : word count `comp'
matrix m = J(`n',`p',.)
matrix rownames m = `varlist'
matrix colnames m = `comp'
local r = 1
foreach i in `varlist' {
local c = 1
foreach j in `comp' {
qui corr `i' `j'
mat e = r(C)
local f = e[2,1]
mat m[`r',`c'] = `f'
local `++c'
}
local `++r'
}
*mat li m, format(%3.2f) noheader
tokenize `varlist'
local maxv = length("`1'")
forvalues i=1/`n' {
local lenv = length("``i''")
if `lenv' > `maxv' local maxv = `lenv'
}
local decv = `maxv'+6
tokenize `comp'
local maxc = length("`1'")
forvalues i=1/`p' {
local lenc = length("``i''")
if `lenc' > `maxc' local maxc = `lenc'
}
local decc = `maxc'+4
local col = `decv'
foreach c in `comp' {
di as result _col(`col') "`c'" _c
local col = `col'+`decc'
}
di
local i = 1
foreach x in `varlist' {
local var`i' = "`x'"
local `++i'
}
forvalues i=1/`n' {
di as result "`var`i''" _c
local col = `decv'
forvalues j=1/`p' {
local t = m[`i',`j']
if `t' > `tconc' | `t' < -`tconc' {
di as result _col(`=`col'-1') %5.2f `t' _c
}
else di as text _col(`=`col'-1') %5.2f `t' _c
local col = `col'+`decc'
}
di
}
end
*conc HA-MOC, comp(ioc1 ioc2) tconc(0.4)
*conc x1-x40, comp(x1 x2) tconc(0.4)

@ -0,0 +1,298 @@
capture program drop convdiv
program convdiv
syntax varlist, PARTition(numlist integer >0) [SCOrename(string) TCONVdiv(real 0.4) convdivboxplots]
preserve
qui set autotabgraphs on
local C = 0
foreach z in `partition' {
local C = `C' + `z'
}
local nbvars : word count `varlist'
if `C' != `nbvars' {
di in red "The sum of the numbers in the partition option is different from the number of variables precised in varlist"
exit 119
}
local P:word count `partition'
if "`scorename'" !="" {
local S:word count `scorename'
if `P'!=`S' {
di in red "The number of score names given is different from the number of dimensions in the partition option"
exit 119
}
}
qui detect `varlist', partition(`partition')
matrix A = r(Corrrestscores)
matrix B = r(Corrscores)
local i = 1
local y = 1
foreach x in `partition' {
if `i' == 1 local s = `x'
else local s = `s' +`x'
forvalues z = `y'/`s' {
matrix B[`z',`i'] = A[`z',`i']
}
local `i++'
local y = `s'+1
}
if "`scorename'"!="" {
matrix colnames B = `scorename'
}
else {
local name
local nname
forvalues i = 1/`P' {
local name "Dim`i'"
local nname `nname' `name'
}
local scorename = "`nname'"
matrix colnames B = `scorename'
}
/* coupure noms des scores */
/*
local i = 1
foreach s in `scorename' {
local len = length("`s'")
if `len' > 5 {
local c = substr("`s'",1,4)
local d = substr("`s'",-1,1)
local s`i' "`c'" "~" "`d'"
}
else local s`i' = "`s'"
local sc `sc' `s`i''
local `++i'
}
*/
/* coupure noms des items */
/*
local i = 1
foreach s in `varlist' {
local len = length("`s'")
if `len' > 10 {
local c = substr("`s'",1,9)
local d = substr("`s'",-1,1)
local var`i' "`c'" "~" "`d'"
}
else local var`i' = "`s'"
local `++i'
}
*/
local i = 1
foreach v in `varlist' {
local var`i' = abbrev("`v'",10)
local `++i'
}
local i = 1
foreach s in `scorename' {
local s`i' = abbrev("`s'",7)
local sc `sc' `s`i''
local `++i'
}
di as result "{hline}"
di "{bf:Correlation matrix}"
di "{hline}"
di
/*
local i = 1
foreach x in `varlist' {
local var`i' = "`x'"
local `++i'
}
*/
/*
tokenize `sc'
local max = 3
forvalues j=1/`P' {
local len`j' = length("`s`j''")
if `len`j'' > `max' local max = `len`j''
}
local maxit = 1
forvalues i=1/`nbvars' {
local len = length("`var`i''")
if `len' > `maxit' local maxit = `len'
}
*/
local dec = 10
local col = `dec'
local decit = 14
local colit = `decit'
local col1 = `decit'
forvalues i=1/`P' {
di _col(`col1') "{bf:`s`i''}" _c
local col1 = `col1' + `dec'
}
di
local h = (`P'-1)*`dec'+`decit'+4
di "{hline `h'}"
local i = 1
local j = 1
local y = 1
foreach p in `partition' {
if `j' == 1 local s = `p'
else local s = `s' +`p'
forvalues z = `y'/`s' {
di as text "{bf:`var`z''}" _c
local col = `decit'-1
local dd = `z' // [counting cptdiv (one per item)]
forvalues k = 1/`P' {
local t = B[`z',`k']
local t : di %6.3f `t'
if `k' == `i' {
if `t' < `tconvdiv' {
di in red _col(`col') "{bf:`t'}" _c
local cptconv = `cptconv'+1
local col = `col' + `dec'
}
else {
di _col(`col') "{bf:`t'}" _c
local col = `col' + `dec'
}
}
else {
if B[`z',`k'] > B[`z',`i'] {
di in red _col(`col') "`t'" _c
if `dd' == `z' local cptdiv = `cptdiv'+1 // [one per item]
local dd = 0
local col = `col' + `dec'
}
else {
di as text _col(`col') "{text:`t'}"_c
local col = `col' + `dec'
}
}
}
di
}
di "{dup `h':-}"
local `i++'
local `j++'
local y = `s'+1
}
local y = 1
local h = 1
local np : word count `partition'
foreach p in `partition' {
if `h' == 1 local s = `p'
else local s = `s' +`p'
forvalues j = 1/`np' {
mat C_`h'_`j' = B[`y'..`s',`j']
tempvar tp_`h'_`j'
mat colnames C_`h'_`j' = `tp_`h'_`j''
svmat C_`h'_`j', names(col)
*rename C_`h'_`j'
*mat li C_`h'_`j'
}
local `++h'
local y = `s'+1
}
if "`convdivboxplots'" != "" {
forvalues h = 1/`np' {
tokenize `scorename'
local call = ""
local callbox = ""
local callleg = ""
forvalues j = 1/`np' {
*rename C_`h'_`j' _``j''
*di "`tp_`h'_`j''"
local call `call' /*_``j''*/ `tp_`h'_`j''
*if `h' == `j' local color = "blue"
*else local color = ""
local callbox `callbox' box(`j',fcolor(`color') lcolor(`color')) marker(`j', mcolor(`color'))
local lab = "``j''"
local lab = `"`lab'"'
local callleg `callleg' `j' "`lab'"
*di `"`callleg'"'
*di "`call'"
}
graph box `call', name("Conv_div_``h''",replace) `callbox' legend(order(`"`callleg'"') stack rows(1) size(small)) title(Correlations between items of ``h'' and dimensions) yline(`tconvdiv', lpattern(dot) lcolor(black))
qui set autotabgraphs on
*drop `call'
}
}
/*
foreach var of varlist ioc1 ioc2 ioc3 {
loc varlab `""`:var l `var''""'
loc varlabs `"`varlabs'`varlab'"'
di "`varlab'"
}
*/
/*
svmat B
local y = 1
local h = 1
qui gen d = 0
foreach p in `partition' {
if `h' == 1 local s = `p'
else local s = `s' +`p'
replace d = `h' in `y'/`s'
local `++h'
local y = `s'+1
}
twoway (scatter B1 d)
*/
local t : di %5.3f `tconvdiv'
local p1 = (`nbvars'-`cptconv')/`nbvars'*100
local p1 : di %4.1f `p1'
local p2 = (`nbvars'-`cptdiv')/`nbvars'*100
local p2 : di %4.1f `p2'
di
di as result "Convergent validity:" _c
di as text " `=`nbvars'-`cptconv''/`nbvars' items (`p1'%) have a correlation coefficient with the score of "
di _col(22) "their own dimension greater than `t'"
di
di as result "Divergent validity:" _c
di as text " `=`nbvars'-`cptdiv''/`nbvars' items (`p2'%) have a correlation coefficient with the score"
di _col(22) "of their own dimension greater than those computed with other scores."
end
*convdiv ioc1-ioc37, partition(4 4 7 3 3 4 7 5) scorename(Hddfdfdffda PSE W BCC Afdfdfererdfc AE LI MOC) tconvdiv(0.4) // convdivboxplots
*convdiv x1-x40, partition(5 5 5 5 5 5 5 5) scorename(Hdfda PSE W BCC Afdfdfererdfc AE LI MOC) tconvdiv(0.4) // convdivboxplots

@ -0,0 +1,16 @@
program define dege
syntax anything [, i(string)]
preserve
if "`i'"=="" {
local i ind
}
qui reshape long `anything',i(`i') j(item)
qui rename `anything' response
qui inspect item
local nbvalues=r(N_unique)
forvalues j=1/`nbvalues' {
qui gen `anything'`j'=item==`j'
qui replace `anything'`j'=-`anything'`j'
}
restore,not
end

@ -0,0 +1,108 @@
*! Delta version 1.5 - 5 March 2008
*! Jean-Benoit Hardouin
************************************************************************************************************
* DELTA: delta coefficient
* Version 1.5: March 5, 2008
*
* Historic
* Version 1 (2007-05-21): Jean-Benoit Hardouin
* Version 1.1 (2007-05-22): Jean-Benoit Hardouin /* if in and possibility to use the score*/
* Version 1.2 (2007-05-22): Jean-Benoit Hardouin /*bug when a score is missing*/
* Version 1.3 (2007-06-16): Jean-Benoit Hardouin /*change in the options*/
* Version 1.4 (2007-07-03): Jean-Benoit Hardouin /*correct a bug in the options*/
* Version 1.5 (2008-03-05): Jean-Benoit Hardouin /*correct a bug in the ci option*/
*
* Jean-benoit Hardouin, Faculty of Pharmaceutical Sciences - University of Nantes - France
* jean-benoit.hardouin@univ-nantes.fr
*
* News about this program : http://www.anaqol.org
* FreeIRT Project : http://www.freeirt.org
*
* Copyright 2007-2008 Jean-Benoit Hardouin
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*
************************************************************************************************************
program define delta , rclass
version 7.0
syntax varlist(min=1 numeric) [if] [in] [,ci(integer 0) noDots MINscore(int 0) MAXscore(int 0)]
preserve
tempfile deltafile
qui save `deltafile'
if "`if'"!=""|"`in'"!="" {
qui keep `if' `in'
}
local nbitems:word count `varlist'
tokenize `varlist'
local scoremin=`minscore'
local scoremax=`maxscore'
tempvar score
if `nbitems'==1&`scoremax'==0 {
di in red "If you indicate only the score variable, you must define the {cmd:scoremax} option"
error 198
}
else if `nbitems'==1&`scoremax'!=0 {
qui gen `score'=`varlist'
}
else {
qui genscore `varlist',score(`score')
}
qui drop if `score'==.
qui count
local nbind=r(N)
if `scoremax'==0 {
qui su `score'
local scoremax=r(max)
}
tempname error
gen `error'=`score'<`scoremin'|`score'>`scoremax'
qui count if `error'==1
local err=r(N)
if `err'!=0 {
di in red "`err' individuals has(have) a score inferior to `scoremin' or superior to `scoremax'"
error 198
}
local sumsqscore=0
forvalues i=`scoremin'/`scoremax' {
qui count if `score'==`i'
local score`i'=r(N)
local sumsqscore=`sumsqscore'+`score`i''^2
}
local delta=(1+`scoremax')*(`nbind'^2-`sumsqscore')/(`nbind'^2*`scoremax')
di in green "Range of the scores : " in ye `scoremin' in gr "/" in ye `scoremax'
di in green "Number of used individuals : " in ye `nbind'
if `ci'!=0 {
bootstrap delta=r(delta), reps(`ci') nowarn noheader nolegend `dots': delta `varlist' ,minscore(`scoremin') maxscore(`scoremax')
}
else {
display in green "Delta= " in yellow %8.6f `delta'
}
return scalar delta=`delta'
qui use `deltafile',clear
restore,not
end

@ -0,0 +1,250 @@
capture program drop descitems
program descitems
syntax varlist, PARTition(numlist integer >0)
local i = 1
local C = 0
foreach z in `partition' {
local C = `C' + `z'
}
local nbvars : word count `varlist'
if `C' != `nbvars' {
di in red "The sum of the numbers in the partition option is different from the number of variables precised in varlist"
exit 119
}
local i = 1
foreach x in `varlist' {
local var`i' = "`x'"
local `++i'
}
/*
qui su `var1'
local min = r(min)
local max = r(max)
forvalue i=2/`nbvars' {
qui su `var`i''
local minloc = r(min)
local maxloc = r(max)
if `minloc'<`min' local min = `minloc'
if `maxloc'>`max' local max = `maxloc'
}
*/
foreach var in `varlist' {
qui replace `var' = round(`var')
}
local lev = ""
foreach var in `varlist' {
qui levelsof `var', local(levels)
foreach l in `levels' {
if strpos("`lev'","`l'") == 0 {
local lev `lev' `l'
}
}
}
_qsort_index `lev'
local lev = r(slist1)
local i = 1
matrix d = J(`nbvars',4,.)
foreach var in `varlist'{
qui count if missing(`var')
local ct=r(N)
local tx`i'=`ct'/_N
matrix d[`i',1] = `tx`i''
local `i++'
}
matrix rownames d = `varlist'
matrix colnames d = "missing" "alpha" "Hj"
local i = 1
local y = 1
foreach x in `partition' {
if `i' == 1 local s = `x'
else local s = `s' +`x'
local liste = ""
forvalues w = `y'/`s' {
local liste `liste' `var`w''
}
qui capture alpha `liste', asi item std
mat a = r(Alpha)
mat at = a'
qui capture loevh `liste', pairwise
matrix e = r(loevHj)
matrix et = e'
matrix ns = r(nbHjkNS)
matrix nst = ns'
local k = 0
forvalues j = `y'/`s' {
local k = `k'+1
matrix d[`j',2] = at[`k',1]
matrix d[`j',3] = et[`k',1]
matrix d[`j',4] = nst[`k',1]
}
local `i++'
local y = `s'+1
}
/* coupure noms des items */
/*
local i = 1
foreach s in `varlist' {
local len = length("`s'")
if `len' > 10 {
local c = substr("`s'",1,9)
local d = substr("`s'",-1,1)
local var`i' "`c'" "~" "`d'"
}
else local var`i' = "`s'"
*local v `v' `var`i''
local `++i'
}
local max = 3
forvalues i=1/`nbvars' {
local len = length("`var`i''")
if `len' > `max' local max = `len'
}
*/
local i = 1
foreach v in `varlist' {
local var`i' = abbrev("`v'",8)
local `++i'
}
local dec = 10
local col = `dec'
local b : word count `lev'
local i = 1
local j = 1
local y = 1
di in blue _col(`dec') "{bf:Missing}" _c
local col = `col'+11
di in blue _col(`=`col'+2') "{bf:N}" _c
local col = `col'+9
di _col(`col') "{bf:Response categories}" _c
local col = `dec'+18+8*`b'
di _col(`col') "{bf:Alpha}" _c
local col = `col'+9
di _col(`col') "{bf:Loevinger}" _c
local col = `col'+12
di _col(`col') "{bf:Number of}"
local col = `dec'-1
di _col(`col') "{bf:data rate}" _c
local col = `dec'+18
foreach m in `lev' {
di _col(`=`col'+2') "`m'" _c
local col = `col'+8
}
local col = `dec'+17+8*`b'
di as result _col(`col') "- item" _c
local col = `col'+10
di as result _col(`col') "Hj coeff" _c
local col = `col'+12
di as result _col(`col') "NS Hjk"
local ch = `dec'+18+8*`b'+29
di "{hline `ch'}"
local i = 1
foreach x in `varlist' {
local varo`i' = "`x'"
local `++i'
}
local y = 1
foreach p in `partition' {
if `j' == 1 local s = `p'
else local s = `s' +`p'
forvalues z = `y'/`s' {
local col = `dec'
di "{bf:`var`z''}" _c
local t = d[`z',1]
local t : di %8.2f `t'
di _col(`col') "{text:`t'}" _c
qui count if missing(`varo`z'')
local m = r(N)
local N = _N-`m'
local N : di %4.0f `N'
local col = `col'+10
di _col(`col') "{text:`N'}" _c
local col = `col'+8
foreach m in `lev' {
local f = 0
qui levelsof `varo`z'', local(levels)
foreach l in `levels' {
if strpos("`levels'","`m'") == 0 {
local f = 1
}
}
/*if `f' == 1 {
di _col(`=`col'+2') "_" _c
}*/
*else {
qui count if round(`varo`z'') == `m'
local n = r(N)
qui count if `varo`z'' != .
local d = r(N)
local e = `n'/`d'
local e : di %4.2f `e'
if `e' != 0 di _col(`=`col'-1')"{text:`e'}" _c
else di _col(`=`col'-1')"{text: -}" _c
*}
local col = `col'+8
}
local col = `dec'+18+8*`b'
local a = d[`z',2]
local a : di %4.2f `a'
di _col(`=`col'+1') "{text:`a'}" _c
local h = d[`z',3]
local h : di %5.2f `h'
local col = `col'+10
di _col(`=`col'+3') "{text:`h'}" _c
local ns = d[`z',4]
local ns : di %1.0f `ns'
local col = `col'+12
di _col(`=`col'+7') "{text:`ns'}"
}
local `i++'
local `j++'
local y = `s'+1
di "{dup `ch':-}"
}
end
*descitems iociociociociocicocio1-ioc37, part(4 4 7 3 3 4 7 5)
*descitems ptgi1-peur16, part(4 4 7 3 3 4 7 5)
*descitems x1-x30, part(5 5 5 5 5 4 1)

@ -0,0 +1,293 @@
*! Version 1.2 29 August 2019
*! Jean-Benoit Hardouin
************************************************************************************************************
* Stata program : descscale
* Description of a scale and covariates
* Release 1.1 : June 4, 2019
*
*
* Historic :
* Version 1 (April 12, 2019) [Jean-Benoit Hardouin]
* Version 1.1 (June 4, 2019) [Jean-Benoit Hardouin]
* Version 1.2 (August 29, 2019) [Jean-Benoit Hardouin] /*correction of bugs*/
*
* Jean-benoit Hardouin, PhD, Assistant Professor
* Team of Methods in Patient Centered Outcomes and Health Research (INSERM U1246-SPHERE)
* University of Nantes - Faculty of Pharmaceutical Sciences
* France
* jean-benoit.hardouin@anaqol.org
*
*
* News about this program :http://www.anaqol.org
*
* Copyright 2019 Jean-Benoit Hardouin
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*
***********************************************************************************************************
program define descscale , rclass
version 8.2
syntax varlist [if] [in] [,MIN(int 0) CONTinuous(varlist) CATegorical(varlist) All PARTition(numlist) Large MEAN MAXlevels(int 20)]
preserve
marksample touse,novarlist
label variable `touse' "selection"
tokenize `varlist'
local nbitems: word count `varlist'
if "`partition'"=="" {
local partition `nbitems'
}
local nbdim:word count `partition'
forvalues i=1/`nbdim' {
local nbitemsdim`i': word `i' of `partition'
}
qui count if `touse'
local N=r(N)
local maxr=0
forvalues i=1/`nbitems' {
qui levelsof ``i'' if `touse'
local r`i'=r(r)
qui su ``i'' if `touse'
local max`i'=r(max)
local mean`i'=r(mean)
qui count if ``i''!=0&``i''!=1&``i''!=2&``i''!=3&``i''!=4&``i''!=5&``i''!=6&``i''!=7&``i''!=8&``i''!=9&``i''!=10&``i''!=.
local ninc=r(N)
local ok`i'=1
if `r`i''>11&"`large'"=="" {
di in red "The number of answer categories to the items ``i'' is large (>11). Use the -large- option or correct the list of items."
local ok`i'=0
exit
}
if `ninc'>0 {
di in red "The variable ``i'' has incompatible values with an items (integers between 0 and 10)."
local ok`i'=0
exit
}
if `max`i''>`maxr' {
local maxr `max`i''
}
}
di "maxr=`maxr'"
di
di in green "Number of individuals : " as result `N'
di
di in green "Description of the items"
local long=(`maxr'+1-`min')*8+40
di in green "{hline `long'}"
di in green "Items" _col(23) "Obs" _c
local col=33
forvalues j=`min'/`maxr' {
di _col(`col') "`j'" _c
local col=`col'+8
}
di _col(`col') "." _col(`=`col'+4') "Mean"
di in green "{hline `long'}"
local deb=1
forvalues d=1/`nbdim' {
local loi`d'
*di "local fin=`deb'+`nbitemsdim`d''-1"
local fin=`deb'+`nbitemsdim`d''-1
forvalues i=`deb'/`fin' {
if `ok`i''==1 {
local loi`d' `loi`d'' ``i''
qui count if `touse'&``i''!=.
local k=abbrev("``i''",20)
di in green "`k'" _col(22) as result %4.0f `r(N)' _c
local col=28
forvalues j=`min'/`maxr' {
qui count if `touse'&``i''==`j'
local per=round(`r(N)'/`N'*100, 0.1)
di _col(`col') %5.1f `per' "%" _c
local col=`col'+8
}
qui count if `touse'&``i''==.
local per=round(`r(N)'/`N'*100,0.1)
di _col(`col') %5.1f `per' "%" _col(`=`col'+9') %4.2f `mean`i''
}
}
if `d'!=`nbdim' {
di in green "{dup `long':-}"
}
local deb=`fin'+1
}
di in green "{hline `long'}"
di
di in green "Description of the scores"
local long2=72
di in green "{hline `long2'}"
di in green "Scores" _col(22) "Obs" _col(33) "Mean" _col(40) "Std. Dev." _col(58) "Min" _col(70) "Max"
di in green "{hline `long2'}"
forvalues d=1/`nbdim' {
tempname score`d'
genscore `loi`d'' if `touse', score(`score`d'') `mean' `standardized'
qui su `score`d'' if `touse'
di in green "score`d'" _col(20) as result %5.0f `r(N)' _col(30) %7.2f `r(mean)' _col(42) %7.2f `r(sd)' _col(54) %7.2f `r(min)' _col(66) %7.2f `r(max)'
}
di in green "{hline `long2'}"
if "`continuous'"!="" {
local continuous2
foreach i of varlist `continuous' {
local candidate=1
forvalues j=1/`nbitems' {
if "`i'"=="``j''" {
local candidate=0
}
}
local type : type `i'
local type=substr("`type'",1,3)
if "`type'"=="str" {
local candidate=0
}
if `candidate'==1 {
qui levelsof `i' if `touse'
local r=r(r)
local continuous2 `continuous2' `i'
}
}
local continuous `continuous2'
}
if "`categorical'"!="" {
local categorical2
foreach i of varlist `categorical' {
local candidate=1
forvalues j=1/`nbitems' {
if "`i'"=="``j''" {
local candidate=0
}
}
if `candidate'==1 {
qui levelsof `i' if `touse'
local r=r(r)
local categorical2 `categorical2' `i'
}
}
local categorical `categorical2'
}
if "`all'"!="" {
*local continuous
*local categorial
foreach i of varlist * {
local candidate=1
forvalues j=1/`nbitems' {
if "`i'"=="``j''" {
local candidate=0
}
}
local type : type `i'
local type=substr("`type'",1,3)
if "`type'"=="str" {
local candidate=0
}
qui levelsof `i' if `touse'
local r=r(r)
if `r'>7&`candidate'==1 {
local continuous `continuous' `i'
}
else {
local categorical `categorical' `i'
}
}
}
*di "CONTINUOUS : `continuous'"
*di "CATEGORICAL : `categorical'"
if "`continuous'"!="" {
di
di "Descriptive analysis of continuous covariates"
local long2=72
di in green "{hline `long2'}"
di in green "Variables" _col(22) "Obs" _col(33) "Mean" _col(40) "Std. Dev." _col(58) "Min" _col(70) "Max"
di in green "{hline `long2'}"
foreach i of varlist `continuous' {
qui su `i' if `touse'
local k=abbrev("`i'",18)
*di in green "`k'" _c
di in green "`k'" _col(20) as result %5.0f `r(N)' _col(30) %7.2f `r(mean)' _col(42) %7.2f `r(sd)' _col(54) %7.2f `r(min)' _col(66) %7.2f `r(max)'
}
di in green "{hline `long2'}"
}
if "`categorical'"!="" {
di
*set trace on
di "Descriptive analysis of categorical covariates"
local long2=55
di in green "{hline `long2'}"
di in green "Variables" _col(25) "Levels" _col(37) "Freq." _col(49) "Percent"
di in green "{hline `long2'}"
local nbc: word count `categorical'
local m=1
local nonret
foreach i of varlist `categorical' {
local type : type `i'
local type=substr("`type'",1,3)
qui levelsof `i'
local lev "`r(levels)'"
local nblev=r(r)
if (`maxlevels'>=`nblev') {
local k=abbrev("`i'",20)
di in green "`k'" _c
if "`type'"=="str" {
foreach j in `lev' {
qui count if `touse'&`i'=="`j'"
local k=abbrev("`j'",10)
di _col(21) %10s in green "`k'" as result %5.0f _col(37) `r(N)' %6.2f _col(49) `=`r(N)'/`N'*100' "%"
}
qui count if `touse'&`i'==""
if `r(N)'!=0 {
di as result %5.0f _col(37) `r(N)' %6.2f _col(49) `=`r(N)'/`N'*100' "%"
}
}
else {
foreach j in `lev' {
qui count if `touse'&`i'==`j'
di _col(21) %10s in green "`j'" as result %5.0f _col(37) `r(N)' %6.2f _col(49) `=`r(N)'/`N'*100' "%"
}
qui count if `touse'&`i'==.
if `r(N)'!=0 {
di as result %5.0f _col(37) `r(N)' %6.2f _col(49) `=`r(N)'/`N'*100' "%"
}
}
if `m'==`nbc' {
di in green "{hline `long2'}"
}
else {
di in green "{dup `long2':-}"
}
}
else {
local nonret `nonret' `i'
*di "local nonret `nonret' `i'"
*di "non retenu `i'"
if `m'==`nbc' {
di in green "{hline `long2'}"
}
}
local ++m
}
if "`nonret'"!="" {
di in green "Not described variables (too more levels) : " as result "`nonret'"
}
}
end

@ -0,0 +1,290 @@
*! Version 1.1 4 June 2019
*! Jean-Benoit Hardouin
************************************************************************************************************
* Stata program : descscale
* Description of a scale and covariates
* Release 1.1 : June 4, 2019
*
*
* Historic :
* Version 1 (April 12, 2019) [Jean-Benoit Hardouin]
* Version 1.1 (June 4, 2019) [Jean-Benoit Hardouin]
*
* Jean-benoit Hardouin, PhD, Assistant Professor
* Team of Methods in Patient Centered Outcomes and Health Research (INSERM U1246-SPHERE)
* University of Nantes - Faculty of Pharmaceutical Sciences
* France
* jean-benoit.hardouin@anaqol.org
*
*
* News about this program :http://www.anaqol.org
*
* Copyright 2019 Jean-Benoit Hardouin
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*
***********************************************************************************************************
program define descscale , rclass
version 8.2
syntax varlist [if] [in] [,MIN(int 0) CONTinuous(varlist) CATegorical(varlist) All PARTition(numlist) Large MEAN MAXlevels(int 20)]
preserve
marksample touse,novarlist
label variable `touse' "selection"
tokenize `varlist'
local nbitems: word count `varlist'
if "`partition'"=="" {
local partition `nbitems'
}
local nbdim:word count `partition'
forvalues i=1/`nbdim' {
local nbitemsdim`i': word `i' of `partition'
}
qui count if `touse'
local N=r(N)
local maxr=0
forvalues i=1/`nbitems' {
qui levelsof ``i'' if `touse'
local r`i'=r(r)
qui su ``i'' if `touse'
local max`i'=r(max)
local mean`i'=r(mean)
qui count if ``i''!=0&``i''!=1&``i''!=2&``i''!=3&``i''!=4&``i''!=5&``i''!=6&``i''!=7&``i''!=8&``i''!=9&``i''!=10&``i''!=.
local ninc=r(N)
local ok`i'=1
if `r`i''>11&"`large'"=="" {
di in red "The number of answer categories to the items ``i'' is large (>11). Use the -large- option or correct the list of items."
local ok`i'=0
exit
}
if `ninc'>0 {
di in red "The variable ``i'' has incompatible values with an items (integers between 0 and 10)."
local ok`i'=0
exit
}
if `r`i''>`maxr' {
local maxr `r`i''
}
}
*di "maxr=`maxr'"
di
di in green "Number of individuals : " as result `N'
di
di in green "Decription of the items"
local long=(`maxr'+1-`min')*8+40
di in green "{hline `long'}"
di in green "Items" _col(23) "Obs" _c
local col=33
forvalues j=`min'/`maxr' {
di _col(`col') "`j'" _c
local col=`col'+8
}
di _col(`col') "." _col(`=`col'+4') "Mean"
di in green "{hline `long'}"
local deb=1
forvalues d=1/`nbdim' {
local loi`d'
*di "local fin=`deb'+`nbitemsdim`d''-1"
local fin=`deb'+`nbitemsdim`d''-1
forvalues i=`deb'/`fin' {
if `ok`i''==1 {
local loi`d' `loi`d'' ``i''
qui count if `touse'&``i''!=.
local k=abbrev("``i''",20)
di in green "`k'" _col(22) as result %4.0f `r(N)' _c
local col=28
forvalues j=`min'/`maxr' {
qui count if `touse'&``i''==`j'
local per=round(`r(N)'/`N'*100, 0.1)
di _col(`col') %5.1f `per' "%" _c
local col=`col'+8
}
qui count if `touse'&``i''==.
local per=round(`r(N)'/`N'*100,0.1)
di _col(`col') %5.1f `per' "%" _col(`=`col'+9') %4.2f `mean`i''
}
}
if `d'!=`nbdim' {
di in green "{dup `long':-}"
}
local deb=`fin'+1
}
di in green "{hline `long'}"
di
di in green "Decription of the scores"
local long2=72
di in green "{hline `long2'}"
di in green "Scores" _col(22) "Obs" _col(33) "Mean" _col(40) "Std. Dev." _col(58) "Min" _col(70) "Max"
di in green "{hline `long2'}"
forvalues d=1/`nbdim' {
tempname score`d'
genscore `loi`d'' if `touse', score(`score`d'') `mean' `standardized'
qui su `score`d'' if `touse'
di in green "score`d'" _col(20) as result %5.0f `r(N)' _col(30) %7.2f `r(mean)' _col(42) %7.2f `r(sd)' _col(54) %7.2f `r(min)' _col(66) %7.2f `r(max)'
}
di in green "{hline `long2'}"
if "`continuous'"!=""|"`categorical'"!="" {
local continuous2
foreach i of varlist `continuous' {
local candidate=1
forvalues j=1/`nbitems' {
if "`i'"=="``j''" {
local candidate=0
}
}
local type : type `i'
local type=substr("`type'",1,3)
if "`type'"=="str" {
local candidate=0
}
if `candidate'==1 {
qui levelsof `i' if `touse'
local r=r(r)
local continuous2 `continuous2' `i'
}
}
local continuous `continuous2'
local categorical2
foreach i of varlist `categorical' {
local candidate=1
forvalues j=1/`nbitems' {
if "`i'"=="``j''" {
local candidate=0
}
}
if `candidate'==1 {
qui levelsof `i' if `touse'
local r=r(r)
local categorical2 `categorical2' `i'
}
}
local categorical `categorical2'
}
if "`all'"!="" {
*local continuous
*local categorial
foreach i of varlist * {
local candidate=1
forvalues j=1/`nbitems' {
if "`i'"=="``j''" {
local candidate=0
}
}
local type : type `i'
local type=substr("`type'",1,3)
if "`type'"=="str" {
local candidate=0
}
qui levelsof `i' if `touse'
local r=r(r)
if `r'>7&`candidate'==1 {
local continuous `continuous' `i'
}
else {
local categorical `categorical' `i'
}
}
}
*di "CONTINUOUS : `continuous'"
*di "CATEGORICAL : `categorical'"
if "`continuous'"!="" {
di
di "Descriptive analysis of continuous covariates"
local long2=72
di in green "{hline `long2'}"
di in green "Variables" _col(22) "Obs" _col(33) "Mean" _col(40) "Std. Dev." _col(58) "Min" _col(70) "Max"
di in green "{hline `long2'}"
foreach i of varlist `continuous' {
qui su `i' if `touse'
local k=abbrev("`i'",18)
*di in green "`k'" _c
di in green "`k'" _col(20) as result %5.0f `r(N)' _col(30) %7.2f `r(mean)' _col(42) %7.2f `r(sd)' _col(54) %7.2f `r(min)' _col(66) %7.2f `r(max)'
}
di in green "{hline `long2'}"
}
if "`categorical'"!="" {
di
*set trace on
di "Descriptive analysis of categorical covariates"
local long2=55
di in green "{hline `long2'}"
di in green "Variables" _col(25) "Levels" _col(37) "Freq." _col(49) "Percent"
di in green "{hline `long2'}"
local nbc: word count `categorical'
local m=1
local nonret
foreach i of varlist `categorical' {
local type : type `i'
local type=substr("`type'",1,3)
qui levelsof `i'
local lev "`r(levels)'"
local nblev=r(r)
if (`maxlevels'>=`nblev') {
local k=abbrev("`i'",20)
di in green "`k'" _c
if "`type'"=="str" {
foreach j in `lev' {
qui count if `touse'&`i'=="`j'"
local k=abbrev("`j'",10)
di _col(21) %10s in green "`k'" as result %5.0f _col(37) `r(N)' %6.2f _col(49) `=`r(N)'/`N'*100' "%"
}
qui count if `touse'&`i'==""
if `r(N)'!=0 {
di as result %5.0f _col(37) `r(N)' %6.2f _col(49) `=`r(N)'/`N'*100' "%"
}
}
else {
foreach j in `lev' {
qui count if `touse'&`i'==`j'
di _col(21) %10s in green "`j'" as result %5.0f _col(37) `r(N)' %6.2f _col(49) `=`r(N)'/`N'*100' "%"
}
qui count if `touse'&`i'==.
if `r(N)'!=0 {
di as result %5.0f _col(37) `r(N)' %6.2f _col(49) `=`r(N)'/`N'*100' "%"
}
}
if `m'==`nbc' {
di in green "{hline `long2'}"
}
else {
di in green "{dup `long2':-}"
}
}
else {
local nonret `nonret' `i'
*di "local nonret `nonret' `i'"
*di "non retenu `i'"
if `m'==`nbc' {
di in green "{hline `long2'}"
}
}
local ++m
}
if "`nonret'"!="" {
di in green "Not described variables (too more levels) : " as result "`nonret'"
}
}
end

@ -0,0 +1,279 @@
*! Version 1 11 April 2019
*! Jean-Benoit Hardouin
************************************************************************************************************
* Stata program : descscale
* Description of a scale and covariates
* Release 1 : April 12, 2019
*
*
* Historic :
* Version 1 (April 12, 2019) [Jean-Benoit Hardouin]
*
* Jean-benoit Hardouin, PhD, Assistant Professor
* Team of Methods in Patient Centered Outcomes and Health Research (INSERM U1246-SPHERE)
* University of Nantes - Faculty of Pharmaceutical Sciences
* France
* jean-benoit.hardouin@anaqol.org
*
*
* News about this program :http://www.anaqol.org
*
* Copyright 2019 Jean-Benoit Hardouin
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*
***********************************************************************************************************
program define descscale , rclass
version 8.2
syntax varlist [if] [in] [,MIN(int 0) CONTinuous(varlist) CATegorical(varlist) All PARTition(numlist) Large MEAN MAXlevels(int 20)]
preserve
marksample touse,novarlist
label variable `touse' "selection"
tokenize `varlist'
local nbitems: word count `varlist'
if "`partition'"=="" {
local partition `nbitems'
}
local nbdim:word count `partition'
forvalues i=1/`nbdim' {
local nbitemsdim`i': word `i' of `partition'
}
qui count if `touse'
local N=r(N)
local maxr=0
forvalues i=1/`nbitems' {
qui levelsof ``i'' if `touse'
local r`i'=r(r)
qui su ``i'' if `touse'
local max`i'=r(max)
local mean`i'=r(mean)
if `r`i''>11&"`large'"=="" {
di in red "The number of answer categories to the items is large (>11). Use the -large- option or correct the list of items."
exit
}
if `r`i''>`maxr' {
local maxr `r`i''
}
}
*di "maxr=`maxr'"
di
di in green "Number of individuals : " as result `N'
di
di in green "Decription of the items"
local long=(`maxr'+1-`min')*8+40
di in green "{hline `long'}"
di in green "Items" _col(23) "Obs" _c
local col=33
forvalues j=`min'/`maxr' {
di _col(`col') "`j'" _c
local col=`col'+8
}
di _col(`col') "." _col(`=`col'+4') "Mean"
di in green "{hline `long'}"
local deb=1
forvalues d=1/`nbdim' {
local loi`d'
*di "local fin=`deb'+`nbitemsdim`d''-1"
local fin=`deb'+`nbitemsdim`d''-1
forvalues i=`deb'/`fin' {
local loi`d' `loi`d'' ``i''
qui count if `touse'&``i''!=.
local k=abbrev("``i''",20)
di in green "`k'" _c
di in green "`k'" _col(22) as result %4.0f `r(N)' _c
local col=28
forvalues j=`min'/`maxr' {
qui count if `touse'&``i''==`j'
local per=round(`r(N)'/`N'*100, 0.1)
di _col(`col') %5.1f `per' "%" _c
local col=`col'+8
}
qui count if `touse'&``i''==.
local per=round(`r(N)'/`N'*100,0.1)
di _col(`col') %5.1f `per' "%" _col(`=`col'+9') %4.2f `mean`i''
}
if `d'!=`nbdim' {
di in green "{dup `long':-}"
}
local deb=`fin'+1
}
di in green "{hline `long'}"
di
di in green "Decription of the scores"
local long2=72
di in green "{hline `long2'}"
di in green "Scores" _col(22) "Obs" _col(33) "Mean" _col(40) "Std. Dev." _col(58) "Min" _col(70) "Max"
di in green "{hline `long2'}"
forvalues d=1/`nbdim' {
tempname score`d'
genscore `loi`d'' if `touse', score(`score`d'') `mean' `standardized'
qui su `score`d'' if `touse'
di in green "score`d'" _col(20) as result %5.0f `r(N)' _col(30) %7.2f `r(mean)' _col(42) %7.2f `r(sd)' _col(54) %7.2f `r(min)' _col(66) %7.2f `r(max)'
}
di in green "{hline `long2'}"
if "`continuous'"!=""|"`categorical'"!="" {
local continuous2
foreach i of varlist `continuous' {
local candidate=1
forvalues j=1/`nbitems' {
if "`i'"=="``j''" {
local candidate=0
}
}
local type : type `i'
local type=substr("`type'",1,3)
if "`type'"=="str" {
local candidate=0
}
if `candidate'==1 {
qui levelsof `i' if `touse'
local r=r(r)
local continuous2 `continuous2' `i'
}
}
local continuous `continuous2'
local categorical2
foreach i of varlist `categorical' {
local candidate=1
forvalues j=1/`nbitems' {
if "`i'"=="``j''" {
local candidate=0
}
}
if `candidate'==1 {
qui levelsof `i' if `touse'
local r=r(r)
local categorical2 `categorical2' `i'
}
}
local categorical `categorical2'
}
if "`all'"!="" {
*local continuous
*local categorial
foreach i of varlist * {
local candidate=1
forvalues j=1/`nbitems' {
if "`i'"=="``j''" {
local candidate=0
}
}
local type : type `i'
local type=substr("`type'",1,3)
if "`type'"=="str" {
local candidate=0
}
qui levelsof `i' if `touse'
local r=r(r)
if `r'>7&`candidate'==1 {
local continuous `continuous' `i'
}
else {
local categorical `categorical' `i'
}
}
}
*di "CONTINUOUS : `continuous'"
*di "CATEGORICAL : `categorical'"
if "`continuous'"!="" {
di
di "Descriptive analysis of continuous covariates"
local long2=72
di in green "{hline `long2'}"
di in green "Variables" _col(22) "Obs" _col(33) "Mean" _col(40) "Std. Dev." _col(58) "Min" _col(70) "Max"
di in green "{hline `long2'}"
foreach i of varlist `continuous' {
qui su `i' if `touse'
local k=abbrev("`i'",18)
*di in green "`k'" _c
di in green "`k'" _col(20) as result %5.0f `r(N)' _col(30) %7.2f `r(mean)' _col(42) %7.2f `r(sd)' _col(54) %7.2f `r(min)' _col(66) %7.2f `r(max)'
}
di in green "{hline `long2'}"
}
if "`categorical'"!="" {
di
*set trace on
di "Descriptive analysis of continuous covariates"
local long2=55
di in green "{hline `long2'}"
di in green "Variables" _col(25) "Levels" _col(37) "Freq." _col(49) "Percent"
di in green "{hline `long2'}"
local nbc: word count `categorical'
local m=1
local nonret
foreach i of varlist `categorical' {
local type : type `i'
local type=substr("`type'",1,3)
qui levelsof `i'
local lev "`r(levels)'"
local nblev=r(r)
if (`maxlevels'>=`nblev') {
local k=abbrev("`i'",20)
di in green "`k'" _c
if "`type'"=="str" {
foreach j in `lev' {
qui count if `touse'&`i'=="`j'"
local k=abbrev("`j'",10)
di _col(21) %10s in green "`k'" as result %5.0f _col(37) `r(N)' %6.2f _col(49) `=`r(N)'/`N'*100' "%"
}
qui count if `touse'&`i'==""
if `r(N)'!=0 {
di as result %5.0f _col(37) `r(N)' %6.2f _col(49) `=`r(N)'/`N'*100' "%"
}
}
else {
foreach j in `lev' {
qui count if `touse'&`i'==`j'
di _col(21) %10s in green "`j'" as result %5.0f _col(37) `r(N)' %6.2f _col(49) `=`r(N)'/`N'*100' "%"
}
qui count if `touse'&`i'==.
if `r(N)'!=0 {
di as result %5.0f _col(37) `r(N)' %6.2f _col(49) `=`r(N)'/`N'*100' "%"
}
}
if `m'==`nbc' {
di in green "{hline `long2'}"
}
else {
di in green "{dup `long2':-}"
}
}
else {
local nonret `nonret' `i'
*di "local nonret `nonret' `i'"
*di "non retenu `i'"
if `m'==`nbc' {
di in green "{hline `long2'}"
}
}
local ++m
}
if "`nonret'"!="" {
di in green "Not described variables (too more levels) : " as result "`nonret'"
}
}
end

@ -0,0 +1,302 @@
************************************************************************************************************
* DETECT: detect, Iss and R indexes
* Version 3.1: May 13, 2004
*
* Historic
* Version 1 (2003-06-20): Jean-Benoit Hardouin
* Version 2 (2004-01-18): Jean-Benoit Hardouin
* Version 3 (2004-01-26): Jean-Benoit Hardouin
* Version 3.1 (2004-05-13): Jean-Benoit Hardouin
* Version 4 (2009-11-24): Jean-Benoit Hardouin /*corrections*/
*
* Jean-benoit Hardouin, Regional Health Observatory of Orléans - France
* jean-benoit.hardouin@neuf.fr
*
* News about this program : http://anaqol.free.fr
* FreeIRT Project : http://freeirt.free.fr
*
* Copyright 2003, 2004 Jean-Benoit Hardouin
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*
************************************************************************************************************
program define detect , rclass
version 7.0
syntax varlist(min=2 numeric), PARTition(numlist integer >0) [noSCOres noRESTscores]
local nbitemstest=0
tokenize `partition'
local Q:word count `partition'
local firstitem=0
local dim0=1
forvalues i=1/`Q' {
local dim`i'=``i''
local firstitem`i'=`firstitem`=`i'-1''+`dim`=`i'-1''
local nbitemstest=`nbitemstest'+`dim`i''
tempvar score`i'
qui gen `score`i''=0
forvalues j=`firstitem`i''/`=`firstitem`i''+`dim`i''-1' {
local item`j': word `j' of `varlist'
qui replace `score`i''=`score`i''+`item`j''
}
}
local nbitems:word count `varlist'
tokenize `varlist'
if `nbitems'!=`nbitemstest' {
di in red "The sum of the numbers of items in all the dimensions is different of the total number of items precised in varlist"
exit
}
tempname Corrscores Corrrestscores
matrix define `Corrscores'=J(`nbitems',`Q',0)
matrix define `Corrrestscores'=J(`nbitems',`Q',0)
forvalues i=1/`nbitems' {
forvalues j=1/`Q' {
tempvar restscore`i's`j'
qui gen `restscore`i's`j''=`score`j''-``i''
qui corr ``i'' `score`j''
local corr`i's`j'=r(rho)
qui corr ``i'' `restscore`i's`j''
local corr`i'rs`j'=r(rho)
matrix `Corrscores'[`i',`j']=`corr`i's`j''
matrix `Corrrestscores'[`i',`j']=`corr`i'rs`j''
}
}
qui count
local nbind=r(N)
tempvar score
qui gen `score'=0
forvalues i=1/`nbitems' {
qui replace `score'=`score'+``i''
}
forvalues i=1/`nbitems' {
local tmp=`i'+1
forvalues j=`tmp'/`nbitems' {
tempvar restscorei`i'j`j'
qui gen `restscorei`i'j`j''=`score'-``i''-``j''
}
}
forvalues k=0/`nbitems'{
tempname Tcov`k'
qui count if `score'==`k'
local n`k'=r(N)
if `n`k''>1 {
qui matrix accum `Tcov`k''=`varlist' if `score'==`k',nocons dev
}
else {
matrix `Tcov`k''=J(`nbitems',`nbitems',0)
}
if `n`k''!=0 {
matrix `Tcov`k''=`Tcov`k''/`n`k''
}
}
forvalues i=1/`nbitems'{
local tmp=`i'+1
forvalues j=`tmp'/`nbitems' {
local tmp=`nbitems'-2
forvalues k=0/`tmp' {
tempname Rcovi`i'j`j'k`k'
qui count if `restscorei`i'j`j''==`k'
local ni`i'j`j'k`k'=r(N)
if `ni`i'j`j'k`k''>1 {
qui matrix accum `Rcovi`i'j`j'k`k''=`varlist' if `restscorei`i'j`j''==`k',nocons dev
}
else {
matrix `Rcovi`i'j`j'k`k''=J(`nbitems',`nbitems',0)
}
if `ni`i'j`j'k`k''!=0 {
matrix `Rcovi`i'j`j'k`k''=`Rcovi`i'j`j'k`k''/`ni`i'j`j'k`k''
}
}
}
}
tempname delta
matrix `delta'=J(`nbitems',`nbitems',-1)
local debut=1
local fin=0
forvalues i=1/`Q' {
local fin=`fin'+`dim`i''
forvalues j=`debut'/`fin' {
forvalues k=`debut'/`fin' {
matrix `delta'[`j',`k']=1
}
}
local debut=`debut'+`dim`i''
}
tempname Tcov Rcov Covfin Issm Abscov
matrix `Tcov'=J(`nbitems',`nbitems',0)
matrix `Rcov'=J(`nbitems',`nbitems',0)
forvalues k=0/`nbitems' {
matrix `Tcov'=`Tcov'+`Tcov`k''*`n`k''
}
forvalues i=1/`nbitems'{
local tmp=`i'+1
forvalues j=`tmp'/`nbitems' {
local tmp=`nbitems'-2
forvalues k=0/`tmp' {
matrix `Rcov'[`i',`j']=`Rcov'[`i',`j']+`Rcovi`i'j`j'k`k''[`i',`j']*`ni`i'j`j'k`k''
matrix `Rcov'[`j',`i']=`Rcov'[`i',`j']
}
}
}
matrix `Covfin'=J(`nbitems',`nbitems',0)
matrix `Issm'=J(`nbitems',`nbitems',0)
matrix `Abscov'=J(`nbitems',`nbitems',0)
forvalues i=1/`nbitems' {
forvalues j=1/`nbitems' {
matrix `Covfin'[`i',`j']=(`Tcov'[`i',`j']+`Rcov'[`i',`j'])/2*`delta'[`i',`j']
matrix `Issm'[`i',`j']=sign(`Tcov'[`i',`j']+`Rcov'[`i',`j'])*`delta'[`i',`j']
matrix `Abscov'[`i',`j']=abs(`Tcov'[`i',`j']+`Rcov'[`i',`j'])/2
}
}
local somme=0
local Iss=0
local R=0
forvalues i=1/`nbitems' {
local tmp=`i'+1
forvalues j=`tmp'/`nbitems' {
local somme=`somme'+`Covfin'[`i',`j']
local Iss=`Iss'+`Issm'[`i',`j']
local R=`R'+`Abscov'[`i',`j']
}
}
local DETECT=`somme'/(`nbind'*`nbitems'*(`nbitems'-1))
local DETECT=`somme'/(`nbitems'*(`nbitems'-1))
local Iss=`Iss'*2/(`nbitems'*(`nbitems'-1))
*local R=`DETECT'/(`R'/(`nbind'*`nbitems'*(`nbitems'-1)))
local R=`DETECT'/(`R'/(`nbitems'*(`nbitems'-1)))
di
di in green _col(20) "DETECT : " as result %5.4f `DETECT'
di in green _col(23) "Iss : " as result %5.4f `Iss'
di in green _col(25) "R : " as result %5.4f `R'
di
if "`scores'"=="" {
di _col(5) in green "Correlations Items-Scores"
di in green _col(5) "{hline 25}"
di
di in green _col(5) "Items" _continue
local col=10
forvalues q=1/`Q' {
local col=`col'+10
di in green _col(`col') "dim `q'" _continue
}
di
local length=`Q'*10+10
di in green _col(5) "{hline `length'}"
forvalues i=1/`nbitems' {
forvalues q=2/`Q' {
if `i'==`firstitem`q'' {
di _col(5) in green _dup(`length') "-"
}
}
di in green _col(5) "``i''" _continue
local col=5
forvalues q=1/`Q' {
local col=`col'+10
di in yellow _col(`col') %10.4f `corr`i's`q'' _continue
}
di
}
di in green _col(5) "{hline `length'}"
di
}
if "`restscore'"=="" {
di _col(5) in green "Correlations Items-Rest-Scores"
di in green _col(5) "{hline 30}"
di
di in green _col(5) "Items" _continue
local col=10
forvalues q=1/`Q' {
local col=`col'+10
di in green _col(`col') "dim `q'" _continue
}
di
local length=`Q'*10+10
di in green _col(5) "{hline `length'}"
forvalues i=1/`nbitems' {
forvalues q=2/`Q' {
if `i'==`firstitem`q'' {
di _col(5) in green _dup(`length') "-"
}
}
di in green _col(5) "``i''" _continue
local col=5
forvalues q=1/`Q' {
local col=`col'+10
di in yellow _col(`col') %10.4f `corr`i'rs`q'' _continue
}
di
}
di in green _col(5) "{hline `length'}"
di
}
local namesdim
forvalues q=1/`Q' {
local namesdim "`namesdim' dim`q'"
}
matrix rownames `Tcov'=`varlist'
matrix rownames `Rcov'= `varlist'
matrix rownames `Covfin'= `varlist'
matrix rownames `Corrscores'= `varlist'
matrix rownames `Corrrestscores'= `varlist'
matrix colnames `Tcov'= `varlist'
matrix colnames `Rcov' =`varlist'
matrix colnames `Covfin'= `varlist'
matrix colnames `Corrscores'= `namesdim'
matrix colnames `Corrrestscores'= `namesdim'
return scalar DETECT=`DETECT'
return scalar Iss=`Iss'
return scalar R=`R'
return matrix Tcov `Tcov'
return matrix Rcov `Rcov'
return matrix Covfin `Covfin'
return matrix Corrscores `Corrscores'
return matrix Corrrestscores `Corrrestscores'
end

@ -0,0 +1,48 @@
{smcl}
{* 26jan2004}{...}
{hline}
help for {hi:detect}
{hline}
{title:DETECT Index}
{p 8 14 2}{cmd:detect} {it:varlist} {cmd:,} {cmdab:part:ition}({it:numlist}) [{cmdab:nosco:res} {cmdab:norest:scores}]
{p 4 4 2}{it:varlist} is a list of two existing dichotomous variables (items) or more. The first items of this list compose the first dimension, the following items define the second dimension, as so on.
{p 4 4 2}{cmd:partition} permit to define the number of items in each dimension. The user must precise in the {it:numlist} the number of items in each dimension.
{Options}
{p 4 4 2}{cmd:noscores} permit to remove the table of the correlations between the items and the scores
{p 4 4 2}{cmd:norestscores} permit to remove the table of the correlations between the items and the rest-scores
{title:Description}
{p 4 4 2}{cmd:detect} permit to compute the DETECT index (Dimensionality Evaluation to Enumerate Contributing Traits), as the Iss (Approximate Simple Structure Index) and the R indexes defined by Zhang and Stout (1999).
{title:Remarks}
{p 4 4 2}For detailed information on the DETECT, Iss and R indexes, see Zhang and Stout (1999).
{p 4 4 2}{cmd:detect} don't permit the use of polytomous items.
{title:Example}
{cmd:. detect item1 item2 item3 item4 , partition(2 2) nosco norest}
{cmd:. detect item1 item2 item3 item4 item 5 item6 item7 , partition(3 2 2)}
{title:References}
{p 4 4 2}Zhang J. and Stout W., The theorical DETECT index of dimensionality and its application to approximate simple structure, {it: Psychometrika}, vol.64(2), 1999, pp. 213-249.
{title:Author}
{p 4 8 2}Jean-Benoit Hardouin, Regional Health Observatory (ORS) - 1, rue Porte Madeleine - BP 2439 - 45032 Orleans Cedex 1 - France.
You can contact the author at {browse "mailto:jean-benoit.hardouin@neuf.fr":jean-benoit.hardouin@neuf.fr} and visit the websites {browse "http://anaqol.free.fr":AnaQol} and {browse "http://freeirt.free.fr":FreeIRT}

@ -0,0 +1,68 @@
************************************************************************************************************
* based on the command "detect" by Jean-Benoit Hardouin
************************************************************************************************************
program define detect2 , rclass
version 7.0
syntax varlist(min=2 numeric), PARTition(numlist integer >0) [noSCOres noRESTscores]
local nbitemstest=0
tokenize `partition'
local Q:word count `partition'
local firstitem=0
local dim0=1
forvalues i=1/`Q' {
local dim`i'=``i''
local firstitem`i'=`firstitem`=`i'-1''+`dim`=`i'-1''
local nbitemstest=`nbitemstest'+`dim`i''
tempvar score`i'
qui gen `score`i''=0
forvalues j=`firstitem`i''/`=`firstitem`i''+`dim`i''-1' {
local item`j': word `j' of `varlist'
qui replace `score`i''=`score`i''+`item`j''
}
}
local nbitems:word count `varlist'
tokenize `varlist'
if `nbitems'!=`nbitemstest' {
di in red "The sum of the numbers of items in all the dimensions is different of the total number of items precised in varlist"
exit
}
tempname Corrscores Corrrestscores
matrix define `Corrscores'=J(`nbitems',`Q',0)
matrix define `Corrrestscores'=J(`nbitems',`Q',0)
forvalues i=1/`nbitems' {
forvalues j=1/`Q' {
tempvar restscore`i's`j'
qui gen `restscore`i's`j''=`score`j''-``i''
qui corr ``i'' `score`j''
local corr`i's`j'=r(rho)
qui corr ``i'' `restscore`i's`j''
local corr`i'rs`j'=r(rho)
matrix `Corrscores'[`i',`j']=`corr`i's`j''
matrix `Corrrestscores'[`i',`j']=`corr`i'rs`j''
}
}
local namesdim
forvalues q=1/`Q' {
local namesdim "`namesdim' dim`q'"
}
matrix colnames `Corrscores'= `namesdim'
matrix colnames `Corrrestscores'= `namesdim'
return matrix Corrscores `Corrscores'
return matrix Corrrestscores `Corrrestscores'
end

@ -0,0 +1,23 @@
program define dropmissing
syntax varlist [, missing(string) delete]
tokenize `varlist'
local nbitems:word count `varlist'
if "`missing'"=="" {
local missing="."
}
local nbmissing:word count `missing'
forvalues i=1/`nbitems' {
forvalues j=1/`nbmissing' {
local miss:word `j' of `missing'
if "`delete'"!="" {
drop if ``i''==`miss'
}
else {
replace ``i'=. if ``i''==`miss'
}
}
}
end

@ -0,0 +1,344 @@
************************************************************************************************************
* Estpop : Estimation de la population d'une commune, d'un canton, d'un département de la région Centre
* (ou de la région Centre toute entière)
*
* Version 2.1: 7 décembre 2004
* Version 2: 2 décembre 2004
* Version 1.5: 2 décembre 2004
* Version 1.4: 21 octobre 2004
* Version 1.3: 16 aout 2004
* Version 1.2: 20 juillet 2004
* Version 1.1: 16 juillet 2004
* Version 1: 16 juillet 2004
*
* Jean-benoit Hardouin, Observatoire Régional de la Santé du Centre - Orléans - France
* jean-benoit.hardouin@neuf.fr
*
* Copyright 2004 Jean-Benoit Hardouin
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*
************************************************************************************************************
program define estpop,rclass
version 7.0
syntax , [annee(int 1999) codegeo(int 1) sexe(int 0) quinq juillet]
capture su *
if _rc==0 {
tempfile saveestpop
qui save `saveestpop',replace
}
preserve
if `annee'>=1900&`annee'<2000 {
local annee=`annee'-1900
}
local sannee=1900+`annee'
if `sannee'<1982 {
local `sannee'=`sannee'+100
}
if `annee'<82|`annee'>99 {
di in red "Vous devez indiquer une annee entre 1982 et 1999 (ou entre 82 et 99)"
exit
}
if `sexe'==0 {
local sexel hf
}
else if `sexe'==1 {
local sexel h
}
else if `sexe'==2 {
local sexel f
}
else {
di in red "Vous devez indiquer un sexe parmi 0 (2 sexes confondus), 1 (hommes) et 2 (femmes)"
exit
}
if `codegeo'>100 {/*SI COMMUNE OU ZE OU CANTON */
use "C:\ado\personal\files\rp`sexel'829099.dta", clear
sort codecom
merge codecom using "C:\ado\personal\files\nais8002.dta"
if `sexe'!=0 {
foreach i of numlist 80/99 00/02 {
if `sexe'==1 {
local ratio=1.05/2.05
}
else if `sexe'==2 {
local ratio=1/2.05
}
if `i'<10 {
qui replace naiss0`i'=naiss0`i'*`ratio'
}
else if `i'>=10 {
qui replace naiss`i'=naiss`i'*`ratio'
}
}
}
tempvar flag
gen `flag'=0
if `codegeo'==0 {
qui replace `flag'=1 if codecom==0
}
if `codegeo'==1 {
qui replace `flag'=1 if codecom!=0
}
if `codegeo'<100 {
qui replace `flag'=1 if dep==`codegeo'
}
if `codegeo'<1000&`codegeo'>100 {
qui replace `flag'=1 if arr==`codegeo'
}
if `codegeo'<10000&`codegeo'>1000&int(`codegeo'/100)!=24 {
qui replace `flag'=1 if pct==`codegeo'
}
if int(`codegeo'/100)==24 {
qui replace `flag'=1 if ze94==`codegeo'
}
if `codegeo'<100000&`codegeo'>10000 {
qui replace `flag'=1 if codecom==`codegeo'
}
qui keep if `flag'==1
if `annee'==82|`annee'==90|`annee'==99 {
forvalues i=1/95 {
qui su rp`annee'`sexel'`i'
local popr`i'=r(sum)
return scalar popr`i'=`popr`i''
}
}
if (`annee'>90&`annee'<99)|(`annee'>82&`annee'<90) {
if `annee'>90&`annee'<99 {
local first=`annee'-90
local last=98-`annee'
local anc=90
local suiv=99
local nbans=9
}
if `annee'>82&`annee'<90 {
local first=`annee'-82
local last=89-`annee'
local anc=82
local suiv=90
local nbans=8
}
forvalues i=1/`first' {
local tmp=`suiv'-`annee'+`i'
local tmp2=`suiv'-`annee'+1
tempvar rp`annee'`sexel'`i'
qui gen `rp`annee'`sexel'`i''=naiss`=`annee'-`i''+`i'*(rp`suiv'`sexel'`tmp'-naiss`=`annee'-`i'')/`tmp'
qui su `rp`annee'`sexel'`i''
local popr`i'=r(sum)
return scalar popr`i'=`popr`i''
}
local rp`annee'`sexel'94 rp`annee'`sexel'95
forvalues i=`=`anc'+1'/`=`suiv'-1' {
tempname rp`i'`sexel'94 rp`i'`sexel'95
qui gen `rp`i'`sexel'94'=rp`anc'`sexel'94+(rp`suiv'`sexel'94-rp`anc'`sexel'94)/`nbans'*(`i'-`anc')
qui gen `rp`i'`sexel'95'=rp`anc'`sexel'95+(rp`suiv'`sexel'95-rp`anc'`sexel'95)/`nbans'*(`i'-`anc')
}
qui su `rp`annee'`sexel'94'
local popr94=r(sum)
return scalar popr94=`popr94'
qui su `rp`annee'`sexel'95'
local popr95=r(sum)
return scalar popr95=`popr95'
forvalues i=`=`first'+1'/`=95-`nbans'' {
tempvar rp`annee'`sexel'`i'
local tmp=`anc'+`i'-`annee'
local tmp2=`suiv'+`i'-`annee'
qui gen `rp`annee'`sexel'`i''=rp`anc'`sexel'`tmp'+(`annee'-`anc')*(rp`suiv'`sexel'`tmp2'-rp`anc'`sexel'`tmp')/`nbans'
qui su `rp`annee'`sexel'`i''
local popr`i'=r(sum)
return scalar popr`i'=`popr`i''
}
forvalues i=`=95-`nbans'+1'/93 {
tempvar rp`annee'`sexel'`i'
local tmp=`anc'+`i'-`annee'
local tmp2=94-`i'+`annee'
qui gen `rp`annee'`sexel'`i''=rp`anc'`sexel'`tmp'+`=`annee'-`anc''*(`rp`tmp2'`sexel'94'-rp`anc'`sexel'`tmp')/`=`tmp2'-`anc''
qui su `rp`annee'`sexel'`i''
local popr`i'=r(sum)
return scalar popr`i'=`popr`i''
}
/*qui su naiss`annee'
local popr0=r(sum)
return scalar popr0=`popr0'*/
}
qui su naiss`annee'
local popr0=r(sum)
return scalar popr0=`popr0'
}/*FIN SI COMMUNE ZE OU CANTON OU ANNEE<1990*/
else if `codegeo'<100 { /*FRANCE, REGION OU DEPT*/
use "C:\ado\personal\files\popfrregdept8201.dta", clear
if `sexe'==0 {
replace sexe=0
}
if "`juillet'"=="" {
forvalues i=0/95 {
qui su age`i' if annee==`sannee'&codegeo==`codegeo'&sexe==`sexe'
local popr`i'=r(sum)
return scalar popr`i'=`popr`i''
}
}
else if "`juillet'"!="" {
local pannee=`sannee'+1
forvalues i=0/95 {
qui su age`i' if annee==`pannee'&codegeo==`codegeo'&sexe==`sexe'
local poppr`i'=r(sum)
qui su age`i' if annee==`sannee'&codegeo==`codegeo'&sexe==`sexe'
local popsr`i'=r(sum)
local popr`i'=(`poppr`i''+`popsr`i'')/2
return scalar popr`i'=`popr`i''
}
}
}/*FIN FRANCE, REGION OU DEPT ET ANNEE>=1990*/
local popr=0
forvalues i=0/95 {
local popr=`popr'+`popr`i''
}
return scalar popr=`popr'
local poprq1=`popr1'+`popr2'+`popr3'+`popr4'
return scalar poprq1=`poprq1'
foreach i of numlist 5(5)90 {
local poprq`i'=`popr`i''+`popr`=`i'+1''+`popr`=`i'+2''+`popr`=`i'+3''+`popr`=`i'+4''
return scalar poprq`i'=`poprq`i''
}
return scalar poprq0=`popr0'
return scalar poprq95=`popr95'
if `codegeo'==0 {
local libgeo France metropolitaine
}
else if `codegeo'==1 {
local libgeo Region Centre
}
else if `codegeo'==18 {
local libgeo Cher
}
else if `codegeo'==28 {
local libgeo Eure-et-Loir
}
else if `codegeo'==36 {
local libgeo Indre
}
else if `codegeo'==37 {
local libgeo Indre-et-Loire
}
else if `codegeo'==41 {
local libgeo Loir-et-Cher
}
else if `codegeo'==45 {
local libgeo Loiret
}
else {
qui trouve `codegeo'
local libgeo=r(trouve`codegeo')
}
di
if `codegeo'>1000&`codegeo'<10000 {
di in green "Niveau geographique : Canton de " in yellow "`libgeo'"
}
else if `codegeo'>10000&`codegeo'<100000 {
di in green "Niveau geographique : Commune de " in yellow "`libgeo'"
}
else {
di in green "Niveau geographique : " in yellow "`libgeo'"
}
if `sexe'==0 {
local libsexe Deux sexes
}
else if `sexe'==1 {
local libsexe Hommes
}
else if `sexe'==2 {
local libsexe Femmes
}
if "`juillet'"==""&`codegeo'<100{
local comm="Estimation au 1er janvier"
}
else if "`juillet'"!=""&`codegeo'<100{
local comm="Estimation au 1er juillet"
}
else if `codegeo'>100 {
local comm="Retropolation entre deux recensements pour"
}
di in green "Sexe : " in yellow "`libsexe'"
di in green "Annee : " in yellow "`comm' `=`annee'+1900'"
di
di in green _col(4) "{hline 22}"
di in green _col(4) "Age" _col(16) "Population"
di in green _col(4) "{hline 22}"
if "`quinq'"=="" {
forvalues i=0/95 {
if `i'<2 {
di in green _col(4) "`i' an" _col(18) in yellow %8.0f `popr`i''
}
else if `i'==95 {
di in green _col(4) "95 ans et plus" _col(18) in yellow %8.0f `popr`i''
}
else {
di in green _col(4) "`i' ans" _col(18) in yellow %8.0f `popr`i''
}
}
}
else {
di in green _col(4) "0 an" _col(18) in yellow %8.0f `popr0'
di in green _col(4) "1-4 ans" _col(18) in yellow %8.0f `poprq1'
foreach i of numlist 5(5)90 {
di in green _col(4) "`i'-`=`i'+4' ans" _col(18) in yellow %8.0f `poprq`i''
}
di in green _col(4) "95 ans et plus" _col(18) in yellow %8.0f `popr95'
}
di in green _col(4) "{hline 22}"
di in green _col(4) "Total" _col(18) in yellow %8.0f `popr'
restore , not
qui drop _all
if "`saveestpop'"!="" {
qui use `saveestpop'
}
end

@ -0,0 +1,113 @@
*! version 2.2 09march2007
*! Jean-Benoit Hardouin
************************************************************************************************************
* Gammasym : Symmetric gamma function
*
* Historic:
* Version 1 (2004-01-29): Jean-Benoit Hardouin
* Version 2 (2004-02-01): Jean-Benoit Hardouin
* Version 2.1 (2005-04-04) : Jean-benoit Hardouin
* Version 2.2 (2007-03-09) : Jean-benoit Hardouin /*Weights*/
*
* Jean-benoit Hardouin, University of Nantes - France
* jean-benoit.hardouin@univ-nantes.fr
*
* News about this program : http://www.anaqol.org
* FreeIRT Project : http://www.freeirt.org
*
* Copyright 2004-2007 Jean-Benoit Hardouin
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*
************************************************************************************************************
program define gammasym,rclass
version 7
syntax anything [, Weight(string) DISplay]
local B `weight'
if "`B'"!="" {
numlist "`B'"
local B=r(numlist)
local nbw:word count `B'
local score=0
forvalues i=1/`nbw' {
local B`i':word `i' of `B'
local score=`score'+`B`i''
}
}
local nbgroups:word count `anything'
if "`B'"=="" {
forvalues i=1/`nbgroups' {
local B`i'=1
}
local nbw=`nbgroups'
local score=`nbgroups'
}
if `nbgroups'!=`nbw' {
di in red "There is not the same number of values and of weights. Please correct that."
error
}
tokenize `anything'
local list
forvalues i=1/`nbgroups' {
local tmp=``i''
local list "`list' `tmp'"
}
tokenize `list'
tempname G
matrix `G'=J(`=`score'+4',`nbgroups',0)
local poids=`score'+2
local epsilon=`score'+3
local max=`score'+4
forvalues j=1/`nbgroups' {
matrix `G'[`poids',`j']=`B`j''
matrix `G'[`epsilon',`j']=exp(-``j'')^`B`j''
if `j'!=1 {
matrix `G'[`max',`j']=`G'[`max',`=`j'-1']+`B`j''
}
matrix `G'[1,`j']=1
}
matrix `G'[`=`B1'+1',1]=exp(-`1')^`B1'
forvalues j=2/`nbgroups' {
local w=`B`j''
forvalues s=2/`=`score'+1' {
matrix `G'[`s',`j']=`G'[`s',`j']+`G'[`s',`=`j'-1']
}
forvalues s=`=`w'+1'/`=`score'+1' {
matrix `G'[`s',`j']=`G'[`s',`j']+exp(-``j'')^`w'*`G'[`=`s'-`w'',`=`j'-1']
}
}
forvalues r=0/`score' {
if "`display'"!="" {
display in green "Level: " in ye `r' in green _col(25) " Gamma=" in ye %10.5f `G'[`=`r'+1',`nbgroups']
}
return scalar gamma`r'= `G'[`=`r'+1',`nbgroups']
}
*return matrix G=`G'
end

@ -0,0 +1,40 @@
{smcl}
{* 9march2007}{...}
{hline}
help for {hi:gammasym}{right:Jean-Benoit Hardouin}
{hline}
{title:The symmetric gamma function}
{p 8 14 2}{cmd:gamma} {it:list} [,{cmdab:w:eight}({it:string}) {cmdab:dis:play}]
{p 4 4 2}{it:list} is a list of the values to use.
{title:Description}
{p 4 4 2}{cmd:gammasym} compute the values of the symmetric gamma function for all the possible level.
{title:Options}
{p 4 4 2}{cmd:weight} defines weights for each value (by default, all the values have a weight of 1)
{p 4 4 2}{cmd:display} displays the value of the symmetric gamma function for each level.
{title:Example}
{cmd:. gammasym -2.1 -1.5 -.7 -.6 1.1 1.00024 2.232447}
{cmd:. gammasym .3 .4 -.1 -.5 0 .6 , weight(1 2 1 1 2 1)}
{cmd:. gammasym 1 1 1 1 1 , w(1 2 3 4 5)}
{title:Outputs}
{p 4 4 2}{cmd:r(gamma#)} return the value of the symmetric gamma function at the level #.
{title:Author}
{p 4 8 2}Jean-Benoit Hardouin, Regional Health Observatory (ORS) - 1, rue Porte Madeleine - BP 2439 - 45032 Orleans Cedex 1 - France.
You can contact the author at {browse "mailto:jean-benoit.hardouin@neuf.fr":jean-benoit.hardouin@neuf.fr} and visit the websites {browse "http://anaqol.free.fr":AnaQol} and {browse "http://freeirt.free.fr":FreeIRT}

@ -0,0 +1,181 @@
*! version 2 15jan2013
************************************************************************************************************
* gausshermite : Estimate an integral of the form |f(x)g(x/mu,sigma)dx or f(x,y)g(x,y/mu,Sigma)dxdy where g(x/mu,sigma) is the distribution function
* of the gaussian distribution of mean mu and variance sigma^2 and g(x,y/mu,Sigma) is the distribution function
* of the bivariate normal distribution of mean mu and covariance matrix Sigma by Gauss Hermite quadratures
*
* Version 1 : May 5, 2005 (Jean-Benoit Hardouin)
* Version 1.1: June 14, 2012 /*name option*/ (Jean-Benoit Hardouin)
* Version 2: January 15, 2013 /*bivariate normal distribution*/ (Jean-Benoit Hardouin, Mohand-Larbi Feddag, Myriam Blanchin)
*
* Jean-Benoit Hardouin, jean-benoit.hardouin@univ-nantes.fr
* EA 4275 "Biostatistics, Pharmacoepidemiology and Subjectives Measures in Health"
* Faculty of Pharmaceutical Sciences - University of Nantes - France
* http://www.sphere-nantes.org
*
* News about this program : http://anaqol.free.fr
* FreeIRT Project : http://freeirt.free.fr
*
* Copyright 2005, 2013 Jean-Benoit Hardouin, Mohand-Larbi Feddag, Myriam Blanchin
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*
************************************************************************************************************
program define gausshermite,rclass
version 7
syntax anything [, Sigma(real -1) Var(string) MU(string) Nodes(integer 12) Display Name(string)]
tempfile gauss
qui capture save `gauss',replace
local save=0
if _rc==0 {
qui save `gauss',replace
local save=1
}
tokenize `anything'
drop _all
tempname mean variance C
qui set obs `=`nodes'*`nodes''
if "`name'"=="" {
if `sigma'!=-1{
if "`var'"==""{
local name x
local nb=1
}
else{
di in red "{p}Please fill in the {hi:name} option{p_end}"
error 198
exit
}
}
else{
if "`var'"!=""{
local name1 x1
local name2 x2
local nb=2
}
else{
di in red "{p}Please fill in the {hi:name} option{p_end}"
error 198
exit
}
}
}
else {
local nb=wordcount("`name'")
if `nb'==2{
local name1=word("`name'",1)
local name2=word("`name'",2)
}
}
if `nb'==2{
capture confirm matrix `var'
if !_rc{
if colsof(`var')==2 & rowsof(`var')==2{
matrix `C'=cholesky(`var')
}
else{
di in red "{p}The covariance matrix in the {hi:var} option should be a 2x2 matrix for a bivariate distribution{p_end}"
error 198
exit
}
}
else{
matrix `variance'=(1,0\0,1)
matrix `C'=cholesky(`variance')
}
}
else{
if `sigma'==-1{
local sig=1
}
else{
local sig=`sigma'
}
}
capture confirm matrix `mu'
if !_rc{
if colsof(`mu')==1 & rowsof(`mu')==1{
local `mean'=`mu'[1,1]
}
else{
matrix `mean'=`mu'
}
}
else{
if "`mu'"==""{
if `nb'==1{
local `mean'=0
}
else{
matrix `mean'=(0,0)
}
}
else{
local `mean'=`mu'
}
}
tempname noeuds poids
qui ghquadm `nodes' `noeuds' `poids'
if `nb'==1{
qui gen `name'=.
qui gen poids=.
forvalues i=1/`nodes' {
qui replace `name'=`noeuds'[1,`i'] in `i'
qui replace poids=`poids'[1,`i'] in `i'
}
qui replace `name'=`name'*(sqrt(2)*`sig')+``mean''
qui gen f=poids/sqrt(_pi)*(`1')
*list `name' poids f in 1/5
}
else{
forvalues i=1/`nb'{
qui gen `name`i''=.
qui gen poids`i'=.
}
local line=1
forvalues i=1/`nodes' {
forvalues j=1/`nodes' {
qui replace `name1'=`noeuds'[1,`i'] *(sqrt(2)*`C'[1,1])+`mean'[1,1] in `line'
qui replace `name2'=`noeuds'[1,`i'] *(sqrt(2)*`C'[2,1])+`noeuds'[1,`j'] *(sqrt(2)*`C'[2,2])+`mean'[1,2] in `line'
qui replace poids1=`poids'[1,`i'] in `line'
qui replace poids2=`poids'[1,`j'] in `line'
local ++line
}
}
qui gen f=poids1*poids2*(`1')/(_pi)
*list `name1' `name2' poids1 poids2 f in 10/20
}
qui su f
return scalar int=r(sum)
if "`display'"!="" {
di in green "int_R (`1')g(`name'/sigma=`sig')d`name'=" in yellow %12.8f `r(sum)'
}
drop _all
if `save'==1 {
qui use `gauss',clear
}
end

@ -0,0 +1,43 @@
{smcl}
{* 5may2005}{...}
{hline}
help for {hi:gausshermite}{right:Jean-Benoit Hardouin}
{hline}
{title:Estimation of integrals using Gauss Hermite quadratures}
{p 8 14 2}{cmd:gausshermite} {it:function} , {cmdab:s:igma}({it:#}) {cmd:mu}({it:#}) {cmdab:n:odes}({it:#}) {cmdab:d:isplay}
{title:Description}
{p 4 4 2}{cmd:gausshermite} approximates the integrals of the form f(x)g(x/mu,sigma) on all the reals where g(x/mu,sigma) is the gaussian distribution function with mean mu and variance sigma^2.
{title:Options}
{p 4 4 2}{it:function} defines f(x). For example, if f(x)=x^2, {it:function} is x^2. It is necessary to use x for the variable of integration.
{p 4 4 2}{cmd:mu} defines the mean of x (0 by default).
{p 4 4 2}{cmd:sigma} defines the standard deviation of x (1 by default).
{p 4 4 2}{cmd:nodes} defines the number of quadrature nodes (12 by default).
{p 4 4 2}{cmd:display} allows automatically displaying the estimation.
{p 4 4 2}Note that the quadrature nodes and the associated weights are computed using the {cmd:ghquadm} Stata command. Find this command with {stata findit ghquadm:findit ghquadm}.
{title:Example}
{cmd:. gausshermite x^2}
{cmd:. gausshermite x^4+exp(x)-2, sigma(1.5) mu(-.4) d n(10)}
{title:Outputs}
{p 4 4 2}The estimated value of the integral is saved in {cmd:r(int)}.
{title:Author}
{p 4 8 2}Jean-Benoit Hardouin, Regional Health Observatory (ORS) - 1, rue Porte Madeleine - BP 2439 - 45032 Orleans Cedex 1 - France.
You can contact the author at {browse "mailto:jean-benoit.hardouin@orscentre.org":jean-benoit.hardouin@orscentre.org} and visit the websites {browse "http://anaqol.free.fr":AnaQol} and {browse "http://freeirt.free.fr":FreeIRT}

@ -0,0 +1,72 @@
*! version 1 5may2005
************************************************************************************************************
* gausshermite : Estimate an integral of the form |f(x)g(x/mu,sigma)dx where g(x/mu,sigma) is the distribution function
* of the gaussian distribution of mean mu and variance sigma^2 by Gauss Hermite quadratures
*
* Version 1: May 5, 2005
*
*
* Jean-benoit Hardouin, Regional Health Observatory of Orléans - France
* jean-benoit.hardouin@orscentre.org
*
* News about this program : http://anaqol.free.fr
* FreeIRT Project : http://freeirt.free.fr
*
* Copyright 2005 Jean-Benoit Hardouin
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*
************************************************************************************************************
program define gausshermite,rclass
version 7
syntax anything [, Sigma(real 1) MU(real 0) Nodes(integer 12) Display]
tempfile gauss
qui capture save `gauss',replace
local save=0
if _rc==0 {
qui save `gauss',replace
local save=1
}
tokenize `anything'
drop _all
qui set obs 100
tempname noeuds poids
qui ghquadm `nodes' `noeuds' `poids'
qui gen x=.
qui gen poids=.
forvalues i=1/`nodes' {
qui replace x=`noeuds'[1,`i'] in `i'
qui replace poids=`poids'[1,`i'] in `i'
}
qui replace x=x*(sqrt(2)*`sigma')+`mu'
qui gen f=poids/sqrt(_pi)*(`1')
qui su f
return scalar int=r(sum)
if "`display'"!="" {
di in green "int_R (`1')g(x/sigma=`sigma')dx=" in yellow %12.8f `r(sum)'
}
drop _all
if `save'==1 {
qui use `gauss',clear
}
end

@ -0,0 +1,104 @@
*! version 1 03 January 2012
************************************************************************************************************
* gausshermite2 : Estimate an integral of the form : f(x,y)g(x,y/mu,Sigma)dxdy where g(x,y/mu,Sigma) is the distribution function
* of the bivariate normal distribution of mean mu and covariance matrix Sigma by Gauss Hermite quadratures
*
* Version 1: 03 January 2012
*
*
* Mohand Feddag, University of Nantes - France
* Mohand-Larbi.Feddag@univ-nantes.fr *
* News about this program : http://anaqol.free.fr
* FreeIRT Project : http://freeirt.free.fr
*
* Copyright 2012 Mohand Feddag
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*
************************************************************************************************************
program define gausshermite2,rclass
version 11
syntax anything [, Mu(string) Sigma(string) Nodes(integer 12) Display]
tempfile gauss2
qui capture save `gauss2',replace
local save=0
if _rc==0 {
qui save `gauss2',replace
local save=1
}
if "`mu'"=="" {
tempname mu
matrix `mu'=[0,0]
}
if "`sigma'"=="" {
tempname sigma
matrix `sigma'=[1,0\0,1]
}
tokenize `anything'
drop _all
qui set obs `=`nodes'*`nodes''
tempname noeuds poids
qui ghquadm `nodes' `noeuds' `poids'
* Cholesky transformation for the covariance matrix sigma
matrix C=cholesky(`sigma')
*matrix list C
local line=1
qui gen x1=.
qui gen x2=.
qui gen poids1=.
qui gen poids2=.
forvalues i=1/`nodes' {
forvalues j=1/`nodes' {
qui replace x1=`noeuds'[1,`i'] *(sqrt(2)*C[1,1])+`mu'[1,1] in `line'
qui replace x2=`noeuds'[1,`i'] *(sqrt(2)*C[2,1])+`noeuds'[1,`j'] *(sqrt(2)*C[2,2])+`mu'[1,2] in `line'
qui replace poids1=`poids'[1,`i'] in `line'
qui replace poids2=`poids'[1,`j'] in `line'
local ++line
}
}
* Double somme du produit poids[i]*poids[j]*f(x1,x2) qui est affecté a la variable sum
qui gen f=poids1*poids2*(`1')/(_pi)
*list x1 x2 poids1 f (sqrt(2)*_pi)
qui su f
local sum=r(sum)
return scalar int=`sum'
if "`display'"!="" {
di in green "int_R^2 (`1')g(x1,x2/mu=`mu',Sigma=`Sigma')dx1dx2=" in yellow %12.8f `sum'
}
drop _all
if `save'==1 {
qui use `gauss2',clear
}
end

@ -0,0 +1,134 @@
*! version 2 11dec2012
************************************************************************************************************
* gausshermite : Estimate an integral of the form |f(x)g(x/mu,sigma)dx or f(x,y)g(x,y/mu,Sigma)dxdy where g(x/mu,sigma) is the distribution function
* of the gaussian distribution of mean mu and variance sigma^2 and g(x,y/mu,Sigma) is the distribution function
* of the bivariate normal distribution of mean mu and covariance matrix Sigma by Gauss Hermite quadratures
*
* Version 1: May 5, 2005
* Version 1.1: June 14, 2012 /*name option*/
* Version 2: December 11, 2012 /*bivariate normal distribution*/
*
* Jean-benoit Hardouin, Regional Health Observatory of Orléans - France
* jean-benoit.hardouin@orscentre.org
*
* News about this program : http://anaqol.free.fr
* FreeIRT Project : http://freeirt.free.fr
*
* Copyright 2005, 2012 Jean-Benoit Hardouin
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*
************************************************************************************************************
program define gausshermite3,rclass
version 7
syntax anything [, Sigma(string) MU(string) Nodes(integer 12) Display Name(string)]
tempfile gauss
qui capture save `gauss',replace
local save=0
if _rc==0 {
qui save `gauss',replace
local save=1
}
tokenize `anything'
drop _all
qui set obs `=`nodes'*`nodes''
if "`name'"=="" {
local name x
local nb=1
}
else {
local nb=wordcount("`name'")
if `nb'==2{
di word("`name'",1)
local name1=word("`name'",1)
local name2=word("`name'",2)
}
}
capture confirm matrix `sigma'
if !_rc{
if colsof(`sigma')==1 & rowsof(`sigma')==1{
local sig=`sigma'[1,1]
}
else{
matrix C=cholesky(`sigma')
}
}
else{
local sig=`sigma'
}
capture confirm matrix `mu'
if !_rc{
if colsof(`mu')==1 & rowsof(`mu')==1{
local mean=`mu'[1,1]
}
else{
matrix mean=`mu'
}
}
else{
local mean=`mu'
}
tempname noeuds poids
qui ghquadm `nodes' `noeuds' `poids'
if `nb'==1{
qui gen `name'=.
qui gen poids=.
forvalues i=1/`nodes' {
qui replace `name'=`noeuds'[1,`i'] in `i'
qui replace poids=`poids'[1,`i'] in `i'
}
qui replace `name'=`name'*(sqrt(2)*`sig')+`mean'
qui gen f=poids/sqrt(_pi)*(`1')
*list `name' poids f in 1/5
}
else{
forvalues i=1/`nb'{
qui gen `name`i''=.
qui gen poids`i'=.
}
local line=1
forvalues i=1/`nodes' {
forvalues j=1/`nodes' {
qui replace `name1'=`noeuds'[1,`i'] *(sqrt(2)*C[1,1])+mean[1,1] in `line'
qui replace `name2'=`noeuds'[1,`i'] *(sqrt(2)*C[2,1])+`noeuds'[1,`j'] *(sqrt(2)*C[2,2])+mean[1,2] in `line'
qui replace poids1=`poids'[1,`i'] in `line'
qui replace poids2=`poids'[1,`j'] in `line'
local ++line
}
}
qui gen f=poids1*poids2*(`1')/(_pi)
*list `name1' `name2' poids1 poids2 f in 10/20
}
qui su f
return scalar int=r(sum)
if "`display'"!="" {
di in green "int_R (`1')g(`name'/sigma=`sigma')d`name'=" in yellow %12.8f `r(sum)'
}
drop _all
if `save'==1 {
qui use `gauss',clear
}
end

@ -0,0 +1,503 @@
*! version 4.3 18january2006
************************************************************************************************************
* GEEkel2d: GEE for estimation of unidimensional or 2-dimensional Latent Trait models (Kelderman and Rijkes 1994)
*
* Version 4.3: January 18, 2006 /*Faster version*/
*
* Historic:
* Version 1 (2003-06-23): Jean-Benoit Hardouin
* Version 2 (2003-08-13): Jean-Benoit Hardouin
* version 3 (2003-11-06): Jean-Benoit Hardouin
* Version 4 (2004-06-08): Jean-Benoit Hardouin
* Version 4.1 (2005-04-02): Jean-Benoit Hardouin
* Version 4.2 (2005-07-02): Jean-Benoit Hardouin
*
* Use the ghquadm program (findit ghquadm)
*
* Jean-benoit Hardouin, Regional Health Observatory of Orléans - France
* jean-benoit.hardouin@neuf.fr
*
* News about this program : http://anaqol.free.fr
* FreeIRT Project : http://freeirt.free.fr
*
* Copyright 2003-2006 Jean-Benoit Hardouin
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*
*************************************************************************************************************
program define geekel2d ,rclass
version 7.0
syntax varlist(min=2 numeric) [, coef(string) novar ll nbit(integer 30) critconv(real 1e-15) quad(integer 12) ]
preserve
local nbitems: word count `varlist'
tokenize `varlist'
qui count
local N=r(N)
forvalues i=1/`nbitems' {
qui drop if ``i''==.
}
qui count
local Naf=r(N)
di _col(3) in green "Initial step (N=" in yellow `Naf' in green ")"
di _col(3) in yellow `=`N'-`Naf'' in green " observations are not used for missing values"
di
qui count
local N=r(N)
tempname B Q
if "`coef'"!="" {
matrix `B'=`coef'
}
else {
matrix `B'=J(`nbitems',1,1)
}
scalar `Q'=colsof(`B')
/* CALCUL INITIAUX DES PARAMETRES DELTA ET SIGMA ET DE LA MATRICE BETA*/
local sigmath11=0.25
if `Q'==2 {
local sigmath22=.25
local sigmath12=0.125
}
tempname beta
matrix `beta'=J(`nbitems'+`Q'*(`Q'+1)/2,1,0)
forvalues i=1/`nbitems' {
qui count if ``i''==1
local si`i'=r(N)
local delta``i''=-log(`si`i''/(`N'-`si`i''))
matrix `beta'[`i',1]=`delta``i'''
forvalues j=`i'/`nbitems' {
quiet count if ``j''==1&``i''==1
local si`i'j`j'=r(N)
local si`j'j`i'=r(N)
if "`var'"=="" {
forvalues k=`j'/`nbitems' {
quiet count if ``i''==1&``j''==1&``k''==1
local si`i'j`j'k`k'=r(N)
local si`i'j`k'k`j'=r(N)
local si`j'j`i'k`k'=r(N)
local si`j'j`k'k`i'=r(N)
local si`k'j`i'k`j'=r(N)
local si`k'j`j'k`i'=r(N)
forvalues l=`k'/`nbitems' {
quiet count if ``i''==1&``j''==1&``k''==1&``l''==1
local si`i'j`j'k`k'l`l'=r(N)
local si`i'j`j'k`l'l`k'=r(N)
local si`i'j`k'k`j'l`l'=r(N)
local si`i'j`k'k`l'l`j'=r(N)
local si`i'j`l'k`j'l`k'=r(N)
local si`i'j`l'k`k'l`j'=r(N)
local si`j'j`i'k`k'l`l'=r(N)
local si`j'j`i'k`l'l`k'=r(N)
local si`j'j`k'k`i'l`l'=r(N)
local si`j'j`k'k`l'l`i'=r(N)
local si`j'j`l'k`i'l`k'=r(N)
local si`j'j`l'k`k'l`i'=r(N)
local si`k'j`i'k`j'l`l'=r(N)
local si`k'j`i'k`l'l`j'=r(N)
local si`k'j`j'k`i'l`l'=r(N)
local si`k'j`j'k`l'l`i'=r(N)
local si`k'j`l'k`i'l`j'=r(N)
local si`k'j`l'k`j'l`i'=r(N)
local si`l'j`i'k`j'l`k'=r(N)
local si`l'j`i'k`k'l`j'=r(N)
local si`l'j`j'k`i'l`k'=r(N)
local si`l'j`j'k`k'l`i'=r(N)
local si`l'j`k'k`i'l`j'=r(N)
local si`l'j`k'k`j'l`i'=r(N)
}
}
}
}
}
local l=`nbitems'+1
matrix `beta'[`l',1]=`sigmath11'
if `Q'==2 {
local l=`nbitems'+2
matrix `beta'[`l',1]=`sigmath22'
local l=`nbitems'+3
matrix `beta'[`l',1]=`sigmath12'
}
tempname variat V11 V12 V21 V22 D11 D12 D21 D22 V D
matrix `variat'=(1)
local compteur=0
local conv=1
/*********ITERATIONS******************/
while (`variat'[1,1]>`critconv'&`compteur'<=`nbit'&`conv'==1) {
if `compteur'==0{
di in green _col(3) "First iteration"
}
else {
di in green _col(3) "iteration:" in yellow _col(14) "`compteur'" in green _col(25) "Convergence index:" in yellow _col(44) %10.7e "`macrovariat'"
}
local compteur=`compteur'+1
forvalues j=1/`nbitems' {
/* CALCUL DES DERIVEES 1 A 6 POUR CHAQUE ITEM*/
local l1``j''=1/(1+exp(`beta'[`j',1]))
local l2``j''=exp(`beta'[`j',1])/(1+exp(`beta'[`j',1]))^2
local l3``j''=exp(`beta'[`j',1])*(exp(`beta'[`j',1])-1)/(1+exp(`beta'[`j',1]))^3
local l4``j''=exp(`beta'[`j',1])*(exp(2*`beta'[`j',1])-4*exp(`beta'[`j',1])+1)/(1+exp(`beta'[`j',1]))^4
local l5``j''=exp(`beta'[`j',1])*(exp(3*`beta'[`j',1])-11*exp(2*`beta'[`j',1])+11*exp(`beta'[`j',1])-1)/(1+exp(`beta'[`j',1]))^5
local l6``j''=exp(`beta'[`j',1])*(exp(4*`beta'[`j',1])-26*exp(3*`beta'[`j',1])+66*exp(2*`beta'[`j',1])-26*exp(`beta'[`j',1])+1)/(1+exp(`beta'[`j',1]))^6
if `Q'==2 {
local H2i`j'=`B'[`j',1]^2*`sigmath11'+`B'[`j',1]*`B'[`j',2]*`sigmath12'+`B'[`j',2]^2*`sigmath22'
local H4i`j'=3*`B'[`j',1]^4*`sigmath11'^2+12*`B'[`j',1]^3*`B'[`j',2]*`sigmath11'*`sigmath12'+6*`B'[`j',1]^2*`B'[`j',2]^2*(`sigmath11'*`sigmath22'+2*`sigmath12'^2)+12*`B'[`j',1]*`B'[`j',2]^3*`sigmath22'*`sigmath12'+3*`B'[`j',2]^4*`sigmath22'^2
}
else if `Q'==1 {
local H2i`j'=`B'[`j',1]^2*`sigmath11'
local H4i`j'=3*`B'[`j',1]^4*`sigmath11'^2
}
/* CALCUL DES MOMENTS D'ORDRE 1 ET 2 ET DE LA MATRICE V11*/
local mui`j'=`l1``j'''+`H2i`j''/2*`l3``j'''+`H4i`j''/24*`l5``j'''
local sigmai`j'j`j'=`l2``j'''+`H2i`j''/2*(`l3``j'''-2*`l1``j'''*`l3``j''')+`H4i`j''/24*(`l5``j'''-2*(`l3``j''')^2-2*`l1``j'''*`l5``j''')
}
matrix `V11'=J(`nbitems',`nbitems',0)
forvalues j=1/`nbitems' {
matrix `V11'[`j',`j']=`sigmai`j'j`j''
forvalues l=`=`j'+1'/`nbitems' {
if `Q'==2 {
local H2i`j'j`l'=`B'[`j',1]*`B'[`l',1]*`sigmath11'+(`B'[`j',1]*`B'[`l',2]+`B'[`j',2]*`B'[`l',1])*`sigmath12'+`B'[`j',2]*`B'[`l',2]*`sigmath22'
local H4i`j'1j`l'3=3*`B'[`j',1]*`B'[`l',1]^3*`sigmath11'^2+3*(3*`B'[`j',1]*`B'[`l',1]^2*`B'[`l',2]+`B'[`j',2]*`B'[`l',1]^3)*`sigmath11'*`sigmath12'+(3*`B'[`j',1]*`B'[`l',1]*`B'[`l',2]^2+3*`B'[`j',2]*`B'[`l',1]^2*`B'[`l',2])*(`sigmath11'*`sigmath22'+2*`sigmath12'^2)+3*(`B'[`j',1]*`B'[`l',2]^3+3*`B'[`j',2]*`B'[`l',1]*`B'[`l',2]^2)*`sigmath22'*`sigmath12'+3*`B'[`j',2]*`B'[`l',2]^3*`sigmath22'^2
local H4i`j'2j`l'2=3*`B'[`j',1]^2*`B'[`l',1]^2*`sigmath11'^2+6*(`B'[`j',1]^2*`B'[`l',1]*`B'[`l',2]+`B'[`j',1]*`B'[`j',2]*`B'[`l',1]^2)*`sigmath11'*`sigmath12'+(`B'[`j',1]^2*`B'[`l',2]^2+4*`B'[`j',1]*`B'[`j',2]*`B'[`l',1]*`B'[`l',2]+`B'[`j',2]^2*`B'[`l',1]^2)*(`sigmath11'*`sigmath22'+2*`sigmath12'^2)+6*(`B'[`j',1]*`B'[`j',2]*`B'[`l',2]^2+`B'[`j',2]^2*`B'[`l',1]*`B'[`l',2])*`sigmath22'*`sigmath12'+3*`B'[`j',2]^2*`B'[`l',2]^2*`sigmath22'^2
local H4i`j'3j`l'1=3*`B'[`l',1]*`B'[`j',1]^3*`sigmath11'^2+3*(3*`B'[`l',1]*`B'[`j',1]^2*`B'[`j',2]+`B'[`l',2]*`B'[`j',1]^3)*`sigmath11'*`sigmath12'+(3*`B'[`l',1]*`B'[`j',1]*`B'[`j',2]^2+3*`B'[`l',2]*`B'[`j',1]^2*`B'[`j',2])*(`sigmath11'*`sigmath22'+2*`sigmath12'^2)+3*(`B'[`l',1]*`B'[`j',2]^3+3*`B'[`l',2]*`B'[`j',1]*`B'[`j',2]^2)*`sigmath22'*`sigmath12'+3*`B'[`l',2]*`B'[`j',2]^3*`sigmath22'^2
}
else if `Q'==1 {
local H2i`j'j`l'=`B'[`j',1]*`B'[`l',1]*`sigmath11'
local H4i`j'1j`l'3=3*`B'[`j',1]*`B'[`l',1]^3*`sigmath11'^2
local H4i`j'2j`l'2=3*`B'[`j',1]^2*`B'[`l',1]^2*`sigmath11'^2
local H4i`j'3j`l'1=3*`B'[`l',1]*`B'[`j',1]^3*`sigmath11'^2
}
local H2i`l'j`j'=`H2i`j'j`l''
local H4i`l'1j`j'3=`H4i`j'1j`l'3'
local H4i`l'2j`j'2=`H4i`j'2j`l'2'
local H4i`l'3j`j'1=`H4i`j'3j`l'1'
local sigmai`j'j`l'=`H2i`j'j`l''*(`l2``j'''*`l2``l''')+`H4i`j'1j`l'3'/6*`l2``j'''*`l4``l'''+`H4i`j'3j`l'1'/6*`l4``j'''*`l2``l'''+(`H4i`j'2j`l'2'-`H2i`j''*`H2i`l'')/4*`l3``j'''*`l3``l'''
}
}
/* DEFINITION DE LA MATRICE COMPOCARRE*/
tempname compocarre m
local carre=`nbitems'*(`nbitems'-1)/2
matrix `compocarre'=J(2,`carre',0)
local m=0
forvalues j=1/`nbitems' {
forvalues l=`=`j'+1'/`nbitems' {
local m=`m'+1
matrix `compocarre'[1,`m']=`j'
matrix `compocarre'[2,`m']=`l'
}
}
/* CALCUL DE LA MATRICE V22*/
matrix `V22'=J(`carre',`carre',0)
forvalues k=1/`carre' {
local j=`compocarre'[1,`k']
local l=`compocarre'[2,`k']
matrix `V22'[`k',`k']=(1-2*`mui`j'')*(1-2*`mui`l'')*`sigmai`j'j`l''+`sigmai`j'j`j''*`sigmai`l'j`l''-`sigmai`j'j`l''^2
}
/* CALCUL DES MATRICES V12, V21 ET V*/
matrix `V12'=J(`nbitems',`carre',0)
forvalues k=1/`carre' {
local j=`compocarre'[1,`k']
local l=`compocarre'[2,`k']
matrix `V12'[`j',`k']=(1-2*`mui`j'')*`sigmai`j'j`l''
matrix `V12'[`l',`k']=(1-2*`mui`l'')*`sigmai`j'j`l''
}
matrix `V21'=`V12' '
matrix `V'=(`V11',`V12' \ `V21',`V22')
/*CALCUL DES MATRICES D11*/
matrix `D11'=J(`nbitems',`nbitems',0)
matrix `D12'=J(`nbitems',`Q'*(`Q'+1)/2,0)
forvalues j=1/`nbitems' {
matrix `D11'[`j',`j']=-`l2``j'''-`H2i`j''/2*`l4``j'''-`H4i`j''/24*`l6``j'''
if `Q'==2 {
matrix `D12'[`j',1]=`B'[`j',1]^2*`l3``j'''/2+(`B'[`j',1]^4*`sigmath11'+2*`B'[`j',1]^3*`B'[`j',2]*`sigmath12'+`B'[`j',1]^2*`B'[`j',2]^2*`sigmath22')/4*`l5``j'''
matrix `D12'[`j',2]=`B'[`j',2]^2*`l3``j'''/2+(`B'[`j',2]^4*`sigmath22'+2*`B'[`j',2]^3*`B'[`j',1]*`sigmath12'+`B'[`j',2]^2*`B'[`j',1]^2*`sigmath11')/4*`l5``j'''
matrix `D12'[`j',3]=`B'[`j',1]*`B'[`j',2]*`l3``j'''+(`B'[`j',1]^3*`B'[`j',2]*`sigmath11'+2*`B'[`j',1]^2*`B'[`j',2]^2*`sigmath12'+`B'[`j',1]*`B'[`j',2]^3*`sigmath22')/2*`l5``j'''
}
else if `Q'==1 {
matrix `D12'[`j',1]=`B'[`j',1]^2*`l3``j'''/2+`B'[`j',1]^4*`sigmath11'/4*`l5``j'''
}
}
/*CALCUL DES MATRICES D21, D22 et D*/
matrix `D21'=J(`carre',`nbitems',0)
matrix `D22'=J(`carre',`Q'*(`Q'+1)/2,0)
forvalues k=1/`carre' {
local j=`compocarre'[1,`k']
local l=`compocarre'[2,`k']
matrix `D21'[`k',`j']=-`H2i`j'j`l''*`l3``j'''*`l2``l'''-`H4i`j'1j`l'3'/6*`l3``j'''*`l4``l'''-`H4i`j'3j`l'1'/6*`l5``j'''*`l2``l'''-(`H4i`j'2j`l'2'-`H2i`j''*`H2i`l'')/4*`l4``j'''*`l3``l'''
matrix `D21'[`k',`l']=-`H2i`j'j`l''*`l2``j'''*`l3``l'''-`H4i`j'3j`l'1'/6*`l4``j'''*`l3``l'''-`H4i`j'1j`l'3'/6*`l2``j'''*`l5``l'''-(`H4i`j'2j`l'2'-`H2i`j''*`H2i`l'')/4*`l3``j'''*`l4``l'''
tempname tmp1 tmp2 tmp3
if `Q'==2 {
scalar `tmp1'=`B'[`j',1]*`B'[`l',1]*`l2``j'''*`l2``l'''+(2*`B'[`j',1]*`B'[`l',1]^3*`sigmath11'+(3*`B'[`j',1]*`B'[`l',1]^2*`B'[`l',2]+`B'[`j',2]*`B'[`l',1]^3)*`sigmath12'+(`B'[`j',1]*`B'[`l',1]*`B'[`l',2]^2+`B'[`j',2]*`B'[`l',1]^2*`B'[`l',2])*`sigmath22')/2*`l2``j'''*`l4``l'''
scalar `tmp2'=(2*`B'[`j',1]^3*`B'[`l',1]*`sigmath11'+(3*`B'[`j',1]^2*`B'[`j',2]*`B'[`l',1]+`B'[`j',1]^3*`B'[`l',2])*`sigmath12'+(`B'[`j',1]*`B'[`j',2]^2*`B'[`l',1]+`B'[`j',1]^2*`B'[`j',2]*`B'[`l',2])*`sigmath22')/2*`l4``j'''*`l2``l'''
scalar `tmp3'=(`B'[`j',1]^2*`B'[`l',1]^2*`sigmath11'+(`B'[`j',1]^2*`B'[`l',1]*`B'[`l',2]+`B'[`j',1]*`B'[`j',2]*`B'[`l',1]^2)*`sigmath12'+`B'[`j',1]*`B'[`j',2]*`B'[`l',1]*`B'[`l',2]*`sigmath22')*`l3``j'''*`l3``l'''
matrix `D22'[`k',1]=`tmp1'+`tmp2'+`tmp3'
scalar `tmp1'=`B'[`j',2]*`B'[`l',2]*`l2``j'''*`l2``l'''+(2*`B'[`j',2]*`B'[`l',2]^3*`sigmath22'+(3*`B'[`j',2]*`B'[`l',2]^2*`B'[`l',1]+`B'[`j',1]*`B'[`l',2]^3)*`sigmath12'+(`B'[`j',2]*`B'[`l',2]*`B'[`l',1]^2+`B'[`j',1]*`B'[`l',2]^2*`B'[`l',1])*`sigmath11')/2*`l2``j'''*`l4``l'''
scalar `tmp2'=(2*`B'[`j',2]^3*`B'[`l',2]*`sigmath22'+(3*`B'[`j',2]^2*`B'[`j',1]*`B'[`l',2]+`B'[`j',2]^3*`B'[`l',1])*`sigmath12'+(`B'[`j',2]*`B'[`j',1]^2*`B'[`l',2]+`B'[`j',2]^2*`B'[`j',1]*`B'[`l',1])*`sigmath11')/2*`l4``j'''*`l2``l'''
scalar `tmp3'=(`B'[`j',1]^2*`B'[`l',1]^2*`sigmath22'+(`B'[`j',1]^2*`B'[`l',1]*`B'[`l',2]+`B'[`j',1]*`B'[`j',2]*`B'[`l',1]^2)*`sigmath12'+`B'[`j',1]*`B'[`j',2]*`B'[`l',1]*`B'[`l',2]*`sigmath11')*`l3``j'''*`l3``l'''
matrix `D22'[`k',2]=`tmp1'+`tmp2'+`tmp3'
scalar `tmp1'=(`B'[`j',1]*`B'[`l',2]+`B'[`j',2]*`B'[`l',1])*`l2``j'''*`l2``l'''+((3*`B'[`j',1]*`B'[`l',1]^2*`B'[`l',2]+`B'[`j',2]*`B'[`l',1]^3)*`sigmath11'+4*(`B'[`j',1]*`B'[`l',1]*`B'[`l',2]^2+`B'[`j',2]*`B'[`l',1]^2*`B'[`l',2])*`sigmath12'+(`B'[`j',1]*`B'[`l',2]^3+3*`B'[`j',2]*`B'[`l',1]*`B'[`l',2]^2)*`sigmath22')/2*`l2``j'''*`l4``l'''
scalar `tmp2'=((3*`B'[`j',1]^2*`B'[`j',2]*`B'[`l',1]+`B'[`j',1]^3*`B'[`l',2])*`sigmath11'+4*(`B'[`j',1]*`B'[`j',2]^2*`B'[`l',1]+`B'[`j',1]^2*`B'[`j',2]*`B'[`l',2])*`sigmath12'+(`B'[`j',2]^3*`B'[`l',1]+3*`B'[`j',1]*`B'[`j',2]^2*`B'[`l',2])*`sigmath22')/2*`l4``j'''*`l2``l'''
scalar `tmp3'=((`B'[`j',1]^2*`B'[`l',1]*`B'[`l',2]+`B'[`j',1]*`B'[`j',2]*`B'[`l',2]^2)*`sigmath11'+(`B'[`j',1]^2*`B'[`l',2]^2+2*`B'[`j',1]*`B'[`j',2]*`B'[`l',1]*`B'[`l',2]+`B'[`j',2]^2*`B'[`l',1]^2)*`sigmath12'+(`B'[`j',1]*`B'[`j',2]*`B'[`l',2]^2+`B'[`j',2]^2*`B'[`l',1]*`B'[`l',2])*`sigmath22')*`l3``j'''*`l3``l'''
matrix `D22'[`k',3]=`tmp1'+`tmp2'+`tmp3'
}
else if `Q'==1 {
scalar `tmp1'=`B'[`j',1]*`B'[`l',1]*`l2``j'''*`l2``l'''+(2*`B'[`j',1]*`B'[`l',1]^3*`sigmath11')/2*`l2``j'''*`l4``l'''
scalar `tmp2'=(2*`B'[`j',1]^3*`B'[`l',1]*`sigmath11')/2*`l4``j'''*`l2``l'''
scalar `tmp3'=(`B'[`j',1]^2*`B'[`l',1]^2*`sigmath11')*`l3``j'''*`l3``l'''
matrix `D22'[`k',1]=`tmp1'+`tmp2'+`tmp3'
}
}
matrix `D'=(`D11',`D12' \ `D21',`D22')
/*CALCUL DE LA MATRICE CHSI*/
tempname chsi
matrix `chsi'=J(`nbitems'+`carre',1,0)
forvalues j=1/`nbitems' {
matrix `chsi'[`j',1]=(`si`j''-`N'*`mui`j'')/`N'
}
forvalues k=1/`carre' {
local j=`compocarre'[1,`k']
local l=`compocarre'[2,`k']
local tmp=`nbitems'+`k'
matrix `chsi'[`tmp',1]=(`si`j'j`l''-`si`j''*`mui`l''-`si`l''*`mui`j''+`N'*`mui`j''*`mui`l''-`N'*`sigmai`j'j`l'')/`N'
}
/*CALCUL DE L'ETAPE k*/
tempname betaold
matrix `betaold'=`beta'
matrix `beta'=`betaold'+inv(`D''*inv(`V')*`D')*`D''*inv(`V')*`chsi'
local l=`nbitems'+1
local sigmath11=`beta'[`l',1]
local l=`nbitems'+2
local sigmath22=`beta'[`l',1]
local l=`nbitems'+3
local sigmath12=`beta'[`l',1]
tempname epsilon variatold
scalar `variatold'=`variat'[1,1]
matrix `epsilon'=`betaold'-`beta'
matrix `variat'=(`epsilon''*`epsilon')
if `variat'[1,1]>`variatold' {
matrix `beta'=`betaold'
local l=`nbitems'+1
local sigm ath11=`beta'[`l',1]
if `Q'==2 {
local l=`nbitems'+2
local sigmath22=`beta'[`l',1]
local l=`nbitems'+3
local sigmath12=`beta'[`l',1]
}
local conv=0
}
else {
local macrovariat=`variat'[1,1]
}
}
/*************************CALCUL des STANDARDS ERRORS DES PARAMETRES *********************/
if "`var'"==""{
tempname xicarreA xicarreB xicarreC xicarre
matrix `xicarreA'=J(`nbitems',`nbitems',0)
matrix `xicarreB'=J(`nbitems',`carre',0)
matrix `xicarreC'=J(`carre',`carre',0)
forvalues i=1/`nbitems' {
forvalues j=`=`i'+1'/`nbitems' {
matrix `xicarreA'[`i',`j']=`si`i'j`j''-`si`i''*`mui`j''-`si`j''*`mui`i''+`N'*`mui`i''*`mui`j''
matrix `xicarreA'[`i',`j']=`xicarreA'[`j',`i']
}
forvalues col=1/`carre' {
local j=`compocarre'[1,`col']
local k=`compocarre'[2,`col']
matrix `xicarreB'[`i',`col']=`si`i'j`j'k`k''-`mui`i''*`si`j'j`k''-`mui`j''*`si`i'j`k''-`mui`k''*`si`i'j`j''+`mui`i''*`mui`j''*`si`k''+`mui`i''*`mui`k''*`si`j''+`mui`j''*`mui`k''*`si`i''-`N'*`mui`i''*`mui`j''*`mui`k''-`sigmai`j'j`k''*`si`i''+`N'*`mui`i''*`sigmai`j'j`k'''
}
}
forvalues row=1/`carre' {
forvalues col=`row'/`carre' {
local i=`compocarre'[1,`row']
local j=`compocarre'[2,`row']
local k=`compocarre'[1,`col']
local l=`compocarre'[2,`col']
matrix `xicarreC'[`row',`col']=`si`i'j`j'k`k'l`l''-`mui`i''*`si`j'j`k'l`l''-`mui`j''*`si`i'j`k'k`l''-`mui`k''*`si`i'j`j'k`l''-`mui`l''*`si`i'j`j'k`k''+`mui`i''*`mui`j''*`si`k'j`l''+`mui`i''*`mui`k''*`si`j'j`l''+`mui`i''*`mui`l''*`si`j'j`k''+`mui`j''*`mui`k''*`si`i'j`l''+`mui`j''*`mui`l''*`si`i'j`k''+`mui`k''*`mui`l''*`si`i'j`j''-`mui`i''*`mui`j''*`mui`k''*`si`l''-`mui`i''*`mui`j''*`mui`l''*`si`k''-`mui`i''*`mui`k''*`mui`l''*`si`j''-`mui`j''*`mui`k''*`mui`l''*`si`i''-`sigmai`i'j`j''*`si`k'j`l''-`sigmai`k'j`l''*`si`i'j`j''+`sigmai`i'j`j''*`mui`k''*`si`l''+`sigmai`i'j`j''*`mui`l''*`si`k''+`sigmai`k'j`l''*`mui`i''*`si`j''+`sigmai`k'j`l''*`mui`j''*`si`i''+`N'*`mui`i''*`mui`j''*`mui`k''*`mui`l''-`N'*`sigmai`i'j`j''*`mui`k''*`mui`l''-`N'*`sigmai`k'j`l''*`mui`i''*`mui`j''+`N'*`sigmai`i'j`j''*`sigmai`k'j`l''
matrix `xicarreC'[`col',`row']=`xicarreC'[`row',`col']
}
}
matrix `xicarre'=(`xicarreA',`xicarreB' \ `xicarreB' ',`xicarreC')
tempname A1 A2 W
matrix `A1'=`D' '*inv(`V')*`D'
matrix `A2'=`D' '*inv(`V')*`xicarre'*inv(`V')*`D'
matrix `W'=1/`N'^2*inv(`A1')*`A2'*inv(`A1')
}
/*****************************DISPLAY THE RESULTS***************************************/
local compteur=`compteur'-1
di ""
di ""
if `compteur'==0 {
noi di in red _col(8) "The algorithm does not converge"
return scalar error=1
exit
}
if `variat'[1,1]<=`critconv'&`compteur'>0 {
noi di in green _col(8) "The algorithm converges at the `compteur'th iteration"
}
if `compteur'==`nbit'&`variat'[1,1]>`critconv' {
noi di in green _col(8) "The algorithm is stopped at the `compteur'th iteration"
}
if `conv'==0&`compteur'>0 {
noi di in green _col(8) "The algorithm no more converges after the `compteur'th iteration"
}
di ""
if "`var'"=="" {
noi di in green _col(30) "Parameters" in green _col(43) "Standard errors"
forvalues j=1/`nbitems' {
noi di in green _col(20) "``j'': " in yellow _col(30) %10.6f `beta'[`j',1] in yellow _col(50) %8.6f sqrt(`W'[`j',`j'])
}
di ""
noi di in green _col(20) "var1: " in yellow _col(30) %10.6f `beta'[`nbitems'+1,1] in yellow _col(50) %8.6f sqrt(`W'[`nbitems'+1,`nbitems'+1])
if `Q'==2 {
noi di in green _col(20) "var2: " in yellow _col(30) %10.6f `beta'[`nbitems'+2,1] in yellow _col(50) %8.6f sqrt(`W'[`nbitems'+2,`nbitems'+2])
tempname rho
scalar `rho'=`beta'[`nbitems'+3,1]/sqrt(`beta'[`nbitems'+1,1]*`beta'[`nbitems'+2,1])
noi di in green _col(20) "covar: " in yellow _col(30) %10.6f `beta'[`nbitems'+3,1] in yellow _col(50) %8.6f sqrt(`W'[`nbitems'+3,`nbitems'+3]) " (rho=" %5.4f `rho' ")"
}
}
else {
noi di in green _col(30) "Parameters"
forvalues j=1/`nbitems' {
noi di in green _col(20) "``j'': " in yellow _col(30) %10.6f `beta'[`j',1]
}
di ""
noi di in green _col(20) "var1: " in yellow _col(30) %10.6f `beta'[`nbitems'+1,1]
if `Q'==2 {
noi di in green _col(20) "var2: " in yellow _col(30) %10.6f `beta'[`nbitems'+2,1]
tempname rho
scalar `rho'=`beta'[`nbitems'+3,1]/sqrt(`beta'[`nbitems'+1,1]*`beta'[`nbitems'+2,1])
noi di in green _col(20) "covar: " in yellow _col(30) %10.6f `beta'[`nbitems'+3,1]
}
}
di ""
if "`ll'"!="" {
tempname noeuds poids
ghquadm `quad' `noeuds' `poids'
tempvar vrais logvrais P
qui gen `P'=0
qui gen `vrais'=0
if `Q'==1 {
forvalues u=1/`quad'{
tempvar vrais`u'
qui gen `vrais`u''=1/sqrt(_pi)
forvalues j=1/`nbitems' {
qui replace `P'=exp(`B'[`j',1]*sqrt(2*`beta'[`nbitems'+1,1])*`noeuds'[1,`u']-`beta'[`j',1])/(1+exp(`B'[`j',1]*sqrt(2*`beta'[`nbitems'+1,1])*`noeuds'[1,`u']-`beta'[`j',1]))
qui replace `P'=1-`P' if ``j''==0
qui replace `vrais`u''=`vrais`u''*`P'
}
qui replace `vrais'=`vrais'+`poids'[1,`u']*`vrais`u''
}
gen `logvrais'=log(`vrais')
qui su `logvrais'
local ll=r(N)*r(mean)
noi di in green _col(20) "ll: " in yellow _col(30) %12.4f `ll'
local AIC=-2*`ll'+2*(`nbitems'+1)
noi di in green _col(20) "AIC: " in yellow _col(30) %12.4f `AIC'
}
if `Q'==2 {
tempname sigma
matrix `sigma'=(`beta'[`nbitems'+1,1],`beta'[`nbitems'+3,1] \ `beta'[`nbitems'+3,1],`beta'[`nbitems'+2,1])
forvalues u=1/`quad'{
forvalues v=1/`quad'{
tempvar vraisu`u'v`v'
qui gen `vraisu`u'v`v''=1/_pi
forvalues j=1/`nbitems' {
local A1`u'tilde=sqrt(`beta'[`nbitems'+2,1]/(2*det(`sigma')))*`noeuds'[1,`u']
local A2`v'tilde=(`noeuds'[1,`v']-`beta'[`nbitems'+3,1]/sqrt(det(`sigma'))*`noeuds'[1,`u'])/(2*`beta'[`nbitems'+2,1])
qui replace `P'=exp(`B'[`j',1]*`A1`u'tilde'+`B'[`j',2]*`A2`v'tilde'-`beta'[`j',1])/(1+exp(`B'[`j',1]*`A1`u'tilde'+`B'[`j',2]*`A2`v'tilde'-`beta'[`j',1]))
qui replace `P'=1-`P' if ``j''==0
qui replace `vraisu`u'v`v''=`vraisu`u'v`v''*`P'
}
qui replace `vrais'=`vrais'+`poids'[1,`u']*`poids'[1,`v']*`vraisu`u'v`v''
}
}
qui gen `logvrais'=log(`vrais')
qui su `logvrais'
local ll=r(N)*r(mean)
noi di in green _col(20) "ll: " in yellow _col(27) %12.4f `ll'
local AIC=-2*`ll'+2*(`nbitems'+3)
noi di in green _col(20) "AIC: " in yellow _col(27) %12.4f `AIC'
}
}
if "`var'"=="" {
return matrix V `W'
}
matrix `beta'=`beta''
return matrix b= `beta'
if "`ll'"!="" {
return scalar ll= `ll'
return scalar AIC= `AIC'
}
return scalar J= `nbitems'
return scalar N= `N'
return scalar error=0
restore
end

@ -0,0 +1,78 @@
{smcl}
{* 2july2005}{...}
{hline}
help for {hi:geekel2d}{right:Jean-Benoit Hardouin}
{hline}
{title:Estimation of the parameters of undimensional and bidimensional IRT models}
{p 8 14 12}{cmd:geekel2d} {it:varlist} [{cmd:,} {cmdab:coef}({it:matrixname}) {cmdab:nbit}({it:#}) {cmdab:critconv}({it:#}) {cmdab:ll} {cmdab:quad}({it:#}) {cmdab:novar}]
{p 8 14 12}{it:varlist} is a list of two existing dichotomous variables or more.
{title:Description}
{p 4 8 2}{cmd:geekel2d} estimates, by Generalized Estimating
Equations (GEE), the parameters of the model defined by Kelderman (1994) with
one or two dimensions and dichotomic items. This model includes the Rasch model
and the One Parameter Logistic Model (OPLM) for the unidimensional models, the
Multidimensional Generalized Rasch Model (MGRM) and the Multidimensional
Completely Sufficient Rasch Model (MMSRM) for the two-dimensional models.
{title:Options}
{p 4 8 2}{cmd:coef} is the name of a matrix which contains the coeficients B. This
matrix relies the items and the latent traits. Each row represents an item and
there is as many colmuns than the supposed number of latent traits (one or two).
The coefficients are choosen, in general, among the first intergers, but
{cmd:geekel2d} allows using real coefficients. By default, the Rasch model is
supposed (the matrix {cmd: coef} is a vector of 1).
{p 4 8 2}{cmd:nbit} defines the maximal number of iterations in the estimation
algorithm. By default, this number is fixed to 30.
{p 4 8 2}{cmd:critconv} is the value of the convergence criterion, calculated
as the square of the cross-product of the vector containing the difference
between two successive iterations of the parameters estimations. By default,
this criterion is fixed to 1e-15.
{p 4 8 2}{cmd:ll} estimates the marginal log-likelihood and the Akaike
Information Criterion (AIC) by Gauss-Hermite quadratures.
{p 4 8 2}{cmd:quad} defines the number of nodes of quadratures.
{p 4 8 2}{cmd:novar} avoids to compute the standards errors of the estimators (faster).
{title:Remarks}
{p 4 8 2}For detailed informations on the Kelderman model, see Kelderman and
Rijkes (1994) or Adams and al. (1997).
{p 4 8 2}{cmd:geekel2d} don't allows using of polytomous items.
{p 4 8 2}The {cmd:ghquadm} Stata module is needed (use {cmd:findit ghquadm} to obtain it).
{title:Example}
{p 4 8 2}{cmd:. geekel2d item1 item2 item3 item4} /*Rasch model*/
{p 4 8 2}{cmd:. matrix B=(1,0\1,0\0,1\0,1)}
{p 4 8 2}{cmd:. geekel2d item1 item2 item3 item4 , coef(B) nbit(50) critconv(1e-30)}
{title:References}
{p 4 8 2}Kelderman H. and Rijkes C. P. M., Loglinear multidimensional IRT models for polytomously scored items. {it:Psychometrika}, 1994, {it:59}, 149-176.
{p 4 8 2}Adams R. J., Wilson M. R. and Wang W., The multidimensional random coefficient multinomial logit model. {it:Applied Psychological Measurement}, 1997, {it:21}, 1-23.
{title:Author}
{p 4 8 2}Jean-Benoit Hardouin, Regional Health Observatory (ORS) - 1, rue Porte
Madeleine - BP 2439 - 45032 Orleans Cedex 1 - France. You can contact the
author at
{browse "mailto:jean-benoit.hardouin@orscentre.org":jean-benoit.hardouin@orscentre.org}
and visit the websites {browse "http://anaqol.free.fr":AnaQol}
and {browse "http://freeirt.free.fr":FreeIRT}

@ -0,0 +1,534 @@
************************************************************************************************************
* GEEkel2d: GEE for estimation of unidimensional or 2-dimensional Latent Trait models (Kelderman and Rijkes 1994)
* Version 4 : June 8, 2004
*
* Historic:
* Version 1 (2003-06-23): Jean-Benoit Hardouin
* Version 2 (2003-08-13): Jean-Benoit Hardouin
* version 3 (2003-11-06): Jean-Benoit Hardouin
*
* Use the ghquadm program (findit ghquadm)
*
* Jean-benoit Hardouin, Regional Health Observatory of Orléans - France
* jean-benoit.hardouin@neuf.fr
*
* News about this program : http://anaqol.free.fr
* FreeIRT Project : http://freeirt.free.fr
*
* Copyright 2003, 2004 Jean-Benoit Hardouin
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*
*************************************************************************************************************
program define GEEkel2d ,rclass
version 7.0
syntax varlist(min=2 numeric) [, coef(string) novar ll nbit(integer 30) critconv(real 1e-15) quad(integer 12)]
preserve
local nbitems: word count `varlist'
tokenize `varlist'
qui count
local N=r(N)
forvalues i=1/`nbitems' {
qui drop if ``i''==.
}
qui count
local Naf=r(N)
di _col(3) in green "Initial step (N=" in yellow `Naf' in green ")"
di _col(3) in yellow `=`N'-`Naf'' in green " observations are not used for missing values"
di
qui count
local N=r(N)
tempname B Q
if "`coef'"!="" {
matrix `B'=`coef'
}
else {
matrix `B'=J(`nbitems',1,1)
}
scalar `Q'=colsof(`B')
/* CALCUL INITIAUX DES PARAMETRES DELTA ET SIGMA ET DE LA MATRICE BETA*/
local sigmath11=.25
if `Q'==2 {
local sigmath22=.25
local sigmath12=0.125
}
tempname beta
matrix `beta'=J(`nbitems'+`Q'*(`Q'+1)/2,1,0)
forvalues j=1/`nbitems' {
qui count if ``j''==1
local s``j''=r(N)
local delta``j''=-log(`s``j'''/(`N'-`s``j'''))
matrix `beta'[`j',1]=`delta``j'''
}
forvalues j=1/`nbitems' {
forvalues l=1/`nbitems' {
quiet count if ``j''==1&``l''==1
local s``j''``l''=r(N)
}
}
if "`var'"==""{
forvalues i=1/`nbitems' {
forvalues j=1/`nbitems' {
forvalues k=1/`nbitems' {
quiet count if ``i''==1&``j''==1&``k''==1
local s``i''``j''``k''=r(N)
}
}
}
forvalues i=1/`nbitems' {
forvalues j=1/`nbitems' {
forvalues k=1/`nbitems' {
forvalues l=1/`nbitems' {
quiet count if ``i''==1&``j''==1&``k''==1&``l''==1
local s``i''``j''``k''``l''=r(N)
}
}
}
}
}
local l=`nbitems'+1
matrix `beta'[`l',1]=`sigmath11'
if `Q'==2 {
local l=`nbitems'+2
matrix `beta'[`l',1]=`sigmath22'
local l=`nbitems'+3
matrix `beta'[`l',1]=`sigmath12'
}
tempname variat V11 V12 V21 V22 D11 D12 D21 D22 V D
matrix `variat'=(1)
local compteur=0
local conv=1
/*********ITERATIONS******************/
while (`variat'[1,1]>`critconv'&`compteur'<=`nbit'&`conv'==1) {
if `compteur'==0{
di in green _col(3) "First iteration"
}
else {
di in green _col(3) "iteration:" in yellow _col(14) "`compteur'" in green _col(25) "Convergence index:" in yellow _col(44) %10.7e "`macrovariat'"
}
local compteur=`compteur'+1
forvalues j=1/`nbitems' {
/* CALCUL DES DERIVEES 1 A 6 POUR CHAQUE ITEM*/
local l1``j''=1/(1+exp(`beta'[`j',1]))
local l2``j''=exp(`beta'[`j',1])/(1+exp(`beta'[`j',1]))^2
local l3``j''=exp(`beta'[`j',1])*(exp(`beta'[`j',1])-1)/(1+exp(`beta'[`j',1]))^3
local l4``j''=exp(`beta'[`j',1])*(exp(2*`beta'[`j',1])-4*exp(`beta'[`j',1])+1)/(1+exp(`beta'[`j',1]))^4
local l5``j''=exp(`beta'[`j',1])*(exp(3*`beta'[`j',1])-11*exp(2*`beta'[`j',1])+11*exp(`beta'[`j',1])-1)/(1+exp(`beta'[`j',1]))^5
local l6``j''=exp(`beta'[`j',1])*(exp(4*`beta'[`j',1])-26*exp(3*`beta'[`j',1])+66*exp(2*`beta'[`j',1])-26*exp(`beta'[`j',1])+1)/(1+exp(`beta'[`j',1]))^6
if `Q'==2 {
local H2``j''=`B'[`j',1]^2*`sigmath11'+`B'[`j',1]*`B'[`j',2]*`sigmath12'+`B'[`j',2]^2*`sigmath22'
local H4``j''=3*`B'[`j',1]^4*`sigmath11'^2+12*`B'[`j',1]^3*`B'[`j',2]*`sigmath11'*`sigmath12'+6*`B'[`j',1]^2*`B'[`j',2]^2*(`sigmath11'*`sigmath22'+2*`sigmath12'^2)+12*`B'[`j',1]*`B'[`j',2]^3*`sigmath22'*`sigmath12'+3*`B'[`j',2]^4*`sigmath22'^2
}
if `Q'==1 {
local H2``j''=`B'[`j',1]^2*`sigmath11'
local H4``j''=3*`B'[`j',1]^4*`sigmath11'^2
}
/* CALCUL DES MOMENTS D'ORDRE 1 ET 2 ET DE LA MATRICE V11*/
local mu``j''=`l1``j'''+`H2``j'''/2*`l3``j'''+`H4``j'''/24*`l5``j'''
local sigma``j''``j''=`l2``j'''+`H2``j'''/2*(`l3``j'''-2*`l1``j'''*`l3``j''')+`H4``j'''/24*(`l5``j'''-2*(`l3``j''')^2-2*`l1``j'''*`l5``j''')
}
matrix `V11'=J(`nbitems',`nbitems',0)
forvalue j=1/`nbitems' {
forvalue l=`j'/`nbitems' {
if `j'!=`l' {
if `Q'==2 {
local H2``j''``l''=`B'[`j',1]*`B'[`l',1]*`sigmath11'+(`B'[`j',1]*`B'[`l',2]+`B'[`j',2]*`B'[`l',1])*`sigmath12'+`B'[`j',2]*`B'[`l',2]*`sigmath22'
local H4``j''1``l''3=3*`B'[`j',1]*`B'[`l',1]^3*`sigmath11'^2+3*(3*`B'[`j',1]*`B'[`l',1]^2*`B'[`l',2]+`B'[`j',2]*`B'[`l',1]^3)*`sigmath11'*`sigmath12'+(3*`B'[`j',1]*`B'[`l',1]*`B'[`l',2]^2+3*`B'[`j',2]*`B'[`l',1]^2*`B'[`l',2])*(`sigmath11'*`sigmath22'+2*`sigmath12'^2)+3*(`B'[`j',1]*`B'[`l',2]^3+3*`B'[`j',2]*`B'[`l',1]*`B'[`l',2]^2)*`sigmath22'*`sigmath12'+3*`B'[`j',2]*`B'[`l',2]^3*`sigmath22'^2
local H4``j''2``l''2=3*`B'[`j',1]^2*`B'[`l',1]^2*`sigmath11'^2+6*(`B'[`j',1]^2*`B'[`l',1]*`B'[`l',2]+`B'[`j',1]*`B'[`j',2]*`B'[`l',1]^2)*`sigmath11'*`sigmath12'+(`B'[`j',1]^2*`B'[`l',2]^2+4*`B'[`j',1]*`B'[`j',2]*`B'[`l',1]*`B'[`l',2]+`B'[`j',2]^2*`B'[`l',1]^2)*(`sigmath11'*`sigmath22'+2*`sigmath12'^2)+6*(`B'[`j',1]*`B'[`j',2]*`B'[`l',2]^2+`B'[`j',2]^2*`B'[`l',1]*`B'[`l',2])*`sigmath22'*`sigmath12'+3*`B'[`j',2]^2*`B'[`l',2]^2*`sigmath22'^2
local H4``j''3``l''1=3*`B'[`l',1]*`B'[`j',1]^3*`sigmath11'^2+3*(3*`B'[`l',1]*`B'[`j',1]^2*`B'[`j',2]+`B'[`l',2]*`B'[`j',1]^3)*`sigmath11'*`sigmath12'+(3*`B'[`l',1]*`B'[`j',1]*`B'[`j',2]^2+3*`B'[`l',2]*`B'[`j',1]^2*`B'[`j',2])*(`sigmath11'*`sigmath22'+2*`sigmath12'^2)+3*(`B'[`l',1]*`B'[`j',2]^3+3*`B'[`l',2]*`B'[`j',1]*`B'[`j',2]^2)*`sigmath22'*`sigmath12'+3*`B'[`l',2]*`B'[`j',2]^3*`sigmath22'^2
}
if `Q'==1 {
local H2``j''``l''=`B'[`j',1]*`B'[`l',1]*`sigmath11'
local H4``j''1``l''3=3*`B'[`j',1]*`B'[`l',1]^3*`sigmath11'^2
local H4``j''2``l''2=3*`B'[`j',1]^2*`B'[`l',1]^2*`sigmath11'^2
local H4``j''3``l''1=3*`B'[`l',1]*`B'[`j',1]^3*`sigmath11'^2
}
local sigma``j''``l''=`H2``j''``l'''*(`l2``j'''*`l2``l''')+`H4``j''1``l''3'/6*`l2``j'''*`l4``l'''+`H4``j''3``l''1'/6*`l4``j'''*`l2``l'''+(`H4``j''2``l''2'-`H2``j'''*`H2``l''')/4*`l3``j'''*`l3``l'''
}
matrix `V11'[`j',`l']=`sigma``j''``l'''
matrix `V11'[`l',`j']=`sigma``j''``l'''
}
}
/* DEFINITION DE LA MATRICE COMPOCARRE*/
tempname compocarre m
local carre=`nbitems'*(`nbitems'-1)/2
matrix `compocarre'=J(2,`carre',0)
scalar `m'=0
forvalue j=1/`nbitems' {
local lbis=`j'+1
forvalue l=`lbis'/`nbitems' {
scalar `m'=`m'+1
matrix `compocarre'[1,`m']=`j'
matrix `compocarre'[2,`m']=`l'
}
}
/* CALCUL DE LA MATRICE V22*/
matrix `V22'=J(`carre',`carre',0)
forvalue k=1/`carre' {
local j=`compocarre'[1,`k']
local l=`compocarre'[2,`k']
matrix `V22'[`k',`k']=(1-2*`mu``j''')*(1-2*`mu``l''')*`sigma``j''``l'''+`sigma``j''``j'''*`sigma``l''``l'''-`sigma``j''``l'''^2
}
/* CALCUL DES MATRICES V12, V21 ET V*/
matrix `V12'=J(`nbitems',`carre',0)
forvalue k=1/`carre' {
local j=`compocarre'[1,`k']
local l=`compocarre'[2,`k']
matrix `V12'[`j',`k']=(1-2*`mu``j''')*`sigma``j''``l'''
matrix `V12'[`l',`k']=(1-2*`mu``l''')*`sigma``j''``l'''
}
matrix `V21'=`V12' '
matrix `V'=(`V11',`V12' \ `V21',`V22')
/*CALCUL DES MATRICES D11*/
matrix `D11'=J(`nbitems',`nbitems',0)
matrix `D12'=J(`nbitems',`Q'*(`Q'+1)/2,0)
forvalue j=1/`nbitems' {
matrix `D11'[`j',`j']=-`l2``j'''-`H2``j'''/2*`l4``j'''-`H4``j'''/24*`l6``j'''
if `Q'==2 {
matrix `D12'[`j',1]=`B'[`j',1]^2*`l3``j'''/2+(`B'[`j',1]^4*`sigmath11'+2*`B'[`j',1]^3*`B'[`j',2]*`sigmath12'+`B'[`j',1]^2*`B'[`j',2]^2*`sigmath22')/4*`l5``j'''
matrix `D12'[`j',2]=`B'[`j',2]^2*`l3``j'''/2+(`B'[`j',2]^4*`sigmath22'+2*`B'[`j',2]^3*`B'[`j',1]*`sigmath12'+`B'[`j',2]^2*`B'[`j',1]^2*`sigmath11')/4*`l5``j'''
matrix `D12'[`j',3]=`B'[`j',1]*`B'[`j',2]*`l3``j'''+(`B'[`j',1]^3*`B'[`j',2]*`sigmath11'+2*`B'[`j',1]^2*`B'[`j',2]^2*`sigmath12'+`B'[`j',1]*`B'[`j',2]^3*`sigmath22')/2*`l5``j'''
}
if `Q'==1 {
matrix `D12'[`j',1]=`B'[`j',1]^2*`l3``j'''/2+`B'[`j',1]^4*`sigmath11'/4*`l5``j'''
}
}
/*CALCUL DES MATRICES D21, D22 et D*/
matrix `D21'=J(`carre',`nbitems',0)
matrix `D22'=J(`carre',`Q'*(`Q'+1)/2,0)
forvalue k=1/`carre' {
local j=`compocarre'[1,`k']
local l=`compocarre'[2,`k']
matrix `D21'[`k',`j']=-`H2``j''``l'''*`l3``j'''*`l2``l'''-`H4``j''1``l''3'/6*`l3``j'''*`l4``l'''-`H4``j''3``l''1'/6*`l5``j'''*`l2``l'''-(`H4``j''2``l''2'-`H2``j'''*`H2``l''')/4*`l4``j'''*`l3``l'''
matrix `D21'[`k',`l']=-`H2``j''``l'''*`l2``j'''*`l3``l'''-`H4``j''3``l''1'/6*`l4``j'''*`l3``l'''-`H4``j''1``l''3'/6*`l2``j'''*`l5``l'''-(`H4``j''2``l''2'-`H2``j'''*`H2``l''')/4*`l3``j'''*`l4``l'''
if `Q'==2 {
scalar tmp1=`B'[`j',1]*`B'[`l',1]*`l2``j'''*`l2``l'''+(2*`B'[`j',1]*`B'[`l',1]^3*`sigmath11'+(3*`B'[`j',1]*`B'[`l',1]^2*`B'[`l',2]+`B'[`j',2]*`B'[`l',1]^3)*`sigmath12'+(`B'[`j',1]*`B'[`l',1]*`B'[`l',2]^2+`B'[`j',2]*`B'[`l',1]^2*`B'[`l',2])*`sigmath22')/2*`l2``j'''*`l4``l'''
scalar tmp2=(2*`B'[`j',1]^3*`B'[`l',1]*`sigmath11'+(3*`B'[`j',1]^2*`B'[`j',2]*`B'[`l',1]+`B'[`j',1]^3*`B'[`l',2])*`sigmath12'+(`B'[`j',1]*`B'[`j',2]^2*`B'[`l',1]+`B'[`j',1]^2*`B'[`j',2]*`B'[`l',2])*`sigmath22')/2*`l4``j'''*`l2``l'''
scalar tmp3=(`B'[`j',1]^2*`B'[`l',1]^2*`sigmath11'+(`B'[`j',1]^2*`B'[`l',1]*`B'[`l',2]+`B'[`j',1]*`B'[`j',2]*`B'[`l',1]^2)*`sigmath12'+`B'[`j',1]*`B'[`j',2]*`B'[`l',1]*`B'[`l',2]*`sigmath22')*`l3``j'''*`l3``l'''
matrix `D22'[`k',1]=tmp1+tmp2+tmp3
scalar tmp1=`B'[`j',2]*`B'[`l',2]*`l2``j'''*`l2``l'''+(2*`B'[`j',2]*`B'[`l',2]^3*`sigmath22'+(3*`B'[`j',2]*`B'[`l',2]^2*`B'[`l',1]+`B'[`j',1]*`B'[`l',2]^3)*`sigmath12'+(`B'[`j',2]*`B'[`l',2]*`B'[`l',1]^2+`B'[`j',1]*`B'[`l',2]^2*`B'[`l',1])*`sigmath11')/2*`l2``j'''*`l4``l'''
scalar tmp2=(2*`B'[`j',2]^3*`B'[`l',2]*`sigmath22'+(3*`B'[`j',2]^2*`B'[`j',1]*`B'[`l',2]+`B'[`j',2]^3*`B'[`l',1])*`sigmath12'+(`B'[`j',2]*`B'[`j',1]^2*`B'[`l',2]+`B'[`j',2]^2*`B'[`j',1]*`B'[`l',1])*`sigmath11')/2*`l4``j'''*`l2``l'''
scalar tmp3=(`B'[`j',1]^2*`B'[`l',1]^2*`sigmath22'+(`B'[`j',1]^2*`B'[`l',1]*`B'[`l',2]+`B'[`j',1]*`B'[`j',2]*`B'[`l',1]^2)*`sigmath12'+`B'[`j',1]*`B'[`j',2]*`B'[`l',1]*`B'[`l',2]*`sigmath11')*`l3``j'''*`l3``l'''
matrix `D22'[`k',2]=tmp1+tmp2+tmp3
scalar tmp1=(`B'[`j',1]*`B'[`l',2]+`B'[`j',2]*`B'[`l',1])*`l2``j'''*`l2``l'''+((3*`B'[`j',1]*`B'[`l',1]^2*`B'[`l',2]+`B'[`j',2]*`B'[`l',1]^3)*`sigmath11'+4*(`B'[`j',1]*`B'[`l',1]*`B'[`l',2]^2+`B'[`j',2]*`B'[`l',1]^2*`B'[`l',2])*`sigmath12'+(`B'[`j',1]*`B'[`l',2]^3+3*`B'[`j',2]*`B'[`l',1]*`B'[`l',2]^2)*`sigmath22')/2*`l2``j'''*`l4``l'''
scalar tmp2=((3*`B'[`j',1]^2*`B'[`j',2]*`B'[`l',1]+`B'[`j',1]^3*`B'[`l',2])*`sigmath11'+4*(`B'[`j',1]*`B'[`j',2]^2*`B'[`l',1]+`B'[`j',1]^2*`B'[`j',2]*`B'[`l',2])*`sigmath12'+(`B'[`j',2]^3*`B'[`l',1]+3*`B'[`j',1]*`B'[`j',2]^2*`B'[`l',2])*`sigmath22')/2*`l4``j'''*`l2``l'''
scalar tmp3=((`B'[`j',1]^2*`B'[`l',1]*`B'[`l',2]+`B'[`j',1]*`B'[`j',2]*`B'[`l',2]^2)*`sigmath11'+(`B'[`j',1]^2*`B'[`l',2]^2+2*`B'[`j',1]*`B'[`j',2]*`B'[`l',1]*`B'[`l',2]+`B'[`j',2]^2*`B'[`l',1]^2)*`sigmath12'+(`B'[`j',1]*`B'[`j',2]*`B'[`l',2]^2+`B'[`j',2]^2*`B'[`l',1]*`B'[`l',2])*`sigmath22')*`l3``j'''*`l3``l'''
matrix `D22'[`k',3]=tmp1+tmp2+tmp3
}
if `Q'==1 {
scalar tmp1=`B'[`j',1]*`B'[`l',1]*`l2``j'''*`l2``l'''+(2*`B'[`j',1]*`B'[`l',1]^3*`sigmath11')/2*`l2``j'''*`l4``l'''
scalar tmp2=(2*`B'[`j',1]^3*`B'[`l',1]*`sigmath11')/2*`l4``j'''*`l2``l'''
scalar tmp3=(`B'[`j',1]^2*`B'[`l',1]^2*`sigmath11')*`l3``j'''*`l3``l'''
matrix `D22'[`k',1]=tmp1+tmp2+tmp3
}
}
matrix `D'=(`D11',`D12' \ `D21',`D22')
/*CALCUL DE LA MATRICE CHSI*/
tempname chsi
matrix `chsi'=J(`nbitems'+`carre',1,0)
forvalue j=1/`nbitems' {
matrix `chsi'[`j',1]=(`s``j'''-`N'*`mu``j''')/`N'
}
forvalue k=1/`carre' {
local j=`compocarre'[1,`k']
local l=`compocarre'[2,`k']
local tmp=`nbitems'+`k'
matrix `chsi'[`tmp',1]=(`s``j''``l'''-`s``j'''*`mu``l'''-`s``l'''*`mu``j'''+`N'*`mu``j'''*`mu``l'''-`N'*`sigma``j''``l''')/`N'
}
/*CALCUL DE L'ETAPE k*/
tempname betaold
matrix `betaold'=`beta'
matrix `beta'=`betaold'+inv(`D''*inv(`V')*`D')*`D''*inv(`V')*`chsi'
local l=`nbitems'+1
local sigmath11=`beta'[`l',1]
local l=`nbitems'+2
local sigmath22=`beta'[`l',1]
local l=`nbitems'+3
local sigmath12=`beta'[`l',1]
tempname epsilon variatold
scalar `variatold'=`variat'[1,1]
matrix `epsilon'=`betaold'-`beta'
matrix `variat'=(`epsilon''*`epsilon')
if `variat'[1,1]>`variatold' {
matrix `beta'=`betaold'
local l=`nbitems'+1
local sigmath11=`beta'[`l',1]
local l=`nbitems'+2
local sigmath22=`beta'[`l',1]
local l=`nbitems'+3
local sigmath12=`beta'[`l',1]
local conv=0
}
else {
local macrovariat=`variat'[1,1]
}
}
/*************************CALCUL des STANDARDS ERRORS DES PARAMETRES *********************/
if "`var'"==""{
tempname xicarreA xicarreB xicarreC xicarre
matrix `xicarreA'=J(`nbitems',`nbitems',0)
matrix `xicarreB'=J(`nbitems',`carre',0)
matrix `xicarreC'=J(`carre',`carre',0)
forvalues i=1/`nbitems' {
forvalues j=1/`nbitems' {
matrix `xicarreA'[`i',`j']=`s``i''``j'''-`s``i'''*`mu``j'''-`s``j'''*`mu``i'''+`N'*`mu``i'''*`mu``j'''
}
}
forvalues i=1/`nbitems' {
forvalues col=1/`carre' {
local j=`compocarre'[1,`col']
local k=`compocarre'[2,`col']
matrix `xicarreB'[`i',`col']=`s``i''``j''``k'''-`mu``i'''*`s``j''``k'''-`mu``j'''*`s``i''``k'''-`mu``k'''*`s``i''``j'''+`mu``i'''*`mu``j'''*`s``k'''+`mu``i'''*`mu``k'''*`s``j'''+`mu``j'''*`mu``k'''*`s``i'''-`N'*`mu``i'''*`mu``j'''*`mu``k'''-`sigma``j''``k'''*`s``i'''+`N'*`mu``i'''*`sigma``j''``k'''
}
}
forvalues row=1/`carre' {
forvalues col=1/`carre' {
local i=`compocarre'[1,`row']
local j=`compocarre'[2,`row']
local k=`compocarre'[1,`col']
local l=`compocarre'[2,`col']
matrix `xicarreC'[`row',`col']=`s``i''``j''``k''``l'''-`mu``i'''*`s``j''``k''``l'''-`mu``j'''*`s``i''``k''``l'''-`mu``k'''*`s``i''``j''``l'''-`mu``l'''*`s``i''``j''``k'''+`mu``i'''*`mu``j'''*`s``k''``l'''+`mu``i'''*`mu``k'''*`s``j''``l'''+`mu``i'''*`mu``l'''*`s``j''``k'''+`mu``j'''*`mu``k'''*`s``i''``l'''+`mu``j'''*`mu``l'''*`s``i''``k'''+`mu``k'''*`mu``l'''*`s``i''``j'''-`mu``i'''*`mu``j'''*`mu``k'''*`s``l'''-`mu``i'''*`mu``j'''*`mu``l'''*`s``k'''-`mu``i'''*`mu``k'''*`mu``l'''*`s``j'''-`mu``j'''*`mu``k'''*`mu``l'''*`s``i'''-`sigma``i''``j'''*`s``k''``l'''-`sigma``k''``l'''*`s``i''``j'''+`sigma``i''``j'''*`mu``k'''*`s``l'''+`sigma``i''``j'''*`mu``l'''*`s``k'''+`sigma``k''``l'''*`mu``i'''*`s``j'''+`sigma``k''``l'''*`mu``j'''*`s``i'''+`N'*`mu``i'''*`mu``j'''*`mu``k'''*`mu``l'''-`N'*`sigma``i''``j'''*`mu``k'''*`mu``l'''-`N'*`sigma``k''``l'''*`mu``i'''*`mu``j'''+`N'*`sigma``i''``j'''*`sigma``k''``l'''
}
}
matrix `xicarre'=(`xicarreA',`xicarreB' \ `xicarreB' ',`xicarreC')
tempname A1 A2 W
matrix `A1'=`D' '*inv(`V')*`D'
matrix `A2'=`D' '*inv(`V')*`xicarre'*inv(`V')*`D'
matrix `W'=1/`N'^2*inv(`A1')*`A2'*inv(`A1')
}
/*****************************DISPLAY THE RESULTS***************************************/
local compteur=`compteur'-1
di ""
di ""
if `compteur'==0 {
noi di in red _col(8) "The algorithm can not converge"
return scalar error=1
exit
}
if `variat'[1,1]<=`critconv'&`compteur'>0 {
noi di in green _col(8) "The algorithm converge at the `compteur'th iteration"
}
if `compteur'==`nbit'&`variat'[1,1]>`critconv' {
noi di in green _col(8) "The algorithm is stopped at the `compteur'th iteration"
}
if `conv'==0&`compteur'>0 {
noi di in green _col(8) "The algorithm converge no more after the `compteur'th iteration"
}
di ""
if "`var'"=="" {
noi di in green _col(30) "Parameters" in green _col(43) "Standard errors"
forvalue j=1/`nbitems' {
noi di in green _col(20) "``j'': " in yellow _col(30) %10.6f `beta'[`j',1] in yellow _col(50) %8.6f sqrt(`W'[`j',`j'])
}
di ""
noi di in green _col(20) "var1: " in yellow _col(30) %10.6f `beta'[`nbitems'+1,1] in yellow _col(50) %8.6f sqrt(`W'[`nbitems'+1,`nbitems'+1])
if `Q'==2 {
noi di in green _col(20) "var2: " in yellow _col(30) %10.6f `beta'[`nbitems'+2,1] in yellow _col(50) %8.6f sqrt(`W'[`nbitems'+2,`nbitems'+2])
scalar rho=`beta'[`nbitems'+3,1]/sqrt(`beta'[`nbitems'+1,1]*`beta'[`nbitems'+2,1])
noi di in green _col(20) "covar: " in yellow _col(30) %10.6f `beta'[`nbitems'+3,1] in yellow _col(50) %8.6f sqrt(`W'[`nbitems'+3,`nbitems'+3]) " (rho=" %5.4f rho ")"
}
}
else {
noi di in green _col(30) "Parameters"
forvalue j=1/`nbitems' {
noi di in green _col(20) "``j'': " in yellow _col(30) %10.6f `beta'[`j',1]
}
di ""
noi di in green _col(20) "var1: " in yellow _col(30) %10.6f `beta'[`nbitems'+1,1]
if `Q'==2 {
noi di in green _col(20) "var2: " in yellow _col(30) %10.6f `beta'[`nbitems'+2,1]
scalar rho=`beta'[`nbitems'+3,1]/sqrt(`beta'[`nbitems'+1,1]*`beta'[`nbitems'+2,1])
noi di in green _col(20) "covar: " in yellow _col(30) %10.6f `beta'[`nbitems'+3,1]
}
}
di ""
if "`ll'"!="" {
if `Q'==1 {
tempname noeuds poids
ghquadm `quad' `noeuds' `poids'
tempvar vrais logvrais
qui gen `vrais'=0
forvalues u=1/`quad'{
tempvar vrais`u'
qui gen `vrais`u''=1/sqrt(_pi)
forvalues j=1/`nbitems' {
tempvar Pu`u'j`j'
qui gen `Pu`u'j`j''=exp(`B'[`j',1]*sqrt(2*`beta'[`nbitems'+1,1])*`noeuds'[1,`u']-`beta'[`j',1])/(1+exp(`B'[`j',1]*sqrt(2*`beta'[`nbitems'+1,1])*`noeuds'[1,`u']-`beta'[`j',1]))
qui replace `Pu`u'j`j''=1-`Pu`u'j`j'' if ``j''==0
qui replace `vrais`u''=`vrais`u''*`Pu`u'j`j''
}
qui replace `vrais'=`vrais'+`poids'[1,`u']*`vrais`u''
}
gen `logvrais'=log(`vrais')
qui su `logvrais'
local ll=r(N)*r(mean)
noi di in green _col(20) "ll: " in yellow _col(30) %12.4f `ll'
local AIC=-2*`ll'+2*(`nbitems'+1)
noi di in green _col(20) "AIC: " in yellow _col(30) %12.4f `AIC'
}
if `Q'==2 {
tempname noeuds poids
ghquadm `quad' `noeuds' `poids'
tempvar vrais logvrais
qui gen `vrais'=0
matrix sigma=(`beta'[`nbitems'+1,1],`beta'[`nbitems'+3,1] \ `beta'[`nbitems'+3,1],`beta'[`nbitems'+2,1])
forvalues u=1/`quad'{
forvalues v=1/`quad'{
tempvar vraisu`u'v`v'
qui gen `vraisu`u'v`v''=1/_pi
forvalues j=1/`nbitems' {
tempvar Pu`u'v`v'j`j'
local A1`u'tilde=sqrt(`beta'[`nbitems'+2,1]/(2*det(sigma)))*`noeuds'[1,`u']
local A2`v'tilde=(`noeuds'[1,`v']-`beta'[`nbitems'+3,1]/sqrt(det(sigma))*`noeuds'[1,`u'])/(2*`beta'[`nbitems'+2,1])
qui gen `Pu`u'v`v'j`j''=exp(`B'[`j',1]*`A1`u'tilde'+`B'[`j',2]*`A2`v'tilde'-`beta'[`j',1])/(1+exp(`B'[`j',1]*`A1`u'tilde'+`B'[`j',2]*`A2`v'tilde'-`beta'[`j',1]))
qui replace `Pu`u'v`v'j`j''=1-`Pu`u'v`v'j`j'' if ``j''==0
qui replace `vraisu`u'v`v''=`vraisu`u'v`v''*`Pu`u'v`v'j`j''
}
qui replace `vrais'=`vrais'+`poids'[1,`u']*`poids'[1,`v']*`vraisu`u'v`v''
}
}
qui gen `logvrais'=log(`vrais')
qui su `logvrais'
local ll=r(N)*r(mean)
noi di in green _col(20) "ll: " in yellow _col(27) %12.4f `ll'
local AIC=-2*`ll'+2*(`nbitems'+3)
noi di in green _col(20) "AIC: " in yellow _col(27) %12.4f `AIC'
}
}
if "`var'"=="" {
return matrix W= `W'
}
return matrix beta= `beta'
if "`ll'"!="" {
return scalar ll= `ll'
return scalar AIC= `AIC'
}
return scalar J= `nbitems'
return scalar N= `N'
return scalar error=0
restore
end

@ -0,0 +1,158 @@
*! version 1.3 1 June 2013
*! Jean-Benoit Hardouin
*
************************************************************************************************************
* Stata program : genscore
* Generate groups of individals based on the values of an ordinal or continuous variable
*
* Historic
* Version 1 (2007-05-27): Jean-Benoit Hardouin
* Version 1.1 (2007-06-21): Jean-Benoit Hardouin /*Correction of a bug without -if- */
* Version 1.2 (2009-10-28): Jean-Benoit Hardouin /*-continuous- option*/
* Version 1.3 (2013-06-01): Jean-Benoit Hardouin /*Correction in presence of missing values*/
*
* Jean-benoit Hardouin, phD, Assistant Professor
* EA4275 - SPHERE
* Team of Biostatistics, Pharmacoepidemiology and Subjective Measures in Health Sciences
* University of Nantes - Faculty of Pharmaceutical Sciences
* France
* jean-benoit.hardouin@univ-nantes.fr
*
* News about this program :http://www.anaqol.org
*
* Copyright 2007, 2009, 2013 Jean-Benoit Hardouin
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*
************************************************************************************************************
program define gengroup ,rclas
version 7.0
syntax varlist(numeric min=1 max=1) [if/] [in] [, NEWvariable(namelist min=1 max=1) REPlace MINsize(integer 30) DETails CONTinuous]
tempvar sort
qui gen `sort'=_n
if "`if'"!="" {
local if2="if `if'"
local if3="&(`if')"
}
marksample touse
if "`newvariable'"=="" {
local newvariable group
}
capture confirm new variable `newvariable'
if _rc!=0&"`replace'"=="" {
di in red "The variable {hi:`newvariable'} is already defined"
exit 198
}
else if _rc!=0&"`replace"!="" {
qui drop `newvariable'
}
if "`continuous'"=="" {
qui gen `newvariable'=`varlist' `if2' `in'
qui su `newvariable' `if2' `in'
local min=r(min)
local max=r(max)
local groupmin=`min'
local groupmax=`min'-1
local numgroup=1
local recode
local list
while (`groupmin'<`max'+1) {
local n=0
while (`n'<`minsize') {
local groupmax=`groupmax'+1
qui count if `newvariable'>=`groupmin'&`newvariable'<=`groupmax'`if3' `in'
local n=r(N)
if `groupmax'>`max' {
local n=`minsize'+1
local numgr
}
}
if `groupmax'<`max' {
local list `list' `groupmax'
}
local recode `recode' `groupmin'/`groupmax'=`numgroup'
if "`details'"!="" {
di in gr "Group " in ye `numgroup' in gr ": Values " in ye `groupmin' in gr " to " in ye `groupmax'
}
local groupmin=`groupmax'+1
local groupmax=`groupmin'-1
local numgroup=`numgroup'+1
}
qui recode `newvariable' `recode' `if2' `in'
qui count if `newvariable'==`numgroup'-1`if3' `in'
local dernier=r(N)
if `dernier'<`minsize' {
qui recode `newvariable' `=`numgroup'-1'=`=`numgroup'-2' `if2' `in'
if "`details'"!="" {
di in gr "The group " in ye `=`numgroup'-1' in gr " is recoded in " in ye `=`numgroup'-2'
}
local list2
forvalues i=1/`=`numgroup'-3' {
local w:word `i' of `list'
local list2 `list2' `w'
}
local list `list2'
}
}
else {
local list
qui sort `varlist'
qui tempvar sort2
qui gen `sort2'=_n
qui gen `newvariable'=0 `if2' `in'
qui replace `newvariable'=. if `varlist'==.
qui count if `varlist'!=.`if3' `in'
local nbind=r(N)
local tmpgp=floor(`nbind'/`minsize')
local minsize=floor(`nbind'/`tmpgp')
local nbused=0
tempvar used
qui gen `used'=0 `if2' `in'
local num=1
while (`=`nbused'+`minsize''<`nbind'+1) {
qui su `varlist' if `sort2'==`=`nbused'+`minsize''`if3' `in'
local mean=r(mean)
local list `list' `mean'
local mean=round(`mean',0.0000001)+0.0000001
if "`details'"!="" {
di in gr "The values inferior to " in ye `mean' in gr " are recoded in " in ye `num'
}
qui replace `newvariable'=`num' if `varlist'<=`mean'&`used'==0
qui replace `used'=1 if `newvariable'!=0&`newvariable'!=.
qui count if `used'==1
local nbused=`r(N)'
local num=`num'+1
}
qui replace `newvariable'=`num'-1 if `newvariable'==0
if "`details'"!="" {
qui su `varlist'
local max=r(max)
if `max'>`mean' {
di in gr "Individuals between " in ye `mean' in gr "and " in ye `max' " are recoded in " in ye `=`num'-1'
}
}
qui sort `sort'
}
return local list `list'
end

@ -0,0 +1,51 @@
{smcl}
{* 6May2013}{...}
{hline}
help for {hi:gengroup}{right:Jean-benoit Hardouin}
{hline}
{title:Module to generate group of individuals based on a ordinal or continuous variable}
{p 8 14 2}{cmd:gengroup} {it:varname} [{cmd:if} {it:exp}] [{cmd:in} {it:range}] [{cmd:,} {cmdab:new:variable}({it:newvarname}) {cmdab:rep:lace} {cmdab:min:size}(#) {cmdab:det:ails} {cmdab:cont:inuous}]
{title:Description}
{p 4 8 2}{cmd:gengroup} creates groups of individuals by using the values of an ordinal or continuous variable.
The module creates groups by recoding several adjacent values of the variable, until obtaining groups with more than individuals than the number defined in the {cmd:minsize} option.
{title:Options}
{p 4 8 2}{cmd:newvariable} defines the name of the new variable ({it:group} by default).
{p 4 8 2}{cmd:replace} replaces the variable defined in the {cmd:newvariable} option if it already exists.
{p 4 8 2}{cmd:minsize} defines the minimal number of individuals in each group (30 by default).
{p 4 8 2}{cmd:details} diplays the composition of each group.
{p 4 8 2}{cmd:continuous} allows handling a continuous variable.
{title:Examples}
{p 4 8 2}{inp:. gengroup score}
{p 4 8 2}{inp:. gengroup score, newvariable(grouptocreate) replace minsize(80)}
{p 4 8 2}{inp:. gengroup score, details}
{title:Author}
{p 4 8 2}Jean-Benoit Hardouin, PhD, assistant professor{p_end}
{p 4 8 2}Team of Biostatistics, Clinical Research and Subjective Measures in Health Sciences{p_end}
{p 4 8 2}University of Nantes - Faculty of Pharmaceutical Sciences{p_end}
{p 4 8 2}1, rue Gaston Veil - BP 53508{p_end}
{p 4 8 2}44035 Nantes Cedex 1 - FRANCE{p_end}
{p 4 8 2}Email:
{browse "mailto:jean-benoit.hardouin@univ-nantes.fr":jean-benoit.hardouin@univ-nantes.fr}{p_end}
{p 4 8 2}Website {browse "http://www.anaqol.org":AnaQol}
{title:Also see}
{p 4 13 2}Online: help for {help egen}, {help generate} and {help genscore} if installed.{p_end}

@ -0,0 +1,133 @@
*! Version 1.1 24 March 2015
*! Jean-Benoit Hardouin
************************************************************************************************************
* Stata program : gengutt
* Generation of Guttman's error per individuals
* Release 1.1 : March 24, 2015 /*no required -order- option*/
*
*
* Jean-benoit Hardouin, phD, Assistant Professor
* Team of Biostatistics, Pharmacoepidemiology and Subjective Measures in Health Sciences (UPRES EA 4275 SPHERE)
* University of Nantes - Faculty of Pharmaceutical Sciences
* France
* jean-benoit.hardouin@anaqol.org
*
* Release 1 : March 20, 2015 /*initial version with only -order- option*/
*
* News about this program :http://www.anaqol.org
*
* Copyright 2015 Jean-Benoit Hardouin
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*
***********************************************************************************************************
program define gengutt , rclass
syntax varlist(min=2 numeric) [if] , Gen(string) [ Replace Order(string) PAIRWise] /*[,PAIRWise PAIR ppp pmm noADJust GENERror(string) REPlace GRaph MONOtonicity(string) NIRESTscore(string) NIPmatrix(string) HTML(string)]*/
preserve
tokenize `varlist'
local nbitems : word count `varlist'
local nbs=0
forvalues i=1/`nbitems' {
qui su ``i''
local modamax`i'=r(max)
local nbs=`nbs'+`modamax`i''
local testmodamax`i'=0
local ordermax`i'=0
}
if "`order'"=="" {
tempname mattmp
matrix `mattmp'=J(3,`nbs',0)
local col=1
forvalues i=1/`nbitems' {
forvalues s=1/`modamax`i'' {
qui count if ``i''>=`s'
matrix `mattmp'[3,`col']=r(N)
matrix `mattmp'[1,`col']=`i'
matrix `mattmp'[2,`col']=`s'
local ++col
}
}
local list
forvalues j=1/`nbs' {
local max=0
local col=0
local item=0
forvalues i=1/`nbs' {
if `mattmp'[3,`i']>`max' {
local max=`mattmp'[3,`i']
local col=`i'
local item=`mattmp'[1,`i']
}
}
local list `list' `item'
matrix `mattmp'[3,`col']=0
}
local order `list'
}
local nbsteps: word count `order'
tempname matorder
matrix `matorder'=J(3,`nbsteps',0)
local defordre=1
forvalues i=1/`nbsteps' {
local tmp: word `i' of `order'
local ++testmodamax`tmp'
matrix `matorder'[1,`i']=`tmp'
local ordermax`tmp'=`ordermax`tmp''+1
matrix `matorder'[2,`i']=`ordermax`tmp''
local i`tmp'_m`ordermax`tmp''=`defordre'
local s`defordre' i`tmp'_m`ordermax`tmp''
tempname `s`defordre''
qui gen ``s`defordre'''=``tmp''>=`ordermax`tmp''
local ++defordre
}
local error=0
forvalues i=1/`nbitems' {
if `modamax`i''>`testmodamax`i'' {
di in red "The item `i' presents more answer categories than defined in the -order- option "
local error=1
}
}
if `error'!=0 {
error 198
}
if "`replace'"!="" {
qui capture drop `gen'
}
qui gen `gen'=0
forvalues i=1/`nbitems' {
if "`replace'"!="" {
qui capture drop `gen'_``i''
}
qui gen `gen'_``i''=0
}
forvalues i=1/`nbsteps' {
forvalues j=`=`i'+1'/`nbsteps' {
qui replace `gen'=`gen'+1 if ``s`i'''<``s`j'''&``s`i'''!=.&``s`j'''!=.
local iti=`matorder'[1,`i']
local itj=`matorder'[1,`j']
qui replace `gen'_``iti''=`gen'_``iti''+1 if ``s`i'''<``s`j'''&``s`i'''!=.&``s`j'''!=.
qui replace `gen'_``itj''=`gen'_``itj''+1 if ``s`i'''<``s`j'''&``s`i'''!=.&``s`j'''!=.
}
}
if "`replace'"!="" {
capture drop `gen'_gp
}
egen `gen'_gp=concat(``s1''-``s`nbsteps''')
capture restore, not
end

@ -0,0 +1,86 @@
*! version 1.4 27december2005
*! Jean-Benoit Hardouin
*
************************************************************************************************************
* Stata program : genscore
* Generate scores from a list of variables
* Version 1.4 : December 27, 2005 /*corrects a bug with the mean option*/
*
* Historic
* Version 1.2 (2005-10-01): Jean-Benoit Hardouin
* Version 1.3 (2005-12-09): Jean-Benoit Hardouin /*centered and standardized options*/
*
* Jean-benoit Hardouin, Regional Health Observatory of Orléans - France
* jean-benoit.hardouin@orscentre.org
*
* News about this program : http://anaqol.free.fr
* FreeIRT Project : http://freeirt.free.fr
*
* Copyright 2005 Jean-Benoit Hardouin
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*
************************************************************************************************************
program define genscore
version 7.0
syntax varlist(min=1) [if] [in] [fweight] [, SCore(namelist min=1 max=1) CENTered STAndardized MEan MIssing(string) REPlace]
marksample touse
if "`score'"=="" {
local score score
}
local nbitems:word count `varlist'
tokenize `varlist'
if "`missing'"=="" {
local missing .
}
capture confirm new variable `score'
quietly {
if _rc!=0&"`replace"=="" {
di in red "The variable {hi:`score'} already defined"
exit 198
}
else if _rc!=0&"`replace"!="" {
drop `score'
}
forvalues i=1/`nbitems' {
tempname var`i'
local sd=1
local moy=0
if "`standardized'"!=""|"`centered'"!="" {
su ``i'' [`weight'`exp']
local moy=r(mean)
local sd=r(sd)
if "`standardized'"=="" {
local sd=1
}
}
gen `var`i''=(``i''-`moy')/`sd'
}
gen `score'=0 if `touse'
forvalues i=1/`nbitems' {
replace `score'=`score'+`var`i'' if `touse'
replace `score'=. if `touse'&``i''==`missing'&``i''>=.
}
if "`mean'"!="" {
replace `score'=`score'/`nbitems' if `touse'
}
}
end

@ -0,0 +1,54 @@
{smcl}
{* 9december2005}{...}
{hline}
help for {hi:genscore}{right:Jean-benoit Hardouin}
{hline}
{title:Module to generate a score}
{p 8 14 2}{cmd:genscore} {it:varlist} [{cmd:if} {it:exp}] [{cmd:in} {it:range}] [{it:weights}] [{cmd:,} {cmdab:sc:ore}({it:newvarname}) {cmdab:me:an} {cmdab:cent:ered} {cmdab:sta:ndardized} {cmdab:mi:ssing}(string) {cmdab:rep:lace}]
{title:Description}
{p 4 8 2}{cmd:genscore} allows creating a new variable containing the score
computed as the sum (or the mean) of the responses to the variables defined in
{it:varlist}. Variables of {it:varlist} can be centered or standardized. The new
variable will contain a missing value for each individual with at least one
missing values among the variables of {it:varlist}.
{p 4 8 2}Only {cmd:fweights} are allowed, and the weights have an influence only with
the {cmd:centered} or {cmd:standardized} options.
{title:Options}
{p 4 8 2}{cmd:score} defines the name of the new variable ({it:score} by default).
{p 4 8 2}{cmd:mean} allows obtaining a mean score (the sum of the responses is divided by the number of variables).
{p 4 8 2}{cmd:centered} computes the scores with centered variables.
{p 4 8 2}{cmd:standardized} computes the scores with standardized variables.
{p 4 8 2}{cmd:missing} defines a specific modality of the variables which can be considered as a missing value.
{p 4 8 2}{cmd:replace} allows replacing the variable defined in the {cmd:score} option if it already exists.
{title:Examples}
{p 4 8 2}{cmd:. genscore turn trunk mpg [fweight=w],standardized mean}
{p 4 8 2}{cmd:. genscore item1-item20, score(totscore) missing(9)}
{p 4 8 2}{cmd:. genscore var*, score(scorevar) replace}
{title:Author}
{p 4 8 2}Jean-Benoit Hardouin, Regional Health Observatory (ORS) - 1, rue Porte
Madeleine - BP 2439 - 45032 Orleans Cedex 1 - France. You can contact the author at
{browse "mailto:jean-benoit.hardouin@orscentre.org":jean-benoit.hardouin@orscentre.org}
and visit the websites {browse "http://anaqol.free.fr":AnaQol} and
{browse "http://freeirt.free.fr":FreeIRT}
{title:Also see}
{p 4 8 2}Online: help for {help egen}, {help generate} and {help mvdecode}{p_end}

@ -0,0 +1,181 @@
*! Version 1 19 November 2008
*! Jean-Benoit Hardouin
************************************************************************************************************
* Stata program : guttmax
* Research of the maximal number of Guttman Errors for a specific score
*
* Historic :
* Version 1 (November 19, 2008) [Jean-Benoit Hardouin]
*
* Jean-benoit Hardouin, phD, Assistant Professor
* Team of Biostatistics, Clinical Research and Subjective Measures in Health Sciences (UPRES EA 4275)
* University of Nantes - Faculty of Pharmaceutical Sciences
* France
* jean-benoit.hardouin@anaqol.org
*
* Requiered Stata modules:
* -anaoption- (version 1)
* -traces- (version 3.2)
* -gengroup- (version 1)
*
* News about this program :http://www.anaqol.org
* FreeIRT Project website : http://www.freeirt.org
*
* Copyright 2008 Jean-Benoit Hardouin
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*
***********************************************************************************************************
program define guttmax , rclass
version 10
syntax anything [,Score(real -1) Response(string)]
if `score'!=-1&"`response'"!="" {
di in red "You cannot defined in the same time the score and the response options";
error 198
}
local step:word count `anything'
local nbitems=0
forvalues i=1/`step' {
local step`i':word `i' of `anything'
if `step`i''>`nbitems' {
local nbitems=`step`i''
}
}
di in green "Number of steps: " in ye `step'
di in green "Number of items: " in ye `nbitems'
tempname mate
matrix `mate'=J(1,`step',0)
if `score'!=-1 {
if `score'>`step' {
di in red "You cannot defined a score greater to the maximal possible score";
error 198
}
di in green "Score: " in ye `score'
local maxstep=0
forvalues i=1/`nbitems' {
local nstep`i'=0
forvalues j=1/`step' {
if `step`j''==`i' {
local ++nstep`i'
if `nstep`i''>`maxstep' {
local maxstep=`nstep`i''
}
}
}
}
tempname calcul
matrix `calcul'=J(`nbitems',`maxstep',0)
*matrix list `calcul'
forvalues i=1/`nbitems' {
*di "item `i'"
local n=1
forvalues j=1/`step' {
if `step`j''==`i' {
forvalues s=`n'/`nstep`i'' {
matrix `calcul'[`i',`s']=`calcul'[`i',`s']+`j'
matrix list `calcul'
}
local ++n
}
}
forvalues j=2/`nstep`i'' {
matrix `calcul'[`i',`j']=`calcul'[`i',`j']/`j'
di "j=`j'"
matrix list `calcul'
}
}
*matrix list `calcul'
while (`score'>0) {
local max=0
forvalues i=1/`nbitems' {
local s=min(`score',`nstep`i'')
*di "forvalues j=1/`s' {"
forvalues j=1/`s' {
if `calcul'[`i',`j']>`max' {
local maxi=`i'
local maxj=`j'
local max=`calcul'[`i',`j']
}
}
}
*di "maxi=`maxi' maxj=`maxj'"
local d=0
forvalues l=1/`step' {
if `step`l''==`maxi'&`mate'[1,`l']==0&`d'<=`maxj' {
matrix `mate'[1,`l']=1
local ++d
}
}
forvalues l=1/`maxj' {
matrix `calcul'[`maxi',`l']=0
}
local score=`score'-`maxj'
*matrix list `calcul'
}
}
else { /*if reponse*/
local nbrep: word count `response'
if `nbrep'!=`nbitems' {
di in red "You cannot defined a number of responses different of the number of items";
error 198
}
tempname cpt
qui matrix `cpt'=J(`nbitems',1,0)
forvalues i=1/`nbitems' {
local rep`i':word `i' of `response'
qui matrix `cpt'[`i',1]=`rep`i''
}
forvalues i=1/`step' {
if `cpt'[`step`i'',1]>0 {
qui matrix `mate'[1,`i']=1
qui matrix `cpt'[`step`i'',1]=`cpt'[`step`i'',1]-1
}
else {
qui matrix `mate'[1,`i']=0
}
}
}
local emax=0
forvalue i=1/`step' {
forvalues j=`=`i'+1'/`step' {
if `mate'[1,`i']==0&`mate'[1,`j']==1 {
local ++emax
}
}
}
if `score'!=-1 {
di in green "Responses profile generating the most important number of Guttman error"
matrix list `mate' ,noheader nonames
di in green "Max number of Guttman errors : " in ye `emax'
}
else {
di in green "Responses profile"
matrix list `mate' ,noheader nonames
di in green "Number of Guttman errors : " in ye `emax'
}
return scalar maxegutt =`emax'
end

@ -0,0 +1,136 @@
*! Version 1 19 November 2008
*! Jean-Benoit Hardouin
************************************************************************************************************
* Stata program : guttmax
* Research of the maximal number of Guttman Errors for a specific score
*
* Historic :
* Version 1 (November 19, 2008) [Jean-Benoit Hardouin]
*
* Jean-benoit Hardouin, phD, Assistant Professor
* Team of Biostatistics, Clinical Research and Subjective Measures in Health Sciences (UPRES EA 4275)
* University of Nantes - Faculty of Pharmaceutical Sciences
* France
* jean-benoit.hardouin@anaqol.org
*
* Requiered Stata modules:
* -anaoption- (version 1)
* -traces- (version 3.2)
* -gengroup- (version 1)
*
* News about this program :http://www.anaqol.org
* FreeIRT Project website : http://www.freeirt.org
*
* Copyright 2008 Jean-Benoit Hardouin
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*
***********************************************************************************************************
program define guttmax1 , rclass
version 10
syntax anything [,Score(int 0)]
local step:word count `anything'
local nbitems=0
forvalues i=1/`step' {
local step`i':word `i' of `anything'
if `step`i''>`nbitems' {
local nbitems=`step`i''
}
}
di in green "Number of steps: " in ye `step'
di in green "Number of items: " in ye `nbitems'
di in green "Score: " in ye `score'
tempname mate
matrix `mate'=J(1,`step',0)
local maxstep=0
forvalues i=1/`nbitems' {
local nstep`i'=0
forvalues j=1/`step' {
if `step`j''==`i' {
local ++nstep`i'
if `nstep`i''>`maxstep' {
local maxstep=`nstep`i''
}
}
}
}
tempname calcul
matrix `calcul'=J(`nbitems',`maxstep',0)
*matrix list `calcul'
forvalues i=1/`nbitems' {
*di "item `i'"
local n=1
forvalues j=1/`step' {
if `step`j''==`i' {
forvalues s=`n'/`nstep`i'' {
matrix `calcul'[`i',`s']=`calcul'[`i',`s']+`j'
}
local ++n
}
}
forvalues j=2/`nstep`i'' {
matrix `calcul'[`i',`j']=`calcul'[`i',`j']/`j'
}
}
*matrix list `calcul'
while (`score'>0) {
local max=0
forvalues i=1/`nbitems' {
local s=min(`score',`nstep`i'')
*di "forvalues j=1/`s' {"
forvalues j=1/`s' {
if `calcul'[`i',`j']>`max' {
local maxi=`i'
local maxj=`j'
local max=`calcul'[`i',`j']
}
}
}
*di "maxi=`maxi' maxj=`maxj'"
local d=0
forvalues l=1/`step' {
if `step`l''==`maxi'&`mate'[1,`l']==0&`d'<=`maxj' {
matrix `mate'[1,`l']=1
local ++d
}
}
forvalues l=1/`maxj' {
matrix `calcul'[`maxi',`l']=0
}
local score=`score'-`maxj'
*matrix list `calcul'
}
local emax=0
forvalue i=1/`step' {
forvalues j=`=`i'+1'/`step' {
if `mate'[1,`i']==0&`mate'[1,`j']==1 {
local ++emax
}
}
}
di in green "Responses profile generating the most important number of Guttman error"
matrix list `mate' ,noheader nonames
di in green "Max number of Guttman errors : " in ye `emax'
return scalar maxegutt =`emax'
end

@ -0,0 +1,25 @@
StataFileTM:00001:01100:GREC: :
00008:00008:00001:
*! classname: twowaygraph_g
*! family: twoway
*! date: 8 Jul 2023
*! time: 23:18:16
*! graph_scheme: stcolor
*! naturallywhite: 1
*! end
// File created by Graph Editor Recorder.
// Edit only if you know what you are doing.
.plotregion1.AddTextBox added_text editor -39.54864687610738 -1.346531835179192
.plotregion1.added_text_new = 1
.plotregion1.added_text_rec = 2
.plotregion1.added_text[2].style.editstyle angle(default) size( sztype(relative) val(3.4722) allow_pct(1)) color(black) horizontal(left) vertical(middle) margin( gleft( sztype(relative) val(0) allow_pct(1)) gright( sztype(relative) val(0) allow_pct(1)) gtop( sztype(relative) val(0) allow_pct(1)) gbottom( sztype(relative) val(0) allow_pct(1))) linegap( sztype(relative) val(0) allow_pct(1)) drawbox(no) boxmargin( gleft( sztype(relative) val(0) allow_pct(1)) gright( sztype(relative) val(0) allow_pct(1)) gtop( sztype(relative) val(0) allow_pct(1)) gbottom( sztype(relative) val(0) allow_pct(1))) fillcolor(bluishgray) linestyle( width( sztype(relative) val(.2) allow_pct(1)) color(black) pattern(solid) align(inside)) box_alignment(east) editcopy
.plotregion1.added_text[2].text = {}
.plotregion1.added_text[2].text.Arrpush 1
// editor text[2] edits
// editor text[2] edits
// <end>

@ -0,0 +1,501 @@
************************************************************************************************************
* hcaccprox: Hierachical Clusters Analysis/CCPROX
* Version 1: May 12, 2004
* Add-on: Partition version 2 (2004-04-10)
*
* Use the Detect Stata program (http://freeirt.free.fr)
*
* Historic :
* Version 1 [2004-01-18], Jean-Benoit Hardouin
*
* Jean-benoit Hardouin, Regional Health Observatory of Orléans - France
* jean-benoit.hardouin@neuf.fr
*
* News about this program : http://anaqol.free.fr
* FreeIRT Project : http://freeirt.free.fr
*
* Copyright 2004 Jean-Benoit Hardouin
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*
************************************************************************************************************
program define hcaccprox , rclass
version 8.0
syntax varlist(min=2 numeric) [,PROX(string) METHod(string) PARTition(numlist) MEASures DETails DETect(integer 0)]
local nbpart:word count `partition'
tokenize `partition'
forvalues k=1/`nbpart' {
local part`k'=``k''
}
local nbitems : word count `varlist'
tokenize `varlist'
tempname proximity whereitems
matrix define `proximity'=J(`nbitems',`nbitems',0)
matrix define `whereitems'=J(`=`nbitems'-1',`nbitems',0)
if `detect'>`nbitems' {
di _col(3) in green "The number of partitions analyzed by the DETECT criterion must be inferior to the number of possible partitions"
di _col(3) in green "This number of possible partitions is `=`nbitems'-1', so your detect option is put to this number"
local detect=`nbitems'-1
di
}
if "`prox'"!="a"&"`prox'"!="ad"&"`prox'"!="cor"&"`prox'"!="ccov"&"`prox'"!="ccor"&"`prox'"!="mh" {
if "`prox'"=="" {
local prox="ccov"
}
else {
di in red "You must define an existing measure of proximity (a, ad, cor, ccov, ccor, mh)."
di in red "Please correct your prox option."
exit
}
}
if "`method'"!="UPGMA"&"`method'"!="single"&"`method'"!="complete" {
if "`method'"=="" {
local method="UPGMA"
}
else {
di in red "Tou must define an existing method to define the proximity between two clusters of items:"
di in red _col(10) "- UPGMA: Unweighted Pair-Group Method of Average"
di in red _col(10) "- single: single linkage"
di in red _col(10) "- complete: complete linkage "
di in red "Please correct your method option"
exit
}
}
forvalues i=1/`nbitems' {
matrix `whereitems'[1,`i']=`i'
if "`details'"!="" {
di in green _col(3) "The item " _col(13) in yellow "``i''" in green " correspond to the node " in yellow "`i'"
}
}
tempvar score
egen `score'=rmean(`varlist')
qui replace `score'=`score'*`nbitems'
forvalues k=0/`nbitems' {
qui count if `score'==`k'
local nk`k'=r(N)
}
qui count
local N=r(N)
if "`prox'"=="ccov"|"`prox'"=="mh" {
local proxmin=0
}
/*************************Measure of proximities*********************************/
forvalues i=1/`nbitems' {
forvalues j=`=`i'+1'/`nbitems' {
/***********************************Proximity A**************************/
if "`prox'"=="a" {
qui count if ``i''==1&``j''==1
local tmp11=r(N)
qui count if ``i''==0&``j''==0
local tmp00=r(N)
matrix `proximity'[`i',`j']=sqrt(1-`tmp11'/(`N'-`tmp00'))
matrix `proximity'[`j',`i']=`proximity'[`i',`j']
}
/***********************************Proximity AD*************************/
if "`prox'"=="ad" {
qui count if ``i''==1&``j''==1
local tmp11=r(N)
qui count if ``i''==0&``j''==0
local tmp00=r(N)
matrix `proximity'[`i',`j']=sqrt(1-(`tmp11'+`tmp00')/`N')
matrix `proximity'[`j',`i']=`proximity'[`i',`j']
}
/**********************************Proximity COR*************************/
if "`prox'"=="cor" {
qui count if ``i''==1&``j''==1
local tmp11=r(N)
qui count if ``i''==0&``j''==0
local tmp00=r(N)
qui count if ``i''==1&``j''==0
local tmp10=r(N)
qui count if ``i''==0&``j''==1
local tmp01=r(N)
matrix `proximity'[`i',`j']=sqrt(2*(1-(`tmp11'*`tmp00'-`tmp10'*`tmp01')/(sqrt((`tmp11'+`tmp10')*(`tmp11'+`tmp01')*(`tmp00'+`tmp10')*(`tmp00'+`tmp01')))))
matrix `proximity'[`j',`i']=`proximity'[`i',`j']
}
/***********************************Proximity CCOV**********************/
if "`prox'"=="ccov" {
local dij=0
forvalues k=1/`=`nbitems'-1' {
if `nk`k''!=0 {
qui corr ``i'' ``j'',cov
local covi`i'j`j'k`k'=r(cov_12)
local dij=`dij'+`covi`i'j`j'k`k''*`nk`k''
}
}
matrix `proximity'[`i',`j']=-`dij'/`N'
matrix `proximity'[`j',`i']=`proximity'[`i',`j']
if `proxmin'<`dij'/`N' {
local proxmin=`dij'/`N'
}
}
/***********************************Proximity CCOR**********************/
if "`prox'"=="ccor" {
local dij=0
forvalues k=1/`=`nbitems'-1' {
if `nk`k''!=0 {
qui corr ``i'' ``j''
local cori`i'j`j'k`k'=r(rho)
local dij=`dij'+`cori`i'j`j'k`k''*`nk`k''
}
}
matrix `proximity'[`i',`j']=sqrt(2*(1-`dij'/`N'))
matrix `proximity'[`j',`i']=`proximity'[`i',`j']
}
/***********************************Proximity MH************************/
if "`prox'"=="mh" {
local numij=0
local denom=0
forvalues k=1/`=`nbitems'-1' {
if `nk`k''!=0 {
qui count if ``i''==1&``j''==1
local A=r(N)
qui count if ``i''==0&``j''==1
local B=r(N)
qui count if ``i''==1&``j''==0
local C=r(N)
qui count if ``i''==0&``j''==0
local D=r(N)
if `B'!=0&`C'!=0 {
local numij=`numij'+`A'*`D'/`nk`k''
local denomij=`denomij'+`B'*`C'/`nk`k''
}
}
}
matrix `proximity'[`i',`j']=-log(`numij'/`denomij')
matrix `proximity'[`j',`i']=`proximity'[`i',`j']
if `proxmin'<log(`numij'/`denomij') {
local proxmin=-`proximity'[`i',`j']
}
}
}
}
if "`prox'"=="ccov"|"`prox'"=="mh" {
forvalues i=1/`nbitems' {
forvalues j=`=`i'+1'/`nbitems' {
matrix `proximity'[`i',`j']=`proximity'[`i',`j']+`proxmin'
matrix `proximity'[`j',`i']=`proximity'[`i',`j']
}
}
}
/**********************END OD THE COMPUTING OF THE PROXIMITIES**************************************/
if "`measures'"!="" {
di
matrix rowname `proximity'=`varlist'
matrix colname `proximity'=`varlist'
di in green _col(3) "Measures of proximity between the items"
matrix list `proximity', noheader
di
}
/**********************STEP 0**********************************************************************/
tempname currentprox nodes conclinesnodes mempart
matrix `currentprox'=`proximity'
matrix define `nodes'=J(`=`nbitems'+4',`=2*`nbitems'-1',0)
matrix define `conclinesnodes'=J(1,`nbitems',0)
matrix define `mempart'=J(`=`nbitems'+2',`=`nbitems'-1',0)
forvalues i=1/`nbitems' {
matrix `nodes'[1,`i']=1
matrix `nodes'[2,`i']=1
matrix `nodes'[5,`i']=`i'
matrix `conclinesnodes'[1,`i']=`i'
}
/*********************************CLUSTERING PROCEDURE*************************************/
forvalues k=1/`=`nbitems'-1' {
local nbclusters=`nbitems'-`k'+1
local distmin=`currentprox'[1,2]
local cl1=1
local cl2=2
forvalues i=1/`nbclusters' {
forvalues j=`=`i'+1'/`nbclusters' {
if `distmin'>`currentprox'[`i',`j'] {
local distmin=`currentprox'[`i',`j']
local cl1=`i'
local cl2=`j'
}
}
}
local linescl1=`conclinesnodes'[1,`cl1']
local nbitemscl1=`nodes'[1,`linescl1']
matrix `nodes'[2,`linescl1']=0
local linescl2=`conclinesnodes'[1,`cl2']
local nbitemscl2=`nodes'[1,`linescl2']
matrix `nodes'[2,`linescl2']=0
matrix `nodes'[1,`=`nbitems'+`k'']=`nbitemscl1'+`nbitemscl2'
matrix `nodes'[2,`=`nbitems'+`k'']=1
matrix `nodes'[3,`=`nbitems'+`k'']=`linescl1'
matrix `nodes'[4,`=`nbitems'+`k'']=`linescl2'
if "`details'"!="" {
di in green _col(3) "The nodes" _col(13) in yellow "`linescl1'" _col(17) in green "and" _col(21) in yellow "`linescl2'" _col(25) in green "are been aggregated to form the node " in yellow "`=`nbitems'+`k''"
}
forvalues i=5/`=`nbitemscl1'+4' {
local item=`nodes'[`i',`linescl1']
matrix `nodes'[`i',`=`nbitems'+`k'']=`item'
matrix `whereitems'[`k',`item']=`=`nbitems'+`k''
}
forvalues i=5/`=`nbitemscl2'+4' {
local item=`nodes'[`i',`linescl2']
matrix `nodes'[`=`i'+`nbitemscl1'',`=`nbitems'+`k'']=`item'
matrix `whereitems'[`k',`item']=`=`nbitems'+`k''
}
local tmp=1
forvalues i=1/`=`nbitems'+`k'' {
if `nodes'[2,`i']==1 {
matrix `mempart'[`tmp',`k']=`i'
local tmp=`tmp'+1
}
}
if `detect'>=`=`nbitems'-`k'' {
local partdetect
local compteur=1
local scaledetect
forvalues i=1/`=`nbitems'-`k'' {
local scaledetect`i'
}
forvalues i=1/`=`nbitems'+`k'' {
if `nodes'[2,`i']==1{
local tmp=`nodes'[1,`i']
local partdetect `partdetect' `tmp'
local tmp2=4+`tmp'
forvalues j=5/`tmp2' {
local tmp3=`nodes'[`j',`i']
local scaledetect`compteur' `scaledetect`compteur'' ``tmp3''
}
local scaledetect `scaledetect' `scaledetect`compteur''
local compteur=`compteur'+1
}
}
qui detect `scaledetect' , partition(`partdetect')
local detect`=`nbclusters'-1'=r(DETECT)
local R`=`nbclusters'-1'=r(R)
local Iss`=`nbclusters'-1'=r(Iss)
}
matrix drop `currentprox'
matrix define `currentprox'=J(`=`nbclusters'-1',`=`nbclusters'-1',0)
matrix drop `conclinesnodes'
matrix define `conclinesnodes'=J(1,`=`nbclusters'-1',0)
local tmp=1
forvalues i=1/`=`nbitems'+`k'' {
if `nodes'[2,`i']==1 {
matrix `conclinesnodes'[1,`tmp']=`i'
local tmp=`tmp'+1
}
}
forvalues i=1/`=`nbclusters'-1' {
forvalues j=`=`i'+1'/`=`nbclusters'-1' {
if "`method'"=="UPGMA" {
local moy=0
local linescl1=`conclinesnodes'[1,`i']
local nbitemscl1=`nodes'[1,`linescl1']
local linescl2=`conclinesnodes'[1,`j']
local nbitemscl2=`nodes'[1,`linescl2']
forvalues l=5/`=`nbitemscl1'+4' {
forvalues m=5/`=`nbitemscl2'+4' {
local item1=`nodes'[`l',`linescl1']
local item2=`nodes'[`m',`linescl2']
local tmp=`proximity'[`item1',`item2']
local moy=`moy'+`tmp'
}
}
matrix `currentprox'[`i',`j']=`moy'/(`nbitemscl1'*`nbitemscl2')
matrix `currentprox'[`j',`i']=`moy'/(`nbitemscl1'*`nbitemscl2')
}
if "`method'"=="single" {
local moy=0
local linescl1=`conclinesnodes'[1,`i']
local nbitemscl1=`nodes'[1,`linescl1']
local linescl2=`conclinesnodes'[1,`j']
local nbitemscl2=`nodes'[1,`linescl2']
forvalues l=5/`=`nbitemscl1'+4' {
forvalues m=5/`=`nbitemscl2'+4' {
local item1=`nodes'[`l',`linescl1']
local item2=`nodes'[`m',`linescl2']
if `l'==5&`m'==5 {
local distmin=`proximity'[`item1',`item2']
}
else {
if `distmin'>`proximity'[`item1',`item2'] {
local distmin=`proximity'[`item1',`item2']
}
}
}
}
matrix `currentprox'[`i',`j']=`distmin'
matrix `currentprox'[`j',`i']=`distmin'
}
if "`method'"=="complete" {
local moy=0
local linescl1=`conclinesnodes'[1,`i']
local nbitemscl1=`nodes'[1,`linescl1']
local linescl2=`conclinesnodes'[1,`j']
local nbitemscl2=`nodes'[1,`linescl2']
local distmax=0
forvalues l=5/`=`nbitemscl1'+4' {
forvalues m=5/`=`nbitemscl2'+4' {
local item1=`nodes'[`l',`linescl1']
local item2=`nodes'[`m',`linescl2']
if `distmax'<`proximity'[`item1',`item2'] {
local distmax=`proximity'[`item1',`item2']
}
}
}
matrix `currentprox'[`i',`j']=`distmax'
matrix `currentprox'[`j',`i']=`distmax'
}
}
}
}
if `detect'!=0 {
tempname indexes
matrix define `indexes'=J(`detect',4,0)
matrix colnames `indexes'=Clusters DETECT Iss R
di ""
di in green _col(7) "Indexes to test the `detect' latest partitions of the items"
di ""
di in green _col(29) "DETECT" _col(43) "Iss" _col(56) "R"
di _col(5) in green "Only one cluster:" _col(27) in yellow %8.5f `detect1' _col(38) %8.5f `Iss1' _col(49) %8.5f `R1'
matrix `indexes'[1,1]=1
matrix `indexes'[1,2]=`detect1'
matrix `indexes'[1,3]=`Iss1'
matrix `indexes'[1,4]=`R1'
forvalues k=2/`detect' {
matrix `indexes'[`k',1]=`k'
matrix `indexes'[`k',2]=`detect`k''
matrix `indexes'[`k',3]=`Iss`k''
matrix `indexes'[`k',4]=`R`k''
di _col(5) in green "`k' clusters:" _col(27) in yellow %8.5f `detect`k'' _col(38) %8.5f `Iss`k'' _col(49) %8.5f `R`k''
}
return matrix indexes=`indexes'
}
forvalues k=1/`nbpart' {
di ""
local rowmempart=`nbitems'-`part`k''
di in green _col(8) "Number of clusters : `part`k''"
tempname affect`part`k''
matrix define `affect`part`k'''=J(1,`nbitems',0)
forvalues i=1/`part`k'' {
di
di in green _col(12) "Cluster `i':"
local rownodes=`mempart'[`i',`rowmempart']
local itemsinthecluster=`nodes'[1,`rownodes']
forvalues j=5/`=4+`itemsinthecluster'' {
local tmp=`nodes'[`j',`rownodes']
matrix `affect`part`k'''[1,`tmp']=`i'
di in yellow _col(13)"``tmp''"
}
}
matrix colnames `affect`part`k'''=`varlist'
return matrix affect`part`k''=`affect`part`k'''
}
return matrix mempart `mempart'
return matrix nodes `nodes'
return local nbitems=`nbitems'
return local varlist `varlist'
end
/*********************************************************
*Partition
*Version 2 (May 10, 2004)
*
*Historic
*Version 1 (January 18, 2004)
***********************************************************/
program define partition
version 8.0
syntax anything(name=partition)
local nbitems=r(nbitems)
tempname mempart nodes
matrix `mempart'=r(mempart)
matrix `nodes'=r(nodes)
local varlist "`r(varlist)'"
local nbpart:word count `partition'
tokenize `partition'
forvalues k=1/`nbpart' {
local part`k'=``k''
}
tokenize `varlist'
forvalues k=1/`nbpart' {
di ""
local rowmempart=`nbitems'-`part`k''
di in green _col(8) "Number of clusters : `part`k''"
forvalues i=1/`part`k'' {
di
di in green _col(12) "Cluster `i':"
local rownodes=`mempart'[`i',`rowmempart']
local itemsinthecluster=`nodes'[1,`rownodes']
forvalues j=5/`=4+`itemsinthecluster'' {
local tmp=`nodes'[`j',`rownodes']
di in yellow _col(13)"``tmp''"
}
}
}
end

@ -0,0 +1,90 @@
{smcl}
{* 12may2004}{...}
{hline}
help for {hi:hcaccprox}
{hline}
{title:Hierarchical Clusters Analysis with conditional proximity measures}
{p 8 14 2}{cmd:hcaccprox} {it:varlist}
[{cmd:,} {cmdab:prox:(}{it:keyword}{cmd:)} {cmdab:meth:od(}{it:keyword}{cmd:)} {cmdab:part:ition(}{it:numlist}{cmd:)}
{cmdab:meas:ures} {cmdab:det:ails} {cmdab:det:ect:(}{it:#}{cmd:)} ]
{p 8 14 2}{cmd:partition} {it:numlist}
{title:Description}
{p 4 8 2}
{cmd:hcaccprox} realize a Hierarchical Clusters Analysis on dichotomoux items
based on specific measures of proximity as conditional proximity measures. The program
permit to obtain indexes to test the obtained partition (the {help detect} program is
necessary in this case).
{p 4 8 2}
{cmd:partition} permit, after a {cmd:hcaccprox} step to obtain
the composition of some specific partitions of the items.
{title:Options}
{p 4 8 2}{cmd:prox:(}{it:keyword}{cmd:)} define the method to compute the proximity between the items.
Six measures are possible. The three first ones are unconditional measures named {it:a}, {it:ad} and {it:cor}.
The three last ones are conditional measures named {it:ccov}, {it:ccor} and {it:mh}. See Roussos, Stout and Marden (1998)
for details of these six measures. By default, the {it:ccov} option is used.
{p 4 8 2}{cmd:method} define the method to aggregate two clusters, {it:single} for a single linkage, {it:complete} for a complete
linkage, and {it:UPGMA} for the Unweighted Pair-Group Method of Average. By default, the {it:UPGMA} option is used.
{p 4 8 2}{cmd:partition(}{it:numlist}{cmd:)} lists the partitions to detail by the program. List like {it:(2 4 6)} or {it:(2(2)6)}
are authorized.
{p 4 8 2}{cmd:measures} display the used proximity measures between the items.
{p 4 8 2}{cmd:details} display the results of the algorithm of aggregation.
{p 4 8 2}{cmd:detect(}{it:#}{cmd:)} specifies for all the partitions with a number of clusters inferior or equal to {it:#}
to compute the DETECT, Iss and R indexes.
{p 4 8 2}{it:numlist}, for the {cmd:partition} program, define the partitions with the number of clusters indicated in the {it:numlist}
to detail.
{title:Examples}
{p 4 8 2}{cmd:. hcaccprox q1-q10}
{p 4 8 2}{cmd:. partition 3 5 6}
{p 4 8 2}{cmd:. hcaccprox item1-item9 dotest1-dotest6, detect(6) measures}
{p 4 8 2}{cmd:. hcaccprox c1 c2 c3 c4 c5 c6 c7, prox(a) method(single)}
{title:Outputs}
{p 4 8 2}{cmd:. r(varlist)} is a macro who contain {it:varlist}
{p 4 8 2}{cmd:. r(nbitems)} is a macro who contain the number of items
{p 4 8 2}{cmd:. r(nodes)} is a matrix who contain all the informations about all the possible clusters of items. Each column represent a node (the first ones represent each item of {it:varlist}, and the following columns represent each
aggregation of clusters), the first line represent the number of items in each cluster, the third and the fourth lines represent the two cluster who are aggregated to form the new cluster, and the following lines represent the list of
items composing each cluster
{p 4 8 2}{cmd:. r(mempart)} list the number of cluster composing each possible partition : the last column is the partition in only one cluster, the preceeding column represent the partition in two cluster, and so on
{p 4 8 2}{cmd:. r(affect#)} is obtained with the {it:partition} option. In this vector, the number of the cluster (of the partition in # clusters) is associated to each item
{p 4 8 2}{cmd:. r(indexes)} is obtained with the {it:detect} option. This matrix contain the DETECT, Iss and R indexes associated to each partition with a number of clusters inferior to the number defined in the {it:detect} option
{title:Reference}
{p 4 8 2}{cmd:Roussos L. A, Stout W. F. and Marden J. I.}, {it:Using new proximity measures with hierarchical cluster analysis to detect multidimensionality}. Journal of Educational Measurement, {cmd:35}(1), pp 1-30, 1998.
{p 4 8 2}{cmd:Zhang J. and Stout W. F.}, {it:The theorical DETECT index of dimensionality and its application to approximate simple structure}. Psychometrika, {cmd:64}(2), pp 213-249, 1999.
{title:Also see}
{p 4 13 2} help for {help detect}
{title:Author}
{p 4 8 2}Jean-Benoit Hardouin, Regional Health Observatory (ORS) - 1, rue Porte Madeleine - BP 2439 - 45032 Orleans Cedex 1 - France.
You can contact the author at {browse "mailto:jean-benoit.hardouin@neuf.fr":jean-benoit.hardouin@neuf.fr} and visit the websites {browse "http://anaqol.free.fr":AnaQol} and {browse "http://freeirt.free.fr":FreeIRT}

@ -0,0 +1,509 @@
*! Version 3.6 19 July 2019
*! Jean-Benoit Hardouin
************************************************************************************************************
* hcavar: Hierachical Clusters Analysis (HCA) of variables
* Version 3.5 [2014-05-26], Jean-Benoit Hardouin /* dirsave and filesave options */
*
* Use the Detect Stata program (ssc install detect)
*
* Historic :
* Under the name of -hcaccprox-
* Version 1 [2004-01-18], Jean-Benoit Hardouin
* Version 2 [2004-05-12], Jean-Benoit Hardouin
* Version 3 [2005-12-31], Jean-Benoit Hardouin
* Version 3.1 [2006-01-15], Jean-Benoit Hardouin /* correction if there is only one individual with a given score*/
* Version 3.2 [2010-04-15], Jean-Benoit Hardouin /* Possibility to use Polytomous Items with CCOR, CCOV and MH*/
* Version 3.3 [2014-05-07], Jean-Benoit Hardouin, Bastien Perrot /* HTML option, if option*/
* Version 3.4 [2014-05-26], Jean-Benoit Hardouin, Bastien Perrot /* DETECT option available for polytomous items */
* Version 3.5 [2019-07-18], Jean-Benoit Hardouin /* dirsave and filesave options */
* Version 3.6 [2019-07-19], Jean-Benoit Hardouin /* loevH proximity */
*
* Jean-benoit Hardouin - University of Nantes - France
* INSERM UMR 1246-SPHERE "Methods in Patient Centered Outcomes and Health Research", Nantes University, University of Tours
* jean-benoit.hardouin@univ-nantes.fr
*
* News about this program :http://www.anaqol.org
*
* Copyright 2004-2006, 2010, 2019 Jean-Benoit Hardouin
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*
************************************************************************************************************
program define hcavar, rclass
version 9
syntax varlist(min=2 numeric) [if] [in] [,PROX(string) METHod(string) PARTition(numlist) MEASures DETect MATrix(string) noDENDROgram HTML(string) DIRSave(string) FILESave]
tempfile hcaccproxfile
qui save `hcaccproxfile',replace
preserve
if "`if'"!="" {
qui keep `if'
}
if "`html'"!="" {
//set scheme sj
//local htmlregion "graphregion(fcolor(white) ifcolor(white))"
di "<!-- SphereCalc start of response -->"
di "<pre>"
}
local nbitems : word count `varlist'
tokenize `varlist'
local type=0
forvalues i=1/`nbitems' {
qui drop if ``i''==.
qui inspect ``i''
if r(N_unique)>`type'&r(N_unique)!=. {
local type=r(N_unique)
}
else if r(N_unique)>`type'&r(N_unique)==. {
local type "100"
}
}
if `type'==100 {
local type ">99"
}
tempname proximity whereitems
local prox=lower("`prox'")
local method=lower("`method'")
matrix define `proximity'=J(`nbitems',`nbitems',0)
matrix define `whereitems'=J(`=`nbitems'-1',`nbitems',0)
/**************************PROXIMITIES MEASURES DESCRIPTION************************/
if "`matrix'"!="" {
local desprox="Defined by the user"
}
if "`prox'"=="" {
local prox="pearson"
}
else if "`prox'"=="a" {
local prox="jaccard"
}
else if "`prox'"=="ad" {
local prox="matching"
}
else if "`prox'"=="corr" {
local prox="pearson"
}
if "`type'">"2"&"`prox'"!="pearson"&"`prox'"!="ccov"&"`prox'"!="ccor"&"`prox'"!="mh"&"`prox'"!="loevh" {
di in red "Only the {hi:pearson}, {hi:ccov}, {hi:ccor}, {hi:mh} and {hi:loevh} measures of proximity are available with ordinal or numerous variables"
di in red "Please correct your {hi:prox} option."
exit
}
if "`partition'"==""&"`detect'"!="" {
di in ye "option partition() required"
error 198
}
local existmeas=0
foreach i in jaccard matching pearson russel dice ccor mh ccov loevh {
if "`prox'"=="`i'" {
local existmeas=1
}
}
if `existmeas'==0 {
di in red "You must define an existing measure of proximity (jaccard(a), matching(ad), pearson(cor), russel, dice, ccov, ccor, mh, loevh)."
di in red "Please correct your {hi:prox} option."
exit
}
if "`prox'"=="ccov"|"`prox'"=="mh" {
local proxmin=0
}
if "`prox'"=="matching" {
local desprox="Matching"
}
else if "`prox'"=="jaccard" {
local desprox="Jaccard"
}
else if "`prox'"=="russel" {
local desprox="Russel"
}
else if "`prox'"=="dice" {
local desprox="Dice"
}
else if "`prox'"=="pearson" {
local desprox="Pearson"
}
else if "`prox'"=="ccov" {
local desprox="Conditional covariances"
}
else if "`prox'"=="ccor" {
local desprox="Conditional correlations"
}
else if "`prox'"=="mh" {
local desprox="Mantel Hanzel"
}
/**************************PROXIMITIES MEASURES DESCRIPTION************************/
if "`method'"=="upgma"|"`method'"=="" {
local method="average"
}
if "`method'"=="wpgma"|"`method'"=="" {
local method="waverage"
}
local vermethod=0
foreach i in average waverage single centroid median complete wards {
if "`method'"=="`i'" {
local vermethod=1
}
}
if `vermethod'==0 {
di in red "You must define an existing method to define the proximity between two clusters of items:"
di in red _col(10) "- single: single linkage"
di in red _col(10) "- complete: complete linkage "
di in red _col(10) "- average(UPGMA): Unweighted Pair-Group Method of Average"
di in red _col(10) "- waverage(WPGMA): Unweighted Pair-Group Method of Average"
di in red _col(10) "- wards: Ward's linkage"
di in red "Please correct your method option"
exit
}
if "`method'"=="single"|"`method'"=="singlelinkage" {
local method single
local desmethod="Single linkage"
}
else if "`method'"=="complete"|"`method'"=="completelinkage" {
local desmethod="Complete linkage"
}
else if "`method'"=="median"|"`method'"=="medianlinkage" {
local desmethod="Median linkage (no dendrogram)"
}
else if "`method'"=="centroid"|"`method'"=="centroidlinkage" {
local desmethod="Centroid linkage (no dendrogram)"
}
else if "`method'"=="average"|"`method'"=="averagelinkage" {
local desmethod="Unweighted Pair-Group Method of Average"
}
else if "`method'"=="waverage"|"`method'"=="waveragelinkage" {
local desmethod="Weighted Pair-Group Method of Average"
}
else if "`method'"=="wards"|"`method'"=="wardslinkage" {
local desmethod="Ward's linkage"
}
forvalues i=1/`nbitems' {
matrix `whereitems'[1,`i']=`i'
}
tempvar score
genscore `varlist',score(`score')
qui su `score'
local maxscore=r(max)
forvalues k=0/`maxscore' {
qui count if `score'==`k'
local nk`k'=r(N)
}
qui count
local N=r(N)
di in green "{hline 80}"
di in green "Number of individuals with none missing values: " in ye `N'
di in green "Maximal number of modalities for a variable: " in ye "`type'"
di in green "Proximity measures: " in ye "`desprox'"
di in green "Method to aggregate clusters: " in ye "`desmethod'"
di in green "{hline 80}"
di
di
/*************************Measure of proximities*********************************/
if "`matrix'"=="" {
if "`prox'"=="loevh" {
qui loevh `varlist', pairw pair
matrix `proximity'=r(loevHjk)
}
forvalues i=1/`nbitems' {
forvalues j=`=`i'+1'/`nbitems' {
/***********************************Proximity AD*************************/
if "`prox'"=="matching" { /*ad*/
qui count if ``i''==1&``j''==1
local tmp11=r(N)
qui count if ``i''==0&``j''==0
local tmp00=r(N)
matrix `proximity'[`i',`j']=sqrt(1-(`tmp11'+`tmp00')/`N')
matrix `proximity'[`j',`i']=`proximity'[`i',`j']
}
/***********************************Proximity A**************************/
else if "`prox'"=="jaccard" { /*a*/
qui count if ``i''==1&``j''==1
local tmp11=r(N)
qui count if ``i''==0&``j''==0
local tmp00=r(N)
matrix `proximity'[`i',`j']=sqrt(1-`tmp11'/(`N'-`tmp00'))
matrix `proximity'[`j',`i']=`proximity'[`i',`j']
}
/***********************************Proximity Russel**************************/
else if "`prox'"=="russel" {
qui count if ``i''==1&``j''==1
local tmp11=r(N)
matrix `proximity'[`i',`j']=sqrt(1-`tmp11'/`N')
matrix `proximity'[`j',`i']=`proximity'[`i',`j']
}
/***********************************Proximity A**************************/
else if "`prox'"=="dice" {
qui count if ``i''==1&``j''==1
local tmp11=r(N)
qui count if ``i''==0&``j''==0
local tmp00=r(N)
matrix `proximity'[`i',`j']=sqrt(1-2*`tmp11'/(`N'+`tmp11'-`tmp00'))
matrix `proximity'[`j',`i']=`proximity'[`i',`j']
}
/**********************************Proximity COR*************************/
else if "`prox'"=="pearson" { /*corr*/
qui corr ``i'' ``j''
matrix `proximity'[`i',`j']=sqrt(2*(1-r(rho)))
matrix `proximity'[`j',`i']=`proximity'[`i',`j']
}
/***********************************Proximity CCOV**********************/
else if "`prox'"=="ccov" {
local dij=0
local Ntemp=`N'
forvalues k=1/`=`maxscore'-1' {
if `nk`k''!=0 {
if `nk`k''>1 {
qui corr ``i'' ``j'' if `score'==`k',cov
local covi`i'j`j'k`k'=r(cov_12)
}
else if `nk`k''==1 {
local Ntemp=`Ntemp'-1
local covi`i'j`j'k`k'=0
}
else {
local covi`i'j`j'k`k'=0
}
local dij=`dij'+`covi`i'j`j'k`k''*`nk`k''
}
}
matrix `proximity'[`i',`j']=-`dij'/`Ntemp'
matrix `proximity'[`j',`i']=`proximity'[`i',`j']
if `proxmin'<`dij'/`Ntemp' {
local proxmin=`dij'/`Ntemp'
}
}
/***********************************Proximity CCOR**********************/
else if "`prox'"=="ccor" {
local dij=0
local nnull=0
local Ntemp=`N'
forvalues k=1/`=`maxscore'-1' {
if `nk`k''!=0 {
if `nk`k''>1 {
qui corr ``i'' ``j'' if `score'==`k'
local cori`i'j`j'k`k'=r(rho)
}
else if `nk`k''==1 {
local Ntemp=`Ntemp'-1
local cori`i'j`j'k`k'=0
}
else {
local cori`i'j`j'k`k'=0
}
if `cori`i'j`j'k`k''!=. {
local dij=`dij'+`cori`i'j`j'k`k''*`nk`k''
}
else if `cori`i'j`j'k`k''==. {
local nnull=`nnull'+`nk`k''
}
}
}
matrix `proximity'[`i',`j']=sqrt(2*(1-`dij'/(`Ntemp'-`nnull')))
matrix `proximity'[`j',`i']=`proximity'[`i',`j']
}
/***********************************Proximity MH************************/
else if "`prox'"=="mh" {
local numij=0
local denom=0
forvalues k=1/`=`maxscore'-1' {
if `nk`k''!=0 {
qui count if ``i''==1&``j''==1&`score'==`k'
local A=r(N)
qui count if ``i''==0&``j''==1&`score'==`k'
local B=r(N)
qui count if ``i''==1&``j''==0&`score'==`k'
local C=r(N)
qui count if ``i''==0&``j''==0&`score'==`k'
local D=r(N)
local numij=`numij'+`A'*`D'/`nk`k''
local denomij=`denomij'+`B'*`C'/`nk`k''
}
}
matrix `proximity'[`i',`j']=-log(`numij'/`denomij')
matrix `proximity'[`j',`i']=`proximity'[`i',`j']
if `proxmin'<log(`numij'/`denomij') {
local proxmin=-`proximity'[`i',`j']
}
}
/***********************************Proximity LoevH************************/
else if "`prox'"=="loevh" {
matrix `proximity'[`i',`j']=1-`proximity'[`i',`j']
matrix `proximity'[`j',`i']=`proximity'[`i',`j']
}
}
}
if "`prox'"=="ccov"|"`prox'"=="mh" {
forvalues i=1/`nbitems' {
forvalues j=`=`i'+1'/`nbitems' {
matrix `proximity'[`i',`j']=`proximity'[`i',`j']+`proxmin'
if `proximity'[`i',`j']<0 {
matrix `proximity'[`i',`j']=0
}
matrix `proximity'[`j',`i']=`proximity'[`i',`j']
}
}
}
}
/**********************END OD THE COMPUTING OF THE PROXIMITIES**************************************/
else {
matrix `proximity'=`matrix'
}
matrix list `proximity'
matrix rowname `proximity'=`varlist'
matrix colname `proximity'=`varlist'
if "`measures'"!="" {
di in green "{hline 50}"
di in green "Measures of proximity between the items"
di in green "{hline 50}"
matrix list `proximity', noheader
di
}
/**********************CLUSTERING PROCEDURE **********************************************/
qui clustermat `method' `proximity',clear labelvar(name)
local hor "hor"
if "`method'"!="centroid"&"`method'"!="median"&"`dendrogram'"=="" {
if "`html'" != "" {
qui local saving "saving(`c(tmpdir)'/`html'_dendro,replace) nodraw"
qui cluster dendro ,labels(name) hor ylabel(,angle(0)) title("Hierarchical Cluster Analysis on variables") subtitle("`desmethod'") xtitle("`desprox' proximities") `saving'
qui graph use `c(tmpdir)'/`html'_dendro.gph
qui graph export `c(tmpdir)'/`html'_dendro.eps, replace
di "<br />"
di "<img src=" _char(34) "/data/`html'_dendro.png" _char(34)
di " class=" _char(34) "resgraph" _char(34) " alt=" _char(34) "dendro" _char(34) " title= " _char(34) "Hierarchical Cluster Analysis on variables - click to enlarge" _char(34) " width=" _char(34) "350" _char(34) " height=" _char(34) "240" _char(34) " >"
}
else {
if "`filesave'"!="" {
qui local saving "saving(`dirsave'//dendrogram,replace) nodraw"
}
else {
qui local saving
}
qui cluster dendro ,labels(name) hor ylabel(,angle(0)) title("Hierarchical Cluster Analysis on variables") subtitle("`desmethod'") xtitle("`desprox' proximities") `saving'
}
}
if "`partition'"!="" {
foreach i of numlist `partition' {
qui cluster gen cluster`i'=group(`i')
}
tempname clusters
mkmat cluster* ,mat(`clusters')
matrix rownames `clusters'=`varlist'
local compteur=0
foreach i of numlist `partition' {
local ++compteur
di
di in green "{hline 30}"
di in green "Partition in `i' cluster(s)"
di in green "{hline 30}"
di
forvalues j=1/`i' {
local cluster`i'_`j'
local nbi`i'_`j'=0
forvalues k=1/`nbitems' {
if `clusters'[`k',`compteur']==`j' {
local cluster`i'_`j' `cluster`i'_`j'' ``k''
local ++nbi`i'_`j'
}
}
di in green "Cluster `j': " in ye "`cluster`i'_`j''"
}
}
return matrix clusters=`clusters'
}
/**********************DETECT OPTION **************************************************/
use `hcaccproxfile',clear
if "`detect'"!=""&"`partition'"!="" {
foreach i of numlist `partition' {
local liste
local part
forvalues j=1/`i' {
local liste "`liste' `cluster`i'_`j''"
local part "`part' `nbi`i'_`j''"
}
qui detect `liste',part(`part')
local detect`i'=r(DETECT)
local Iss`i'=r(Iss)
local R`i'=r(R)
}
tempname indexes
matrix define `indexes'=J(`compteur',4,0)
matrix colnames `indexes'=Clusters DETECT Iss R
di ""
di in green "{hline 50}"
di in green "Indexes to compare the partitions of the items"
di in green "{hline 50}"
di ""
di in green _col(29) "DETECT" _col(43) "Iss" _col(56) "R"
local compteur=0
foreach k of numlist `partition' {
local ++compteur
matrix `indexes'[`compteur',1]=`k'
matrix `indexes'[`compteur',2]=`detect`k''
matrix `indexes'[`compteur',3]=`Iss`k''
matrix `indexes'[`compteur',4]=`R`k''
di _col(5) in green "`k' cluster(s):" _col(27) in yellow %8.5f `detect`k'' _col(38) %8.5f `Iss`k'' _col(49) %8.5f `R`k''
}
return matrix indexes=`indexes'
}
return local nbvar=`nbitems'
return matrix measures=`proximity'
restore, not
*use `hcaccproxfile',clear
end

@ -0,0 +1,100 @@
{smcl}
{* 6december2012}{...}
{hline}
help for {hi:hcavar}{right:Jean-Benoit Hardouin}
{hline}
{title:Hierarchical Clusters Analysis of variables}
{p 8 14 2}{cmd:hcavar} {it:varlist}
[{cmd:,} {cmdab:prox}{it:(keyword)} {cmdab:mat:rix}{it:(matrix)} {cmdab:meth:od}{it:(keyword)}
{cmdab:part:ition}{it:(numlist)} {cmdab:meas:ures} {cmdab:det:ect} {cmdab:nodendro:gram}]
{title:Description}
{p 4 8 2}
{cmd:hcavar} is the new name of the old {cmd:hcaccprox} module.
{p 4 8 2}
{cmd:hcavar} realizes a Hierarchical Clusters Analysis on variables.
The variables can be numerous, ordinal or binary. The distances (dissimilarity
measures for binary variables) between two variables are computed as the squared
root of 2 times one minus the Pearson correlation. For binary variables, it is
possible to use other similarity coefficients as Matching, Jaccard, Russel or Dice
(See {help measure option} for more details). The distance matrix is computed as
the squared root of one minus the value of these coefficients.
In the field of Item Response Theory, it is possible to define conditional measures
to the score as defined by Roussos, Stout and Marden (1998): conditional correlations,
conditional covariance, or Mantel-Haenszel measures of similarity. In the same field,
it is possible to compute, for a set of obtained partition of the items, the DETECT,
Iss and R indexes defined by Zhang and Stout (1999).
{title:Options}
{p 4 8 2}{cmd:prox} defines the proximity measures to use : {it:jaccard}
(alias {it:a}), {it: russel}, {it:dice}, {it:matching} (alias {it:ad}), {it:pearson}
(alias {it:corr}), conditional covariance ({it:ccov}), conditional correlation
({it:ccor}), or Mantel Haenszel ({it:mh}). By default, this option is put to
{it:pearson}. {it:pearson} is the only one option available with ordinal or numerous
variables.
{p 4 8 2}{cmd:matrix} allows using a matrix as distance matrix.
{p 4 8 2}{cmd:method} defines the method to aggregate two clusters. See {help cluster}
for more details about these methods. The complete name of the method
must be indicated (with or without "linkage"), none abbreviation is allowed.
{it:waveragelinkage} is used by default.
{p 4 8 2}{cmd:partition} lists the partitions of variables to detail by
the program.
{p 4 8 2}{cmd:measures} displays the used proximity measures matrix between
the variables.
{p 4 8 2}{cmd:detect} computes the DETECT, Iss and R indexes
for the partitions indicated in the {cmd:partitions} option.
{p 4 8 2}{cmdnodendrogram} enables the displaying of th dendrogram.
{title:Examples}
{p 4 8 2}{cmd:. hcavar var1-var10} /*displays only the dendrogram*/
{p 4 8 2}{cmd:. hcavar var*, partition(1/6) measures method(single)} /*Single linkage, details of 6 partitions*/
{p 4 8 2}{cmd:. hcavar itemA1-itemA7 itemB1-itemB7, prox(ccor) method(single) detect part(1/4)} /*details of 4 partitions, conditional correlations*/
{title:Outputs}
{p 4 8 2}{cmd:. r(nbvar)} contains the number of variables
{p 4 8 2}{cmd:. r(measures)} is the distances measures matrix between the variables
{p 4 8 2}{cmd:. r(clusters)} is a matrix obtained with the {cmd:partition} option
containing the composition of the partitions defined with this option.
{p 4 8 2}{cmd:. r(indexes)} is obtained with the {cmd:detect} option.
This matrix contain the DETECT, Iss and R indexes associated to each partition
defined with the {cmd:partition} option.
{title:Reference}
{p 4 8 2}{cmd:Roussos L. A, Stout W. F. and Marden J. I.}, {it:Using new proximity measures with hierarchical cluster analysis to detect multidimensionality}. Journal of Educational Measurement, {cmd:35}(1), pp 1-30, 1998.
{p 4 8 2}{cmd:Zhang J. and Stout W. F.}, {it:The theorical DETECT index of dimensionality and its application to approximate simple structure}. Psychometrika, {cmd:64}(2), pp 213-249, 1999.
{title:Also see}
{p 4 13 2} help for {help cluster}, help for {help detect} (if installed)
{title:Author}
{p 4 8 2}Jean-Benoit Hardouin, PhD, assistant professor{p_end}
{p 4 8 2}EA 4275 SPHERE "Team of Biostatistics, Clinical Research and Subjective Measures in Health Sciences"{p_end}
{p 4 8 2}University of Nantes - Faculty of Pharmaceutical Sciences{p_end}
{p 4 8 2}1, rue Gaston Veil - BP 53508{p_end}
{p 4 8 2}44035 Nantes Cedex 1 - FRANCE{p_end}
{p 4 8 2}Email:
{browse "mailto:jean-benoit.hardouin@univ-nantes.fr":jean-benoit.hardouin@univ-nantes.fr}{p_end}
{p 4 8 2}Website {browse "http://www.anaqol.org":AnaQol}

@ -0,0 +1,464 @@
*! Version 3.2 15 April 2010
*! Jean-Benoit Hardouin
************************************************************************************************************
* hcavar: Hierachical Clusters Analysis (HCA) of variables
* Version 3.2: April 15, 2010 /* Possibility to use Polytomous Items with CCOR, CCOV and MH*/
*
* Use the Detect Stata program (ssc install detect)
*
* Historic :
* Under the name of -hcaccprox-
* Version 1 [2004-01-18], Jean-Benoit Hardouin
* Version 2 [2004-05-12], Jean-Benoit Hardouin
* Version 3 [2005-12-31], Jean-Benoit Hardouin
* Version 3.1 [2006-01-15], Jean-Benoit Hardouin /* correction if there is only one individual with a given score*/
*
* Jean-benoit Hardouin - Department of Biomathematics and Biostatistics - University of Nantes - France
* EA 4275 "Biostatistics, Clinical Research and Subjective Measures in Health Sciences"
* jean-benoit.hardouin@univ-nantes.fr
*
* News about this program :http://www.anaqol.org
*
* Copyright 2004-2006, 2010 Jean-Benoit Hardouin
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*
************************************************************************************************************
program define hcavar , rclass
version 9
syntax varlist(min=2 numeric) [,PROX(string) METHod(string) PARTition(numlist) MEASures DETect MATrix(string) noDENDROgram]
tempfile hcaccproxfile
qui save `hcaccproxfile',replace
preserve
local nbitems : word count `varlist'
tokenize `varlist'
local type=0
forvalues i=1/`nbitems' {
qui drop if ``i''==.
qui inspect ``i''
if r(N_unique)>`type'&r(N_unique)!=. {
local type=r(N_unique)
}
else if r(N_unique)>`type'&r(N_unique)==. {
local type "100"
}
}
if `type'==100 {
local type ">99"
}
tempname proximity whereitems
local prox=lower("`prox'")
local method=lower("`method'")
matrix define `proximity'=J(`nbitems',`nbitems',0)
matrix define `whereitems'=J(`=`nbitems'-1',`nbitems',0)
/**************************PROXIMITIES MEASURES DESCRIPTION************************/
if "`matrix'"!="" {
local desprox="Defined by the user"
}
if "`prox'"=="" {
local prox="pearson"
}
else if "`prox'"=="a" {
local prox="jaccard"
}
else if "`prox'"=="ad" {
local prox="matching"
}
else if "`prox'"=="corr" {
local prox="pearson"
}
if "`type'">"2"&"`prox'"!="pearson"&"`prox'"!="ccov"&"`prox'"!="ccor"&"`prox'"!="mh" {
di in red "Only the {hi:pearson}, {hi:ccov} and {hi:ccor} measures of proximity are available with ordinal or numerous variables"
di in red "Please correct your {hi:prox} option."
exit
}
if "`type'">"2"&"`detect'"!="" {
di in ye "The {hi:detect} option is available only with binary variables. This option is disabled."
local detect
di
}
local existmeas=0
foreach i in jaccard matching pearson russel dice ccor mh ccov {
if "`prox'"=="`i'" {
local existmeas=1
}
}
if `existmeas'==0 {
di in red "You must define an existing measure of proximity (jaccard(a), matching(ad), pearson(cor), russel, dice, ccov, ccor, mh)."
di in red "Please correct your {hi:prox} option."
exit
}
if "`prox'"=="ccov"|"`prox'"=="mh" {
local proxmin=0
}
if "`prox'"=="matching" {
local desprox="Matching"
}
else if "`prox'"=="jaccard" {
local desprox="Jaccard"
}
else if "`prox'"=="russel" {
local desprox="Russel"
}
else if "`prox'"=="dice" {
local desprox="Dice"
}
else if "`prox'"=="pearson" {
local desprox="Pearson"
}
else if "`prox'"=="ccov" {
local desprox="Conditional covariances"
}
else if "`prox'"=="ccor" {
local desprox="Conditional correlations"
}
else if "`prox'"=="mh" {
local desprox="Mantel Hanzel"
}
/**************************PROXIMITIES MEASURES DESCRIPTION************************/
if "`method'"=="upgma"|"`method'"=="" {
local method="average"
}
if "`method'"=="wpgma"|"`method'"=="" {
local method="waverage"
}
local vermethod=0
foreach i in average waverage single centroid median complete wards {
if "`method'"=="`i'" {
local vermethod=1
}
}
if `vermethod'==0 {
di in red "You must define an existing method to define the proximity between two clusters of items:"
di in red _col(10) "- single: single linkage"
di in red _col(10) "- complete: complete linkage "
di in red _col(10) "- average(UPGMA): Unweighted Pair-Group Method of Average"
di in red _col(10) "- waverage(WPGMA): Unweighted Pair-Group Method of Average"
di in red _col(10) "- wards: Ward's linkage"
di in red "Please correct your method option"
exit
}
if "`method'"=="single"|"`method'"=="singlelinkage" {
local method single
local desmethod="Single linkage"
}
else if "`method'"=="complete"|"`method'"=="completelinkage" {
local desmethod="Complete linkage"
}
else if "`method'"=="median"|"`method'"=="medianlinkage" {
local desmethod="Median linkage (no dendrogram)"
}
else if "`method'"=="centroid"|"`method'"=="centroidlinkage" {
local desmethod="Centroid linkage (no dendrogram)"
}
else if "`method'"=="average"|"`method'"=="averagelinkage" {
local desmethod="Unweighted Pair-Group Method of Average"
}
else if "`method'"=="waverage"|"`method'"=="waveragelinkage" {
local desmethod="Weighted Pair-Group Method of Average"
}
else if "`method'"=="wards"|"`method'"=="wardslinkage" {
local desmethod="Ward's linkage"
}
forvalues i=1/`nbitems' {
matrix `whereitems'[1,`i']=`i'
}
tempvar score
genscore `varlist',score(`score')
qui su `score'
local maxscore=r(max)
forvalues k=0/`maxscore' {
qui count if `score'==`k'
local nk`k'=r(N)
}
qui count
local N=r(N)
di in green "{hline 80}"
di in green "Number of individuals with none missing values: " in ye `N'
di in green "Maximal number of modalities for a variable: " in ye "`type'"
di in green "Proximity measures: " in ye "`desprox'"
di in green "Method to aggregate clusters: " in ye "`desmethod'"
di in green "{hline 80}"
di
di
/*************************Measure of proximities*********************************/
if "`matrix'"=="" {
forvalues i=1/`nbitems' {
forvalues j=`=`i'+1'/`nbitems' {
/***********************************Proximity AD*************************/
if "`prox'"=="matching" { /*ad*/
qui count if ``i''==1&``j''==1
local tmp11=r(N)
qui count if ``i''==0&``j''==0
local tmp00=r(N)
matrix `proximity'[`i',`j']=sqrt(1-(`tmp11'+`tmp00')/`N')
matrix `proximity'[`j',`i']=`proximity'[`i',`j']
}
/***********************************Proximity A**************************/
else if "`prox'"=="jaccard" { /*a*/
qui count if ``i''==1&``j''==1
local tmp11=r(N)
qui count if ``i''==0&``j''==0
local tmp00=r(N)
matrix `proximity'[`i',`j']=sqrt(1-`tmp11'/(`N'-`tmp00'))
matrix `proximity'[`j',`i']=`proximity'[`i',`j']
}
/***********************************Proximity Russel**************************/
else if "`prox'"=="russel" {
qui count if ``i''==1&``j''==1
local tmp11=r(N)
matrix `proximity'[`i',`j']=sqrt(1-`tmp11'/`N')
matrix `proximity'[`j',`i']=`proximity'[`i',`j']
}
/***********************************Proximity A**************************/
else if "`prox'"=="dice" {
qui count if ``i''==1&``j''==1
local tmp11=r(N)
qui count if ``i''==0&``j''==0
local tmp00=r(N)
matrix `proximity'[`i',`j']=sqrt(1-2*`tmp11'/(`N'+`tmp11'-`tmp00'))
matrix `proximity'[`j',`i']=`proximity'[`i',`j']
}
/**********************************Proximity COR*************************/
else if "`prox'"=="pearson" { /*corr*/
qui corr ``i'' ``j''
matrix `proximity'[`i',`j']=sqrt(2*(1-r(rho)))
matrix `proximity'[`j',`i']=`proximity'[`i',`j']
}
/***********************************Proximity CCOV**********************/
else if "`prox'"=="ccov" {
local dij=0
local Ntemp=`N'
forvalues k=1/`=`maxscore'-1' {
if `nk`k''!=0 {
if `nk`k''>1 {
qui corr ``i'' ``j'' if `score'==`k',cov
local covi`i'j`j'k`k'=r(cov_12)
}
else if `nk`k''==1 {
local Ntemp=`Ntemp'-1
local covi`i'j`j'k`k'=0
}
else {
local covi`i'j`j'k`k'=0
}
local dij=`dij'+`covi`i'j`j'k`k''*`nk`k''
}
}
matrix `proximity'[`i',`j']=-`dij'/`Ntemp'
matrix `proximity'[`j',`i']=`proximity'[`i',`j']
if `proxmin'<`dij'/`Ntemp' {
local proxmin=`dij'/`Ntemp'
}
}
/***********************************Proximity CCOR**********************/
else if "`prox'"=="ccor" {
local dij=0
local nnull=0
local Ntemp=`N'
forvalues k=1/`=`maxscore'-1' {
if `nk`k''!=0 {
if `nk`k''>1 {
qui corr ``i'' ``j'' if `score'==`k'
local cori`i'j`j'k`k'=r(rho)
}
else if `nk`k''==1 {
local Ntemp=`Ntemp'-1
local cori`i'j`j'k`k'=0
}
else {
local cori`i'j`j'k`k'=0
}
if `cori`i'j`j'k`k''!=. {
local dij=`dij'+`cori`i'j`j'k`k''*`nk`k''
}
else if `cori`i'j`j'k`k''==. {
local nnull=`nnull'+`nk`k''
}
}
}
matrix `proximity'[`i',`j']=sqrt(2*(1-`dij'/(`Ntemp'-`nnull')))
matrix `proximity'[`j',`i']=`proximity'[`i',`j']
}
/***********************************Proximity MH************************/
else if "`prox'"=="mh" {
local numij=0
local denom=0
forvalues k=1/`=`maxscore'-1' {
if `nk`k''!=0 {
qui count if ``i''==1&``j''==1&`score'==`k'
local A=r(N)
qui count if ``i''==0&``j''==1&`score'==`k'
local B=r(N)
qui count if ``i''==1&``j''==0&`score'==`k'
local C=r(N)
qui count if ``i''==0&``j''==0&`score'==`k'
local D=r(N)
local numij=`numij'+`A'*`D'/`nk`k''
local denomij=`denomij'+`B'*`C'/`nk`k''
}
}
matrix `proximity'[`i',`j']=-log(`numij'/`denomij')
matrix `proximity'[`j',`i']=`proximity'[`i',`j']
if `proxmin'<log(`numij'/`denomij') {
local proxmin=-`proximity'[`i',`j']
}
}
}
}
if "`prox'"=="ccov"|"`prox'"=="mh" {
forvalues i=1/`nbitems' {
forvalues j=`=`i'+1'/`nbitems' {
matrix `proximity'[`i',`j']=`proximity'[`i',`j']+`proxmin'
if `proximity'[`i',`j']<0 {
matrix `proximity'[`i',`j']=0
}
matrix `proximity'[`j',`i']=`proximity'[`i',`j']
}
}
}
}
/**********************END OD THE COMPUTING OF THE PROXIMITIES**************************************/
else {
matrix `proximity'=`matrix'
}
matrix rowname `proximity'=`varlist'
matrix colname `proximity'=`varlist'
if "`measures'"!="" {
di in green "{hline 50}"
di in green "Measures of proximity between the items"
di in green "{hline 50}"
matrix list `proximity', noheader
di
}
/**********************CLUSTERING PROCEDURE **********************************************/
qui clustermat `method' `proximity',clear labelvar(name)
local hor "hor"
if "`method'"!="centroid"&"`method'"!="median"&"`dendrogram'"=="" {
qui cluster dendro ,labels(name) hor ylabel(,angle(0)) title("Hierarchical Cluster Analysis on variables") subtitle("`desmethod'") xtitle("`desprox' proximities")
}
if "`partition'"!="" {
foreach i of numlist `partition' {
qui cluster gen cluster`i'=group(`i')
}
tempname clusters
mkmat cluster* ,mat(`clusters')
matrix rownames `clusters'=`varlist'
local compteur=0
foreach i of numlist `partition' {
local ++compteur
di
di in green "{hline 30}"
di in green "Partition in `i' cluster(s)"
di in green "{hline 30}"
di
forvalues j=1/`i' {
local cluster`i'_`j'
local nbi`i'_`j'=0
forvalues k=1/`nbitems' {
if `clusters'[`k',`compteur']==`j' {
local cluster`i'_`j' `cluster`i'_`j'' ``k''
local ++nbi`i'_`j'
}
}
di in green "Cluster `j': " in ye "`cluster`i'_`j''"
}
}
return matrix clusters=`clusters'
}
/**********************DETECT OPTION **************************************************/
use `hcaccproxfile',clear
if "`detect'"!="" {
foreach i of numlist `partition' {
local liste
local part
forvalues j=1/`i' {
local liste "`liste' `cluster`i'_`j''"
local part "`part' `nbi`i'_`j''"
}
qui detect `liste',part(`part')
local detect`i'=r(DETECT)
local Iss`i'=r(Iss)
local R`i'=r(R)
}
tempname indexes
matrix define `indexes'=J(`compteur',4,0)
matrix colnames `indexes'=Clusters DETECT Iss R
di ""
di in green "{hline 50}"
di in green "Indexes to compare the partitions of the items"
di in green "{hline 50}"
di ""
di in green _col(29) "DETECT" _col(43) "Iss" _col(56) "R"
local compteur=0
foreach k of numlist `partition' {
local ++compteur
matrix `indexes'[`compteur',1]=`k'
matrix `indexes'[`compteur',2]=`detect`k''
matrix `indexes'[`compteur',3]=`Iss`k''
matrix `indexes'[`compteur',4]=`R`k''
di _col(5) in green "`k' cluster(s):" _col(27) in yellow %8.5f `detect`k'' _col(38) %8.5f `Iss`k'' _col(49) %8.5f `R`k''
}
return matrix indexes=`indexes'
}
return local nbvar=`nbitems'
return matrix measures=`proximity'
restore, not
*use `hcaccproxfile',clear
end

@ -0,0 +1,488 @@
*! Version 3.3 6 May 2014
*! Jean-Benoit Hardouin
************************************************************************************************************
* hcavar: Hierachical Clusters Analysis (HCA) of variables
* Version 3.3: May 7, 2014 /* HTML option, if option*/
*
* Use the Detect Stata program (ssc install detect)
*
* Historic :
* Under the name of -hcaccprox-
* Version 1 [2004-01-18], Jean-Benoit Hardouin
* Version 2 [2004-05-12], Jean-Benoit Hardouin
* Version 3 [2005-12-31], Jean-Benoit Hardouin
* Version 3.1 [2006-01-15], Jean-Benoit Hardouin /* correction if there is only one individual with a given score*/
* Version 3.2 [2010-04-15], Jean-Benoit Hardouin /* Possibility to use Polytomous Items with CCOR, CCOV and MH*/
* Version 3.3 [2014-05-07], Jean-Benoit Hardouin, Bastien Perrot /* HTML option,, if option*/
*
* Jean-benoit Hardouin - Department of Biomathematics and Biostatistics - University of Nantes - France
* EA 4275 "Biostatistics, Clinical Research and Subjective Measures in Health Sciences"
* jean-benoit.hardouin@univ-nantes.fr
*
* News about this program :http://www.anaqol.org
*
* Copyright 2004-2006, 2010 Jean-Benoit Hardouin
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*
************************************************************************************************************
program define hcavar33, rclass
version 9
syntax varlist(min=2 numeric) [if] [in] [,PROX(string) METHod(string) PARTition(numlist) MEASures DETect MATrix(string) noDENDROgram HTML(string)]
tempfile hcaccproxfile
qui save `hcaccproxfile',replace
preserve
if "`if'"!="" {
qui keep `if'
}
if "`html'"!="" {
//set scheme sj
//local htmlregion "graphregion(fcolor(white) ifcolor(white))"
di "<!-- SphereCalc start of response -->"
di "<pre>"
}
local nbitems : word count `varlist'
tokenize `varlist'
local type=0
forvalues i=1/`nbitems' {
qui drop if ``i''==.
qui inspect ``i''
if r(N_unique)>`type'&r(N_unique)!=. {
local type=r(N_unique)
}
else if r(N_unique)>`type'&r(N_unique)==. {
local type "100"
}
}
if `type'==100 {
local type ">99"
}
tempname proximity whereitems
local prox=lower("`prox'")
local method=lower("`method'")
matrix define `proximity'=J(`nbitems',`nbitems',0)
matrix define `whereitems'=J(`=`nbitems'-1',`nbitems',0)
/**************************PROXIMITIES MEASURES DESCRIPTION************************/
if "`matrix'"!="" {
local desprox="Defined by the user"
}
if "`prox'"=="" {
local prox="pearson"
}
else if "`prox'"=="a" {
local prox="jaccard"
}
else if "`prox'"=="ad" {
local prox="matching"
}
else if "`prox'"=="corr" {
local prox="pearson"
}
if "`type'">"2"&"`prox'"!="pearson"&"`prox'"!="ccov"&"`prox'"!="ccor"&"`prox'"!="mh" {
di in red "Only the {hi:pearson}, {hi:ccov} and {hi:ccor} measures of proximity are available with ordinal or numerous variables"
di in red "Please correct your {hi:prox} option."
exit
}
if "`type'">"2"&"`detect'"!="" {
di in ye "The {hi:detect} option is available only with binary variables. This option is disabled."
local detect
di
}
local existmeas=0
foreach i in jaccard matching pearson russel dice ccor mh ccov {
if "`prox'"=="`i'" {
local existmeas=1
}
}
if `existmeas'==0 {
di in red "You must define an existing measure of proximity (jaccard(a), matching(ad), pearson(cor), russel, dice, ccov, ccor, mh)."
di in red "Please correct your {hi:prox} option."
exit
}
if "`prox'"=="ccov"|"`prox'"=="mh" {
local proxmin=0
}
if "`prox'"=="matching" {
local desprox="Matching"
}
else if "`prox'"=="jaccard" {
local desprox="Jaccard"
}
else if "`prox'"=="russel" {
local desprox="Russel"
}
else if "`prox'"=="dice" {
local desprox="Dice"
}
else if "`prox'"=="pearson" {
local desprox="Pearson"
}
else if "`prox'"=="ccov" {
local desprox="Conditional covariances"
}
else if "`prox'"=="ccor" {
local desprox="Conditional correlations"
}
else if "`prox'"=="mh" {
local desprox="Mantel Hanzel"
}
/**************************PROXIMITIES MEASURES DESCRIPTION************************/
if "`method'"=="upgma"|"`method'"=="" {
local method="average"
}
if "`method'"=="wpgma"|"`method'"=="" {
local method="waverage"
}
local vermethod=0
foreach i in average waverage single centroid median complete wards {
if "`method'"=="`i'" {
local vermethod=1
}
}
if `vermethod'==0 {
di in red "You must define an existing method to define the proximity between two clusters of items:"
di in red _col(10) "- single: single linkage"
di in red _col(10) "- complete: complete linkage "
di in red _col(10) "- average(UPGMA): Unweighted Pair-Group Method of Average"
di in red _col(10) "- waverage(WPGMA): Unweighted Pair-Group Method of Average"
di in red _col(10) "- wards: Ward's linkage"
di in red "Please correct your method option"
exit
}
if "`method'"=="single"|"`method'"=="singlelinkage" {
local method single
local desmethod="Single linkage"
}
else if "`method'"=="complete"|"`method'"=="completelinkage" {
local desmethod="Complete linkage"
}
else if "`method'"=="median"|"`method'"=="medianlinkage" {
local desmethod="Median linkage (no dendrogram)"
}
else if "`method'"=="centroid"|"`method'"=="centroidlinkage" {
local desmethod="Centroid linkage (no dendrogram)"
}
else if "`method'"=="average"|"`method'"=="averagelinkage" {
local desmethod="Unweighted Pair-Group Method of Average"
}
else if "`method'"=="waverage"|"`method'"=="waveragelinkage" {
local desmethod="Weighted Pair-Group Method of Average"
}
else if "`method'"=="wards"|"`method'"=="wardslinkage" {
local desmethod="Ward's linkage"
}
forvalues i=1/`nbitems' {
matrix `whereitems'[1,`i']=`i'
}
tempvar score
genscore `varlist',score(`score')
qui su `score'
local maxscore=r(max)
forvalues k=0/`maxscore' {
qui count if `score'==`k'
local nk`k'=r(N)
}
qui count
local N=r(N)
di in green "{hline 80}"
di in green "Number of individuals with none missing values: " in ye `N'
di in green "Maximal number of modalities for a variable: " in ye "`type'"
di in green "Proximity measures: " in ye "`desprox'"
di in green "Method to aggregate clusters: " in ye "`desmethod'"
di in green "{hline 80}"
di
di
/*************************Measure of proximities*********************************/
if "`matrix'"=="" {
forvalues i=1/`nbitems' {
forvalues j=`=`i'+1'/`nbitems' {
/***********************************Proximity AD*************************/
if "`prox'"=="matching" { /*ad*/
qui count if ``i''==1&``j''==1
local tmp11=r(N)
qui count if ``i''==0&``j''==0
local tmp00=r(N)
matrix `proximity'[`i',`j']=sqrt(1-(`tmp11'+`tmp00')/`N')
matrix `proximity'[`j',`i']=`proximity'[`i',`j']
}
/***********************************Proximity A**************************/
else if "`prox'"=="jaccard" { /*a*/
qui count if ``i''==1&``j''==1
local tmp11=r(N)
qui count if ``i''==0&``j''==0
local tmp00=r(N)
matrix `proximity'[`i',`j']=sqrt(1-`tmp11'/(`N'-`tmp00'))
matrix `proximity'[`j',`i']=`proximity'[`i',`j']
}
/***********************************Proximity Russel**************************/
else if "`prox'"=="russel" {
qui count if ``i''==1&``j''==1
local tmp11=r(N)
matrix `proximity'[`i',`j']=sqrt(1-`tmp11'/`N')
matrix `proximity'[`j',`i']=`proximity'[`i',`j']
}
/***********************************Proximity A**************************/
else if "`prox'"=="dice" {
qui count if ``i''==1&``j''==1
local tmp11=r(N)
qui count if ``i''==0&``j''==0
local tmp00=r(N)
matrix `proximity'[`i',`j']=sqrt(1-2*`tmp11'/(`N'+`tmp11'-`tmp00'))
matrix `proximity'[`j',`i']=`proximity'[`i',`j']
}
/**********************************Proximity COR*************************/
else if "`prox'"=="pearson" { /*corr*/
qui corr ``i'' ``j''
matrix `proximity'[`i',`j']=sqrt(2*(1-r(rho)))
matrix `proximity'[`j',`i']=`proximity'[`i',`j']
}
/***********************************Proximity CCOV**********************/
else if "`prox'"=="ccov" {
local dij=0
local Ntemp=`N'
forvalues k=1/`=`maxscore'-1' {
if `nk`k''!=0 {
if `nk`k''>1 {
qui corr ``i'' ``j'' if `score'==`k',cov
local covi`i'j`j'k`k'=r(cov_12)
}
else if `nk`k''==1 {
local Ntemp=`Ntemp'-1
local covi`i'j`j'k`k'=0
}
else {
local covi`i'j`j'k`k'=0
}
local dij=`dij'+`covi`i'j`j'k`k''*`nk`k''
}
}
matrix `proximity'[`i',`j']=-`dij'/`Ntemp'
matrix `proximity'[`j',`i']=`proximity'[`i',`j']
if `proxmin'<`dij'/`Ntemp' {
local proxmin=`dij'/`Ntemp'
}
}
/***********************************Proximity CCOR**********************/
else if "`prox'"=="ccor" {
local dij=0
local nnull=0
local Ntemp=`N'
forvalues k=1/`=`maxscore'-1' {
if `nk`k''!=0 {
if `nk`k''>1 {
qui corr ``i'' ``j'' if `score'==`k'
local cori`i'j`j'k`k'=r(rho)
}
else if `nk`k''==1 {
local Ntemp=`Ntemp'-1
local cori`i'j`j'k`k'=0
}
else {
local cori`i'j`j'k`k'=0
}
if `cori`i'j`j'k`k''!=. {
local dij=`dij'+`cori`i'j`j'k`k''*`nk`k''
}
else if `cori`i'j`j'k`k''==. {
local nnull=`nnull'+`nk`k''
}
}
}
matrix `proximity'[`i',`j']=sqrt(2*(1-`dij'/(`Ntemp'-`nnull')))
matrix `proximity'[`j',`i']=`proximity'[`i',`j']
}
/***********************************Proximity MH************************/
else if "`prox'"=="mh" {
local numij=0
local denom=0
forvalues k=1/`=`maxscore'-1' {
if `nk`k''!=0 {
qui count if ``i''==1&``j''==1&`score'==`k'
local A=r(N)
qui count if ``i''==0&``j''==1&`score'==`k'
local B=r(N)
qui count if ``i''==1&``j''==0&`score'==`k'
local C=r(N)
qui count if ``i''==0&``j''==0&`score'==`k'
local D=r(N)
local numij=`numij'+`A'*`D'/`nk`k''
local denomij=`denomij'+`B'*`C'/`nk`k''
}
}
matrix `proximity'[`i',`j']=-log(`numij'/`denomij')
matrix `proximity'[`j',`i']=`proximity'[`i',`j']
if `proxmin'<log(`numij'/`denomij') {
local proxmin=-`proximity'[`i',`j']
}
}
}
}
if "`prox'"=="ccov"|"`prox'"=="mh" {
forvalues i=1/`nbitems' {
forvalues j=`=`i'+1'/`nbitems' {
matrix `proximity'[`i',`j']=`proximity'[`i',`j']+`proxmin'
if `proximity'[`i',`j']<0 {
matrix `proximity'[`i',`j']=0
}
matrix `proximity'[`j',`i']=`proximity'[`i',`j']
}
}
}
}
/**********************END OD THE COMPUTING OF THE PROXIMITIES**************************************/
else {
matrix `proximity'=`matrix'
}
matrix rowname `proximity'=`varlist'
matrix colname `proximity'=`varlist'
if "`measures'"!="" {
di in green "{hline 50}"
di in green "Measures of proximity between the items"
di in green "{hline 50}"
matrix list `proximity', noheader
di
}
/**********************CLUSTERING PROCEDURE **********************************************/
qui clustermat `method' `proximity',clear labelvar(name)
local hor "hor"
if "`method'"!="centroid"&"`method'"!="median"&"`dendrogram'"=="" {
if "`html'" != "" {
qui local saving "saving(`c(tmpdir)'/`html'_dendro,replace) nodraw"
qui cluster dendro ,labels(name) hor ylabel(,angle(0)) title("Hierarchical Cluster Analysis on variables") subtitle("`desmethod'") xtitle("`desprox' proximities") `saving'
qui graph use `c(tmpdir)'/`html'_dendro.gph
qui graph export `c(tmpdir)'/`html'_dendro.eps, replace
di "<br />"
di "<img src=" _char(34) "/data/`html'_dendro.png" _char(34)
di " class=" _char(34) "resgraph" _char(34) " alt=" _char(34) "dendro" _char(34) " title= " _char(34) "Hierarchical Cluster Analysis on variables - click to enlarge" _char(34) " width=" _char(34) "350" _char(34) " height=" _char(34) "240" _char(34) " >"
}
else {
qui cluster dendro ,labels(name) hor ylabel(,angle(0)) title("Hierarchical Cluster Analysis on variables") subtitle("`desmethod'") xtitle("`desprox' proximities")
}
}
if "`partition'"!="" {
foreach i of numlist `partition' {
qui cluster gen cluster`i'=group(`i')
}
tempname clusters
mkmat cluster* ,mat(`clusters')
matrix rownames `clusters'=`varlist'
local compteur=0
foreach i of numlist `partition' {
local ++compteur
di
di in green "{hline 30}"
di in green "Partition in `i' cluster(s)"
di in green "{hline 30}"
di
forvalues j=1/`i' {
local cluster`i'_`j'
local nbi`i'_`j'=0
forvalues k=1/`nbitems' {
if `clusters'[`k',`compteur']==`j' {
local cluster`i'_`j' `cluster`i'_`j'' ``k''
local ++nbi`i'_`j'
}
}
di in green "Cluster `j': " in ye "`cluster`i'_`j''"
}
}
return matrix clusters=`clusters'
}
/**********************DETECT OPTION **************************************************/
use `hcaccproxfile',clear
if "`detect'"!="" {
foreach i of numlist `partition' {
local liste
local part
forvalues j=1/`i' {
local liste "`liste' `cluster`i'_`j''"
local part "`part' `nbi`i'_`j''"
}
qui detect `liste',part(`part')
local detect`i'=r(DETECT)
local Iss`i'=r(Iss)
local R`i'=r(R)
}
tempname indexes
matrix define `indexes'=J(`compteur',4,0)
matrix colnames `indexes'=Clusters DETECT Iss R
di ""
di in green "{hline 50}"
di in green "Indexes to compare the partitions of the items"
di in green "{hline 50}"
di ""
di in green _col(29) "DETECT" _col(43) "Iss" _col(56) "R"
local compteur=0
foreach k of numlist `partition' {
local ++compteur
matrix `indexes'[`compteur',1]=`k'
matrix `indexes'[`compteur',2]=`detect`k''
matrix `indexes'[`compteur',3]=`Iss`k''
matrix `indexes'[`compteur',4]=`R`k''
di _col(5) in green "`k' cluster(s):" _col(27) in yellow %8.5f `detect`k'' _col(38) %8.5f `Iss`k'' _col(49) %8.5f `R`k''
}
return matrix indexes=`indexes'
}
return local nbvar=`nbitems'
return matrix measures=`proximity'
restore, not
*use `hcaccproxfile',clear
end

@ -0,0 +1,489 @@
*! Version 3.4 26 May 2014
*! Jean-Benoit Hardouin
************************************************************************************************************
* hcavar: Hierachical Clusters Analysis (HCA) of variables
* Version 3.4: May 26, 2014 /* DETECT option available for polytomous items */
*
* Use the Detect Stata program (ssc install detect)
*
* Historic :
* Under the name of -hcaccprox-
* Version 1 [2004-01-18], Jean-Benoit Hardouin
* Version 2 [2004-05-12], Jean-Benoit Hardouin
* Version 3 [2005-12-31], Jean-Benoit Hardouin
* Version 3.1 [2006-01-15], Jean-Benoit Hardouin /* correction if there is only one individual with a given score*/
* Version 3.2 [2010-04-15], Jean-Benoit Hardouin /* Possibility to use Polytomous Items with CCOR, CCOV and MH*/
* Version 3.3 [2014-05-07], Jean-Benoit Hardouin, Bastien Perrot /* HTML option, if option*/
* Version 3.4 [2014-05-26], Jean-Benoit Hardouin, Bastien Perrot /* DETECT option available for polytomous items */
*
* Jean-benoit Hardouin - Department of Biomathematics and Biostatistics - University of Nantes - France
* EA 4275 "Biostatistics, Clinical Research and Subjective Measures in Health Sciences"
* jean-benoit.hardouin@univ-nantes.fr
*
* News about this program :http://www.anaqol.org
*
* Copyright 2004-2006, 2010 Jean-Benoit Hardouin
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*
************************************************************************************************************
program define hcavar34, rclass
version 9
syntax varlist(min=2 numeric) [if] [in] [,PROX(string) METHod(string) PARTition(numlist) MEASures DETect MATrix(string) noDENDROgram HTML(string)]
tempfile hcaccproxfile
qui save `hcaccproxfile',replace
preserve
if "`if'"!="" {
qui keep `if'
}
if "`html'"!="" {
//set scheme sj
//local htmlregion "graphregion(fcolor(white) ifcolor(white))"
di "<!-- SphereCalc start of response -->"
di "<pre>"
}
local nbitems : word count `varlist'
tokenize `varlist'
local type=0
forvalues i=1/`nbitems' {
qui drop if ``i''==.
qui inspect ``i''
if r(N_unique)>`type'&r(N_unique)!=. {
local type=r(N_unique)
}
else if r(N_unique)>`type'&r(N_unique)==. {
local type "100"
}
}
if `type'==100 {
local type ">99"
}
tempname proximity whereitems
local prox=lower("`prox'")
local method=lower("`method'")
matrix define `proximity'=J(`nbitems',`nbitems',0)
matrix define `whereitems'=J(`=`nbitems'-1',`nbitems',0)
/**************************PROXIMITIES MEASURES DESCRIPTION************************/
if "`matrix'"!="" {
local desprox="Defined by the user"
}
if "`prox'"=="" {
local prox="pearson"
}
else if "`prox'"=="a" {
local prox="jaccard"
}
else if "`prox'"=="ad" {
local prox="matching"
}
else if "`prox'"=="corr" {
local prox="pearson"
}
if "`type'">"2"&"`prox'"!="pearson"&"`prox'"!="ccov"&"`prox'"!="ccor"&"`prox'"!="mh" {
di in red "Only the {hi:pearson}, {hi:ccov} and {hi:ccor} measures of proximity are available with ordinal or numerous variables"
di in red "Please correct your {hi:prox} option."
exit
}
if "`partition'"==""&"`detect'"!="" {
di in ye "option partition() required"
error 198
}
local existmeas=0
foreach i in jaccard matching pearson russel dice ccor mh ccov {
if "`prox'"=="`i'" {
local existmeas=1
}
}
if `existmeas'==0 {
di in red "You must define an existing measure of proximity (jaccard(a), matching(ad), pearson(cor), russel, dice, ccov, ccor, mh)."
di in red "Please correct your {hi:prox} option."
exit
}
if "`prox'"=="ccov"|"`prox'"=="mh" {
local proxmin=0
}
if "`prox'"=="matching" {
local desprox="Matching"
}
else if "`prox'"=="jaccard" {
local desprox="Jaccard"
}
else if "`prox'"=="russel" {
local desprox="Russel"
}
else if "`prox'"=="dice" {
local desprox="Dice"
}
else if "`prox'"=="pearson" {
local desprox="Pearson"
}
else if "`prox'"=="ccov" {
local desprox="Conditional covariances"
}
else if "`prox'"=="ccor" {
local desprox="Conditional correlations"
}
else if "`prox'"=="mh" {
local desprox="Mantel Hanzel"
}
/**************************PROXIMITIES MEASURES DESCRIPTION************************/
if "`method'"=="upgma"|"`method'"=="" {
local method="average"
}
if "`method'"=="wpgma"|"`method'"=="" {
local method="waverage"
}
local vermethod=0
foreach i in average waverage single centroid median complete wards {
if "`method'"=="`i'" {
local vermethod=1
}
}
if `vermethod'==0 {
di in red "You must define an existing method to define the proximity between two clusters of items:"
di in red _col(10) "- single: single linkage"
di in red _col(10) "- complete: complete linkage "
di in red _col(10) "- average(UPGMA): Unweighted Pair-Group Method of Average"
di in red _col(10) "- waverage(WPGMA): Unweighted Pair-Group Method of Average"
di in red _col(10) "- wards: Ward's linkage"
di in red "Please correct your method option"
exit
}
if "`method'"=="single"|"`method'"=="singlelinkage" {
local method single
local desmethod="Single linkage"
}
else if "`method'"=="complete"|"`method'"=="completelinkage" {
local desmethod="Complete linkage"
}
else if "`method'"=="median"|"`method'"=="medianlinkage" {
local desmethod="Median linkage (no dendrogram)"
}
else if "`method'"=="centroid"|"`method'"=="centroidlinkage" {
local desmethod="Centroid linkage (no dendrogram)"
}
else if "`method'"=="average"|"`method'"=="averagelinkage" {
local desmethod="Unweighted Pair-Group Method of Average"
}
else if "`method'"=="waverage"|"`method'"=="waveragelinkage" {
local desmethod="Weighted Pair-Group Method of Average"
}
else if "`method'"=="wards"|"`method'"=="wardslinkage" {
local desmethod="Ward's linkage"
}
forvalues i=1/`nbitems' {
matrix `whereitems'[1,`i']=`i'
}
tempvar score
genscore `varlist',score(`score')
qui su `score'
local maxscore=r(max)
forvalues k=0/`maxscore' {
qui count if `score'==`k'
local nk`k'=r(N)
}
qui count
local N=r(N)
di in green "{hline 80}"
di in green "Number of individuals with none missing values: " in ye `N'
di in green "Maximal number of modalities for a variable: " in ye "`type'"
di in green "Proximity measures: " in ye "`desprox'"
di in green "Method to aggregate clusters: " in ye "`desmethod'"
di in green "{hline 80}"
di
di
/*************************Measure of proximities*********************************/
if "`matrix'"=="" {
forvalues i=1/`nbitems' {
forvalues j=`=`i'+1'/`nbitems' {
/***********************************Proximity AD*************************/
if "`prox'"=="matching" { /*ad*/
qui count if ``i''==1&``j''==1
local tmp11=r(N)
qui count if ``i''==0&``j''==0
local tmp00=r(N)
matrix `proximity'[`i',`j']=sqrt(1-(`tmp11'+`tmp00')/`N')
matrix `proximity'[`j',`i']=`proximity'[`i',`j']
}
/***********************************Proximity A**************************/
else if "`prox'"=="jaccard" { /*a*/
qui count if ``i''==1&``j''==1
local tmp11=r(N)
qui count if ``i''==0&``j''==0
local tmp00=r(N)
matrix `proximity'[`i',`j']=sqrt(1-`tmp11'/(`N'-`tmp00'))
matrix `proximity'[`j',`i']=`proximity'[`i',`j']
}
/***********************************Proximity Russel**************************/
else if "`prox'"=="russel" {
qui count if ``i''==1&``j''==1
local tmp11=r(N)
matrix `proximity'[`i',`j']=sqrt(1-`tmp11'/`N')
matrix `proximity'[`j',`i']=`proximity'[`i',`j']
}
/***********************************Proximity A**************************/
else if "`prox'"=="dice" {
qui count if ``i''==1&``j''==1
local tmp11=r(N)
qui count if ``i''==0&``j''==0
local tmp00=r(N)
matrix `proximity'[`i',`j']=sqrt(1-2*`tmp11'/(`N'+`tmp11'-`tmp00'))
matrix `proximity'[`j',`i']=`proximity'[`i',`j']
}
/**********************************Proximity COR*************************/
else if "`prox'"=="pearson" { /*corr*/
qui corr ``i'' ``j''
matrix `proximity'[`i',`j']=sqrt(2*(1-r(rho)))
matrix `proximity'[`j',`i']=`proximity'[`i',`j']
}
/***********************************Proximity CCOV**********************/
else if "`prox'"=="ccov" {
local dij=0
local Ntemp=`N'
forvalues k=1/`=`maxscore'-1' {
if `nk`k''!=0 {
if `nk`k''>1 {
qui corr ``i'' ``j'' if `score'==`k',cov
local covi`i'j`j'k`k'=r(cov_12)
}
else if `nk`k''==1 {
local Ntemp=`Ntemp'-1
local covi`i'j`j'k`k'=0
}
else {
local covi`i'j`j'k`k'=0
}
local dij=`dij'+`covi`i'j`j'k`k''*`nk`k''
}
}
matrix `proximity'[`i',`j']=-`dij'/`Ntemp'
matrix `proximity'[`j',`i']=`proximity'[`i',`j']
if `proxmin'<`dij'/`Ntemp' {
local proxmin=`dij'/`Ntemp'
}
}
/***********************************Proximity CCOR**********************/
else if "`prox'"=="ccor" {
local dij=0
local nnull=0
local Ntemp=`N'
forvalues k=1/`=`maxscore'-1' {
if `nk`k''!=0 {
if `nk`k''>1 {
qui corr ``i'' ``j'' if `score'==`k'
local cori`i'j`j'k`k'=r(rho)
}
else if `nk`k''==1 {
local Ntemp=`Ntemp'-1
local cori`i'j`j'k`k'=0
}
else {
local cori`i'j`j'k`k'=0
}
if `cori`i'j`j'k`k''!=. {
local dij=`dij'+`cori`i'j`j'k`k''*`nk`k''
}
else if `cori`i'j`j'k`k''==. {
local nnull=`nnull'+`nk`k''
}
}
}
matrix `proximity'[`i',`j']=sqrt(2*(1-`dij'/(`Ntemp'-`nnull')))
matrix `proximity'[`j',`i']=`proximity'[`i',`j']
}
/***********************************Proximity MH************************/
else if "`prox'"=="mh" {
local numij=0
local denom=0
forvalues k=1/`=`maxscore'-1' {
if `nk`k''!=0 {
qui count if ``i''==1&``j''==1&`score'==`k'
local A=r(N)
qui count if ``i''==0&``j''==1&`score'==`k'
local B=r(N)
qui count if ``i''==1&``j''==0&`score'==`k'
local C=r(N)
qui count if ``i''==0&``j''==0&`score'==`k'
local D=r(N)
local numij=`numij'+`A'*`D'/`nk`k''
local denomij=`denomij'+`B'*`C'/`nk`k''
}
}
matrix `proximity'[`i',`j']=-log(`numij'/`denomij')
matrix `proximity'[`j',`i']=`proximity'[`i',`j']
if `proxmin'<log(`numij'/`denomij') {
local proxmin=-`proximity'[`i',`j']
}
}
}
}
if "`prox'"=="ccov"|"`prox'"=="mh" {
forvalues i=1/`nbitems' {
forvalues j=`=`i'+1'/`nbitems' {
matrix `proximity'[`i',`j']=`proximity'[`i',`j']+`proxmin'
if `proximity'[`i',`j']<0 {
matrix `proximity'[`i',`j']=0
}
matrix `proximity'[`j',`i']=`proximity'[`i',`j']
}
}
}
}
/**********************END OD THE COMPUTING OF THE PROXIMITIES**************************************/
else {
matrix `proximity'=`matrix'
}
matrix rowname `proximity'=`varlist'
matrix colname `proximity'=`varlist'
if "`measures'"!="" {
di in green "{hline 50}"
di in green "Measures of proximity between the items"
di in green "{hline 50}"
matrix list `proximity', noheader
di
}
/**********************CLUSTERING PROCEDURE **********************************************/
qui clustermat `method' `proximity',clear labelvar(name)
local hor "hor"
if "`method'"!="centroid"&"`method'"!="median"&"`dendrogram'"=="" {
if "`html'" != "" {
qui local saving "saving(`c(tmpdir)'/`html'_dendro,replace) nodraw"
qui cluster dendro ,labels(name) hor ylabel(,angle(0)) title("Hierarchical Cluster Analysis on variables") subtitle("`desmethod'") xtitle("`desprox' proximities") `saving'
qui graph use `c(tmpdir)'/`html'_dendro.gph
qui graph export `c(tmpdir)'/`html'_dendro.eps, replace
di "<br />"
di "<img src=" _char(34) "/data/`html'_dendro.png" _char(34)
di " class=" _char(34) "resgraph" _char(34) " alt=" _char(34) "dendro" _char(34) " title= " _char(34) "Hierarchical Cluster Analysis on variables - click to enlarge" _char(34) " width=" _char(34) "350" _char(34) " height=" _char(34) "240" _char(34) " >"
}
else {
qui cluster dendro ,labels(name) hor ylabel(,angle(0)) title("Hierarchical Cluster Analysis on variables") subtitle("`desmethod'") xtitle("`desprox' proximities")
}
}
if "`partition'"!="" {
foreach i of numlist `partition' {
qui cluster gen cluster`i'=group(`i')
}
tempname clusters
mkmat cluster* ,mat(`clusters')
matrix rownames `clusters'=`varlist'
local compteur=0
foreach i of numlist `partition' {
local ++compteur
di
di in green "{hline 30}"
di in green "Partition in `i' cluster(s)"
di in green "{hline 30}"
di
forvalues j=1/`i' {
local cluster`i'_`j'
local nbi`i'_`j'=0
forvalues k=1/`nbitems' {
if `clusters'[`k',`compteur']==`j' {
local cluster`i'_`j' `cluster`i'_`j'' ``k''
local ++nbi`i'_`j'
}
}
di in green "Cluster `j': " in ye "`cluster`i'_`j''"
}
}
return matrix clusters=`clusters'
}
/**********************DETECT OPTION **************************************************/
use `hcaccproxfile',clear
if "`detect'"!="" {
foreach i of numlist `partition' {
local liste
local part
forvalues j=1/`i' {
local liste "`liste' `cluster`i'_`j''"
local part "`part' `nbi`i'_`j''"
}
qui detect `liste',part(`part')
local detect`i'=r(DETECT)
local Iss`i'=r(Iss)
local R`i'=r(R)
}
tempname indexes
matrix define `indexes'=J(`compteur',4,0)
matrix colnames `indexes'=Clusters DETECT Iss R
di ""
di in green "{hline 50}"
di in green "Indexes to compare the partitions of the items"
di in green "{hline 50}"
di ""
di in green _col(29) "DETECT" _col(43) "Iss" _col(56) "R"
local compteur=0
foreach k of numlist `partition' {
local ++compteur
matrix `indexes'[`compteur',1]=`k'
matrix `indexes'[`compteur',2]=`detect`k''
matrix `indexes'[`compteur',3]=`Iss`k''
matrix `indexes'[`compteur',4]=`R`k''
di _col(5) in green "`k' cluster(s):" _col(27) in yellow %8.5f `detect`k'' _col(38) %8.5f `Iss`k'' _col(49) %8.5f `R`k''
}
return matrix indexes=`indexes'
}
return local nbvar=`nbitems'
return matrix measures=`proximity'
restore, not
*use `hcaccproxfile',clear
end

@ -0,0 +1,193 @@
*! version 2.4 3 May 2013
*! Jean-Benoit Hardouin
************************************************************************************************************
* imputeitems: Imputation of missing data of binary items
*
* Version 1 : November 25, 2006 (Jean-Benoit Hardouin) /*Dichotomous data*/
* Version 1.1 : January 26, 2007 (Jean-Benoit Hardouin) /*Correction of a bug with the BIL method*/
* Version 1.2 : March 9, 2007 (Jean-Benoit Hardouin) /*IF*/
* Version 2 : June 30, 2008 (Jean-Benoit Hardouin) /*new names of the methods, MAX option*/
* Version 2.1 : December 3, 2008 (Jean-Benoit Hardouin) /*correction of a bug with the MAX option*/
* Version 2.2 : January 28, 2013 (Jean-Benoit Hardouin) /*noround option*/
* Version 2.3 : February 19, 2013 (Jean-Benoit Hardouin) /*polytomous items with PMS method*/
* Version 2.4 : May 3, 2013 (Jean-Benoit Hardouin) /*minor correction*/
*
* Jean-benoit Hardouin, phD, Assistant Professor
* Team of Biostatistics, Pharmacoepidemiology and Subjective Measures in Health Sciences (UPRES EA 4275 SPHERE)
* University of Nantes - Faculty of Pharmaceutical Sciences
* France
* jean-benoit.hardouin@anaqol.org
*
* News about this program :http://www.anaqol.org
*
* Copyright 2006-2008,2013 Jean-Benoit Hardouin
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
************************************************************************************************************/
program define imputeitems
version 9
syntax varlist(min=2 numeric) [if/] [, PREFix(string) METHod(string) RANDom max(int -1) noround]
if "`if'"=="" {
local if=1
local ifif
}
else {
local ifif if `if'
}
*di "IF : `if' `ifif'"
local nbitems : word count `varlist'
tokenize `varlist'
if `max'==-1 {
local max=`nbitems'
}
if "`method'"=="" {
local method pms
}
forvalues i=1/`nbitems' {
qui su ``i'' `ifif'
if `r(min)'!=0&(`r(max)'!=1&"`method'"!="pms") {
di in red "The {hi:imputeqol} command runs only with dichotomous items"
error
}
local p`i'=r(mean)
}
if "`method'"!="pms"&"`method'"!="ims"&"`method'"!="cim"&"`method'"!="ics"&"`method'"!="bip"&"`method'"!="bil"&"`method'"!="bic"&"`method'"!="bii"&"`method'"!="log"&"`method'"!="worst" {
di in red "The method option is unknow (choose among pms, ims, cim, ics, log and worst)"
error
}
forvalues i=1/`nbitems'{
qui su ``i'' `ifif'
local mean`i'=r(mean)
}
if "`method'"=="pms"&"`random'"!="" {
local method bip
}
else if "`method'"=="ims"&"`random'"!="" {
local method bii
}
else if "`method'"=="log"&"`random'"!="" {
local method bil
}
else if "`method'"=="cim"&"`random'"!="" {
local method bic
}
else if ("`method'"=="ics"|"`method'"=="worst")&"`random'"!="" {
di in green "The random process is not available with the {hi:ics} or {hi:worst} methods. The {hi:random} option is ignored."
local random
}
forvalues i=1/`nbitems' {
tempvar imp`i' tmp`i'
if "`method'"=="pms"|"`method'"=="bip"|"`method'"=="cim"|"`method'"=="bic" {
qui egen `imp`i''=rowtotal(`varlist') `ifif'
qui egen `tmp`i''=rownonmiss(`varlist') `ifif'
qui replace `imp`i''=`imp`i''/`tmp`i'' `ifif'
qui replace `imp`i''=``i'' if ``i''!=.&`if'
if "`method'"=="pms"&"`round'"=="" {
qui replace `imp`i''=round(`imp`i'') `ifif'
}
else if "`method'"=="bip" {
qui replace `imp`i''=uniform()<`imp`i'' `ifif'
}
else if "`method'"=="cim"|"`method'"=="bic"{
qui replace `imp`i''=`imp`i''*`tmp`i''*`mean`i'' `ifif'
qui replace `tmp`i''=0 `ifif'
forvalues j=1/`nbitems' {
qui replace `tmp`i''=`tmp`i''+`mean`j'' if ``j''!=.&`if'
}
qui replace `imp`i''=`imp`i''/`tmp`i'' `ifif'
qui replace `imp`i''=1 if `imp`i''>1&`imp`i''!=.&`if'
qui replace `imp`i''=0 if `imp`i''<0&`imp`i''!=.&`if'
if "`method'"=="cim"&"`round'"=="" {
qui replace `imp`i''=round(`imp`i'') `ifif'
}
else if "`method'"=="bic" {
qui replace `imp`i''=uniform()<`imp`i'' `ifif'
}
}
}
else if "`method'"=="ims"|"`method'"=="bii" {
qui gen `imp`i''=`mean`i'' `ifif'
if "`method'"=="ims"&"`round'"=="" {
qui replace `imp`i''=round(`imp`i'') `ifif'
}
else if "`method'"=="bii" {
qui replace `imp`i''=uniform()<`imp`i'' `ifif'
}
}
else if "`method'"=="ics" {
local item=0
local corrmax=-2
forvalues j=1/`nbitems' {
if `i'!=`j' {
qui corr ``i'' ``j'' `ifif'
if r(rho)>`corrmax'&r(rho)!=. {
local item `j'
local corrmax=r(rho)
}
}
}
di "A missing value for the item ``i'' is replaced by the value of the item `item'"
qui gen `imp`i''=``i'' `ifif'
qui replace `imp`i''=``item'' if ``i''==.&`if'
}
else if "`method'"=="log"|"`method'"=="bil" {
local liste`i'
forvalues j=1/`nbitems' {
if `i'!=`j' {
local liste`i' `liste`i'' ``j''
}
}
qui sw ,pr(0.05): logit ``i'' `liste`i'' `ifif'
*local select :colnames e(b)
local select=substr("`:colnames e(b)'",1,length("`:colnames e(b)'")-5)
qui logit ``i'' `select' `ifif'
qui predict `imp`i'' `ifif'
if "`method'"=="log"&"`round'"=="" {
qui replace `imp`i''=round(`imp`i'') if `imp`i''!=.&`if'
}
else if "`method'"=="bil" {
qui replace `imp`i''=uniform()<`imp`i'' if `imp`i''!=.&`if'
}
}
else if "`method'"=="worst" {
qui gen `imp`i''=0 `ifif'
}
}
forvalues i=1/`nbitems' {
qui replace `imp`i''=``i'' if ``i''!=.&`if'
if "`prefix'"=="" {
local prefix imp
}
qui gen `prefix'``i''=`imp`i'' `ifif'
}
tempvar miss
qui egen `miss'=rowmiss(`varlist')
forvalues i=1/`nbitems' {
qui replace `prefix'``i''=. if ``i''==.&`miss'>`max'
}
end

@ -0,0 +1,193 @@
*! version 2.4 3 May 2013
*! Jean-Benoit Hardouin
************************************************************************************************************
* imputeitems: Imputation of missing data of binary items
*
* Version 1 : November 25, 2006 (Jean-Benoit Hardouin) /*Dichotomous data*/
* Version 1.1 : January 26, 2007 (Jean-Benoit Hardouin) /*Correction of a bug with the BIL method*/
* Version 1.2 : March 9, 2007 (Jean-Benoit Hardouin) /*IF*/
* Version 2 : June 30, 2008 (Jean-Benoit Hardouin) /*new names of the methods, MAX option*/
* Version 2.1 : December 3, 2008 (Jean-Benoit Hardouin) /*correction of a bug with the MAX option*/
* Version 2.2 : January 28, 2013 (Jean-Benoit Hardouin) /*noround option*/
* Version 2.3 : February 19, 2013 (Jean-Benoit Hardouin) /*polytomous items with PMS method*/
* Version 2.4 : May 3, 2013 (Jean-Benoit Hardouin) /*minor correction*/
*
* Jean-benoit Hardouin, phD, Assistant Professor
* Team of Biostatistics, Pharmacoepidemiology and Subjective Measures in Health Sciences (UPRES EA 4275 SPHERE)
* University of Nantes - Faculty of Pharmaceutical Sciences
* France
* jean-benoit.hardouin@anaqol.org
*
* News about this program :http://www.anaqol.org
*
* Copyright 2006-2008,2013 Jean-Benoit Hardouin
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
************************************************************************************************************/
program define imputeitems
version 9
syntax varlist(min=2 numeric) [if/] [, PREFix(string) METHod(string) RANDom max(int -1) noround]
if "`if'"=="" {
local if=1
local ifif
}
else {
local ifif if `if'
}
*di "IF : `if' `ifif'"
local nbitems : word count `varlist'
tokenize `varlist'
if `max'==-1 {
local max=`nbitems'
}
if "`method'"=="" {
local method pms
}
forvalues i=1/`nbitems' {
qui su ``i'' `ifif'
if `r(min)'!=0&(`r(max)'!=1&"`method'"!="pms") {
di in red "The {hi:imputeqol} command runs only with dichotomous items"
error
}
local p`i'=r(mean)
}
if "`method'"!="pms"&"`method'"!="ims"&"`method'"!="cim"&"`method'"!="ics"&"`method'"!="bip"&"`method'"!="bil"&"`method'"!="bic"&"`method'"!="bii"&"`method'"!="log"&"`method'"!="worst" {
di in red "The method option is unknow (choose among pms, ims, cim, ics, log and worst)"
error
}
forvalues i=1/`nbitems'{
qui su ``i'' `ifif'
local mean`i'=r(mean)
}
if "`method'"=="pms"&"`random'"!="" {
local method bip
}
else if "`method'"=="ims"&"`random'"!="" {
local method bii
}
else if "`method'"=="log"&"`random'"!="" {
local method bil
}
else if "`method'"=="cim"&"`random'"!="" {
local method bic
}
else if ("`method'"=="ics"|"`method'"=="worst")&"`random'"!="" {
di in green "The random process is not available with the {hi:ics} or {hi:worst} methods. The {hi:random} option is ignored."
local random
}
forvalues i=1/`nbitems' {
tempvar imp`i' tmp`i'
if "`method'"=="pms"|"`method'"=="bip"|"`method'"=="cim"|"`method'"=="bic" {
qui egen `imp`i''=rowtotal(`varlist') `ifif'
qui egen `tmp`i''=rownonmiss(`varlist') `ifif'
qui replace `imp`i''=`imp`i''/`tmp`i'' `ifif'
qui replace `imp`i''=``i'' if ``i''!=.&`if'
if "`method'"=="pms"&"`round'"=="" {
qui replace `imp`i''=round(`imp`i'') `ifif'
}
else if "`method'"=="bip" {
qui replace `imp`i''=uniform()<`imp`i'' `ifif'
}
else if "`method'"=="cim"|"`method'"=="bic"{
qui replace `imp`i''=`imp`i''*`tmp`i''*`mean`i'' `ifif'
qui replace `tmp`i''=0 `ifif'
forvalues j=1/`nbitems' {
qui replace `tmp`i''=`tmp`i''+`mean`j'' if ``j''!=.&`if'
}
qui replace `imp`i''=`imp`i''/`tmp`i'' `ifif'
qui replace `imp`i''=1 if `imp`i''>1&`imp`i''!=.&`if'
qui replace `imp`i''=0 if `imp`i''<0&`imp`i''!=.&`if'
if "`method'"=="cim"&"`round'"=="" {
qui replace `imp`i''=round(`imp`i'') `ifif'
}
else if "`method'"=="bic" {
qui replace `imp`i''=uniform()<`imp`i'' `ifif'
}
}
}
else if "`method'"=="ims"|"`method'"=="bii" {
qui gen `imp`i''=`mean`i'' `ifif'
if "`method'"=="ims"&"`round'"=="" {
qui replace `imp`i''=round(`imp`i'') `ifif'
}
else if "`method'"=="bii" {
qui replace `imp`i''=uniform()<`imp`i'' `ifif'
}
}
else if "`method'"=="ics" {
local item=0
local corrmax=-2
forvalues j=1/`nbitems' {
if `i'!=`j' {
qui corr ``i'' ``j'' `ifif'
if r(rho)>`corrmax'&r(rho)!=. {
local item `j'
local corrmax=r(rho)
}
}
}
di "A missing value for the item ``i'' is replaced by the value of the item `item'"
qui gen `imp`i''=``i'' `ifif'
qui replace `imp`i''=``item'' if ``i''==.&`if'
}
else if "`method'"=="log"|"`method'"=="bil" {
local liste`i'
forvalues j=1/`nbitems' {
if `i'!=`j' {
local liste`i' `liste`i'' ``j''
}
}
qui sw ,pr(0.05): logit ``i'' `liste`i'' `ifif'
*local select :colnames e(b)
local select=substr("`:colnames e(b)'",1,length("`:colnames e(b)'")-5)
qui logit ``i'' `select' `ifif'
qui predict `imp`i'' `ifif'
if "`method'"=="log"&"`round'"=="" {
qui replace `imp`i''=round(`imp`i'') if `imp`i''!=.&`if'
}
else if "`method'"=="bil" {
qui replace `imp`i''=uniform()<`imp`i'' if `imp`i''!=.&`if'
}
}
else if "`method'"=="worst" {
qui gen `imp`i''=0 `ifif'
}
}
forvalues i=1/`nbitems' {
qui replace `imp`i''=``i'' if ``i''!=.&`if'
if "`prefix'"=="" {
local prefix imp
}
qui gen `prefix'``i''=`imp`i'' `ifif'
}
tempvar miss
qui egen `miss'=rowmiss(`varlist')
forvalues i=1/`nbitems' {
qui replace `prefix'``i''=. if ``i''==.&`miss'>`max'
}
end

@ -0,0 +1,193 @@
*! version 2.5 17 December 2021
*! Jean-Benoit Hardouin
************************************************************************************************************
* imputeitems: Imputation of missing data of binary items
*
* Version 1 : November 25, 2006 (Jean-Benoit Hardouin) /*Dichotomous data*/
* Version 1.1 : January 26, 2007 (Jean-Benoit Hardouin) /*Correction of a bug with the BIL method*/
* Version 1.2 : March 9, 2007 (Jean-Benoit Hardouin) /*IF*/
* Version 2 : June 30, 2008 (Jean-Benoit Hardouin) /*new names of the methods, MAX option*/
* Version 2.1 : December 3, 2008 (Jean-Benoit Hardouin) /*correction of a bug with the MAX option*/
* Version 2.2 : January 28, 2013 (Jean-Benoit Hardouin) /*noround option*/
* Version 2.3 : February 19, 2013 (Jean-Benoit Hardouin) /*polytomous items with PMS method*/
* Version 2.4 : May 3, 2013 (Jean-Benoit Hardouin) /*minor correction*/
*
* Jean-benoit Hardouin, phD, Assistant Professor
* Team of Biostatistics, Pharmacoepidemiology and Subjective Measures in Health Sciences (UPRES EA 4275 SPHERE)
* University of Nantes - Faculty of Pharmaceutical Sciences
* France
* jean-benoit.hardouin@anaqol.org
*
* News about this program :http://www.anaqol.org
*
* Copyright 2006-2008,2013 2021 Jean-Benoit Hardouin
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
************************************************************************************************************/
program define imputeitems
version 9
syntax varlist(min=2 numeric) [if/] [, PREFix(string) METHod(string) RANDom max(int -1) noround]
if "`if'"=="" {
local if=1
local ifif
}
else {
local ifif if `if'
}
*di "IF : `if' `ifif'"
local nbitems : word count `varlist'
tokenize `varlist'
if `max'==-1 {
local max=`nbitems'
}
if "`method'"=="" {
local method pms
}
forvalues i=1/`nbitems' {
qui su ``i'' `ifif'
if "`r(min)'"!="0"&("`r(max)'"!="1"&"`method'"!="pms") {
di in red "The {hi:imputeqol} command runs only with dichotomous items"
error
}
local p`i'=r(mean)
}
if "`method'"!="pms"&"`method'"!="ims"&"`method'"!="cim"&"`method'"!="ics"&"`method'"!="bip"&"`method'"!="bil"&"`method'"!="bic"&"`method'"!="bii"&"`method'"!="log"&"`method'"!="worst" {
di in red "The method option is unknow (choose among pms, ims, cim, ics, log and worst)"
error
}
forvalues i=1/`nbitems'{
qui su ``i'' `ifif'
local mean`i'=r(mean)
}
if "`method'"=="pms"&"`random'"!="" {
local method bip
}
else if "`method'"=="ims"&"`random'"!="" {
local method bii
}
else if "`method'"=="log"&"`random'"!="" {
local method bil
}
else if "`method'"=="cim"&"`random'"!="" {
local method bic
}
else if ("`method'"=="ics"|"`method'"=="worst")&"`random'"!="" {
di in green "The random process is not available with the {hi:ics} or {hi:worst} methods. The {hi:random} option is ignored."
local random
}
forvalues i=1/`nbitems' {
tempvar imp`i' tmp`i'
if "`method'"=="pms"|"`method'"=="bip"|"`method'"=="cim"|"`method'"=="bic" {
qui egen `imp`i''=rowtotal(`varlist') `ifif'
qui egen `tmp`i''=rownonmiss(`varlist') `ifif'
qui replace `imp`i''=`imp`i''/`tmp`i'' `ifif'
qui replace `imp`i''=``i'' if ``i''!=.&`if'
if "`method'"=="pms"&"`round'"=="" {
qui replace `imp`i''=round(`imp`i'') `ifif'
}
else if "`method'"=="bip" {
qui replace `imp`i''=uniform()<`imp`i'' `ifif'
}
else if "`method'"=="cim"|"`method'"=="bic"{
qui replace `imp`i''=`imp`i''*`tmp`i''*`mean`i'' `ifif'
qui replace `tmp`i''=0 `ifif'
forvalues j=1/`nbitems' {
qui replace `tmp`i''=`tmp`i''+`mean`j'' if ``j''!=.&`if'
}
qui replace `imp`i''=`imp`i''/`tmp`i'' `ifif'
qui replace `imp`i''=1 if `imp`i''>1&`imp`i''!=.&`if'
qui replace `imp`i''=0 if `imp`i''<0&`imp`i''!=.&`if'
if "`method'"=="cim"&"`round'"=="" {
qui replace `imp`i''=round(`imp`i'') `ifif'
}
else if "`method'"=="bic" {
qui replace `imp`i''=uniform()<`imp`i'' `ifif'
}
}
}
else if "`method'"=="ims"|"`method'"=="bii" {
qui gen `imp`i''=`mean`i'' `ifif'
if "`method'"=="ims"&"`round'"=="" {
qui replace `imp`i''=round(`imp`i'') `ifif'
}
else if "`method'"=="bii" {
qui replace `imp`i''=uniform()<`imp`i'' `ifif'
}
}
else if "`method'"=="ics" {
local item=0
local corrmax=-2
forvalues j=1/`nbitems' {
if `i'!=`j' {
qui corr ``i'' ``j'' `ifif'
if r(rho)>`corrmax'&r(rho)!=. {
local item `j'
local corrmax=r(rho)
}
}
}
di "A missing value for the item ``i'' is replaced by the value of the item `item'"
qui gen `imp`i''=``i'' `ifif'
qui replace `imp`i''=``item'' if ``i''==.&`if'
}
else if "`method'"=="log"|"`method'"=="bil" {
local liste`i'
forvalues j=1/`nbitems' {
if `i'!=`j' {
local liste`i' `liste`i'' ``j''
}
}
qui sw ,pr(0.05): logit ``i'' `liste`i'' `ifif'
*local select :colnames e(b)
local select=substr("`:colnames e(b)'",1,length("`:colnames e(b)'")-5)
qui logit ``i'' `select' `ifif'
qui predict `imp`i'' `ifif'
if "`method'"=="log"&"`round'"=="" {
qui replace `imp`i''=round(`imp`i'') if `imp`i''!=.&`if'
}
else if "`method'"=="bil" {
qui replace `imp`i''=uniform()<`imp`i'' if `imp`i''!=.&`if'
}
}
else if "`method'"=="worst" {
qui gen `imp`i''=0 `ifif'
}
}
forvalues i=1/`nbitems' {
qui replace `imp`i''=``i'' if ``i''!=.&`if'
if "`prefix'"=="" {
local prefix imp
}
qui gen `prefix'``i''=`imp`i'' `ifif'
}
tempvar miss
qui egen `miss'=rowmiss(`varlist')
forvalues i=1/`nbitems' {
qui replace `prefix'``i''=. if ``i''==.&`miss'>`max'
}
end

@ -0,0 +1,70 @@
{smcl}
{* 6May2013}{...}
{hline}
help for {hi:imputeitems}{right:Jean-Benoit Hardouin}
{hline}
{title:Imputation of missing item responses}
{p 8 14 2}{cmd:imputeitems} {it:varlist} [{it:if}] [,{cmdab:pref:ix}({it:string}) {cmdab:meth:od}({it:string}) {cmdab:rand:om} {cmdab:max}({it:#})]
{title:Description}
{p 4 4 2}{cmd:imputeitems} imputes missing item responses by different ways : Item Mean Substitution (IMS), Person Mean Substitution (PMS), Corrected Item Mean Substiutution (CIM), Interitem Correlation Substitution (ICS), logistic model (LOG) and Worst Case (WORST). A random process can be added to several methods.
{title:Options}
{p 4 8 2}{cmd:prefix} defines the prefix to use to name the imputted variables (this prefix is followed by the name of the initial variable). By default, this prefix is "imp".
{p 4 8 2}{cmd:method} defines the method to impute missing data :
{p 8 8 2}{it:pms} computes the proportion of positive response of each individual on non missing items, and impute a deterministic result (if p<.5 then 0, else 1),
{p 8 8 2}{it:ims} computes the proportion of positive response to each items, and impute a deterministic result (if p<.5 then 0, else 1),
{p 8 8 2}{it:cim} computes the proportion of positive response to each items, corrected by the ability of the individual and impute a deterministic result (if p<.5 then 0, else 1),
{p 8 8 2}{it:ics} searchs for each item the more correlated item and replaces a missing data by the data of this more correlated item (if the other response is missing too, there is no imputation),
{p 8 8 2}{it:log} explains the responses of each item by a logistic model where the independent variables are the responses to the others items. Only significant variables are rettained (5%). These methods impute a deterministic result (if p<.5 then 0, else 1) [{it:log}] to missing responses (if the response to an independant variable is missing, there is no imputation),
{p 8 8 2}{it:worst} replaces the missing data by a 0.
{p 4 8 2}{cmd:random} adds a random effect to the imputation process (available only with {it:pms}, {it:ims}, {it:cim} or {it:log}). In these cases, the imputed value is randomly drawed from a binomial distribution using the parameter p.
{p 4 8 2}{cmd:noround} avoids to round the imputed values to the nearest integer.
{p 4 8 2}{cmd:max} allows imputing missing values only for individuals with a maximal number of missing values defined with this option.
{p 4 8 2}By default, {it:pms} method is working.
{p 4 8 2}Old names of methods ({it:bip}, {bii}, {it:bic} and {it:bil} continues to run. They actually correspond to the add of the {cmd:random} option to the {it:pms}, {it:ims}, {it:cim} and {it:log} methods.
{title:Example}
{cmd:. imputeitems itemA*} /*PMS method, IMP prefix*/
{cmd:. imputeitems itemA*, prefix(cim) method(cim)}
{cmd:. imputeitems itemA*, method(log) random}
{title:Reference}
{p 4 8 2}{cmd:Huisman M.} (2000), Imputation of missing item responses: some simple techniques. {it: Quality & Quantity}, {cmd:34}, 331-351.
{title:Author}
{p 4 8 2}Jean-Benoit Hardouin, PhD, assistant professor{p_end}
{p 4 8 2}EA 4275 "Biostatistics, Clinical Research and Subjective Measures in Health Sciences"{p_end}
{p 4 8 2}University of Nantes - Faculty of Pharmaceutical Sciences{p_end}
{p 4 8 2}1, rue Gaston Veil - BP 53508{p_end}
{p 4 8 2}44035 Nantes Cedex 1 - FRANCE{p_end}
{p 4 8 2}Email:
{browse "mailto:jean-benoit.hardouin@univ-nantes.fr":jean-benoit.hardouin@univ-nantes.fr}{p_end}
{p 4 8 2}Websites {browse "http://www.anaqol.org":AnaQol}
and {browse "http://www.freeirt.org":FreeIRT}

@ -0,0 +1,190 @@
*! version 2.3 19 February 2013
*! Jean-Benoit Hardouin
************************************************************************************************************
* imputeitems: Imputation of missing data of binary items
*
* Version 1 : November 25, 2006 (Jean-Benoit Hardouin) /*Dichotomous data*/
* Version 1.1 : January 26, 2007 (Jean-Benoit Hardouin) /*Correction of a bug with the BIL method*/
* Version 1.2 : March 9, 2007 (Jean-Benoit Hardouin) /*IF*/
* Version 2 : June 30, 2008 (Jean-Benoit Hardouin) /*new names of the methods, MAX option*/
* Version 2.1 : December 3, 2008 (Jean-Benoit Hardouin) /*correction of a bug with the MAX option*/
* Version 2.2 : January 28, 2013 (Jean-Benoit Hardouin) /*noround option*/
* Version 2.3 : February 19, 2013 (Jean-Benoit Hardouin) /*polytomous items with PMS method*/
*
* Jean-benoit Hardouin, Faculty of Pharmaceutical Sciences - University of Nantes - France
* jean-benoit.hardouin@univ-nantes.fr
*
* News about this program : http://www.anaqol.org
* FreeIRT Project : http://www.freeirt.org
*
* Copyright 2006-2008,2013 Jean-Benoit Hardouin
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
************************************************************************************************************/
program define imputeitems
version 9
syntax varlist(min=2 numeric) [if/] [, PREFix(string) METHod(string) RANDom max(int 0) noround]
if "`if'"=="" {
local if=1
local ifif
}
else {
local ifif if `if'
}
*di "IF : `if' `ifif'"
local nbitems : word count `varlist'
tokenize `varlist'
if `max'==0 {
local max=`nbitems'
}
if "`method'"=="" {
local method pms
}
forvalues i=1/`nbitems' {
qui su ``i'' `ifif'
if `r(min)'!=0&(`r(max)'!=1&"`method"!="pms") {
di in red "The {hi:imputeqol} command runs only with dichotomous items"
error
}
local p`i'=r(mean)
}
if "`method'"!="pms"&"`method'"!="ims"&"`method'"!="cim"&"`method'"!="ics"&"`method'"!="bip"&"`method'"!="bil"&"`method'"!="bic"&"`method'"!="bii"&"`method'"!="log"&"`method'"!="worst" {
di in red "The method option is unknow (choose among pms, ims, cim, ics, log and worst)"
error
}
forvalues i=1/`nbitems'{
qui su ``i'' `ifif'
local mean`i'=r(mean)
}
if "`method'"=="pms"&"`random'"!="" {
local method bip
}
else if "`method'"=="ims"&"`random'"!="" {
local method bii
}
else if "`method'"=="log"&"`random'"!="" {
local method bil
}
else if "`method'"=="cim"&"`random'"!="" {
local method bic
}
else if ("`method'"=="ics"|"`method'"=="worst")&"`random'"!="" {
di in green "The random process is not available with the {hi:ics} or {hi:worst} methods. The {hi:random} option is ignored."
local random
}
forvalues i=1/`nbitems' {
tempvar imp`i' tmp`i'
if "`method'"=="pms"|"`method'"=="bip"|"`method'"=="cim"|"`method'"=="bic" {
qui egen `imp`i''=rowtotal(`varlist') `ifif'
qui egen `tmp`i''=rownonmiss(`varlist') `ifif'
qui replace `imp`i''=`imp`i''/`tmp`i'' `ifif'
qui replace `imp`i''=``i'' if ``i''!=.&`if'
if "`method'"=="pms"&"`round'"=="" {
qui replace `imp`i''=round(`imp`i'') `ifif'
}
else if "`method'"=="bip" {
qui replace `imp`i''=uniform()<`imp`i'' `ifif'
}
else if "`method'"=="cim"|"`method'"=="bic"{
qui replace `imp`i''=`imp`i''*`tmp`i''*`mean`i'' `ifif'
qui replace `tmp`i''=0 `ifif'
forvalues j=1/`nbitems' {
qui replace `tmp`i''=`tmp`i''+`mean`j'' if ``j''!=.&`if'
}
qui replace `imp`i''=`imp`i''/`tmp`i'' `ifif'
qui replace `imp`i''=1 if `imp`i''>1&`imp`i''!=.&`if'
qui replace `imp`i''=0 if `imp`i''<0&`imp`i''!=.&`if'
if "`method'"=="cim"&"`round'"=="" {
qui replace `imp`i''=round(`imp`i'') `ifif'
}
else if "`method'"=="bic" {
qui replace `imp`i''=uniform()<`imp`i'' `ifif'
}
}
}
else if "`method'"=="ims"|"`method'"=="bii" {
qui gen `imp`i''=`mean`i'' `ifif'
if "`method'"=="ims"&"`round'"=="" {
qui replace `imp`i''=round(`imp`i'') `ifif'
}
else if "`method'"=="bii" {
qui replace `imp`i''=uniform()<`imp`i'' `ifif'
}
}
else if "`method'"=="ics" {
local item=0
local corrmax=-2
forvalues j=1/`nbitems' {
if `i'!=`j' {
qui corr ``i'' ``j'' `ifif'
if r(rho)>`corrmax'&r(rho)!=. {
local item `j'
local corrmax=r(rho)
}
}
}
di "A missing value for the item ``i'' is replaced by the value of the item `item'"
qui gen `imp`i''=``i'' `ifif'
qui replace `imp`i''=``item'' if ``i''==.&`if'
}
else if "`method'"=="log"|"`method'"=="bil" {
local liste`i'
forvalues j=1/`nbitems' {
if `i'!=`j' {
local liste`i' `liste`i'' ``j''
}
}
qui sw ,pr(0.05): logit ``i'' `liste`i'' `ifif'
*local select :colnames e(b)
local select=substr("`:colnames e(b)'",1,length("`:colnames e(b)'")-5)
qui logit ``i'' `select' `ifif'
qui predict `imp`i'' `ifif'
if "`method'"=="log"&"`round'"=="" {
qui replace `imp`i''=round(`imp`i'') if `imp`i''!=.&`if'
}
else if "`method'"=="bil" {
qui replace `imp`i''=uniform()<`imp`i'' if `imp`i''!=.&`if'
}
}
else if "`method'"=="worst" {
qui gen `imp`i''=0 `ifif'
}
}
forvalues i=1/`nbitems' {
qui replace `imp`i''=``i'' if ``i''!=.&`if'
if "`prefix'"=="" {
local prefix imp
}
qui gen `prefix'``i''=`imp`i'' `ifif'
}
tempvar miss
qui egen `miss'=rowmiss(`varlist')
forvalues i=1/`nbitems' {
qui replace `prefix'``i''=. if ``i''==.&`miss'>`max'
}
end

@ -0,0 +1,201 @@
*! version 2.3 19 February 2013
*! Jean-Benoit Hardouin
************************************************************************************************************
* imputeitems: Imputation of missing data of binary items
*
* Version 1 : November 25, 2006 (Jean-Benoit Hardouin) /*Dichotomous data*/
* Version 1.1 : January 26, 2007 (Jean-Benoit Hardouin) /*Correction of a bug with the BIL method*/
* Version 1.2 : March 9, 2007 (Jean-Benoit Hardouin) /*IF*/
* Version 2 : June 30, 2008 (Jean-Benoit Hardouin) /*new names of the methods, MAX option*/
* Version 2.1 : December 3, 2008 (Jean-Benoit Hardouin) /*correction of a bug with the MAX option*/
* Version 2.2 : January 28, 2013 (Jean-Benoit Hardouin) /*noround option*/
* Version 2.3 : February 19, 2013 (Jean-Benoit Hardouin) /*polytomous items with PMS method*/
*
* Jean-benoit Hardouin, Faculty of Pharmaceutical Sciences - University of Nantes - France
* jean-benoit.hardouin@univ-nantes.fr
*
* News about this program : http://www.anaqol.org
* FreeIRT Project : http://www.freeirt.org
*
* Copyright 2006-2008,2013 Jean-Benoit Hardouin
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
************************************************************************************************************/
program define imputeitems2
version 9
syntax varlist(min=2 numeric) [if/] [, PREFix(string) METHod(string) RANDom max(int 0) noround replace]
if "`noround'" != "" di "noround"
if "`round'" != "" di "round"
if "`if'"=="" {
local if=1
local ifif
}
else {
local ifif if `if'
}
*di "IF : `if' `ifif'"
local nbitems : word count `varlist'
tokenize `varlist'
if `max'==0 {
local max=`nbitems'
}
if "`method'"=="" {
local method pms
}
forvalues i=1/`nbitems' {
qui su ``i'' `ifif'
if `r(min)'!=0&(`r(max)'!=1&"`method'"!="pms") {
di in red "The {hi:imputeqol} command runs only with dichotomous items"
error
}
local p`i'=r(mean)
}
if "`method'"!="pms"&"`method'"!="ims"&"`method'"!="cim"&"`method'"!="ics"&"`method'"!="bip"&"`method'"!="bil"&"`method'"!="bic"&"`method'"!="bii"&"`method'"!="log"&"`method'"!="worst" {
di in red "The method option is unknow (choose among pms, ims, cim, ics, log and worst)"
error
}
forvalues i=1/`nbitems'{
qui su ``i'' `ifif'
local mean`i'=r(mean)
}
if "`method'"=="pms"&"`random'"!="" {
local method bip
}
else if "`method'"=="ims"&"`random'"!="" {
local method bii
}
else if "`method'"=="log"&"`random'"!="" {
local method bil
}
else if "`method'"=="cim"&"`random'"!="" {
local method bic
}
else if ("`method'"=="ics"|"`method'"=="worst")&"`random'"!="" {
di in green "The random process is not available with the {hi:ics} or {hi:worst} methods. The {hi:random} option is ignored."
local random
}
forvalues i=1/`nbitems' {
tempvar imp`i' tmp`i'
if "`method'"=="pms"|"`method'"=="bip"|"`method'"=="cim"|"`method'"=="bic" {
qui egen `imp`i''=rowtotal(`varlist') `ifif'
qui egen `tmp`i''=rownonmiss(`varlist') `ifif'
qui replace `imp`i''=`imp`i''/`tmp`i'' `ifif'
qui replace `imp`i''=``i'' if ``i''!=.&`if'
if "`method'"=="pms"&"`round'"=="" {
qui replace `imp`i''=round(`imp`i'') `ifif'
}
else if "`method'"=="bip" {
qui replace `imp`i''=uniform()<`imp`i'' `ifif'
}
else if "`method'"=="cim"|"`method'"=="bic"{
qui replace `imp`i''=`imp`i''*`tmp`i''*`mean`i'' `ifif'
qui replace `tmp`i''=0 `ifif'
forvalues j=1/`nbitems' {
qui replace `tmp`i''=`tmp`i''+`mean`j'' if ``j''!=.&`if'
}
qui replace `imp`i''=`imp`i''/`tmp`i'' `ifif'
qui replace `imp`i''=1 if `imp`i''>1&`imp`i''!=.&`if'
qui replace `imp`i''=0 if `imp`i''<0&`imp`i''!=.&`if'
if "`method'"=="cim"&"`round'"=="" {
qui replace `imp`i''=round(`imp`i'') `ifif'
}
else if "`method'"=="bic" {
qui replace `imp`i''=uniform()<`imp`i'' `ifif'
}
}
}
else if "`method'"=="ims"|"`method'"=="bii" {
qui gen `imp`i''=`mean`i'' `ifif'
if "`method'"=="ims"&"`round'"=="" {
qui replace `imp`i''=round(`imp`i'') `ifif'
}
else if "`method'"=="bii" {
qui replace `imp`i''=uniform()<`imp`i'' `ifif'
}
}
else if "`method'"=="ics" {
local item=0
local corrmax=-2
forvalues j=1/`nbitems' {
if `i'!=`j' {
qui corr ``i'' ``j'' `ifif'
if r(rho)>`corrmax'&r(rho)!=. {
local item `j'
local corrmax=r(rho)
}
}
}
di "A missing value for the item ``i'' is replaced by the value of the item `item'"
qui gen `imp`i''=``i'' `ifif'
qui replace `imp`i''=``item'' if ``i''==.&`if'
}
else if "`method'"=="log"|"`method'"=="bil" {
local liste`i'
forvalues j=1/`nbitems' {
if `i'!=`j' {
local liste`i' `liste`i'' ``j''
}
}
qui sw ,pr(0.05): logit ``i'' `liste`i'' `ifif'
*local select :colnames e(b)
local select=substr("`:colnames e(b)'",1,length("`:colnames e(b)'")-5)
qui logit ``i'' `select' `ifif'
qui predict `imp`i'' `ifif'
if "`method'"=="log"&"`round'"=="" {
qui replace `imp`i''=round(`imp`i'') if `imp`i''!=.&`if'
}
else if "`method'"=="bil" {
qui replace `imp`i''=uniform()<`imp`i'' if `imp`i''!=.&`if'
}
}
else if "`method'"=="worst" {
qui gen `imp`i''=0 `ifif'
}
}
forvalues i=1/`nbitems' {
qui replace `imp`i''=``i'' if ``i''!=.&`if'
if "`prefix'"=="" {
local prefix imp
}
tempvar `prefix'``i''
*di "`prefix'``i''"
qui gen ``prefix'``i'''=`imp`i'' `ifif'
replace ``i'' = ``prefix'``i'''
}
tempvar miss
qui egen `miss'=rowmiss(`varlist')
forvalues i=1/`nbitems' {
qui replace ``prefix'``i'''=. if ``i''==.&`miss'>`max'
}
end

@ -0,0 +1,193 @@
*! version 2.4 3 May 2013
*! Jean-Benoit Hardouin
************************************************************************************************************
* imputeitems: Imputation of missing data of binary items
*
* Version 1 : November 25, 2006 (Jean-Benoit Hardouin) /*Dichotomous data*/
* Version 1.1 : January 26, 2007 (Jean-Benoit Hardouin) /*Correction of a bug with the BIL method*/
* Version 1.2 : March 9, 2007 (Jean-Benoit Hardouin) /*IF*/
* Version 2 : June 30, 2008 (Jean-Benoit Hardouin) /*new names of the methods, MAX option*/
* Version 2.1 : December 3, 2008 (Jean-Benoit Hardouin) /*correction of a bug with the MAX option*/
* Version 2.2 : January 28, 2013 (Jean-Benoit Hardouin) /*noround option*/
* Version 2.3 : February 19, 2013 (Jean-Benoit Hardouin) /*polytomous items with PMS method*/
* Version 2.4 : May 3, 2013 (Jean-Benoit Hardouin) /*minor correction*/
*
* Jean-benoit Hardouin, phD, Assistant Professor
* Team of Biostatistics, Pharmacoepidemiology and Subjective Measures in Health Sciences (UPRES EA 4275 SPHERE)
* University of Nantes - Faculty of Pharmaceutical Sciences
* France
* jean-benoit.hardouin@anaqol.org
*
* News about this program :http://www.anaqol.org
*
* Copyright 2006-2008,2013 Jean-Benoit Hardouin
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
************************************************************************************************************/
program define imputeitems
version 9
syntax varlist(min=2 numeric) [if/] [, PREFix(string) METHod(string) RANDom max(int -1) noround]
if "`if'"=="" {
local if=1
local ifif
}
else {
local ifif if `if'
}
*di "IF : `if' `ifif'"
local nbitems : word count `varlist'
tokenize `varlist'
if `max'==-1 {
local max=`nbitems'
}
if "`method'"=="" {
local method pms
}
forvalues i=1/`nbitems' {
qui su ``i'' `ifif'
if `r(min)'!=0&(`r(max)'!=1&"`method'"!="pms") {
di in red "The {hi:imputeqol} command runs only with dichotomous items"
error
}
local p`i'=r(mean)
}
if "`method'"!="pms"&"`method'"!="ims"&"`method'"!="cim"&"`method'"!="ics"&"`method'"!="bip"&"`method'"!="bil"&"`method'"!="bic"&"`method'"!="bii"&"`method'"!="log"&"`method'"!="worst" {
di in red "The method option is unknow (choose among pms, ims, cim, ics, log and worst)"
error
}
forvalues i=1/`nbitems'{
qui su ``i'' `ifif'
local mean`i'=r(mean)
}
if "`method'"=="pms"&"`random'"!="" {
local method bip
}
else if "`method'"=="ims"&"`random'"!="" {
local method bii
}
else if "`method'"=="log"&"`random'"!="" {
local method bil
}
else if "`method'"=="cim"&"`random'"!="" {
local method bic
}
else if ("`method'"=="ics"|"`method'"=="worst")&"`random'"!="" {
di in green "The random process is not available with the {hi:ics} or {hi:worst} methods. The {hi:random} option is ignored."
local random
}
forvalues i=1/`nbitems' {
tempvar imp`i' tmp`i'
if "`method'"=="pms"|"`method'"=="bip"|"`method'"=="cim"|"`method'"=="bic" {
qui egen `imp`i''=rowtotal(`varlist') `ifif'
qui egen `tmp`i''=rownonmiss(`varlist') `ifif'
qui replace `imp`i''=`imp`i''/`tmp`i'' `ifif'
qui replace `imp`i''=``i'' if ``i''!=.&`if'
if "`method'"=="pms"&"`round'"=="" {
qui replace `imp`i''=round(`imp`i'') `ifif'
}
else if "`method'"=="bip" {
qui replace `imp`i''=uniform()<`imp`i'' `ifif'
}
else if "`method'"=="cim"|"`method'"=="bic"{
qui replace `imp`i''=`imp`i''*`tmp`i''*`mean`i'' `ifif'
qui replace `tmp`i''=0 `ifif'
forvalues j=1/`nbitems' {
qui replace `tmp`i''=`tmp`i''+`mean`j'' if ``j''!=.&`if'
}
qui replace `imp`i''=`imp`i''/`tmp`i'' `ifif'
qui replace `imp`i''=1 if `imp`i''>1&`imp`i''!=.&`if'
qui replace `imp`i''=0 if `imp`i''<0&`imp`i''!=.&`if'
if "`method'"=="cim"&"`round'"=="" {
qui replace `imp`i''=round(`imp`i'') `ifif'
}
else if "`method'"=="bic" {
qui replace `imp`i''=uniform()<`imp`i'' `ifif'
}
}
}
else if "`method'"=="ims"|"`method'"=="bii" {
qui gen `imp`i''=`mean`i'' `ifif'
if "`method'"=="ims"&"`round'"=="" {
qui replace `imp`i''=round(`imp`i'') `ifif'
}
else if "`method'"=="bii" {
qui replace `imp`i''=uniform()<`imp`i'' `ifif'
}
}
else if "`method'"=="ics" {
local item=0
local corrmax=-2
forvalues j=1/`nbitems' {
if `i'!=`j' {
qui corr ``i'' ``j'' `ifif'
if r(rho)>`corrmax'&r(rho)!=. {
local item `j'
local corrmax=r(rho)
}
}
}
di "A missing value for the item ``i'' is replaced by the value of the item `item'"
qui gen `imp`i''=``i'' `ifif'
qui replace `imp`i''=``item'' if ``i''==.&`if'
}
else if "`method'"=="log"|"`method'"=="bil" {
local liste`i'
forvalues j=1/`nbitems' {
if `i'!=`j' {
local liste`i' `liste`i'' ``j''
}
}
qui sw ,pr(0.05): logit ``i'' `liste`i'' `ifif'
*local select :colnames e(b)
local select=substr("`:colnames e(b)'",1,length("`:colnames e(b)'")-5)
qui logit ``i'' `select' `ifif'
qui predict `imp`i'' `ifif'
if "`method'"=="log"&"`round'"=="" {
qui replace `imp`i''=round(`imp`i'') if `imp`i''!=.&`if'
}
else if "`method'"=="bil" {
qui replace `imp`i''=uniform()<`imp`i'' if `imp`i''!=.&`if'
}
}
else if "`method'"=="worst" {
qui gen `imp`i''=0 `ifif'
}
}
forvalues i=1/`nbitems' {
qui replace `imp`i''=``i'' if ``i''!=.&`if'
if "`prefix'"=="" {
local prefix imp
}
qui gen `prefix'``i''=`imp`i'' `ifif'
}
tempvar miss
qui egen `miss'=rowmiss(`varlist')
forvalues i=1/`nbitems' {
qui replace `prefix'``i''=. if ``i''==.&`miss'>`max'
}
end

@ -0,0 +1,135 @@
*! version 2.1 24 November 2008
*! Jean-Benoit Hardouin
************************************************************************************************************
* impmok: Imputation of missing data by a Mokken model
*
* Version 1 : November 25, 2006 (Jean-Benoit Hardouin) /*Dichotomous data*/
* Version 2 : June 30, 2008 (Jean-Benoit Hardouin) /*MAX option*/
* Version 2.1 : November 24, 2008 (Jean-Benoit Hardouin) /*correction of a bug with the MAX option*/
*
* Jean-benoit Hardouin, Faculty of Pharmaceutical Sciences - University of Nantes - France
* jean-benoit.hardouin@univ-nantes.fr
*
* News about this program : http://anaqol.free.fr
* FreeIRT Project : http://freeirt.free.fr
*
* Copyright 2006, 2008 Jean-Benoit Hardouin
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
************************************************************************************************************/
program define imputemok , rclass
version 9
syntax varlist(min=2 numeric) [, PREFix(string) max(int 0)]
local nbitems : word count `varlist'
tokenize `varlist'
if `max'==0 {
local max=`nbitems'
}
tempname p
matrix `p'=J(3,`nbitems',0)
forvalues i=1/`nbitems' {
qui su ``i''
if `r(min)'!=0&`r(max)'!=1 {
di in red "The -impmok- command runs only with dichotomous items"
error
}
local p`i'=r(mean)
matrix `p'[1,`i']=`i'
}
forvalues place=1/`nbitems' {
local pmax=0
local itemax=0
forvalues i=1/`nbitems' {
local t=`p'[1,`i']
if `p`i''>`pmax'&`t'!=0 {
local pmax=`p`i''
local itemax=`i'
}
}
matrix `p'[1,`itemax']=0
matrix `p'[2,`place']=`itemax'
matrix `p'[3,`place']=`pmax'
}
local liste
forvalues i=1/`nbitems' {
local t=`p'[2,`i']
local liste "`liste' ``t''"
tempname imp`i'
qui gen `imp`i''`i'=``i''
}
forvalues j=`=`nbitems'-1'(-1)1 {
local i=`p'[2,`j']
local suiv=`p'[2,`=`j'+1']
qui replace `imp`i''`i'=1 if `imp`suiv''`suiv'==1&`imp`i''`i'==.
}
forvalues j=2/`nbitems'{
local i=`p'[2,`j']
local prec=`p'[2,`=`j'-1']
qui replace `imp`i''`i'=0 if `imp`prec''`prec'==0&`imp`i''`i'==.
}
forvalues j=1/`nbitems' {
local i=`p'[2,`j']
local suiv=`p'[2,`=`j'+1']
local prec=`p'[2,`=`j'-1']
tempname prec0`i' prec1`i'
qui gen `prec0`i''=0
qui gen `prec1`i''=0
if `j'!=1 {
qui replace `prec0`i''=`prec0`prec''+1 if `imp`prec''`prec'==0
qui replace `prec0`i''=`prec0`prec'' if `imp`prec''`prec'!=0
qui replace `prec1`i''=`prec1`prec''+1 if `imp`prec''`prec'==1
qui replace `prec1`i''=`prec1`prec'' if `imp`prec''`prec'!=1
qui replace `imp`i''`i'=0 if `prec0`i''!=0&`prec0`i''>=`prec1`i''&`imp`i''`i'==.
}
}
forvalues j=`nbitems'(-1)1 {
local i=`p'[2,`j']
local suiv=`p'[2,`=`j'+1']
local prec=`p'[2,`=`j'-1']
tempname suiv0`i' suiv1`i'
qui gen `suiv0`i''=0
qui gen `suiv1`i''=0
if `j'!=`nbitems' {
qui replace `suiv0`i''=`suiv0`suiv''+1 if `imp`suiv''`suiv'==0
qui replace `suiv0`i''=`suiv0`suiv'' if `imp`suiv''`suiv'!=0
qui replace `suiv1`i''=`suiv1`suiv''+1 if `imp`suiv''`suiv'==1
qui replace `suiv1`i''=`suiv1`suiv'' if `imp`suiv''`suiv'!=1
qui replace `imp`i''`i'=1 if `suiv0`i''<=`suiv1`i''&`suiv1`i''!=0&`imp`i''`i'==.
}
}
forvalues j=1/`nbitems' {
local i=`p'[2,`j']
qui replace `imp`i''`i'=uniform()<=`p`i'' if `imp`i''`i'==.
if "`prefix'"=="" {
local prefix imp
}
qui gen `prefix'``i''=`imp`i''`i'
}
tempvar miss
qui egen `miss'=rowmiss(`varlist')
forvalues i=1/`nbitems' {
qui replace `prefix'``i''=. if ``i''==.&`miss'>`max'
}
end

@ -0,0 +1,58 @@
{smcl}
{* 30June2008}{...}
{hline}
help for {hi:imputemok}{right:Jean-Benoit Hardouin}
{hline}
{title:Imputation of missing item responses with the Mokken scaling}
{p 8 14 2}{cmd:imputemok} {it:varlist} [,{cmdab:pref:ix}({it:string}) {cmdab:max}({it:#})]
{title:Description}
{p 4 4 2}{cmd:imputemok} imputes missing item responses with the Mokken scaling as defined in Huisman and Molenaar (2001). This module runs only with dichotomous items.
{p 4 4 2}The following algorithm is used:
{p 8 4 2}First, the items are ordered according to the percentage of positive responses (in a decreasing order).
{p 8 4 2}For each individual, if a positive response follows a missing response, it is imputed to 1.
{p 8 4 2}Else if a negative response precedes a missing response, it is imputed to 0.
{p 8 4 2}Else we count the number of positive and negative responses preceding a missing response and if the number of negative response is larger or equal than the number of positive responses, the missing value is imputed to 0.
{p 8 4 2}Else we count the number of positive and negative responses following a missing response and if the number of positive response is larger or equal than the number of negative responses, the missing value is imputed to 1.
{p 8 4 2}Else, the missing value is imputed by drawing a random number based on the observed proportion of positive responses to the item.
{title:Options}
{p 4 8 2}{cmd:prefix} defines the prefix to use to name the imputted variables (this prefix is followed by the name of the initial variable). By default, this prefix is "imp".
{p 4 8 2}{cmd:max} allows imputing missing values only for individuals with a maximal number of missing values defined with this option.
{title:Example}
{cmd:. imputemok itemA*}
{cmd:. imputemok itemA*,prefix(new)}
{title:Reference}
{p 4 8 2}{cmd:Huisman M. and Molenaar I. W.}, {it:Imputation of missing scale data with item response models}. In A. Boomsma, M.A.J. van Duijn, & T.A.B. Snijders (Eds.), {it: Essays on item response theory} (pp. 221-244).
New York: Springer-Verlag, 2001.
{title:Author}
{p 4 8 2}Jean-Benoit Hardouin, PhD, assistant professor{p_end}
{p 4 8 2}EA 4275 "Biostatistics, Clinical Research and Subjective Measures in Health Sciences"{p_end}
{p 4 8 2}University of Nantes - Faculty of Pharmaceutical Sciences{p_end}
{p 4 8 2}1, rue Gaston Veil - BP 53508{p_end}
{p 4 8 2}44035 Nantes Cedex 1 - FRANCE{p_end}
{p 4 8 2}Email:
{browse "mailto:jean-benoit.hardouin@univ-nantes.fr":jean-benoit.hardouin@univ-nantes.fr}{p_end}
{p 4 8 2}Websites {browse "http://www.anaqol.org":AnaQol}
and {browse "http://www.freeirt.org":FreeIRT}

@ -0,0 +1,146 @@
*! version 2 30 June 2008
*! Jean-Benoit Hardouin
************************************************************************************************************
* imputerasch: Imputation of missing data by a Rasch model
*
* Version 1 : November 25, 2006 (Jean-Benoit Hardouin) /*Dichotomous data*/
* Version 1.1 : January 26, 2007 (Jean-Benoit Hardouin) /*Correction of a bug with the Binomial option*/
* Version 2 : June 30, 2008 (Jean-Benoit Hardouin) /*norandom option, max option*/
*
* Jean-benoit Hardouin, Faculty of Pharmaceutical Sciences - University of Nantes - France
* jean-benoit.hardouin@univ-nantes.fr
*
* News about this program : http://www.anaqol.org
* FreeIRT Project : http://www.freeirt.org
*
* Copyright 2006-2008 Jean-Benoit Hardouin
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
************************************************************************************************************/
program define imputerasch
version 9
syntax varlist(min=2 numeric) [, PREFix(string) noBINomial noRANDom SAVEProba(string) NBITeration(integer 1) DETails MAX(int 0) ]
preserve
qui ds
local order=r(varlist)
local nbitems : word count `varlist'
tokenize `varlist'
if `max'==0 {
local max=`nbitems'
}
if "`random'"!="" {
local binomial nobinomial
}
if "`binomial'"==""&`nbiteration'!=1 {
local binomial nobinomial
di in green "You must use the {hi:norandom} option when you use iterative process. This option is assumed."
}
if `nbiteration'!=1 {
di in ye "Iteration : 1"
}
tempvar lt0 lt1 score id item lt name
qui gen `id'=_n
qui egen `score'=rowtotal(`varlist')
forvalues i=1/`nbitems' {
qui rename ``i'' `name'`i'
}
qui reshape long `name' ,i(`id') j(`item')
forvalues i=1/`nbitems' {
qui gen ``i''=`item'==`i'
}
qui gllamm `name' `varlist' ,family(bin) nocons link(logit) i(`id') it(1)
qui gllapred `lt' ,u
qui bysort `id':egen `lt'=min(`lt'm1)
drop `lt's1 `lt'm1
tempname diff
matrix `diff'=e(b)
drop `varlist'
qui reshape wide `name' ,i(`id') j(`item')
forvalues i=1/`nbitems' {
qui rename `name'`i' ``i''
tempvar imp`i'
local diff`i'=`diff'[1,`i']
qui gen `imp`i''=exp(`lt'-`diff`i'')/(1+exp(`lt'-`diff`i''))
if "`saveproba'"!="" {
qui gen `saveproba'``i''=`imp`i''
}
if "`binomial'"!="" {
qui replace `imp`i''=round(`imp`i'')
}
else {
qui replace `imp`i''=uniform()<`imp`i''
}
}
restore,not
forvalues i=1/`nbitems' {
qui replace `imp`i''=``i'' if ``i''!=.
if "`prefix'"=="" {
local prefix imp
}
qui gen `prefix'``i''=`imp`i''
}
if "`details'"!="" {
forvalues i=1/`nbitems' {
qui count if ``i''==.
local nbmiss`i'=r(N)
di in ye "``i'':" in gr " Number of missing data: " in ye "`nbmiss`i''"
}
}
if `nbiteration'>1 {
local flag=0
local it=2
tempname p new
while `flag'!=1&`it'<=`nbiteration' {
di in ye "Iteration : `it'"
imputerasch `prefix'`1'-`prefix'``nbitems'', savep(`p') prefix(`new') nobin
local flag=1
forvalues i=1/`nbitems' {
qui replace `new'`prefix'``i''=round(`p'`prefix'``i'') if ``i''==.
qui corr `prefix'``i'' `new'`prefix'``i''
local rho=round(r(rho)*1000000)
qui count if `prefix'``i''==`new'`prefix'``i''&``i''==.
qui count if ``i''==.
local nbmiss`i'=r(N)
local coher=r(N)
local txcoher=`coher'/`nbmiss`i''*100
di in ye "``i'':" in gr " Coherence rate between iterations `it' and `=`it'-1': " in ye %6.2f `txcoher' in gr "%"
if int(`txcoher')!=100 {
local flag=0
}
qui replace `prefix'``i''=`new'`prefix'``i''
}
drop `p'`prefix'`1'-`p'`prefix'``nbitems'' `new'`prefix'`1'-`new'`prefix'``nbitems''
local ++it
}
}
tempvar miss
qui egen `miss'=rowmiss(`varlist')
forvalues i=1/`nbitems' {
qui replace `prefix'``i''=. if ``i''==.&`miss'>`max'
}
end

@ -0,0 +1,63 @@
{smcl}
{* 30june2008}{...}
{hline}
help for {hi:imputerasch}{right:Jean-Benoit Hardouin}
{hline}
{title:Imputation of missing binary variables by a Rasch model}
{p 8 14 2}{cmd:imputerasch} {it:varlist} [{cmd:,} {cmdab:pref:ix}({it:string}) {cmdab:noran:dom} {cmdab:savep:roba}({it:string}) {cmdab:nbit:eration}({it:#}) {cmdab:det:ails} {cmdab:max}({it:#})]
{p 8 14 2}{it:varlist} is a list of two or more existing dichotomous variables.
{title:Description}
{p 4 8 2}{cmd:imputerasch} imputes missing binary data by a Rasch model.
The parameters of the Rasch model are estimated on complete data, then the missing data are imputed from the estimated probability
for each individual to response to each item.
By default, the imputed value is a result of a random draw within a Bernouilli random variable with this probability used like
parameter, but it is possible to affect more deterministically the value of the missing data (0 if p<0.5 and 1 if p>=.5) with
the {cmd:norandom} option.
An iterative procedure can be run in a second time by estimating parameters of the Rasch model on existing and imputing data,
and by eventually correcting missing data at each step (see the {cmd:nbiteration} option).
This procedure is stopped as soon the allowed maximal number of iterations is attained, or as soon the imputed values are stable.
{title:Options}
{p 4 8 2}{cmd:prefix}. The former variables (with missing data) are keeped. New variables are created by imputing new values to missing data.
The name of these new variables are the names of the former variables preeceded by the prefix defined in this option. By default, this prefix is "imp".
{p 4 8 2}{cmd:norandom} avoids to randomly draw the value of imputation (by default). A deterministic process is used : if the expected probability is <0.5, imputed value is 0, else imputed value is 1
(the old name of this option, {cmd:nobinomial}, continues to run).
{p 4 8 2}{cmd:saveproba} allows saving the expected probability in variables whose the names begin by the string defined in this option.
{p 4 8 2}{cmd:nbiteration} realizes an iterative procedure which is stopped as soon as the maximal number of iterations is attained, or as soon as the imputed data are stable.
{p 4 8 2}{cmd:details} gives details on the imputation.
{p 4 8 2}{cmd:max} allows imputing missing values only for individuals with a maximal number of missing values defined with this option.
{title:Example}
{inp:. imputerasch item*}
{inp:. imputerasch item*, norandom saveproba(p) prefix(dataimputed) max(4)}
{inp:. imputerasch item1-item5, nbiteration(5) details}
{title:Author}
{p 4 8 2}Jean-Benoit Hardouin, PhD, assistant professor{p_end}
{p 4 8 2}EA 4275 "Biostatistics, Clinical Research and Subjective Measures in Health Sciences"{p_end}
{p 4 8 2}University of Nantes - Faculty of Pharmaceutical Sciences{p_end}
{p 4 8 2}1, rue Gaston Veil - BP 53508{p_end}
{p 4 8 2}44035 Nantes Cedex 1 - FRANCE{p_end}
{p 4 8 2}Email:
{browse "mailto:jean-benoit.hardouin@univ-nantes.fr":jean-benoit.hardouin@univ-nantes.fr}{p_end}
{p 4 8 2}Websites {browse "http://www.anaqol.org":AnaQol}
and {browse "http://www.freeirt.org":FreeIRT}

@ -0,0 +1,414 @@
program define irtpoly,eclass
version 11.0
syntax varlist(min=3 numeric) [if] [in] [,test Graph group(string) latent(string) REPlace Fixed(string) FIXEDVar(real -1) rsm rasch Last SASOUTput long Covariables(varlist) Covariablemean(varname) noCentered Project(string)]
preserve
capture mkdir "c:/data/irtpoly//`project'/"
if !_rc {
di in green "The directory c:/data/irtpoly//`project' has been created"
}
local dir="c:/data/irtpoly//`project'/"
local savegroup=1
if "`group'"=="" {
tempname group
local savegroup=0
}
local savelatent=1
if "`latent'"=="" {
tempname latent
local savelatent=0
}
tempvar order
gen `order'=_n
tempfile pcmsasfile
qui save `pcmsasfile'
qui count `if' `in'
local nbind=r(N)
tokenize `varlist'
local nbitems:word count `varlist'
local max=0
forvalues i=1/`nbitems' {
qui su ``i''
local max`i'=r(max)
if `max`i''>`max' {
local max=`max`i''
}
}
tempname freq
contract `varlist' `covariables' `covariablemean', freq(`freq')
qui sort `varlist'
qui outsheet `varlist' `covariables' `covariablemean' `freq' using "`dir'irtpoly_data.txt",replace
drop _all
if "`fixed'"!="" {
capture confirm matrix `fixed'
if _rc {
di as error "The {hi:`fixed'} matrix does not exist"
error 198
}
tempname matfix
matrix `matfix'=`fixed''
qui svmat `matfix'
qui rename `matfix'1 estimate
qui gen parameter=""
order parameter estimate
local l=1
forvalues j=1/`nbitems' {
forvalues m=1/`max`j'' {
if "`rasch'"=="" {
qui replace parameter="beta``j''_`m'" in `l'
}
else {
qui replace parameter="beta``j''" in `l'
}
local ++l
}
}
qui outsheet using "`dir'/irtpoly_fixedparameters.txt",replace
}
drop _all
qui set obs 1000
qui generate str txt="%include 'C:\ado\macros SAS\anaqolv48.sas';" in 1
qui replace txt="%include 'C:\Documents and Settings\Jean-Benoit Hardouin\Mes documents\Boulot JB\Enseignement\ENSAI\2009-2010\macros\gammasymv1.sas';" in 2
qui replace txt="PROC IMPORT OUT=WORK.data DATAFILE='`dir'irtpoly_data.txt' DBMS=TAB REPLACE;GETNAMES=YES;DATAROW=2; RUN;" in 3
if "`fixed'"!="" {
qui replace txt="PROC IMPORT OUT=WORK.fixed DATAFILE='`dir'irtpoly_fixedparameters.txt' DBMS=TAB REPLACE;GETNAMES=YES;DATAROW=2; RUN;" in 4
}
local txt="%anaqol(DATASET=data,ITEMS=`varlist',DETAILS=yes,WEIGHT=`freq',MODEL="
if "`rsm'"==""&"`rasch'"=="" {
local txt `txt'pcm
}
else if "`rasch'"!="" {
local txt `txt'rasch, TEST=no
}
else {
local txt `txt'rsm
}
if "`fixed'"!="" {
local txt `txt',FIXED=fixed
}
if `fixedvar'>0 {
local txt `txt',FIXEDVAR=`fixedvar'
}
if "`fixed'"!=""&"`fixedvar'"!="" {
local centered nocentered
}
if "`centered'"!="" {
local txt `txt',CENTERED=yes
}
if "`covariables'"!="" {
local txt `txt',COVARIABLES=`covariables'
}
if "`covariablemean'"!="" {
local txt `txt',COVARIABLEMEAN=`covariablemean'
}
local txt `txt');
qui replace txt="`txt'" in 10
qui replace txt="PROC EXPORT DATA= WORK.Out_parameters OUTFILE='`dir'irtpoly_parameters.txt' DBMS=TAB REPLACE;RUN;" in 11
qui replace txt="PROC EXPORT DATA= WORK.Out_latent OUTFILE='`dir'irtpoly_latent.txt' DBMS=TAB REPLACE;RUN;" in 12
qui replace txt="PROC EXPORT DATA= WORK.Out_rep OUTFILE='`dir'irtpoly_rep.txt' DBMS=TAB REPLACE;RUN;" in 13
qui replace txt="PROC EXPORT DATA= WORK.Out_fit OUTFILE='`dir'irtpoly_fit.txt' DBMS=TAB REPLACE;RUN;" in 14
qui outsheet txt using "`dir'irtpoly_pgmsas.txt", replace nonames noquote
if "`last'"=="" {
/*local date=c(current_date)
local jour=substr("`date'",1,2)
local mois=substr("`date'",4,3)
local an=substr("`date'",8,4)
if "`mois'"=="Jan" {local moisd 01}
if "`mois'"=="Feb" {local moisd 02}
if "`mois'"=="Mar" {local moisd 03}
if "`mois'"=="Apr" {local moisd 04}
if "`mois'"=="May" {local moisd 05}
if "`mois'"=="Jun" {local moisd 06}
if "`mois'"=="Jul" {local moisd 07}
if "`mois'"=="Aug" {local moisd 08}
if "`mois'"=="Sep" {local moisd 09}
if "`mois'"=="Oct" {local moisd 10}
if "`mois'"=="Nov" {local moisd 11}
if "`mois'"=="Dec" {local moisd 12}
di "`jour' `mois' `an' `moisd'"
shell "date" "01/01/2009"
*/
if "`long'"!=""{
local cmd winexec
}
else {
local cmd shell
}
`cmd' "C:\Program Files\SAS\SAS 9.2\SASFoundation\9.2\sas.exe" "`dir'irtpoly_pgmsas.txt" -print "`dir'irtpoly_pgmsas.lst" -nolog
*shell "cmd.exe" "date `jour'/`moid'/`an'"
if "`long'"!="" {
exit
}
}
if "`sasoutput'"!="" {
view "`dir'irtpoly_pgmsas.lst"
}
*set trace on
*set trace on
drop _all
qui insheet using "`dir'irtpoly_fit.txt"
qui su value if descr=="-2 Log Likelihood"
local m2ll=r(mean)
local ll=-`m2ll'/2
qui su value if descr=="AIC (smaller is better)"
local aic=r(mean)
qui su value if descr=="BIC (smaller is better)"
local bic=r(mean)
drop _all
qui insheet using "`dir'irtpoly_parameters.txt"
tempname parameters separameters
qui su estimate if parameter=="var"
local variance=r(mean)
qui su standarderror if parameter=="var"
local sevariance=r(mean)
*set trace on
local nbcov:word count `covariables'
forvalues i=1/`nbcov' {
local cov`i':word `i' of `covariables'
qui su estimate if parameter=="beta`cov`i''"
local betacov`i'=r(mean)
qui su standarderror if parameter=="beta`cov`i''"
local secov`i'=r(mean)
}
*set trace off
*su
di in gr "Number of individuals: " in ye `nbind'
di in gr "Number of items: " in ye `nbitems'
di in gr "log-likelihood: " in ye %10.4f `ll'
di in gr "AIC: " in ye %10.4f `aic'
di in gr "BIC: " in ye %10.4f `bic'
di
di
if "`rsm'"=="" {
matrix `parameters'=J(`nbitems',`max',0)
matrix `separameters'=J(`nbitems',`max',0)
local l=1
forvalues i=1/`nbitems' {
forvalues j=1/`max`i'' {
if "`fixed'"=="" {
qui su estimate if parameter=="beta``i''_`j'"
matrix `parameters'[`i',`j']=r(mean)
qui su standarderror if parameter=="beta``i''_`j'"
matrix `separameters'[`i',`j']=r(mean)
}
else {
matrix `parameters'[`i',`j']=`fixed'[1,`l']
}
local ++l
}
}
di in gr "{hline 52}"
di in gr "Items" _col(18) "Modality" _col(30) "Estimate" _col(39) "Standard error"
di in gr "{hline 52}"
forvalues j=1/`nbitems' {
di in gr "``j''" _c
forvalues m=1/`max`i'' {
di _col(25) in gr `m' _col(30) %8.4f in ye `parameters'[`j',`m'] _col(45) %8.4f in ye `separameters'[`j',`m']
}
}
}
else {
matrix `parameters'=J(`=`nbitems'+`max'-1',1,0)
matrix `separameters'=J(`=`nbitems'+`max'-1',1,0)
local l=1
if "`fixed'"=="" {
forvalues i=1/`nbitems' {
qui su estimate if parameter=="beta``i''"
matrix `parameters'[`i',1]=r(mean)
qui su standarderror if parameter=="beta``i''"
matrix `separameters'[`i',1]=r(mean)
local ++l
}
forvalues l=`=`nbitems'+1'/`=`nbitems'+`max'-1' {
local m=`l'-`nbitems'+1
qui su estimate if parameter=="t`m'"
matrix `parameters'[`l',1]=r(mean)
local tau`m'=r(mean)
qui su standarderror if parameter=="t`m'"
matrix `separameters'[`l',1]=r(mean)
}
}
else {
matrix `parameters'=`fixed'
}
di in gr "{hline 52}"
di in gr "Items" _col(30) "Estimate" _col(39) "Standard error"
di in gr "{hline 52}"
forvalues j=1/`nbitems' {
di in gr "``j''" _col(30) %8.4f in ye `parameters'[`j',1] _col(45) %8.4f in ye `separameters'[`j',1]
}
forvalues l=`=`nbitems'+1'/`=`nbitems'+`max'-1' {
di in gr "tau`=`l'-`nbitems''" _col(30) %8.4f in ye `parameters'[`l',1] _col(45) %8.4f in ye `separameters'[`l',1]
}
}
di in gr "{hline 52}"
di in gr "Variance" _col(30) %8.4f in ye `variance' _col(45) %8.4f in ye `sevariance'
di in gr "{hline 52}"
forvalues i=1/`nbcov' {
di in gr "`cov`i''" _col(30) %8.4f in ye `betacov`i'' _col(45) %8.4f in ye `secov`i''
}
if "`covariables'"!="" {
di in gr "{hline 52}"
}
*matrix list `parameters'
*fdsjklgvsjf
*set trace on
drop _all
qui insheet using "`dir'irtpoly_rep.txt"
qui sort anaqol_id
qui sort `varlist' `covariables' `covariablemean'
qui tempfile pcmsas
qui rename theta `latent'
qui rename stderrpred se`latent'
qui save `pcmsas',replace
qui use `pcmsasfile', clear
qui sort `varlist'
qui gen anaqol_id=_n
qui sort anaqol_id
qui sort `varlist' `covariables' `covariablemean'
/***********************************************
qui merge 1:1 anaqol_id using "`pcmsas'",nogen
***********************************************/
qui merge m:1 `varlist' `covariables' `covariablemean' using "`pcmsas'",nogen
*tempvar group
*set trace on
forvalues i=1/`nbcov' {
qui replace `latent'=`latent'+`betacov`i''*`cov`i''
}
*qui save `latent' using c:\latent.dta
qui gengroup `latent', det replace continuous newvariable(`group')
qui su `group'
local nbgroup=r(max)
forvalues g=1/`nbgroup' {
qui count if `group'==`g'
local group`g'=r(N)
}
forvalues i=1/`nbitems' {
*set trace on
tempname freq`i'
qui tab `group' ``i'',matcell(`freq`i'') row nofreq m
*matrix list `freq`i''
forvalues g=1/`nbgroup' {
qui count if `group'==`g'&``i''!=.
local freq`g'_`i'=r(N)
forvalues j=0/`max`i'' {
matrix `freq`i''[`g',`=`j'+1']=`freq`i''[`g',`=`j'+1']/`freq`g'_`i''
}
}
local D`i'=0
forvalues j=0/`max`i'' {
local D`i'_`j' exp(`j'*`latent'
forvalues l=1/`j' {
if "`rsm'"=="" {
local D`i'_`j' `D`i'_`j''-`parameters'[`i',`l']
}
else {
local D`i'_`j' `D`i'_`j''-`parameters'[`i',1]
}
}
if "`rsm'"!="" {
forvalues m=2/`j' {
local D`i'_`j' `D`i'_`j''-`tau`m''
}
}
local D`i'_`j' `D`i'_`j'')
local D`i' `D`i''+`D`i'_`j''
}
}
tempvar theta2
qui gen `theta2'=0
forvalues g=1/`nbgroup' {
qui su `latent' if `group'==`g'
local thetag`g'=r(mean)
qui replace `theta2'=`thetag`g'' if `group'==`g'
}
local colors="blue red green gray pink purple"
*local chi2=0
forvalues i=1/`nbitems' {
local line`i'
local scatter`i'
tempvar propE``i'' propO``i''
qui gen `propE``i'''=0
qui gen `propO``i'''=0
forvalues j=0/`max`i'' {
local color:word `=`j'+1' of `colors'
tempvar propE``i''_`j' propO``i''_`j'
*matrix list `parameters'
*di "qui gen `propE``i''_`j''=`D`i'_`j''/(`D`i'')"
qui gen `propE``i''_`j''=`D`i'_`j''/(`D`i'')
*su `propE``i''_`j''
label variable `propE``i''_`j'' "Expected values / modality `j'"
local line`i' `line`i'' (line `propE``i''_`j'' `latent', lcolor(`color') lwidth(thick))
qui gen `propO``i''_`j''=0
forvalues g=1/`nbgroup' {
local tmp=`freq`i''[`g',`=`j'+1']
qui replace `propO``i''_`j''=`tmp' if `group'==`g'
}
label variable `propO``i''_`j'' "Observed values / modality `j'"
qui replace `propO``i'''=`propO``i'''+`j'*`propO``i''_`j''
qui replace `propE``i'''=`propE``i'''+`j'*`propE``i''_`j''
local scatter`i' `scatter`i'' (scatter `propO``i''_`j'' `theta2', mcolor(`color'))
}
qui sort `latent'
if "`graph'"!="" {
twoway `line`i'' `scatter`i'',name(``i'', replace)
}
label variable `propE``i''' "Expected values"
label variable `propO``i''' "Observed values"
if "`graph'"!="" {
twoway (line `propE``i''' `latent', lcolor(green) lwidth(thick)) (scatter `propO``i''' `theta2',mcolor(green)),name(``i''2, replace)
}
*set trace on
if "`test'"!="" {
local chi2=0
forvalues g=1/`nbgroup' {
qui ttest `propE``i'''=`propO``i''' if `group'==`g'
local t`g'=r(t)
qui count if `group'==`g'
local nb`g'=r(N)
di "local chi2=`chi2'+/*`nb`g''**/(`t`g'')^2"
local chi2=`chi2'+/*`nb`g''**/(`t`g'')^2
}
di "Chi-square statistics: " %8.4f `chi2'
local pchi2=chi2(`=`nbgroup'-1',`chi2')
di "p-values: " %8.4f `pchi2'
}
}
*set trace on
tempfile saveu
qui keep `order' `latent' se`latent' `group'
if `savegroup'==0 {
drop `group'
}
if `savelatent'==0 {
drop `latent'
drop se`latent'
}
sort `order'
qui save `saveu' ,replace
restore
qui gen `order'=_n
qui sort `order'
if "`replace'"!="" {
capture drop `group'
capture drop `latent'
capture drop se`latent'
}
qui merge 1:1 `order' using `saveu',nogen
end

@ -0,0 +1,27 @@
*! NJC 1.0.0 19 Sept 2005
program isvar, rclass
version 8
syntax anything
foreach v of local anything {
capture unab V : `v'
if _rc == 0 local varlist `varlist' `V'
else local badlist `badlist' `v'
}
di
if "`varlist'" != "" {
local n : word count `varlist'
local what = plural(`n', "variable")
di as txt "{p}`what': `varlist'{p_end}"
return local varlist "`varlist'"
}
if "`badlist'" != "" {
local n : word count `badlist'
local what = plural(`n', "not variable")
di as txt "{p}`what': `badlist'{p_end}"
return local badlist "`badlist'"
}
end

@ -0,0 +1,495 @@
* version 1.0.5 23set2004 MER version 8.0
capture program drop kapci
program define kapci, rclass byable(recall)
version 8.0
if "`1'" == "" {
di _n in gr " Syntax is:" _n
di in wh " kapci " in gr "[varlist] [if] [in] , [ " _c
di in wh "est" in gr "im(" in wh "an bc p n bsall" in gr ") "
di in wh _col(22) "w" in gr "gt" _c
di in gr "(" in wh "w w2 any_wgt" in gr ") " _c
di in wh "r" in gr "eps(" in wh "#" in gr ") " _c
di in wh "si" in gr "ze(" in wh "#" in gr ") "
di in wh _col(22) "se" in gr "ed(" in wh "#" in gr ") " _c
di in wh "ev" in gr "ery(" in wh "#" in gr ") " _c
di in wh "le" in gr "vel(" in wh "#" in gr ") " _c
di in wh "t" in gr "ab " in wh "w" in gr "ide"
di in wh _col(22) "sa" in gr "ving(" in wh "filename" in gr ") " _c
di in wh "replace nom" in gr "sg ]"
exit
}
* Setup
version 8
syntax varlist [if] [in] [ , Reps(numlist) SIze(numlist) SEed(numlist) ///
EVery(numlist) Wgt(str) ESTim(str) Level(integer $S_level) ///
SAving(str) REPLACE Tab WIDE NOMsg ]
if "`options'" ~= "" {
local options = ", `options'"
}
tokenize "`varlist'"
marksample touse
preserve
if "`if'"!="" {
keep `if'
}
if ("`in'"!="") {
keep `in'
}
* Level value, etc.
global zsc = invnorm(1-(1-`level'/100)/2)
global N = _N
* Checking if estim=an is compatible with data
qui inspect `1'
local numlev `r(N_unique)'
local nummeas : word count `varlist'
if `numlev' < 3 & `nummeas' < 3 {
local bs 0
}
else local bs 1
if "`estim'" == "an" & `bs' == 1 {
di in gr " "
di in gr "Note: Option " in wh "estim(an) " in gr "is only suitable for 2x2 data."
di in wh " bs " in gr " will be used."
}
* Defaults
if ("`estim'" == "") & (`bs' == 0) {
local estim "an"
}
if ("`estim'" == "") & (`bs' == 1) {
local estim "bc"
}
*************************************
* Displaying table if it is requested
*************************************
if "`wide'" ~= "" {
local wide_str ", wrap"
}
if "`wide'" == "" {
local wide_str ""
}
if ("`tab'" ~= "") & (`nummeas' < 3) {
tab `varlist' if `touse' `wide_str'
}
if ("`tab'" ~= "") & (`nummeas' >= 3) {
tab2 `varlist'if `touse' `wide_str'
}
****************************************************
* Calculating analytical CI for kappa when estim=an
****************************************************
if (`bs' == 0) & ("`estim'" == "an") { /* Start of no bs situation */
* First ... extracting effective sample size used (noting byable!)
qui summ `varlist' if `touse'
local N = `r(N)'
* Call kappa and get midpoints
qui kap `varlist' if `touse' `options'
* Saving scalars from kap as locals for return list
local prop_e = r(prop_e)
local prop_o = r(prop_o)
* Working macro ...
local k = r(kappa)
* Extract table data
tempname tab2x2 a b c d agrN
qui tab2 `varlist' , matcell(`tab2x2')
scalar `a'=`tab2x2'[1,1]
scalar `b'=`tab2x2'[1,2]
scalar `c'=`tab2x2'[2,1]
scalar `d'=`tab2x2'[2,2]
scalar `agrN'=`a'+`b'+`c'+`d'
* Locals - marginals
local p1 = (`a'+`b')/`agrN'
local p2 = (`a'+`c')/`agrN'
* Quantity Q based on Fleiss, (1981), equations 13.15 - 13.18
local Q = ( ( ( (-1 + `k' ) * ( (-2*`k' *`p1' ) + /*
*/ ((`k' ^2)*`p1' ) + (4*`k' *(`p1'^2)) - /*
*/ (2*(`k'^2)*(`p1'^2)) - (2*`k' *`p2' ) + /*
*/ ((`k'^2)*`p2' ) - (4*`p1' *`p2' ) + /*
*/ (8*`k' *`p1' *`p2' ) - (6*(`k'^2)*`p1' *`p2' ) + /*
*/ (4*(`p1'^2)*`p2' ) - (12*`k' *(`p1'^2)*`p2' ) + /*
*/ (8*(`k'^2)*(`p1'^2)*`p2' ) + (4*`k' *(`p2'^2)) - /*
*/ (2*(`k'^2)*(`p2'^2)) + (4*`p1' *(`p2'^2)) - /*
*/ (12*`k' *`p1' *(`p2'^2)) + /*
*/ (8*(`k'^2)*`p1' *(`p2'^2)) - /*
*/ (4*(`p1'^2)*(`p2'^2)) + /*
*/ (12*`k' *(`p1'^2)*(`p2'^2)) - /*
*/ (8*(`k'^2)*(`p1'^2)*(`p2'^2))) /*
*/ ) / ( (`p1' + `p2' - (2*`p1' *`p2' ) )^2 ) ) )
* Standard error, given kappa estimate k_hat=`k'
local sek = (sqrt(`Q')) / (sqrt(`agrN') )
* CI
local klow = `k'-($zsc*`sek')
local kup = `k'+($zsc*`sek')
if `kup' >= 1 {
local kup = 1
}
if `klow' < -1 {
local klow = -1
}
* Display
local type "analytical "
local typeab "A"
di _n in gr _col(42) "N=" `N'
di in gr "{hline 48}"
di in gr " Kappa (" %2.0f `level' "% CI) = " in ye %5.3f `k' _c
di in gr _col(24) " (" in ye %5.3f `klow' in gr " - " in ye %5.3f `kup' in gr ")" _c
di in gr _col(44) "(" "`typeab'" ")"
di in gr "{hline 48}"
di in gr _col(2) "`typeab'" " = " "`type'"
* Return list
return scalar N = `agrN'
return scalar z = $zsc
return scalar se = `sek'
return scalar prop_o = `prop_o'
return scalar prop_e = `prop_e'
return scalar ub_an = `kup'
return scalar lb_an = `klow'
return scalar kappa = `k'
} /* End of no bs situation */
*****************************************************
* Calculating analytical CI for kappa when estim!=an
*****************************************************
if (`bs' == 1) | ((`bs' == 0) & ("`estim'" ~= "an")) { /* Start of bs situation */
if "`estim'" ~= "an" {
if "`estim'" ~= "" {
if "`estim'" ~= "bc" {
if "`estim'" ~= "n" {
if "`estim'" ~= "p" {
if "`estim'" ~= "bsall" {
local estim "bc"
di in bl " "
di in bl "Note: Unknown bs CI type specified."
di in wh " bc " in gr "will be used."
}
}
}
}
}
}
if "`estim'" == "an" {
local estim "bc"
}
* Preparing ...
tempfile mainfile
qui save `mainfile', replace
qui use `mainfile', clear
tempfile tmpsave0 tmpsave1
local byindex = _byindex()
*----------------------------------------------------------------------------
if ("`saving'" ~= "") & (_by()==1) {
local sa_str "saving(`tmpsave1')"
}
if ("`saving'" ~= "") & (_by()==0) {
local sa_str "saving(`tmpsave0')"
}
if "`saving'" == "" & (_by()==0) {
local sa_str ""
}
*----------------------------------------------------------------------------
if "`wgt'" ~= "" {
local wgt_str "wgt(`wgt')"
}
local n : word count `varlist'
if `n' > 2 & "`wgt'" ~= "" {
di _n in gr "Note: wgt() not allowed if varlist > 2. Option ignored."
local wgt_str ""
}
if "`wgt'" == "" {
local wgt_str ""
}
*----------------------------------------------------------------------------
if "`reps'" ~= "" {
local reps_str "reps(`reps')"
}
if "`reps'" == "" {
local reps_str "reps(5)"
local reps 5
di _n in gr "Note: default number of bootstrap replications " _c
di in gr "has been
di in gr " set to " in wh "5 " in gr "for syntax testing only." _c
di in wh "reps() " in gr "needs to "
di in gr " be increased when analysing real data." _n
}
*----------------------------------------------------------------------------
if "`seed'" ~= "" {
set seed `seed'
local seed_str "seed(`seed')"
}
*----------------------------------------------------------------------------
if "`size'" ~= "" {
if `size' < 5 {
di in gr "Note: size() set to N"
local size $N
}
local size_str "size(`size')"
}
if "`size'" == "" {
local size_str ""
local size $N
}
*----------------------------------------------------------------------------
if "`every'" ~= "" {
local every_str "every(`every')"
}
if "`every'" == "" {
local every_str ""
}
*----------------------------------------------------------------------------
if _by()==0 {
local first "kap `varlist' , `wgt_str'"
}
if _by()==1 {
local first "kap `varlist' if `touse' , `wgt_str'"
}
* Calling bs
if `reps' > 100 & "`nomsg'" == "" {
di _n in gr "This may take quite a long time. Please wait ..."
}
qui bs " `first' " r(kappa), `reps_str' `sa_str' level(`level') `size_str' `every_str' nowarn
if ("`saving'" ~= "") & (_by()==0) {
qui use `tmpsave0'
qui label data "kapci: varlist is `varlist'"
qui rename _bs_1 _kapci_bs
label var _kapci_bs "Options: `wgt_str' `reps_str' `seed_str' `size_str' `every_str'"
qui save `saving', `replace'
restore
}
if ("`saving'" ~= "") & (_by()==1) {
qui use `tmpsave1'
qui label data "kapci: varlist is `varlist'; byvars is `_byvars'; by-group is (`byindex')"
qui rename _bs_1 _kapci_bs__`byindex'
label var _kapci_bs__`byindex' "Options: `wgt_str' `reps_str' `seed_str' `size_str' `every_str'"
qui save `saving'`byindex', `replace'
restore
}
* Extracting sample size used (noting byable !)
qui summ `varlist' if `touse'
local N = `r(N)'
* Matrix extraction
matrix tmp_mtx = e(b)
local k = tmp_mtx[1,1]
matrix tmp_mtx = e(ci_bc)
local klow_bc = tmp_mtx[1,1]
local kup_bc = tmp_mtx[2,1]
matrix tmp_mtx = e(ci_percentile)
local klow_p = tmp_mtx[1,1]
local kup_p = tmp_mtx[2,1]
matrix tmp_mtx = e(ci_normal)
local klow_n = tmp_mtx[1,1]
local kup_n = tmp_mtx[2,1]
matrix tmp_mtx = e(reps)
local numreps = tmp_mtx[1,1]
matrix tmp_mtx = e(bias)
local bias = tmp_mtx[1,1]
matrix tmp_mtx = e(se)
local se = tmp_mtx[1,1]
matrix drop tmp_mtx
* Display
local dotdot "{hline 48}"
local col1 "_col(34)"
local col2 "_col(43)"
if "`estim'" ~= "bsall" {
if "`estim'" == "bc" {
local klow = `klow_bc'
local kup = `kup_bc'
}
if "`estim'" == "n" {
local klow = `klow_n'
local kup = `kup_n'
}
if "`estim'" == "p" {
local klow = `klow_p'
local kup = `kup_p'
}
if (`kup' >= 1) & (`kup' ~= .) {
local kup = 1
}
if (`klow' < -1) & (`klow' ~= .) {
local klow = -1
}
if "`estim'" == "bc" {
local type "bias corrected "
local typeab "BC"
}
if "`estim'" == "n" {
local type "normal "
local typeab "N"
}
if "`estim'" == "p" {
local type "percentile "
local typeab "P"
}
di _n in gr _col(34) "B=" `numreps' _col(42) "N=" `N'
di in gr "{hline 48}"
di in gr " Kappa (" %2.0f `level' "% CI) = " in ye %5.3f `k' _c
di in gr _col(24) " (" in ye %5.3f `klow' in gr " - " in ye %5.3f `kup' in gr ")" _c
di in gr _col(44) "(" "`typeab'" ")"
di in gr "{hline 48}"
di in gr _col(2) "`typeab'" " = " "`type'"
}
if "`estim'" == "bsall" {
if `kup_n' >= 1 {
local kup_n = 1
}
if `klow_n' < -1 {
local klow_n = -1
}
local type1 "bias corrected"
local typeab1 "BC"
local type2 "percentile"
local typeab2 "P"
local type3 "normal"
local typeab3 "N"
di _n in gr _col(34) "B=" `numreps' _col(42) "N=" `N'
di in gr "{hline 48}"
di in gr " Kappa (" %2.0f `level' "% CI) = " in ye %5.3f `k' _c
di in gr _col(24) " (" in ye %5.3f `klow_bc' in gr " - " in ye %5.3f `kup_bc' in gr ")" _c
di in gr _col(44) "(" "`typeab1'" ")"
di in gr _col(24) " (" in ye %5.3f `klow_p' in gr " - " in ye %5.3f `kup_p' in gr ")" _c
di in gr _col(44) "(" "`typeab2'" ")"
di in gr _col(24) " (" in ye %5.3f `klow_n' in gr " - " in ye %5.3f `kup_n' in gr ")" _c
di in gr _col(44) "(" "`typeab3'" ")"
di in gr "{hline 48}"
di in gr _col(2) "`typeab1'" " = " "`type1'" ", " _c
di in gr "`typeab2'" " = " "`type2'" ", " _c
di in gr "`typeab3'" " = " "`type3'"
}
* Return list
return scalar N_bs = `size'
return scalar N = $N
return scalar z = $zsc
return scalar reps = `numreps'
return scalar bias = `bias'
return scalar se = `se'
return scalar lb_n = `klow_n'
return scalar ub_n = `kup_n'
return scalar lb_p = `klow_p'
return scalar ub_p = `kup_p'
return scalar lb_bc = `klow_bc'
return scalar ub_bc = `kup_bc'
return scalar kappa = `k'
} /* End of no bs situation */
* Cleaning up
macro drop zsc
macro drop N
end

@ -0,0 +1,212 @@
capture program drop kgv
program kgv,rclass
syntax varlist, categ(varlist) [KGVBoxplots KGVGroupboxplots]
local i = 1
local j = 1
local k = 0
local max = 0
local a : word count `categ'
local nb:word count `varlist'
di as result "{hline}"
di "{bf:Known-groups validity}"
di as result "{hline}"
di
foreach sco in `varlist' {
foreach cat in `categ' {
local nblev = 0
local maxlen`j' = 0
qui anova `sco' `cat'
local p`i'_`j' = Ftail(e(df_m), e(df_r), e(F))
qui levelsof `cat', local(levels)
local lbe : value label `cat'
foreach l of local levels {
qui count if `sco' !=. & `cat' == `l'
local `++k'
local eff`i'_`j'_`k' = r(N)
if "`lbe'" != "" {
local ll`j'_`k' : label `lbe' `l'
local len = length("`ll`j'_`k''")
if `len' > 10 {
local c = substr("`ll`j'_`k''",1,9)
local d = substr("`ll`j'_`k''",-1,1)
local ll`j'_`k' "`c'" "~" "`d'"
}
local w = length("`ll`j'_`k''")
if `w' > `maxlen`j'' local maxlen`j' = `w'
}
else {
local ll`j'_`k' = `l'
local len = length("`ll`j'_`k''")
if `len' > 10 {
local c = substr("`ll`j'_`k''",1,9)
local d = substr("`ll`j'_`k''",-1,1)
local ll`j'_`k' "`c'" "~" "`d'"
}
local w = length("`ll`j'_`k''")
if `w' > `maxlen`j'' local maxlen`j' = `w'
}
qui su `sco' if `cat' == `l'
local m`i'_`j'_`k' = r(mean)
local s`i'_`j'_`k' = r(sd)
local nblev = `nblev' + 1
}
if `nblev' > `max' local max = `nblev'
local `++j'
local k = 0
}
local `++i'
local j = 1
}
/* coupure noms des scores */
/*
local i = 1
foreach s in `varlist' {
local len = length("`s'")
if `len' > 10 {
local c = substr("`s'",1,9)
local d = substr("`s'",-1,1)
local s`i' "`c'" "~" "`d'"
}
else local s`i' = "`s'"
local sc `sc' `s`i''
local `++i'
}
*/
local i = 1
foreach s in `varlist' {
local s`i' = abbrev("`s'",7)
local sc `sc' `s`i''
local `++i'
}
/*
local max = 0
foreach s in `scorename' {
local len = length("`s'")
if `len' > `max' local max = `len'
}
*/
local maxs = 0
forvalues j=1/`nb' {
local len`j' = length("`s`j''")
if `len`j'' > `maxs' local maxs = `len`j''
}
local i = 1
local k = 0
local j = 2
foreach cat in `categ'{
local `++k'
tokenize `categ'
local c`k' = "``i'' ``j''"
local i = `i' + 2
local j = `j' + 2
}
local d = 1
local f = 2
forvalues h = 1/`a' {
if `f' > `a' local f = `f'-1
local j = 1
local col = `maxs'+6
foreach cat in `c`h'' {
di _col(`col') "{bf:`cat'}" _c
local col = `col' + `maxlen`j'' + 5 + 40
local `++j'
}
di
local j = `d'
local col = `maxs'+6
foreach cat in `c`h'' {
di _col(`=`col'+`maxlen`j''+5') "{bf: mean }" _c
di "{bf:standard }" _c
di "{bf:p-value}" _c
local col = `col' + `maxlen`j'' + 5 + 40
local `++j'
}
di
local j = `d'
local col = `maxs'+6
foreach cat in `c`h'' {
di _col(`=`col'+`maxlen`j''+6') "{bf: error}"_c
local col = `col' + `maxlen`j'' + 5 + 40
local `++j'
}
di
di
local i = 1
local col = `maxs'+6
forvalues g = 1/`nb' {
di "{bf:`s`g''}" _c
forvalues k = 1/`max' {
forvalues j = `d'/`f' {
di _col(`col') "{bf:`ll`j'_`k''} " _c
if "`eff`i'_`j'_`k''" != "" di as text "(n=`eff`i'_`j'_`k'')" _c
local m : di %6.2f `m`i'_`j'_`k''
di _col(`=`col'+`maxlen`j''+10') "{text:`m'} " _c
local s : di %8.2f `s`i'_`j'_`k''
di "{text: `s'} " _c
if `k' == 1 {
local p : di %8.3f `p`i'_`j''
di _col(`=`col'+31') "{text:`p'} " _c
}
local col = `col' + `maxlen`j'' + 5 + 40
}
di
local col = `maxs'+6
}
di
local `++i'
}
local d = `d'+2
local f = `f'+2
if `d' > `a' continue, break
di
}
if "`kgvboxplots'" != "" {
*qui set autotabgraphs on
if "`kgvgroupboxplots'" != "" {
foreach c in `categ' {
foreach s in `varlist' {
graph box `s', over(`c') name("`s'_`c'",replace) b1title("`c'") nodraw
local g `g' `s'_`c'
}
}
gr combine `g', name(Known_groups_validity,replace)
}
else {
foreach c in `categ' {
foreach s in `varlist' {
graph box `s', over(`c') name("`s'_`c'",replace) b1title("`c'")
local g `g' `s'_`c'
}
}
}
}
end
*kgv HA-MOC, categ(radio chim) //kgvboxplots kgvgroupboxplots // radio_01 etud_01 actu_01)

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

@ -0,0 +1,103 @@
{smcl}
{* 6november2004}{...}
{hline}
help for {hi:loevH}{right:Jean-Benoit Hardouin}
{hline}
{title:Loevinger's H coefficients and non parametric Item Responses Models}
{p 8 14 2}{cmd:loevH} {it:varlist} [{cmd:,} {cmdab:pairw:ise} {cmdab:pair} {cmdab:ppp} {cmdab:pmm} {cmdab:noadj:ust} {cmdab:gener:ror}({it:newvar}) {cmdab:rep:lace} {cmdab:gr:aph} {cmdab:mono:tonicity}({it:string}) {cmdab:nip:matrix}({it:string})]
{p 8 14 2}{it:varlist} is a list of two or more existing dichotomous ou polytomous variables.
{title:Description}
{p 4 8 2}{cmd:loevH} allows verifying the fit of data to the Monotonely Homogeneous Mokken Model or to the Doubly Monotone Mokken Model.
It computes the Loevinger H scalability coefficients, and several indexes in the field of the Non parametric Item Response Theory.
{title:Options}
{p 4 8 2}{cmd:pairwise}. By default, all the individuals with one or more missing values are omitted. {cmd:pairwise} allows to use the complete information by pair of items.
{p 4 8 2}{cmd:pair} displays statistics and the value of the Loevinger H coefficient for each pair of items.
{p 4 8 2}{cmd:ppp} displays the P++ matrix.
{p 4 8 2}{cmd:pmm} displays the P-- matrix.
{p 4 8 2}{cmd:noadjust} approximates the tests statistics like the MSP software (Molenaar et al. (2000)).
{p 4 8 2}{cmd:generror} creates a new variable containing the number of Guttman errors produced by each individual.
{p 4 8 2}{cmd:replace} allows replacing the variable defined by the {cmd:generror} option.
{p 4 8 2}{cmd:graph} displays graphs (only with the {cmd:ppp}, {cmd:pmm} and {cmd:generror} options).
{p 4 8 2}{cmd:monotonicity} displays indexes in order to check the monotonicity of the data (Monotone Homogeneity Mokken Model). This option produces outputs similar to the MSP software.
The string contains several suboptions: {cmd:minvi}, {cmd:minsize}, {cmd:siglevel} and {cmd:details}. If you want use all the default values, type *.{p_end}
{p 10 12 10}{cmd:minvi} defines the minimal size of a violation of monotonicity (0.03 by default){p_end}
{p 10 12 10}{cmd:minsize} defines the minimum size of groups of patients to check the monotonicity (by default, the number of individuals divided by 10 with more than 500 individuals, the same number divided by 5 with more than 250 individuals, and the same number divided by 3 for a smaller number, with a minimum of 50){p_end}
{p 10 12 10}{cmd:siglevel} defines the significance level for the tests (0.05 by default){p_end}
{p 10 12 10}{cmd:details} displays more details with polytomous items
{p 4 8 2}{cmd:nipmatrix} display indexes in order to check the non-intersection (Doubly Monotone Mokken Model). This option produces outputs similar to the MSP software.
The string contains several suboptions: {cmd:minvi} and {cmd:siglevel}. If you want use all the default values, type *.{p_end}
{p 10 12 10}{cmd:minvi} defines the minimal size of a violation of non-intersection (0.03 by default){p_end}
{p 10 12 10}{cmd:siglevel} defines the significance level for the tests (0.05 by default){p_end}
{title:Remarks}
{p 4 8 2}For detailed informations on the Loevinger's H coefficients, see Loevinger (1948) or Hemker and al. (1995). For details about the analysis of non parametric Mokken models, see for example the MSP 5.0 manual.
{title:Example}
{p 8 8}{inp:. loevH itemA1-itemA7}
{p 8 8}{inp:. loevH itemA*, pair monotonicity(*) ppp pmm nipmatrix(minvi(0.05) siglevel(0.01))}
{p 8 8}{inp:. loevH item*, pairwise generror(error) graph}
{title:Results}
{p 4 8 2}The Loevinger's H coefficients between all the pairs of items, for each item with respect of all the others items and for the set of items are respectively saved in the matrices {it:r(loevHjk)}, {it:r(loevHj)}
and in the scalar {it:r(loevH)}.
{p 4 8 2}The empirical Guttman errors between all the pairs of items, associated to each item and relied to the scale are respectively saved in the matrices {it:r(eGuttjk)}, {it:r(eGuttj)} and in the scalar {it:e(Gutt)}.
{p 4 8 2}The theorical Guttman errors between all the pairs of items, associated to each item and relied to the scale are respectively saved in the matrices {it:r(eGuttjk0)}, {it:r(eGuttj0)} and in the scalar {it:e(Gutt0)}.
{p 4 8 2}The values of the Z statistics and the corresponding p-values associated to the Loevinger H coefficients are respectively saved in the matrices {it:r(zHjk)}, {it:r(pvalHjk)}, {it:r(zHj)}, {it:r(pvalHj)}
and in the scalars {it:e(zH)} and {it:r(pvalH)}.
{p 4 8 2}The P++ and P-- matrices are saved in {it:r(ppp)} and {it:r(pmm)}.
{p 4 8 2}The used number of individuals per items pair is saved in {it:r(Obs)}.
{title:References}
{p 4 8 2}Hemker B. T., Sijtsma K. and Molenaar I. W., Selection of unidimensional scales from a multidimensional item bank in the polytomous Mokken IRT
model, {it: Applied Psychological Measurement}, vol.19(4), 1995, pp. 337-352.
{p 4 8 2}Loevinger J., The technique of homogeneous tests compared with some aspects of "scale analysis" and factor analysis. {it:Psychological bulletin},
vol. 45, 1948, pp. 507-530.
{p 4 8 2}Molenaar I. W., Sijtsma K. and Boer P. {it:MSP5 for Windows - User's Manual}, 2000, 105 pages.
{title:Author}
{p 4 8 2}Jean-Benoit Hardouin, PhD, assistant professor{p_end}
{p 4 8 2}EA 4275 "Team of Biostatistics, Clinical Research and Subjective Measures in Health Sciences"{p_end}
{p 4 8 2}University of Nantes - Faculty of Pharmaceutical Sciences{p_end}
{p 4 8 2}1, rue Gaston Veil - BP 53508{p_end}
{p 4 8 2}44035 Nantes Cedex 1 - FRANCE{p_end}
{p 4 8 2}Email:
{browse "mailto:jean-benoit.hardouin@univ-nantes.fr":jean-benoit.hardouin@univ-nantes.fr}{p_end}
{p 4 8 2}Websites {browse "http://www.anaqol.org":AnaQol}
and {browse "http://www.freeirt.org":FreeIRT}
{title:Also see}
{p 4 13 2}Online: help for {help traces}, {help msp}, {help gengroup}, {help mokken} if installed.{p_end}

@ -0,0 +1,71 @@
{smcl}
{* 31Aug2002}{...}
{hline}
help for {hi:loevH}
{hline}
{title:Loevinger's H coefficients}
{p 8 14}{cmd:loevH} {it:varlist} [{cmd:,} {cmdab:h} {cmdab:hj} {cmdab:hjk}
{cmdab:e} {cmdab:ej} {cmdab:ejk} {cmdab:e0} {cmdab:ej0} {cmdab:ejk0}]
{p}{it:varlist} is a list of two existing dichotomous variables or more.
{title:Options}
{p 0 4}{cmd:h} display the Loevinger H coefficient for all the items of the scale.
{p 0 4}{cmd:hj} display the Loevinger H coefficient for each item of the scale with respect of all the others items.
{p 0 4}{cmd:hjk} display the Loevinger H coefficient for each pair of items of the scale.
{p 0 4}{cmd:e} display the sum of the number of Guttman errors between all the pairs of items.
{p 0 4}{cmd:ej} display the sum of the number of Guttman errors for each item of the scale.
{p 0 4}{cmd:ejk} display the sum of the number of Guttman errors fror each pair of items of the scale.
{p 0 4}{cmd:e0} display the sum of the number of expected Guttman errors between all the pairs of items.
{p 0 4}{cmd:ej0} display the sum of the number of expected Guttman errors for each item of the scale.
{p 0 4}{cmd:ejk0} display the sum of the number of expected Guttman errors fror each pair of items of the scale.
{title:Description}
{p}{cmd:loevH} calculate the Loevinger's H coefficient between all the pairs of items of {it:varlist}, and compute the Loevinger's Hj coefficient for each item of {it:varlist} with respect of all the others items and the Loevinger's H coefficient for the set of items of {it:varlist}.
{title:Remarks}
For detailed information on the Loevinger's H coefficients, see Loevinger (1948) or Hemker and al. (1995).
{cmd:loevH} display none result by default. {cmd:loevH} don't permit the use of polytomous items.
{title:Example}
{inp:. loevH item1 item2 item3 item4}
{title:Results}
The Loevinger's H coefficients between all the pais of items are saved in the matrix r(loevHjk).
The Loevinger's H coefficients for each item with respect of all the others items are saved in the matrix r(loevHj).
The Loevinger's H coefficient for the set of items is saved in the scalar r(loevH).
{title:References}
Hemker B. T., Sijtsma K. and Molenaar I. W., Selection of unidimensional scales from a multidimensional item bank in the polytomous Mokken IRT
model, {it: Applied Psychological Measurement}, vol.19(4), 1995, pp. 337-352.
Loevinger J., The technique of homogeneous tests compared with some aspects of "scale analysis" and factor analysis. {it:Psychological bulletin},
vol. 45, 1948, pp. 507-530.
{title:Author}
{p 4 8 2}Jean-Benoit Hardouin, Regional Health Observatory (ORS) - 1, rue Porte Madeleine - BP 2439 - 45032 Orleans Cedex 1 - France.
You can contact the author at {browse "mailto:jean-benoit.hardouin@neuf.fr":jean-benoit.hardouin@neuf.fr} and visit the websites {browse "http://anaqol.free.fr":AnaQol} and {browse "http://freeirt.free.fr":FreeIRT}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

@ -0,0 +1,103 @@
{smcl}
{* 8december2010}{...}
{hline}
help for {hi:loevh}{right:Jean-Benoit Hardouin}
{hline}
{title:Loevinger's H coefficients and non parametric Item Responses Models}
{p 8 14 2}{cmd:loevh} {it:varlist} [{cmd:,} {cmdab:pairw:ise} {cmdab:pair} {cmdab:ppp} {cmdab:pmm} {cmdab:noadj:ust} {cmdab:gener:ror}({it:newvar}) {cmdab:rep:lace} {cmdab:gr:aph} {cmdab:mono:tonicity}({it:string}) {cmdab:nip:matrix}({it:string})]
{p 8 14 2}{it:varlist} is a list of two or more existing dichotomous ou polytomous variables.
{title:Description}
{p 4 8 2}{cmd:loevh} allows verifying the fit of data to the Monotonely Homogeneous Mokken Model or to the Doubly Monotone Mokken Model.
It computes the Loevinger H scalability coefficients, and several indexes in the field of the Non parametric Item Response Theory.
{title:Options}
{p 4 8 2}{cmd:pairwise}. By default, all the individuals with one or more missing values are omitted. {cmd:pairwise} allows to use the complete information by pair of items.
{p 4 8 2}{cmd:pair} displays statistics and the value of the Loevinger H coefficient for each pair of items.
{p 4 8 2}{cmd:ppp} displays the P++ matrix.
{p 4 8 2}{cmd:pmm} displays the P-- matrix.
{p 4 8 2}{cmd:noadjust} approximates the tests statistics like the MSP software (Molenaar et al. (2000)).
{p 4 8 2}{cmd:generror} creates a new variable containing the number of Guttman errors produced by each individual.
{p 4 8 2}{cmd:replace} allows replacing the variable defined by the {cmd:generror} option.
{p 4 8 2}{cmd:graph} displays graphs (only with the {cmd:ppp}, {cmd:pmm} and {cmd:generror} options).
{p 4 8 2}{cmd:monotonicity} displays indexes in order to check the monotonicity of the data (Monotone Homogeneity Mokken Model). This option produces outputs similar to the MSP software.
The string contains several suboptions: {cmd:minvi}, {cmd:minsize}, {cmd:siglevel} and {cmd:details}. If you want use all the default values, type *.{p_end}
{p 10 12 10}{cmd:minvi} defines the minimal size of a violation of monotonicity (0.03 by default){p_end}
{p 10 12 10}{cmd:minsize} defines the minimum size of groups of patients to check the monotonicity (by default, the number of individuals divided by 10 with more than 500 individuals, the same number divided by 5 with more than 250 individuals, and the same number divided by 3 for a smaller number, with a minimum of 50){p_end}
{p 10 12 10}{cmd:siglevel} defines the significance level for the tests (0.05 by default){p_end}
{p 10 12 10}{cmd:details} displays more details with polytomous items
{p 4 8 2}{cmd:nipmatrix} display indexes in order to check the non-intersection (Doubly Monotone Mokken Model). This option produces outputs similar to the MSP software.
The string contains several suboptions: {cmd:minvi} and {cmd:siglevel}. If you want use all the default values, type *.{p_end}
{p 10 12 10}{cmd:minvi} defines the minimal size of a violation of non-intersection (0.03 by default){p_end}
{p 10 12 10}{cmd:siglevel} defines the significance level for the tests (0.05 by default){p_end}
{title:Remarks}
{p 4 8 2}For detailed informations on the Loevinger's H coefficients, see Loevinger (1948) or Hemker and al. (1995). For details about the analysis of non parametric Mokken models, see for example the MSP 5.0 manual.
{title:Example}
{p 8 8}{inp:. loevh itemA1-itemA7}
{p 8 8}{inp:. loevh itemA*, pair monotonicity(*) ppp pmm nipmatrix(minvi(0.05) siglevel(0.01))}
{p 8 8}{inp:. loevh item*, pairwise generror(error) graph}
{title:Results}
{p 4 8 2}The Loevinger's H coefficients between all the pairs of items, for each item with respect of all the others items and for the set of items are respectively saved in the matrices {it:r(loevHjk)}, {it:r(loevHj)}
and in the scalar {it:r(loevh)}.
{p 4 8 2}The empirical Guttman errors between all the pairs of items, associated to each item and relied to the scale are respectively saved in the matrices {it:r(eGuttjk)}, {it:r(eGuttj)} and in the scalar {it:e(Gutt)}.
{p 4 8 2}The theorical Guttman errors between all the pairs of items, associated to each item and relied to the scale are respectively saved in the matrices {it:r(eGuttjk0)}, {it:r(eGuttj0)} and in the scalar {it:e(Gutt0)}.
{p 4 8 2}The values of the Z statistics and the corresponding p-values associated to the Loevinger H coefficients are respectively saved in the matrices {it:r(zHjk)}, {it:r(pvalHjk)}, {it:r(zHj)}, {it:r(pvalHj)}
and in the scalars {it:e(zH)} and {it:r(pvalH)}.
{p 4 8 2}The P++ and P-- matrices are saved in {it:r(ppp)} and {it:r(pmm)}.
{p 4 8 2}The used number of individuals per items pair is saved in {it:r(Obs)}.
{title:References}
{p 4 8 2}Hemker B. T., Sijtsma K. and Molenaar I. W., Selection of unidimensional scales from a multidimensional item bank in the polytomous Mokken IRT
model, {it: Applied Psychological Measurement}, vol.19(4), 1995, pp. 337-352.
{p 4 8 2}Loevinger J., The technique of homogeneous tests compared with some aspects of "scale analysis" and factor analysis. {it:Psychological bulletin},
vol. 45, 1948, pp. 507-530.
{p 4 8 2}Molenaar I. W., Sijtsma K. and Boer P. {it:MSP5 for Windows - User's Manual}, 2000, 105 pages.
{title:Author}
{p 4 8 2}Jean-Benoit Hardouin, PhD, assistant professor{p_end}
{p 4 8 2}EA 4275 "Team of Biostatistics, Clinical Research and Subjective Measures in Health Sciences"{p_end}
{p 4 8 2}University of Nantes - Faculty of Pharmaceutical Sciences{p_end}
{p 4 8 2}1, rue Gaston Veil - BP 53508{p_end}
{p 4 8 2}44035 Nantes Cedex 1 - FRANCE{p_end}
{p 4 8 2}Email:
{browse "mailto:jean-benoit.hardouin@univ-nantes.fr":jean-benoit.hardouin@univ-nantes.fr}{p_end}
{p 4 8 2}Websites {browse "http://www.anaqol.org":AnaQol}
and {browse "http://www.freeirt.org":FreeIRT}
{title:Also see}
{p 4 13 2}Online: help for {help traces}, {help msp}, {help gengroup}, {help mokken} if installed.{p_end}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

Some files were not shown because too many files have changed in this diff Show More

Loading…
Cancel
Save