You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
502 lines
15 KiB
Plaintext
502 lines
15 KiB
Plaintext
************************************************************************************************************
|
|
* hcaccprox: Hierachical Clusters Analysis/CCPROX
|
|
* Version 1: May 12, 2004
|
|
* Add-on: Partition version 2 (2004-04-10)
|
|
*
|
|
* Use the Detect Stata program (http://freeirt.free.fr)
|
|
*
|
|
* Historic :
|
|
* Version 1 [2004-01-18], Jean-Benoit Hardouin
|
|
*
|
|
* Jean-benoit Hardouin, Regional Health Observatory of Orléans - France
|
|
* jean-benoit.hardouin@neuf.fr
|
|
*
|
|
* News about this program : http://anaqol.free.fr
|
|
* FreeIRT Project : http://freeirt.free.fr
|
|
*
|
|
* Copyright 2004 Jean-Benoit Hardouin
|
|
*
|
|
* This program is free software; you can redistribute it and/or modify
|
|
* it under the terms of the GNU General Public License as published by
|
|
* the Free Software Foundation; either version 2 of the License, or
|
|
* (at your option) any later version.
|
|
*
|
|
* This program is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
* GNU General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU General Public License
|
|
* along with this program; if not, write to the Free Software
|
|
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
*
|
|
************************************************************************************************************
|
|
|
|
|
|
program define hcaccprox , rclass
|
|
version 8.0
|
|
syntax varlist(min=2 numeric) [,PROX(string) METHod(string) PARTition(numlist) MEASures DETails DETect(integer 0)]
|
|
|
|
local nbpart:word count `partition'
|
|
tokenize `partition'
|
|
forvalues k=1/`nbpart' {
|
|
local part`k'=``k''
|
|
}
|
|
|
|
local nbitems : word count `varlist'
|
|
tokenize `varlist'
|
|
|
|
tempname proximity whereitems
|
|
|
|
matrix define `proximity'=J(`nbitems',`nbitems',0)
|
|
matrix define `whereitems'=J(`=`nbitems'-1',`nbitems',0)
|
|
|
|
if `detect'>`nbitems' {
|
|
di _col(3) in green "The number of partitions analyzed by the DETECT criterion must be inferior to the number of possible partitions"
|
|
di _col(3) in green "This number of possible partitions is `=`nbitems'-1', so your detect option is put to this number"
|
|
local detect=`nbitems'-1
|
|
di
|
|
}
|
|
|
|
if "`prox'"!="a"&"`prox'"!="ad"&"`prox'"!="cor"&"`prox'"!="ccov"&"`prox'"!="ccor"&"`prox'"!="mh" {
|
|
if "`prox'"=="" {
|
|
local prox="ccov"
|
|
}
|
|
else {
|
|
di in red "You must define an existing measure of proximity (a, ad, cor, ccov, ccor, mh)."
|
|
di in red "Please correct your prox option."
|
|
exit
|
|
}
|
|
}
|
|
|
|
if "`method'"!="UPGMA"&"`method'"!="single"&"`method'"!="complete" {
|
|
if "`method'"=="" {
|
|
local method="UPGMA"
|
|
}
|
|
else {
|
|
di in red "Tou must define an existing method to define the proximity between two clusters of items:"
|
|
di in red _col(10) "- UPGMA: Unweighted Pair-Group Method of Average"
|
|
di in red _col(10) "- single: single linkage"
|
|
di in red _col(10) "- complete: complete linkage "
|
|
di in red "Please correct your method option"
|
|
exit
|
|
}
|
|
}
|
|
|
|
forvalues i=1/`nbitems' {
|
|
matrix `whereitems'[1,`i']=`i'
|
|
if "`details'"!="" {
|
|
di in green _col(3) "The item " _col(13) in yellow "``i''" in green " correspond to the node " in yellow "`i'"
|
|
}
|
|
}
|
|
|
|
tempvar score
|
|
egen `score'=rmean(`varlist')
|
|
qui replace `score'=`score'*`nbitems'
|
|
forvalues k=0/`nbitems' {
|
|
qui count if `score'==`k'
|
|
local nk`k'=r(N)
|
|
}
|
|
|
|
qui count
|
|
local N=r(N)
|
|
|
|
if "`prox'"=="ccov"|"`prox'"=="mh" {
|
|
local proxmin=0
|
|
}
|
|
|
|
/*************************Measure of proximities*********************************/
|
|
|
|
forvalues i=1/`nbitems' {
|
|
forvalues j=`=`i'+1'/`nbitems' {
|
|
/***********************************Proximity A**************************/
|
|
if "`prox'"=="a" {
|
|
qui count if ``i''==1&``j''==1
|
|
local tmp11=r(N)
|
|
qui count if ``i''==0&``j''==0
|
|
local tmp00=r(N)
|
|
|
|
matrix `proximity'[`i',`j']=sqrt(1-`tmp11'/(`N'-`tmp00'))
|
|
matrix `proximity'[`j',`i']=`proximity'[`i',`j']
|
|
}
|
|
|
|
/***********************************Proximity AD*************************/
|
|
if "`prox'"=="ad" {
|
|
qui count if ``i''==1&``j''==1
|
|
local tmp11=r(N)
|
|
qui count if ``i''==0&``j''==0
|
|
local tmp00=r(N)
|
|
|
|
matrix `proximity'[`i',`j']=sqrt(1-(`tmp11'+`tmp00')/`N')
|
|
matrix `proximity'[`j',`i']=`proximity'[`i',`j']
|
|
}
|
|
|
|
/**********************************Proximity COR*************************/
|
|
if "`prox'"=="cor" {
|
|
qui count if ``i''==1&``j''==1
|
|
local tmp11=r(N)
|
|
qui count if ``i''==0&``j''==0
|
|
local tmp00=r(N)
|
|
qui count if ``i''==1&``j''==0
|
|
local tmp10=r(N)
|
|
qui count if ``i''==0&``j''==1
|
|
local tmp01=r(N)
|
|
|
|
matrix `proximity'[`i',`j']=sqrt(2*(1-(`tmp11'*`tmp00'-`tmp10'*`tmp01')/(sqrt((`tmp11'+`tmp10')*(`tmp11'+`tmp01')*(`tmp00'+`tmp10')*(`tmp00'+`tmp01')))))
|
|
matrix `proximity'[`j',`i']=`proximity'[`i',`j']
|
|
}
|
|
|
|
/***********************************Proximity CCOV**********************/
|
|
if "`prox'"=="ccov" {
|
|
local dij=0
|
|
forvalues k=1/`=`nbitems'-1' {
|
|
if `nk`k''!=0 {
|
|
qui corr ``i'' ``j'',cov
|
|
local covi`i'j`j'k`k'=r(cov_12)
|
|
local dij=`dij'+`covi`i'j`j'k`k''*`nk`k''
|
|
}
|
|
}
|
|
|
|
matrix `proximity'[`i',`j']=-`dij'/`N'
|
|
matrix `proximity'[`j',`i']=`proximity'[`i',`j']
|
|
if `proxmin'<`dij'/`N' {
|
|
local proxmin=`dij'/`N'
|
|
}
|
|
}
|
|
|
|
/***********************************Proximity CCOR**********************/
|
|
|
|
if "`prox'"=="ccor" {
|
|
local dij=0
|
|
forvalues k=1/`=`nbitems'-1' {
|
|
if `nk`k''!=0 {
|
|
qui corr ``i'' ``j''
|
|
local cori`i'j`j'k`k'=r(rho)
|
|
local dij=`dij'+`cori`i'j`j'k`k''*`nk`k''
|
|
}
|
|
}
|
|
|
|
matrix `proximity'[`i',`j']=sqrt(2*(1-`dij'/`N'))
|
|
matrix `proximity'[`j',`i']=`proximity'[`i',`j']
|
|
}
|
|
|
|
|
|
/***********************************Proximity MH************************/
|
|
|
|
if "`prox'"=="mh" {
|
|
local numij=0
|
|
local denom=0
|
|
forvalues k=1/`=`nbitems'-1' {
|
|
if `nk`k''!=0 {
|
|
qui count if ``i''==1&``j''==1
|
|
local A=r(N)
|
|
qui count if ``i''==0&``j''==1
|
|
local B=r(N)
|
|
qui count if ``i''==1&``j''==0
|
|
local C=r(N)
|
|
qui count if ``i''==0&``j''==0
|
|
local D=r(N)
|
|
|
|
if `B'!=0&`C'!=0 {
|
|
local numij=`numij'+`A'*`D'/`nk`k''
|
|
local denomij=`denomij'+`B'*`C'/`nk`k''
|
|
}
|
|
}
|
|
}
|
|
|
|
matrix `proximity'[`i',`j']=-log(`numij'/`denomij')
|
|
matrix `proximity'[`j',`i']=`proximity'[`i',`j']
|
|
if `proxmin'<log(`numij'/`denomij') {
|
|
local proxmin=-`proximity'[`i',`j']
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
if "`prox'"=="ccov"|"`prox'"=="mh" {
|
|
forvalues i=1/`nbitems' {
|
|
forvalues j=`=`i'+1'/`nbitems' {
|
|
matrix `proximity'[`i',`j']=`proximity'[`i',`j']+`proxmin'
|
|
matrix `proximity'[`j',`i']=`proximity'[`i',`j']
|
|
}
|
|
}
|
|
}
|
|
|
|
/**********************END OD THE COMPUTING OF THE PROXIMITIES**************************************/
|
|
if "`measures'"!="" {
|
|
di
|
|
matrix rowname `proximity'=`varlist'
|
|
matrix colname `proximity'=`varlist'
|
|
di in green _col(3) "Measures of proximity between the items"
|
|
matrix list `proximity', noheader
|
|
di
|
|
}
|
|
|
|
/**********************STEP 0**********************************************************************/
|
|
|
|
tempname currentprox nodes conclinesnodes mempart
|
|
|
|
matrix `currentprox'=`proximity'
|
|
matrix define `nodes'=J(`=`nbitems'+4',`=2*`nbitems'-1',0)
|
|
matrix define `conclinesnodes'=J(1,`nbitems',0)
|
|
matrix define `mempart'=J(`=`nbitems'+2',`=`nbitems'-1',0)
|
|
forvalues i=1/`nbitems' {
|
|
matrix `nodes'[1,`i']=1
|
|
matrix `nodes'[2,`i']=1
|
|
matrix `nodes'[5,`i']=`i'
|
|
matrix `conclinesnodes'[1,`i']=`i'
|
|
}
|
|
|
|
|
|
/*********************************CLUSTERING PROCEDURE*************************************/
|
|
|
|
forvalues k=1/`=`nbitems'-1' {
|
|
local nbclusters=`nbitems'-`k'+1
|
|
local distmin=`currentprox'[1,2]
|
|
local cl1=1
|
|
local cl2=2
|
|
|
|
forvalues i=1/`nbclusters' {
|
|
forvalues j=`=`i'+1'/`nbclusters' {
|
|
if `distmin'>`currentprox'[`i',`j'] {
|
|
local distmin=`currentprox'[`i',`j']
|
|
local cl1=`i'
|
|
local cl2=`j'
|
|
}
|
|
}
|
|
}
|
|
|
|
local linescl1=`conclinesnodes'[1,`cl1']
|
|
local nbitemscl1=`nodes'[1,`linescl1']
|
|
matrix `nodes'[2,`linescl1']=0
|
|
local linescl2=`conclinesnodes'[1,`cl2']
|
|
local nbitemscl2=`nodes'[1,`linescl2']
|
|
matrix `nodes'[2,`linescl2']=0
|
|
matrix `nodes'[1,`=`nbitems'+`k'']=`nbitemscl1'+`nbitemscl2'
|
|
matrix `nodes'[2,`=`nbitems'+`k'']=1
|
|
matrix `nodes'[3,`=`nbitems'+`k'']=`linescl1'
|
|
matrix `nodes'[4,`=`nbitems'+`k'']=`linescl2'
|
|
if "`details'"!="" {
|
|
di in green _col(3) "The nodes" _col(13) in yellow "`linescl1'" _col(17) in green "and" _col(21) in yellow "`linescl2'" _col(25) in green "are been aggregated to form the node " in yellow "`=`nbitems'+`k''"
|
|
}
|
|
|
|
forvalues i=5/`=`nbitemscl1'+4' {
|
|
local item=`nodes'[`i',`linescl1']
|
|
matrix `nodes'[`i',`=`nbitems'+`k'']=`item'
|
|
matrix `whereitems'[`k',`item']=`=`nbitems'+`k''
|
|
}
|
|
forvalues i=5/`=`nbitemscl2'+4' {
|
|
local item=`nodes'[`i',`linescl2']
|
|
matrix `nodes'[`=`i'+`nbitemscl1'',`=`nbitems'+`k'']=`item'
|
|
matrix `whereitems'[`k',`item']=`=`nbitems'+`k''
|
|
}
|
|
|
|
local tmp=1
|
|
forvalues i=1/`=`nbitems'+`k'' {
|
|
if `nodes'[2,`i']==1 {
|
|
matrix `mempart'[`tmp',`k']=`i'
|
|
local tmp=`tmp'+1
|
|
}
|
|
}
|
|
|
|
|
|
|
|
if `detect'>=`=`nbitems'-`k'' {
|
|
local partdetect
|
|
local compteur=1
|
|
local scaledetect
|
|
forvalues i=1/`=`nbitems'-`k'' {
|
|
local scaledetect`i'
|
|
}
|
|
forvalues i=1/`=`nbitems'+`k'' {
|
|
if `nodes'[2,`i']==1{
|
|
local tmp=`nodes'[1,`i']
|
|
local partdetect `partdetect' `tmp'
|
|
local tmp2=4+`tmp'
|
|
forvalues j=5/`tmp2' {
|
|
local tmp3=`nodes'[`j',`i']
|
|
local scaledetect`compteur' `scaledetect`compteur'' ``tmp3''
|
|
}
|
|
local scaledetect `scaledetect' `scaledetect`compteur''
|
|
local compteur=`compteur'+1
|
|
}
|
|
}
|
|
qui detect `scaledetect' , partition(`partdetect')
|
|
local detect`=`nbclusters'-1'=r(DETECT)
|
|
local R`=`nbclusters'-1'=r(R)
|
|
local Iss`=`nbclusters'-1'=r(Iss)
|
|
}
|
|
|
|
|
|
matrix drop `currentprox'
|
|
matrix define `currentprox'=J(`=`nbclusters'-1',`=`nbclusters'-1',0)
|
|
matrix drop `conclinesnodes'
|
|
matrix define `conclinesnodes'=J(1,`=`nbclusters'-1',0)
|
|
|
|
local tmp=1
|
|
forvalues i=1/`=`nbitems'+`k'' {
|
|
if `nodes'[2,`i']==1 {
|
|
matrix `conclinesnodes'[1,`tmp']=`i'
|
|
local tmp=`tmp'+1
|
|
}
|
|
}
|
|
forvalues i=1/`=`nbclusters'-1' {
|
|
forvalues j=`=`i'+1'/`=`nbclusters'-1' {
|
|
if "`method'"=="UPGMA" {
|
|
local moy=0
|
|
local linescl1=`conclinesnodes'[1,`i']
|
|
local nbitemscl1=`nodes'[1,`linescl1']
|
|
local linescl2=`conclinesnodes'[1,`j']
|
|
local nbitemscl2=`nodes'[1,`linescl2']
|
|
forvalues l=5/`=`nbitemscl1'+4' {
|
|
forvalues m=5/`=`nbitemscl2'+4' {
|
|
local item1=`nodes'[`l',`linescl1']
|
|
local item2=`nodes'[`m',`linescl2']
|
|
local tmp=`proximity'[`item1',`item2']
|
|
local moy=`moy'+`tmp'
|
|
}
|
|
}
|
|
matrix `currentprox'[`i',`j']=`moy'/(`nbitemscl1'*`nbitemscl2')
|
|
matrix `currentprox'[`j',`i']=`moy'/(`nbitemscl1'*`nbitemscl2')
|
|
}
|
|
if "`method'"=="single" {
|
|
local moy=0
|
|
local linescl1=`conclinesnodes'[1,`i']
|
|
local nbitemscl1=`nodes'[1,`linescl1']
|
|
local linescl2=`conclinesnodes'[1,`j']
|
|
local nbitemscl2=`nodes'[1,`linescl2']
|
|
forvalues l=5/`=`nbitemscl1'+4' {
|
|
forvalues m=5/`=`nbitemscl2'+4' {
|
|
local item1=`nodes'[`l',`linescl1']
|
|
local item2=`nodes'[`m',`linescl2']
|
|
if `l'==5&`m'==5 {
|
|
local distmin=`proximity'[`item1',`item2']
|
|
}
|
|
else {
|
|
if `distmin'>`proximity'[`item1',`item2'] {
|
|
local distmin=`proximity'[`item1',`item2']
|
|
}
|
|
}
|
|
}
|
|
}
|
|
matrix `currentprox'[`i',`j']=`distmin'
|
|
matrix `currentprox'[`j',`i']=`distmin'
|
|
}
|
|
if "`method'"=="complete" {
|
|
local moy=0
|
|
local linescl1=`conclinesnodes'[1,`i']
|
|
local nbitemscl1=`nodes'[1,`linescl1']
|
|
local linescl2=`conclinesnodes'[1,`j']
|
|
local nbitemscl2=`nodes'[1,`linescl2']
|
|
local distmax=0
|
|
forvalues l=5/`=`nbitemscl1'+4' {
|
|
forvalues m=5/`=`nbitemscl2'+4' {
|
|
local item1=`nodes'[`l',`linescl1']
|
|
local item2=`nodes'[`m',`linescl2']
|
|
if `distmax'<`proximity'[`item1',`item2'] {
|
|
local distmax=`proximity'[`item1',`item2']
|
|
}
|
|
}
|
|
}
|
|
matrix `currentprox'[`i',`j']=`distmax'
|
|
matrix `currentprox'[`j',`i']=`distmax'
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
if `detect'!=0 {
|
|
tempname indexes
|
|
matrix define `indexes'=J(`detect',4,0)
|
|
matrix colnames `indexes'=Clusters DETECT Iss R
|
|
di ""
|
|
di in green _col(7) "Indexes to test the `detect' latest partitions of the items"
|
|
di ""
|
|
di in green _col(29) "DETECT" _col(43) "Iss" _col(56) "R"
|
|
di _col(5) in green "Only one cluster:" _col(27) in yellow %8.5f `detect1' _col(38) %8.5f `Iss1' _col(49) %8.5f `R1'
|
|
matrix `indexes'[1,1]=1
|
|
matrix `indexes'[1,2]=`detect1'
|
|
matrix `indexes'[1,3]=`Iss1'
|
|
matrix `indexes'[1,4]=`R1'
|
|
forvalues k=2/`detect' {
|
|
matrix `indexes'[`k',1]=`k'
|
|
matrix `indexes'[`k',2]=`detect`k''
|
|
matrix `indexes'[`k',3]=`Iss`k''
|
|
matrix `indexes'[`k',4]=`R`k''
|
|
di _col(5) in green "`k' clusters:" _col(27) in yellow %8.5f `detect`k'' _col(38) %8.5f `Iss`k'' _col(49) %8.5f `R`k''
|
|
}
|
|
return matrix indexes=`indexes'
|
|
}
|
|
|
|
|
|
forvalues k=1/`nbpart' {
|
|
di ""
|
|
local rowmempart=`nbitems'-`part`k''
|
|
di in green _col(8) "Number of clusters : `part`k''"
|
|
tempname affect`part`k''
|
|
matrix define `affect`part`k'''=J(1,`nbitems',0)
|
|
forvalues i=1/`part`k'' {
|
|
di
|
|
di in green _col(12) "Cluster `i':"
|
|
local rownodes=`mempart'[`i',`rowmempart']
|
|
local itemsinthecluster=`nodes'[1,`rownodes']
|
|
forvalues j=5/`=4+`itemsinthecluster'' {
|
|
local tmp=`nodes'[`j',`rownodes']
|
|
matrix `affect`part`k'''[1,`tmp']=`i'
|
|
di in yellow _col(13)"``tmp''"
|
|
}
|
|
}
|
|
matrix colnames `affect`part`k'''=`varlist'
|
|
return matrix affect`part`k''=`affect`part`k'''
|
|
}
|
|
|
|
return matrix mempart `mempart'
|
|
return matrix nodes `nodes'
|
|
return local nbitems=`nbitems'
|
|
return local varlist `varlist'
|
|
end
|
|
|
|
/*********************************************************
|
|
*Partition
|
|
*Version 2 (May 10, 2004)
|
|
*
|
|
*Historic
|
|
*Version 1 (January 18, 2004)
|
|
***********************************************************/
|
|
|
|
program define partition
|
|
version 8.0
|
|
syntax anything(name=partition)
|
|
|
|
local nbitems=r(nbitems)
|
|
tempname mempart nodes
|
|
matrix `mempart'=r(mempart)
|
|
matrix `nodes'=r(nodes)
|
|
local varlist "`r(varlist)'"
|
|
|
|
local nbpart:word count `partition'
|
|
tokenize `partition'
|
|
|
|
forvalues k=1/`nbpart' {
|
|
local part`k'=``k''
|
|
}
|
|
tokenize `varlist'
|
|
|
|
forvalues k=1/`nbpart' {
|
|
di ""
|
|
local rowmempart=`nbitems'-`part`k''
|
|
di in green _col(8) "Number of clusters : `part`k''"
|
|
forvalues i=1/`part`k'' {
|
|
di
|
|
di in green _col(12) "Cluster `i':"
|
|
local rownodes=`mempart'[`i',`rowmempart']
|
|
local itemsinthecluster=`nodes'[1,`rownodes']
|
|
forvalues j=5/`=4+`itemsinthecluster'' {
|
|
local tmp=`nodes'[`j',`rownodes']
|
|
di in yellow _col(13)"``tmp''"
|
|
}
|
|
}
|
|
}
|
|
end
|
|
|