You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

502 lines
15 KiB
Plaintext

************************************************************************************************************
* hcaccprox: Hierachical Clusters Analysis/CCPROX
* Version 1: May 12, 2004
* Add-on: Partition version 2 (2004-04-10)
*
* Use the Detect Stata program (http://freeirt.free.fr)
*
* Historic :
* Version 1 [2004-01-18], Jean-Benoit Hardouin
*
* Jean-benoit Hardouin, Regional Health Observatory of Orl<72>ans - France
* jean-benoit.hardouin@neuf.fr
*
* News about this program : http://anaqol.free.fr
* FreeIRT Project : http://freeirt.free.fr
*
* Copyright 2004 Jean-Benoit Hardouin
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*
************************************************************************************************************
program define hcaccprox , rclass
version 8.0
syntax varlist(min=2 numeric) [,PROX(string) METHod(string) PARTition(numlist) MEASures DETails DETect(integer 0)]
local nbpart:word count `partition'
tokenize `partition'
forvalues k=1/`nbpart' {
local part`k'=``k''
}
local nbitems : word count `varlist'
tokenize `varlist'
tempname proximity whereitems
matrix define `proximity'=J(`nbitems',`nbitems',0)
matrix define `whereitems'=J(`=`nbitems'-1',`nbitems',0)
if `detect'>`nbitems' {
di _col(3) in green "The number of partitions analyzed by the DETECT criterion must be inferior to the number of possible partitions"
di _col(3) in green "This number of possible partitions is `=`nbitems'-1', so your detect option is put to this number"
local detect=`nbitems'-1
di
}
if "`prox'"!="a"&"`prox'"!="ad"&"`prox'"!="cor"&"`prox'"!="ccov"&"`prox'"!="ccor"&"`prox'"!="mh" {
if "`prox'"=="" {
local prox="ccov"
}
else {
di in red "You must define an existing measure of proximity (a, ad, cor, ccov, ccor, mh)."
di in red "Please correct your prox option."
exit
}
}
if "`method'"!="UPGMA"&"`method'"!="single"&"`method'"!="complete" {
if "`method'"=="" {
local method="UPGMA"
}
else {
di in red "Tou must define an existing method to define the proximity between two clusters of items:"
di in red _col(10) "- UPGMA: Unweighted Pair-Group Method of Average"
di in red _col(10) "- single: single linkage"
di in red _col(10) "- complete: complete linkage "
di in red "Please correct your method option"
exit
}
}
forvalues i=1/`nbitems' {
matrix `whereitems'[1,`i']=`i'
if "`details'"!="" {
di in green _col(3) "The item " _col(13) in yellow "``i''" in green " correspond to the node " in yellow "`i'"
}
}
tempvar score
egen `score'=rmean(`varlist')
qui replace `score'=`score'*`nbitems'
forvalues k=0/`nbitems' {
qui count if `score'==`k'
local nk`k'=r(N)
}
qui count
local N=r(N)
if "`prox'"=="ccov"|"`prox'"=="mh" {
local proxmin=0
}
/*************************Measure of proximities*********************************/
forvalues i=1/`nbitems' {
forvalues j=`=`i'+1'/`nbitems' {
/***********************************Proximity A**************************/
if "`prox'"=="a" {
qui count if ``i''==1&``j''==1
local tmp11=r(N)
qui count if ``i''==0&``j''==0
local tmp00=r(N)
matrix `proximity'[`i',`j']=sqrt(1-`tmp11'/(`N'-`tmp00'))
matrix `proximity'[`j',`i']=`proximity'[`i',`j']
}
/***********************************Proximity AD*************************/
if "`prox'"=="ad" {
qui count if ``i''==1&``j''==1
local tmp11=r(N)
qui count if ``i''==0&``j''==0
local tmp00=r(N)
matrix `proximity'[`i',`j']=sqrt(1-(`tmp11'+`tmp00')/`N')
matrix `proximity'[`j',`i']=`proximity'[`i',`j']
}
/**********************************Proximity COR*************************/
if "`prox'"=="cor" {
qui count if ``i''==1&``j''==1
local tmp11=r(N)
qui count if ``i''==0&``j''==0
local tmp00=r(N)
qui count if ``i''==1&``j''==0
local tmp10=r(N)
qui count if ``i''==0&``j''==1
local tmp01=r(N)
matrix `proximity'[`i',`j']=sqrt(2*(1-(`tmp11'*`tmp00'-`tmp10'*`tmp01')/(sqrt((`tmp11'+`tmp10')*(`tmp11'+`tmp01')*(`tmp00'+`tmp10')*(`tmp00'+`tmp01')))))
matrix `proximity'[`j',`i']=`proximity'[`i',`j']
}
/***********************************Proximity CCOV**********************/
if "`prox'"=="ccov" {
local dij=0
forvalues k=1/`=`nbitems'-1' {
if `nk`k''!=0 {
qui corr ``i'' ``j'',cov
local covi`i'j`j'k`k'=r(cov_12)
local dij=`dij'+`covi`i'j`j'k`k''*`nk`k''
}
}
matrix `proximity'[`i',`j']=-`dij'/`N'
matrix `proximity'[`j',`i']=`proximity'[`i',`j']
if `proxmin'<`dij'/`N' {
local proxmin=`dij'/`N'
}
}
/***********************************Proximity CCOR**********************/
if "`prox'"=="ccor" {
local dij=0
forvalues k=1/`=`nbitems'-1' {
if `nk`k''!=0 {
qui corr ``i'' ``j''
local cori`i'j`j'k`k'=r(rho)
local dij=`dij'+`cori`i'j`j'k`k''*`nk`k''
}
}
matrix `proximity'[`i',`j']=sqrt(2*(1-`dij'/`N'))
matrix `proximity'[`j',`i']=`proximity'[`i',`j']
}
/***********************************Proximity MH************************/
if "`prox'"=="mh" {
local numij=0
local denom=0
forvalues k=1/`=`nbitems'-1' {
if `nk`k''!=0 {
qui count if ``i''==1&``j''==1
local A=r(N)
qui count if ``i''==0&``j''==1
local B=r(N)
qui count if ``i''==1&``j''==0
local C=r(N)
qui count if ``i''==0&``j''==0
local D=r(N)
if `B'!=0&`C'!=0 {
local numij=`numij'+`A'*`D'/`nk`k''
local denomij=`denomij'+`B'*`C'/`nk`k''
}
}
}
matrix `proximity'[`i',`j']=-log(`numij'/`denomij')
matrix `proximity'[`j',`i']=`proximity'[`i',`j']
if `proxmin'<log(`numij'/`denomij') {
local proxmin=-`proximity'[`i',`j']
}
}
}
}
if "`prox'"=="ccov"|"`prox'"=="mh" {
forvalues i=1/`nbitems' {
forvalues j=`=`i'+1'/`nbitems' {
matrix `proximity'[`i',`j']=`proximity'[`i',`j']+`proxmin'
matrix `proximity'[`j',`i']=`proximity'[`i',`j']
}
}
}
/**********************END OD THE COMPUTING OF THE PROXIMITIES**************************************/
if "`measures'"!="" {
di
matrix rowname `proximity'=`varlist'
matrix colname `proximity'=`varlist'
di in green _col(3) "Measures of proximity between the items"
matrix list `proximity', noheader
di
}
/**********************STEP 0**********************************************************************/
tempname currentprox nodes conclinesnodes mempart
matrix `currentprox'=`proximity'
matrix define `nodes'=J(`=`nbitems'+4',`=2*`nbitems'-1',0)
matrix define `conclinesnodes'=J(1,`nbitems',0)
matrix define `mempart'=J(`=`nbitems'+2',`=`nbitems'-1',0)
forvalues i=1/`nbitems' {
matrix `nodes'[1,`i']=1
matrix `nodes'[2,`i']=1
matrix `nodes'[5,`i']=`i'
matrix `conclinesnodes'[1,`i']=`i'
}
/*********************************CLUSTERING PROCEDURE*************************************/
forvalues k=1/`=`nbitems'-1' {
local nbclusters=`nbitems'-`k'+1
local distmin=`currentprox'[1,2]
local cl1=1
local cl2=2
forvalues i=1/`nbclusters' {
forvalues j=`=`i'+1'/`nbclusters' {
if `distmin'>`currentprox'[`i',`j'] {
local distmin=`currentprox'[`i',`j']
local cl1=`i'
local cl2=`j'
}
}
}
local linescl1=`conclinesnodes'[1,`cl1']
local nbitemscl1=`nodes'[1,`linescl1']
matrix `nodes'[2,`linescl1']=0
local linescl2=`conclinesnodes'[1,`cl2']
local nbitemscl2=`nodes'[1,`linescl2']
matrix `nodes'[2,`linescl2']=0
matrix `nodes'[1,`=`nbitems'+`k'']=`nbitemscl1'+`nbitemscl2'
matrix `nodes'[2,`=`nbitems'+`k'']=1
matrix `nodes'[3,`=`nbitems'+`k'']=`linescl1'
matrix `nodes'[4,`=`nbitems'+`k'']=`linescl2'
if "`details'"!="" {
di in green _col(3) "The nodes" _col(13) in yellow "`linescl1'" _col(17) in green "and" _col(21) in yellow "`linescl2'" _col(25) in green "are been aggregated to form the node " in yellow "`=`nbitems'+`k''"
}
forvalues i=5/`=`nbitemscl1'+4' {
local item=`nodes'[`i',`linescl1']
matrix `nodes'[`i',`=`nbitems'+`k'']=`item'
matrix `whereitems'[`k',`item']=`=`nbitems'+`k''
}
forvalues i=5/`=`nbitemscl2'+4' {
local item=`nodes'[`i',`linescl2']
matrix `nodes'[`=`i'+`nbitemscl1'',`=`nbitems'+`k'']=`item'
matrix `whereitems'[`k',`item']=`=`nbitems'+`k''
}
local tmp=1
forvalues i=1/`=`nbitems'+`k'' {
if `nodes'[2,`i']==1 {
matrix `mempart'[`tmp',`k']=`i'
local tmp=`tmp'+1
}
}
if `detect'>=`=`nbitems'-`k'' {
local partdetect
local compteur=1
local scaledetect
forvalues i=1/`=`nbitems'-`k'' {
local scaledetect`i'
}
forvalues i=1/`=`nbitems'+`k'' {
if `nodes'[2,`i']==1{
local tmp=`nodes'[1,`i']
local partdetect `partdetect' `tmp'
local tmp2=4+`tmp'
forvalues j=5/`tmp2' {
local tmp3=`nodes'[`j',`i']
local scaledetect`compteur' `scaledetect`compteur'' ``tmp3''
}
local scaledetect `scaledetect' `scaledetect`compteur''
local compteur=`compteur'+1
}
}
qui detect `scaledetect' , partition(`partdetect')
local detect`=`nbclusters'-1'=r(DETECT)
local R`=`nbclusters'-1'=r(R)
local Iss`=`nbclusters'-1'=r(Iss)
}
matrix drop `currentprox'
matrix define `currentprox'=J(`=`nbclusters'-1',`=`nbclusters'-1',0)
matrix drop `conclinesnodes'
matrix define `conclinesnodes'=J(1,`=`nbclusters'-1',0)
local tmp=1
forvalues i=1/`=`nbitems'+`k'' {
if `nodes'[2,`i']==1 {
matrix `conclinesnodes'[1,`tmp']=`i'
local tmp=`tmp'+1
}
}
forvalues i=1/`=`nbclusters'-1' {
forvalues j=`=`i'+1'/`=`nbclusters'-1' {
if "`method'"=="UPGMA" {
local moy=0
local linescl1=`conclinesnodes'[1,`i']
local nbitemscl1=`nodes'[1,`linescl1']
local linescl2=`conclinesnodes'[1,`j']
local nbitemscl2=`nodes'[1,`linescl2']
forvalues l=5/`=`nbitemscl1'+4' {
forvalues m=5/`=`nbitemscl2'+4' {
local item1=`nodes'[`l',`linescl1']
local item2=`nodes'[`m',`linescl2']
local tmp=`proximity'[`item1',`item2']
local moy=`moy'+`tmp'
}
}
matrix `currentprox'[`i',`j']=`moy'/(`nbitemscl1'*`nbitemscl2')
matrix `currentprox'[`j',`i']=`moy'/(`nbitemscl1'*`nbitemscl2')
}
if "`method'"=="single" {
local moy=0
local linescl1=`conclinesnodes'[1,`i']
local nbitemscl1=`nodes'[1,`linescl1']
local linescl2=`conclinesnodes'[1,`j']
local nbitemscl2=`nodes'[1,`linescl2']
forvalues l=5/`=`nbitemscl1'+4' {
forvalues m=5/`=`nbitemscl2'+4' {
local item1=`nodes'[`l',`linescl1']
local item2=`nodes'[`m',`linescl2']
if `l'==5&`m'==5 {
local distmin=`proximity'[`item1',`item2']
}
else {
if `distmin'>`proximity'[`item1',`item2'] {
local distmin=`proximity'[`item1',`item2']
}
}
}
}
matrix `currentprox'[`i',`j']=`distmin'
matrix `currentprox'[`j',`i']=`distmin'
}
if "`method'"=="complete" {
local moy=0
local linescl1=`conclinesnodes'[1,`i']
local nbitemscl1=`nodes'[1,`linescl1']
local linescl2=`conclinesnodes'[1,`j']
local nbitemscl2=`nodes'[1,`linescl2']
local distmax=0
forvalues l=5/`=`nbitemscl1'+4' {
forvalues m=5/`=`nbitemscl2'+4' {
local item1=`nodes'[`l',`linescl1']
local item2=`nodes'[`m',`linescl2']
if `distmax'<`proximity'[`item1',`item2'] {
local distmax=`proximity'[`item1',`item2']
}
}
}
matrix `currentprox'[`i',`j']=`distmax'
matrix `currentprox'[`j',`i']=`distmax'
}
}
}
}
if `detect'!=0 {
tempname indexes
matrix define `indexes'=J(`detect',4,0)
matrix colnames `indexes'=Clusters DETECT Iss R
di ""
di in green _col(7) "Indexes to test the `detect' latest partitions of the items"
di ""
di in green _col(29) "DETECT" _col(43) "Iss" _col(56) "R"
di _col(5) in green "Only one cluster:" _col(27) in yellow %8.5f `detect1' _col(38) %8.5f `Iss1' _col(49) %8.5f `R1'
matrix `indexes'[1,1]=1
matrix `indexes'[1,2]=`detect1'
matrix `indexes'[1,3]=`Iss1'
matrix `indexes'[1,4]=`R1'
forvalues k=2/`detect' {
matrix `indexes'[`k',1]=`k'
matrix `indexes'[`k',2]=`detect`k''
matrix `indexes'[`k',3]=`Iss`k''
matrix `indexes'[`k',4]=`R`k''
di _col(5) in green "`k' clusters:" _col(27) in yellow %8.5f `detect`k'' _col(38) %8.5f `Iss`k'' _col(49) %8.5f `R`k''
}
return matrix indexes=`indexes'
}
forvalues k=1/`nbpart' {
di ""
local rowmempart=`nbitems'-`part`k''
di in green _col(8) "Number of clusters : `part`k''"
tempname affect`part`k''
matrix define `affect`part`k'''=J(1,`nbitems',0)
forvalues i=1/`part`k'' {
di
di in green _col(12) "Cluster `i':"
local rownodes=`mempart'[`i',`rowmempart']
local itemsinthecluster=`nodes'[1,`rownodes']
forvalues j=5/`=4+`itemsinthecluster'' {
local tmp=`nodes'[`j',`rownodes']
matrix `affect`part`k'''[1,`tmp']=`i'
di in yellow _col(13)"``tmp''"
}
}
matrix colnames `affect`part`k'''=`varlist'
return matrix affect`part`k''=`affect`part`k'''
}
return matrix mempart `mempart'
return matrix nodes `nodes'
return local nbitems=`nbitems'
return local varlist `varlist'
end
/*********************************************************
*Partition
*Version 2 (May 10, 2004)
*
*Historic
*Version 1 (January 18, 2004)
***********************************************************/
program define partition
version 8.0
syntax anything(name=partition)
local nbitems=r(nbitems)
tempname mempart nodes
matrix `mempart'=r(mempart)
matrix `nodes'=r(nodes)
local varlist "`r(varlist)'"
local nbpart:word count `partition'
tokenize `partition'
forvalues k=1/`nbpart' {
local part`k'=``k''
}
tokenize `varlist'
forvalues k=1/`nbpart' {
di ""
local rowmempart=`nbitems'-`part`k''
di in green _col(8) "Number of clusters : `part`k''"
forvalues i=1/`part`k'' {
di
di in green _col(12) "Cluster `i':"
local rownodes=`mempart'[`i',`rowmempart']
local itemsinthecluster=`nodes'[1,`rownodes']
forvalues j=5/`=4+`itemsinthecluster'' {
local tmp=`nodes'[`j',`rownodes']
di in yellow _col(13)"``tmp''"
}
}
}
end