************************************************************************************************************
* hcaccprox: Hierachical Clusters Analysis/CCPROX
* Version 1: May 12, 2004
* Add-on: Partition version 2 (2004-04-10)
*
* Use the Detect Stata program (http://freeirt.free.fr)
*
* Historic :
* Version 1 [2004-01-18], Jean-Benoit Hardouin
*
* Jean-benoit Hardouin, Regional Health Observatory of Orléans - France
* jean-benoit.hardouin@neuf.fr
*
* News about this program : http://anaqol.free.fr
* FreeIRT Project : http://freeirt.free.fr
*
* Copyright 2004 Jean-Benoit Hardouin
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
*
************************************************************************************************************


program define hcaccprox , rclass
version 8.0
syntax varlist(min=2 numeric) [,PROX(string) METHod(string) PARTition(numlist) MEASures DETails DETect(integer 0)]

local nbpart:word count `partition'
tokenize `partition'
forvalues k=1/`nbpart' {
	local part`k'=``k''
}

local nbitems : word count `varlist'
tokenize `varlist'

tempname proximity whereitems 

matrix define `proximity'=J(`nbitems',`nbitems',0)
matrix define `whereitems'=J(`=`nbitems'-1',`nbitems',0)

if `detect'>`nbitems' {
	di _col(3) in green "The number of partitions analyzed by the DETECT criterion must be inferior to the number of possible partitions"
	di _col(3) in green "This number of possible partitions is `=`nbitems'-1', so your detect option is put to this number"
	local detect=`nbitems'-1
	di
}

if "`prox'"!="a"&"`prox'"!="ad"&"`prox'"!="cor"&"`prox'"!="ccov"&"`prox'"!="ccor"&"`prox'"!="mh" {
	if "`prox'"=="" {
		local prox="ccov"
	}
	else {
		di in red "You must define an existing measure of proximity (a, ad, cor, ccov, ccor, mh)."
		di in red "Please correct your prox option."
		exit
	}
}

if "`method'"!="UPGMA"&"`method'"!="single"&"`method'"!="complete" {
	if "`method'"=="" {
		local method="UPGMA"
	}
	else {
		di in red "Tou must define an existing method to define the proximity between two clusters of items:"
		di in red _col(10) "- UPGMA: Unweighted Pair-Group Method of Average"
		di in red _col(10) "- single: single linkage"
		di in red _col(10) "- complete: complete linkage "
		di in red "Please correct your method option"
		exit
	}
}

forvalues i=1/`nbitems' {
	matrix `whereitems'[1,`i']=`i'
	if "`details'"!="" {
		di in green _col(3) "The item " _col(13) in yellow "``i''"  in green " correspond to the node " in yellow "`i'" 
	}
}

tempvar score
egen `score'=rmean(`varlist')
qui replace `score'=`score'*`nbitems'
forvalues k=0/`nbitems' {
	qui count if `score'==`k'
	local nk`k'=r(N)
}

qui count
local N=r(N)

if "`prox'"=="ccov"|"`prox'"=="mh" {
	local proxmin=0
}

/*************************Measure of proximities*********************************/

forvalues i=1/`nbitems' {
	forvalues j=`=`i'+1'/`nbitems' {	
		/***********************************Proximity A**************************/
		if "`prox'"=="a" {
			qui count if ``i''==1&``j''==1
			local tmp11=r(N)
			qui count if ``i''==0&``j''==0
			local tmp00=r(N)

			matrix `proximity'[`i',`j']=sqrt(1-`tmp11'/(`N'-`tmp00'))
			matrix `proximity'[`j',`i']=`proximity'[`i',`j']
		}

		/***********************************Proximity AD*************************/
		if "`prox'"=="ad" {
			qui count if ``i''==1&``j''==1
			local tmp11=r(N)
			qui count if ``i''==0&``j''==0
			local tmp00=r(N)

			matrix `proximity'[`i',`j']=sqrt(1-(`tmp11'+`tmp00')/`N')
			matrix `proximity'[`j',`i']=`proximity'[`i',`j']
		}

		/**********************************Proximity COR*************************/
		if "`prox'"=="cor" {
			qui count if ``i''==1&``j''==1
			local tmp11=r(N)
			qui count if ``i''==0&``j''==0
			local tmp00=r(N)
			qui count if ``i''==1&``j''==0
			local tmp10=r(N)
			qui count if ``i''==0&``j''==1
			local tmp01=r(N)

			matrix `proximity'[`i',`j']=sqrt(2*(1-(`tmp11'*`tmp00'-`tmp10'*`tmp01')/(sqrt((`tmp11'+`tmp10')*(`tmp11'+`tmp01')*(`tmp00'+`tmp10')*(`tmp00'+`tmp01')))))
			matrix `proximity'[`j',`i']=`proximity'[`i',`j']
		}

		/***********************************Proximity CCOV**********************/
		if "`prox'"=="ccov" {
			local dij=0
			forvalues k=1/`=`nbitems'-1' {
				if `nk`k''!=0 {
					qui corr ``i'' ``j'',cov
					local covi`i'j`j'k`k'=r(cov_12)
					local dij=`dij'+`covi`i'j`j'k`k''*`nk`k''
				}
			}

			matrix `proximity'[`i',`j']=-`dij'/`N'
			matrix `proximity'[`j',`i']=`proximity'[`i',`j']
			if `proxmin'<`dij'/`N' {
				local proxmin=`dij'/`N'
			}
		}

		/***********************************Proximity CCOR**********************/

		if "`prox'"=="ccor" {
			local dij=0
			forvalues k=1/`=`nbitems'-1' {
				if `nk`k''!=0 {
					qui corr ``i'' ``j''
					local cori`i'j`j'k`k'=r(rho)
					local dij=`dij'+`cori`i'j`j'k`k''*`nk`k''
				}
			}

			matrix `proximity'[`i',`j']=sqrt(2*(1-`dij'/`N'))
			matrix `proximity'[`j',`i']=`proximity'[`i',`j']
		}
	
		
		/***********************************Proximity MH************************/

		if "`prox'"=="mh" {
			local numij=0
			local denom=0
			forvalues k=1/`=`nbitems'-1' {
				if `nk`k''!=0 {
					qui count if ``i''==1&``j''==1
					local A=r(N)
					qui count if ``i''==0&``j''==1
					local B=r(N)
					qui count if ``i''==1&``j''==0
					local C=r(N)
					qui count if ``i''==0&``j''==0
					local D=r(N)
				 
					if `B'!=0&`C'!=0 {
						local numij=`numij'+`A'*`D'/`nk`k''
						local denomij=`denomij'+`B'*`C'/`nk`k''
					}
				}
			}

			matrix `proximity'[`i',`j']=-log(`numij'/`denomij')
			matrix `proximity'[`j',`i']=`proximity'[`i',`j']
			if `proxmin'<log(`numij'/`denomij') {
				local proxmin=-`proximity'[`i',`j']
			}
		}
	}
}

if "`prox'"=="ccov"|"`prox'"=="mh" {
	forvalues i=1/`nbitems' {
		forvalues j=`=`i'+1'/`nbitems' {
			matrix `proximity'[`i',`j']=`proximity'[`i',`j']+`proxmin'
			matrix `proximity'[`j',`i']=`proximity'[`i',`j']
		}
	}
}

/**********************END OD THE COMPUTING OF THE PROXIMITIES**************************************/
if "`measures'"!="" {
	di
	matrix rowname `proximity'=`varlist'
	matrix colname `proximity'=`varlist'
	di in green _col(3) "Measures of proximity between the items"
	matrix list `proximity', noheader
	di
}

/**********************STEP 0**********************************************************************/

tempname currentprox nodes conclinesnodes mempart

matrix `currentprox'=`proximity'
matrix define `nodes'=J(`=`nbitems'+4',`=2*`nbitems'-1',0)
matrix define `conclinesnodes'=J(1,`nbitems',0)
matrix define `mempart'=J(`=`nbitems'+2',`=`nbitems'-1',0)
forvalues i=1/`nbitems' {
	matrix `nodes'[1,`i']=1
	matrix `nodes'[2,`i']=1
	matrix `nodes'[5,`i']=`i'
	matrix `conclinesnodes'[1,`i']=`i'
}


/*********************************CLUSTERING PROCEDURE*************************************/

forvalues k=1/`=`nbitems'-1' {
	local nbclusters=`nbitems'-`k'+1
	local distmin=`currentprox'[1,2]
	local cl1=1
	local cl2=2

	forvalues i=1/`nbclusters' {
		forvalues j=`=`i'+1'/`nbclusters' {
			if `distmin'>`currentprox'[`i',`j'] {
				local distmin=`currentprox'[`i',`j']
				local cl1=`i'
				local cl2=`j'
			}
		}
	}

	local linescl1=`conclinesnodes'[1,`cl1']
	local nbitemscl1=`nodes'[1,`linescl1']
	matrix `nodes'[2,`linescl1']=0
	local linescl2=`conclinesnodes'[1,`cl2']
	local nbitemscl2=`nodes'[1,`linescl2']
	matrix `nodes'[2,`linescl2']=0
	matrix `nodes'[1,`=`nbitems'+`k'']=`nbitemscl1'+`nbitemscl2'
	matrix `nodes'[2,`=`nbitems'+`k'']=1
	matrix `nodes'[3,`=`nbitems'+`k'']=`linescl1'
	matrix `nodes'[4,`=`nbitems'+`k'']=`linescl2'
	if "`details'"!="" {
		di in green _col(3) "The nodes" _col(13) in yellow "`linescl1'" _col(17) in green "and" _col(21) in yellow "`linescl2'" _col(25) in green "are been aggregated to form the node " in yellow "`=`nbitems'+`k''"
	}

	forvalues i=5/`=`nbitemscl1'+4' {
		local item=`nodes'[`i',`linescl1']
		matrix `nodes'[`i',`=`nbitems'+`k'']=`item'
		matrix `whereitems'[`k',`item']=`=`nbitems'+`k''
	}
	forvalues i=5/`=`nbitemscl2'+4' {
		local item=`nodes'[`i',`linescl2']
		matrix `nodes'[`=`i'+`nbitemscl1'',`=`nbitems'+`k'']=`item'
		matrix `whereitems'[`k',`item']=`=`nbitems'+`k''
	}

	local tmp=1
	forvalues i=1/`=`nbitems'+`k'' {
		if `nodes'[2,`i']==1 {
			matrix `mempart'[`tmp',`k']=`i'
			local tmp=`tmp'+1
		}
	}



	if `detect'>=`=`nbitems'-`k'' {
		local partdetect
		local compteur=1
		local scaledetect
		forvalues i=1/`=`nbitems'-`k'' {
			local scaledetect`i'
		}
		forvalues i=1/`=`nbitems'+`k'' {
			if `nodes'[2,`i']==1{
				local tmp=`nodes'[1,`i']
				local partdetect `partdetect' `tmp'
				local tmp2=4+`tmp'
				forvalues j=5/`tmp2' {
					local tmp3=`nodes'[`j',`i']
					local scaledetect`compteur' `scaledetect`compteur'' ``tmp3''
				}
			local scaledetect `scaledetect' `scaledetect`compteur''
			local compteur=`compteur'+1
			}
		}
	qui detect `scaledetect' , partition(`partdetect')
	local detect`=`nbclusters'-1'=r(DETECT)
	local R`=`nbclusters'-1'=r(R)
	local Iss`=`nbclusters'-1'=r(Iss)
	}


	matrix drop `currentprox'
	matrix define `currentprox'=J(`=`nbclusters'-1',`=`nbclusters'-1',0)
	matrix drop `conclinesnodes'
	matrix define `conclinesnodes'=J(1,`=`nbclusters'-1',0)

	local tmp=1
	forvalues i=1/`=`nbitems'+`k'' {
		if `nodes'[2,`i']==1 {
			matrix `conclinesnodes'[1,`tmp']=`i'
			local tmp=`tmp'+1
		}
	}
	forvalues i=1/`=`nbclusters'-1' {
		forvalues j=`=`i'+1'/`=`nbclusters'-1' {
			if "`method'"=="UPGMA" {
				local moy=0
				local linescl1=`conclinesnodes'[1,`i']
				local nbitemscl1=`nodes'[1,`linescl1']
				local linescl2=`conclinesnodes'[1,`j']
				local nbitemscl2=`nodes'[1,`linescl2']
				forvalues l=5/`=`nbitemscl1'+4' {
					forvalues m=5/`=`nbitemscl2'+4' {
						local item1=`nodes'[`l',`linescl1']
						local item2=`nodes'[`m',`linescl2']
						local tmp=`proximity'[`item1',`item2']
						local moy=`moy'+`tmp'
					}
				}
				matrix `currentprox'[`i',`j']=`moy'/(`nbitemscl1'*`nbitemscl2')
				matrix `currentprox'[`j',`i']=`moy'/(`nbitemscl1'*`nbitemscl2')
			}
			if "`method'"=="single" {
				local moy=0
				local linescl1=`conclinesnodes'[1,`i']
				local nbitemscl1=`nodes'[1,`linescl1']
				local linescl2=`conclinesnodes'[1,`j']
				local nbitemscl2=`nodes'[1,`linescl2']
				forvalues l=5/`=`nbitemscl1'+4' {
					forvalues m=5/`=`nbitemscl2'+4' {
						local item1=`nodes'[`l',`linescl1']
						local item2=`nodes'[`m',`linescl2']
						if `l'==5&`m'==5 {
							local distmin=`proximity'[`item1',`item2']
						}
						else {
							if `distmin'>`proximity'[`item1',`item2'] {
								local distmin=`proximity'[`item1',`item2']
							}
						}
					}
				}
				matrix `currentprox'[`i',`j']=`distmin'
				matrix `currentprox'[`j',`i']=`distmin'
			}
			if "`method'"=="complete" {
				local moy=0
				local linescl1=`conclinesnodes'[1,`i']
				local nbitemscl1=`nodes'[1,`linescl1']
				local linescl2=`conclinesnodes'[1,`j']
				local nbitemscl2=`nodes'[1,`linescl2']
				local distmax=0
				forvalues l=5/`=`nbitemscl1'+4' {
					forvalues m=5/`=`nbitemscl2'+4' {
						local item1=`nodes'[`l',`linescl1']
						local item2=`nodes'[`m',`linescl2']
						if `distmax'<`proximity'[`item1',`item2'] {
							local distmax=`proximity'[`item1',`item2']
						}
					}
				}
				matrix `currentprox'[`i',`j']=`distmax'
				matrix `currentprox'[`j',`i']=`distmax'
			}
		}
	}
}

if `detect'!=0 {
        tempname indexes
        matrix define `indexes'=J(`detect',4,0)
        matrix colnames `indexes'=Clusters DETECT Iss R
	di ""
	di in green _col(7) "Indexes to test the `detect' latest partitions of the items"
	di ""
	di in green _col(29) "DETECT" _col(43) "Iss" _col(56) "R"
	di _col(5) in green "Only one cluster:" _col(27) in yellow %8.5f `detect1' _col(38) %8.5f  `Iss1' _col(49) %8.5f  `R1'
	matrix `indexes'[1,1]=1
	matrix `indexes'[1,2]=`detect1'
	matrix `indexes'[1,3]=`Iss1'
	matrix `indexes'[1,4]=`R1'
        forvalues k=2/`detect' {
	        matrix  `indexes'[`k',1]=`k'
	        matrix  `indexes'[`k',2]=`detect`k''
	        matrix  `indexes'[`k',3]=`Iss`k''
	        matrix  `indexes'[`k',4]=`R`k''
		di  _col(5) in green "`k' clusters:" _col(27) in yellow %8.5f `detect`k'' _col(38) %8.5f  `Iss`k'' _col(49) %8.5f `R`k''
	}
        return matrix indexes=`indexes'
}


forvalues k=1/`nbpart' {
	di ""
	local rowmempart=`nbitems'-`part`k''
	di in green _col(8) "Number of clusters : `part`k''"
	tempname affect`part`k''
	matrix define `affect`part`k'''=J(1,`nbitems',0)
	forvalues i=1/`part`k'' {
		di
		di in green _col(12) "Cluster `i':"
		local rownodes=`mempart'[`i',`rowmempart']
		local itemsinthecluster=`nodes'[1,`rownodes']
		forvalues j=5/`=4+`itemsinthecluster'' {
			local tmp=`nodes'[`j',`rownodes']
			matrix `affect`part`k'''[1,`tmp']=`i'
			di in yellow _col(13)"``tmp''"
		}
	}
	matrix colnames `affect`part`k'''=`varlist'
        return matrix affect`part`k''=`affect`part`k'''
}

return matrix mempart `mempart'
return matrix nodes `nodes'
return local nbitems=`nbitems'
return local varlist `varlist'
end

/*********************************************************
*Partition
*Version 2 (May 10, 2004)
*
*Historic
*Version 1 (January 18, 2004)
***********************************************************/

program define partition
version 8.0
syntax anything(name=partition)
 
local nbitems=r(nbitems)
tempname mempart nodes
matrix `mempart'=r(mempart)
matrix `nodes'=r(nodes)
local varlist "`r(varlist)'"

local nbpart:word count `partition'
tokenize `partition'

forvalues k=1/`nbpart' {
	local part`k'=``k''
}
tokenize `varlist'

forvalues k=1/`nbpart' {
	di ""
	local rowmempart=`nbitems'-`part`k''
	di in green _col(8) "Number of clusters : `part`k''"
	forvalues i=1/`part`k'' {
		di
		di in green _col(12) "Cluster `i':"
		local rownodes=`mempart'[`i',`rowmempart']
		local itemsinthecluster=`nodes'[1,`rownodes']
		forvalues j=5/`=4+`itemsinthecluster'' {
			local tmp=`nodes'[`j',`rownodes']
			di in yellow _col(13)"``tmp''"
		}
	}
}
end