*! Confirmatory factor analysis with a single factor: v.2.2 *! Stas Kolenikov, skolenik-gmail-com program define cfa1, eclass version 9.1 if replay() { if ("`e(cmd)'" != "cfa1") error 301 Replay `0' } else Estimate `0' end program Estimate `0', eclass syntax varlist(numeric min=3) [if] [in] [aw pw / ] /// , [unitvar FREE POSvar FROM(str) CONSTRaint(numlist) LEVel(int $S_level) /// ROBust VCE(string) CLUster(passthru) SVY SEArch(passthru) * ] * syntax: cfa1 <list of the effect indicators> * untivar is for the identification condition of the unit variance of the latent variable unab varlist : `varlist' tokenize `varlist' local q: word count `varlist' marksample touse preserve qui keep if `touse' * weights! global CFA1N = _N /* * we'll estimate the means instead qui foreach x of varlist `varlist' { sum `x', meanonly replace `x' = `x'-r(mean) * deviations from the mean } */ if "`weight'" != "" { local mywgt [`weight'=`exp'] } if "`robust'`cluster'`svy'`weight'"~="" { local needed 1 } else { local needed 0 } Parse `varlist' , `unitvar' local toml `r(toml)' if "`from'" == "" { local from `r(tostart)', copy } * identification constraint free global CFA1constr `r(free)' if "`unitvar'" ~= "" { * identification by unit variance constraint $CFA1constr [phi]_cons = 1 } else if "`free'"=="" { * identification by the first variable constraint $CFA1constr [`1']_cons = 1 } else { * identification imposed by user global CFA1constr } local nconstr : word count `constraint' global CFA1PV = ("`posvar'" != "") if "`posvar'" ~= "" { di as text _n "Fitting the model without restrictions on error variances..." } * variance estimation local vce = trim("`vce'") if "`vce'" == "boot" local vce bootstrap if "`vce'" == "sbentler" { global CFA1SBV = 1 local vce } else { if index("robustoimopg","`vce'") { local vce vce(`vce') } else { di as err "`vce' estimation is not supported" if "`vce'" == "bootstrap" | "`vce'" == "boot" { di as err "try {help bootstrap} command directly" } exit 198 } } tempname ilog1 ilog2 Tryit ml model lf cfa1_lf `toml' `mywgt', constraint($CFA1constr `constraint') /// init (`from') maximize nooutput `options' `search' /// `svy' `cluster' `robust' `vce' /* ml model lf cfa1_lf `toml' `mywgt', /// constraint($CFA1constr `constraint') `svy' `robust' `cluster' /// maximize * ml check * ml search, rep(5) ml init `from', copy cap noi ml maximize , `options' */ mat `ilog1' = e(ilog) local nz = 0 if $CFA1PV { * determine if refitting the model is needed tempname ll_unr scalar `ll_unr' = e(ll) forvalues i=1/`q' { if [``i''_v]_cons <0 { constraint free local ccc = `r(free)' const define `ccc' [``i''_v]_cons = 0 local zerolist `zerolist' `ccc' local ++nz } } local zerolist = trim("`zerolist'") global CFA1constr $CFA1constr `zerolist' if "`zerolist'" ~= "" { di as text _n "Fitting the model with some error variances set to zero..." Tryit ml model lf cfa1_lf `toml' `mywgt' , constraint($CFA1constr `constraint') /// init (`from') maximize nooutput `options' `search' /// `svy' `robust' `cluster' `vce' mat `ilog2' = e(ilog) } * adjust degrees of freedom! } * we better have this before Satorra-Bentler if "`unitvar'" ~= "" { ereturn local normalized Latent Variance } else if "`free'"=="" { ereturn local normalized `1' } * work out Satorra-Bentler estimates if "$CFA1SBV"!="" { * repost Satorra-Bentler covariance matrix tempname SBVar SBV Delta Gamma cap SatorraBentler if _rc { di as err "Satorra-Bentler standard errors are not supported for this circumstance; revert to vce(oim)" global CFA1SBV } else { mat `SBVar' = r(SBVar) mat `Delta' = r(Delta) mat `Gamma' = r(Gamma) mat `SBV' = r(SBV) ereturn repost V = `SBVar' ereturn matrix SBGamma = `Gamma', copy ereturn matrix SBDelta = `Delta', copy ereturn matrix SBV = `SBV', copy ereturn local vce SatorraBentler } } * get the covariance matrix and the number of observations! *********************************************************** tempname lambda vars phi S Sindep Sigma trind eb qui mat accum `S' = `varlist', dev nocons mat `S' = `S' / $CFA1N * implied matrix mat `eb' = e(b) mat `lambda' = `eb'[1,1..`q'] mat `vars' = `eb'[1,`q'+1..2*`q'] scalar `phi' = `eb'[1,3*`q'+1] mat `Sigma' = `lambda''*`phi'*`lambda' + diag(`vars') mat `Sindep' = diag(vecdiag(`S')) * test against independence mat `trind' = trace( syminv(`Sindep') * `S' ) local trind = `trind'[1,1] ereturn scalar ll_indep = -0.5 * `q' * $CFA1N * ln(2*_pi) - 0.5 * $CFA1N * ln(det(`Sindep')) - 0.5 * $CFA1N * `trind' ereturn scalar lr_indep = 2*(e(ll)-e(ll_indep)) ereturn scalar df_indep = `q'-`nz'-`nconstr' ereturn scalar p_indep = chi2tail(e(df_indep),e(lr_indep)) * goodness of fit test ereturn scalar ll_u = -0.5 * `q' * $CFA1N * ln(2*_pi) - 0.5 * $CFA1N * ln(det(`S')) - 0.5 * `q' * $CFA1N ereturn scalar lr_u = -2*(e(ll)-e(ll_u)) ereturn scalar df_u = `q'*(`q'+1)*.5 - (2*`q' - `nz' - `nconstr') * wrong if there are any extra constraints in -constraint- command!!! ereturn scalar p_u = chi2tail(e(df_u),e(lr_u)) ereturn matrix ilog1 `ilog1' cap ereturn matrix ilog2 `ilog2' * Satorra-Bentler corrections if "$CFA1SBV"!="" { * compute the corrected tests, too * Satorra-Bentler 1994 tempname U trUG2 Tdf mat `U' = `SBV' - `SBV'*`Delta'*syminv(`Delta''*`SBV'*`Delta')*`Delta''*`SBV' ereturn matrix SBU = `U' mat `U' = trace( e(SBU)*`Gamma' ) ereturn scalar SBc = `U'[1,1]/e(df_u) ereturn scalar Tscaled = e(lr_u)/e(SBc) ereturn scalar p_Tscaled = chi2tail( e(df_u), e(Tscaled) ) mat `trUG2' = trace( e(SBU)*`Gamma'*e(SBU)*`Gamma') ereturn scalar SBd = `U'[1,1]*`U'[1,1]/`trUG2'[1,1] ereturn scalar Tadj = ( e(SBd)/`U'[1,1]) * e(lr_u) ereturn scalar p_Tadj = chi2tail( e(SBd), e(Tadj) ) * Yuan-Bentler 1997 * weights! ereturn scalar T2 = e(lr_u)/(1+e(lr_u)/e(N) ) ereturn scalar p_T2 = chi2tail( e(df_u), e(T2) ) } if "`posvar'" ~= "" { ereturn scalar lr_zerov = 2*(`ll_unr' - e(ll)) ereturn scalar df_zerov = `nz' local replay_opt posvar llu(`ll_unr') } ereturn local cmd cfa1 Replay , `replay_opt' level(`level') Finish restore ereturn repost, esample(`touse') end program define Tryit cap noi `0' local rc=_rc if `rc' { Finish exit `rc' } end program define Finish * finishing off constraint drop $CFA1constr global CFA1S global CFA1N global CFA1PV global CFA1theta global CFA1arg global CFA1data global CFA1constr global CFA1vars global CFA1SBV end program define Replay syntax, [posvar llu(str) level(passthru)] di _n as text "Log likelihood = " as res e(ll) _col(59) as text "Number of obs = " as res e(N) di as text "{hline 13}{c TT}{hline 64}" di as text " {c |} Coef. Std. Err. z P>|z| [$S_level% Conf. Interval]" di as text "{hline 13}{c +}{hline 64}" tempname vce mat `vce' = e(V) local q = colsof(`vce') local q = (`q'-1)/3 local a : colfullnames(`vce') tokenize `a' di as text "Lambda{col 14}{c |}" forvalues i = 1/`q' { gettoken v`i' : `i' , parse(":") _diparm `v`i'' , label("`v`i''") prob `level' } di as text "Var[error]{col 14}{c |}" forvalues i = 1/`q' { _diparm `v`i''_v , label("`v`i''") prob `level' } di as text "Means{col 14}{c |}" forvalues i = 1/`q' { _diparm `v`i''_m , label("`v`i''") prob `level' } di as text "Var[latent]{col 14}{c |}" _diparm phi , label("phi1") prob di as text "{hline 13}{c +}{hline 64}" di as text "R2{col 14}{c |}" forvalues i = 1/`q' { di as text %12s "`v`i''" "{col 14}{c |}{col 20}" /// as res %6.4f (_b[`v`i'':_cons]^2*_b[phi:_cons]) / /// (_b[`v`i'':_cons]^2*_b[phi:_cons] + _b[`v`i''_v:_cons]) } di as text "{hline 13}{c BT}{hline 64}" if e(df_u)>0 { di as text _n "Goodness of fit test: LR = " as res %6.3f e(lr_u) /// as text _col(40) "; Prob[chi2(" as res %2.0f e(df_u) as text ") > LR] = " as res %6.4f e(p_u) } else { di as text "No degrees of freedom to perform the goodness of fit test" } di as text "Test vs independence: LR = " as res %6.3f e(lr_indep) /// as text _col(40) "; Prob[chi2(" as res %2.0f e(df_indep) as text ") > LR] = " as res %6.4f e(p_indep) if "`e(vce)'" == "SatorraBentler" & e(df_u)>0 { * need to report all those corrected statistics di as text _n "Satorra-Bentler Tbar" _col(26) "= " as res %6.3f e(Tscaled) /// as text _col(40) "; Prob[chi2(" as res %2.0f e(df_u) as text ") > Tbar] = " as res %6.4f e(p_Tscaled) di as text "Satorra-Bentler Tbarbar" _col(26) "= " as res %6.3f e(Tadj) /// as text _col(40) "; Prob[chi2(" as res %4.1f e(SBd) as text ") > Tbarbar] = " as res %6.4f e(p_Tadj) di as text "Yuan-Bentler T2" _col(26) "= " as res %6.3f e(T2) /// as text _col(40) "; Prob[chi2(" as res %2.0f e(df_u) as text ") > T2] = " as res %6.4f e(p_T2) } if "`posvar'" ~= "" { * just estimated? if "`llu'" == "" { di as err "cannot specify -posvar- option, need to refit the whole model" } else { if e(df_zerov)>0 { di as text "Likelihood ratio against negative variances: LR = " as res %6.3f e(lr_zerov) di as text "Conservative Prob[chi2(" as res %2.0f e(df_zerov) as text ") > LR] = " /// as res %6.4f chi2tail(e(df_zerov),e(lr_zerov)) } else { di as text "All variances are non-negative, no need to test against zero variances" } } } end program define Parse , rclass * takes the list of variables and returns the appropriate ml model statement syntax varlist , [unitvar] global CFA1arg global CFA1theta global CFA1vars local q : word count `varlist' * lambdas forvalues i = 1/`q' { local toml `toml' (``i'': ``i'' = ) local tostart `tostart' 1 global CFA1arg $CFA1arg g_``i'' global CFA1theta $CFA1theta l_`i' global CFA1vars $CFA1vars ``i'' } * variances forvalues i = 1/`q' { local toml `toml' (``i''_v: ) local tostart `tostart' 1 global CFA1arg $CFA1arg g_``i''_v global CFA1theta $CFA1theta v_`i' } * means forvalues i = 1/`q' { local toml `toml' (``i''_m: ) qui sum ``i'', mean local mean = r(mean) local tostart `tostart' `mean' global CFA1arg $CFA1arg g_``i''_m global CFA1theta $CFA1theta m_`i' } * variance of the factor local toml `toml' (phi: ) local tostart `tostart' 1 global CFA1arg $CFA1arg g_Phi global CFA1theta $CFA1theta phi * done! return local toml `toml' return local tostart `tostart' end **************************** Satorra-Bentler covariance matrix code program SatorraBentler, rclass version 9.1 syntax [, noisily] * assume the maximization completed, the results are in memory as -ereturn data- * we shall just return the resulting matrix if "`e(normalized)'" == "" { di as err "cannot compute Satorra-Bentler variance estimator with arbitrary identification... yet" exit 198 } * assume sample is restricted to e(sample) * preserve * keep if e(sample) * get the variable names tempname VV bb mat `VV' = e(V) local q = rowsof(`VV') local p = (`q'-1)/3 local eqlist : coleq `VV' tokenize `eqlist' forvalues k=1/`p' { local varlist `varlist' ``k'' } * compute the implied covariance matrix tempname Lambda Theta phi Sigma mat `bb' = e(b) mat `Lambda' = `bb'[1,1..`p'] mat `Theta' = `bb'[1,`p'+1..2*`p'] scalar `phi' = `bb'[1,`q'] mat `Sigma' = `Lambda''*`phi'*`Lambda' + diag(`Theta') * compute the empirical cov matrix tempname SampleCov qui mat accum `SampleCov' = `varlist' , nocons dev * weights!!! mat `SampleCov' = `SampleCov' / (r(N)-1) * compute the matrix Gamma `noisily' di as text "Computing the Gamma matrix of fourth moments..." tempname Gamma SBGamma `varlist' mat `Gamma' = r(Gamma) return add * compute the duplication matrix * Dupl `p' * let's call it from within SBV! * compute the V matrix `noisily' di as text "Computing the V matrix..." SBV `SampleCov' `noisily' tempname V mat `V' = r(SBV) return add * compute the Delta matrix `noisily' di as text "Computing the Delta matrix..." tempname Delta mata : SBDelta("`bb'","`Delta'") *** put the pieces together now tempname DeltaId * enact the constraints! SBconstr `bb' mat `DeltaId' = `Delta' * diag( r(Fixed) ) * those should be in there, but it never hurts to fix! if "`e(normalized)'" == "Latent Variance" { * make the last column null mat `DeltaId' = ( `DeltaId'[1...,1...3*`p'] , J(rowsof(`Delta'), 1, 0) ) } else if "`e(normalized)'" ~= "" { * normalization by first variable local idvar `e(normalized)' if "`idvar'" ~= "`1'" { di as err "cannot figure out the identification variable" exit 198 } mat `DeltaId' = ( J(rowsof(`Delta'), 1, 0) , `DeltaId'[1...,2...] ) } local dcnames : colfullnames `bb' local drnames : rownames `Gamma' mat colnames `DeltaId' = `dcnames' mat rownames `DeltaId' = `drnames' return matrix Delta = `DeltaId', copy tempname VVV mat `VVV' = ( `DeltaId'' * `V' * `DeltaId' ) mat `VVV' = syminv(`VVV') mat `VVV' = `VVV' * ( `DeltaId'' * `V' * `Gamma' * `V' * `DeltaId' ) * `VVV' * add the covariance matrix for the means, which is just Sigma/_N * weights! tempname CovM mat `CovM' = ( J(2*`p',colsof(`bb'),0) \ J(`p',2*`p',0) , `Sigma', J(`p',1,0) \ J(1, colsof(`bb'), 0) ) mat `VVV' = (`VVV' + `CovM')/_N return matrix SBVar = `VVV' end * of satorrabentler program define SBGamma, rclass syntax varlist unab varlist : `varlist' tokenize `varlist' local p: word count `varlist' forvalues k=1/`p' { * make up the deviations * weights!!! qui sum ``k'', meanonly tempvar d`k' qui g double `d`k'' = ``k'' - r(mean) local dlist `dlist' `d`k'' } local pstar = `p'*(`p'+1)/2 forvalues k=1/`pstar' { tempvar b`k' qui g double `b`k'' = . local blist `blist' `b`k'' } * convert into vech (z_i-bar z)(z_i-bar z)' mata : SBvechZZtoB("`dlist'","`blist'") * blist now should contain the moments around the sample means * we need to get their covariance matrix tempname Gamma qui mat accum `Gamma' = `blist', dev nocons * weights! mat `Gamma' = `Gamma'/(_N-1) mata : Gamma = st_matrix( "`Gamma'" ) * make nice row and column names forvalues i=1/`p' { forvalues j=`i'/`p' { local namelist `namelist' ``i''_X_``j'' } } mat colnames `Gamma' = `namelist' mat rownames `Gamma' = `namelist' return matrix Gamma = `Gamma' end * of computing Gamma program define SBV, rclass args A noisily tempname D Ainv V local p = rowsof(`A') `noisily' di as text "Computing the duplication matrix..." mata : Dupl(`p',"`D'") mat `Ainv' = syminv(`A') mat `V' = .5*`D''* (`Ainv' # `Ainv') * `D' return matrix SBV = `V' end * of computing V * need to figure out whether a constraint has the form parameter = value, * and to nullify the corresponding column program define SBconstr, rclass args bb tempname Iq mat `Iq' = J(1,colsof(`bb'),1) tokenize $CFA1constr while "`1'" ~= "" { constraint get `1' local constr `r(contents)' gettoken param value : constr, parse("=") * is the RHS indeed a number? local value = substr("`value'",2,.) confirm number `value' * parse the square brackets and turn them into colon * replace the opening brackets with nothing, and closing brackets, with : local param = subinstr("`param'","["," ",1) local param = subinstr("`param'","]",":",1) local param = trim("`param'") local coln = colnumb(`bb',"`param'" ) mat `Iq'[1,`coln']=0 mac shift } return matrix Fixed = `Iq' end cap mata : mata drop SBvechZZtoB() cap mata : mata drop Dupl() cap mata : mata drop SBDelta() mata: void SBvechZZtoB(string dlist, string blist) { // view the deviation variables st_view(data=.,.,tokens(dlist)) // view the moment variables // blist=st_local("blist") st_view(moments=.,.,tokens(blist)) // vectorize! for(i=1; i<=rows(data); i++) { B = data[i,.]'*data[i,.] moments[i,.] = vech(B)' } } void Dupl(scalar p, string Dname) { pstar = p*(p+1)/2 Ipstar = I(pstar) D = J(p*p,0,.) for(k=1;k<=pstar;k++) { D = (D, vec(invvech(Ipstar[.,k]))) } st_matrix(Dname,D) } void SBDelta(string bbname, string DeltaName) { bb = st_matrix(bbname) p = (cols(bb)-1)/3 Lambda = bb[1,1..p] Theta = bb[1,p+1..2*p] phi = bb[1,cols(bb)] Delta = J(0,cols(bb),.) for(i=1;i<=p;i++) { for(j=i;j<=p;j++) { DeltaRow = J(1,cols(Delta),0) for(k=1;k<=p;k++) { // derivative wrt lambda_k DeltaRow[k] = (k==i)*Lambda[j]*phi + (j==k)*Lambda[i]*phi // derivative wrt sigma^2_k DeltaRow[p+k] = (i==k)*(j==k) } DeltaRow[cols(Delta)] = Lambda[i]*Lambda[j] Delta = Delta \ DeltaRow } } st_matrix(DeltaName,Delta) } end * of mata piece ***************************** end of Satorra-Bentler covariance matrix code exit History: v.1.0 -- May 19, 2004: basic operation, method d0 v.1.1 -- May 19, 2004: identification by -constraint- common -cfa1_ll- from() v.1.2 -- May 21, 2004: method -lf-, robust constraint free v.1.3 -- unknown v.1.4 -- Feb 15, 2005: pweights, arbitrary constraints v.2.0 -- Feb 28, 2006: update to version 9 using Mata v.2.1 -- Apr 11, 2006: whatever v.2.2 -- Apr 13, 2006: Satorra-Bentler standard errors and test corrections -vce- option Apr 14, 2006: degrees of freedom corrected for # constraints July 5, 2006: minor issue with -from(, copy)-