From 28e43dee33e8596abe2fef84a86be6138e0e7815 Mon Sep 17 00:00:00 2001
From: corentinchoisy <corentin.choisy@proton.me>
Date: Tue, 6 May 2025 08:31:24 +0200
Subject: [PATCH] Added theta estimation methods

---
 DESCRIPTION    |  3 ++-
 NAMESPACE      |  1 +
 R/pcm.R        | 14 ++++++++++++--
 R/residif.R    |  9 ++++++---
 man/pcm.Rd     |  9 ++++++++-
 man/residif.Rd |  6 +++++-
 6 files changed, 34 insertions(+), 8 deletions(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index 2e3de79..eecc87c 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -14,4 +14,5 @@ RoxygenNote: 7.3.2
 Imports:
     vcrpart,
     rjags,
-    dclone
+    dclone,
+    PP
diff --git a/NAMESPACE b/NAMESPACE
index a765abf..4aa249a 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -4,6 +4,7 @@ export(bpcm)
 export(pcm)
 export(res_ij)
 export(residif)
+import(PP)
 import(dclone)
 import(rjags)
 import(vcrpart)
diff --git a/R/pcm.R b/R/pcm.R
index bb3ebc0..d4d2b61 100644
--- a/R/pcm.R
+++ b/R/pcm.R
@@ -10,11 +10,15 @@
 #' @param grp string containing the name of the column where an optional group membership variable is stored in df
 #' @param dif.items vector containing the list of indexes in "items" corresponding to dif items
 #' @param type.dif vector containing DIF form for each item specified in dif.items. 1 is homogeneous DIF, 0 is heterogeneous DIF
+#' @param verbose set to TRUE to print a detailed output, FALSE otherwise
+#' @param fit string determining the optimization algorithm. Values "ucminf" or "nlminb" ar recommended
+#' @param method.theta string determining the estimation method for individual latent variable values. Either "eap", "mle" or "wle"
 #' @return A data.frame containing various model outputs
 #' @import vcrpart
+#' @import PP
 #' @export
 
-pcm <- function(df=NULL,items=NULL,grp=NULL,dif.items=NULL,type.dif=NULL,verbose=T,fit="ucminf") {
+pcm <- function(df=NULL,items=NULL,grp=NULL,dif.items=NULL,type.dif=NULL,verbose=T,fit="ucminf",method.theta="eap") {
   ##### Detecting errors
 
   if (any(!(items %in% colnames(df)))) {
@@ -205,7 +209,13 @@ pcm <- function(df=NULL,items=NULL,grp=NULL,dif.items=NULL,type.dif=NULL,verbose
     }
 
   }
-  theta <- -1*ranef(mod,norm=F)+ifelse(grp==1,beta,0)
+  if (method.theta=="eap") {
+    theta <- c(-1*ranef(mod,norm=F)+ifelse(grp==1,beta,0))
+  } else if (method.theta=="wle") {
+    theta <- PP::PP_gpcm(as.matrix(df[,items]),t(restab),rep(1,length(items)))$resPP$resPP[,1]
+  } else if (method.theta=="mle") {
+    theta <- PP::PP_gpcm(as.matrix(df[,items]),t(restab),rep(1,length(items)),type="mle")$resPP$resPP[,1]
+  }
   resid <- apply(matrix(1:nbitems,ncol=length(nbitems)),1, function(k) sapply(1:nrow(df), function(j) res_ij(theta[j],restab[k,],df[j,items[k]],beta=0)))
   colnames(resid) <- items_o
 
diff --git a/R/residif.R b/R/residif.R
index 418d558..e425b1f 100644
--- a/R/residif.R
+++ b/R/residif.R
@@ -8,11 +8,14 @@
 #' @param df data.frame containing the data
 #' @param items vector containing the names of columns where item responses are stored in df
 #' @param grp vector containing the name of the column where an optional group membership variable is stored in df
+#' @param method.theta string determining the estimation method for individual latent variable values. Either "eap", "mle" or "wle"
+#' @param verbose set to TRUE to print a detailed output, FALSE otherwise
 #' @return A data.frame containing a column listing the detected DIF item and another listing detected DIF forms
 #' @import vcrpart
+#' @import PP
 #' @export
 
-residif <- function(df=NULL,items=NULL,grp=NULL,verbose=T) {
+residif <- function(df=NULL,items=NULL,grp=NULL,method.theta="eap",verbose=T) {
   if (any(!(items %in% colnames(df)))) {
     stop("ERROR: provided item name does not exist in df")
   }
@@ -37,7 +40,7 @@ residif <- function(df=NULL,items=NULL,grp=NULL,verbose=T) {
     cat("#################################################################################################\n")
   }
   startt <- Sys.time()
-  pcm_initial <- pcm(df = df,items = items,grp = grp,verbose=F)
+  pcm_initial <- pcm(df = df,items = items,grp = grp,verbose=F,method.theta = method.theta)
   dat <- df
   dat$score <- rowSums(dat[,items])
   nqt <- ifelse(length(unique(quantile(dat$score,seq(0,1,0.2))))==6,5,length(unique(quantile(dat$score,seq(0,1,0.2))))-1)
@@ -73,7 +76,7 @@ residif <- function(df=NULL,items=NULL,grp=NULL,verbose=T) {
     res.items <- c(res.items,res.item)
     res.uni <- res.anova[[numitem]][3,"Pr(>F)"]>0.05
     res.uniform <- c(res.uniform,res.uni)
-    pcm_while <- pcm(df = df,items = items,grp = grp,dif.items = res.items,type.dif = res.uniform,verbose=F)
+    pcm_while <- pcm(df = df,items = items,grp = grp,dif.items = res.items,type.dif = res.uniform,verbose=F,method.theta = method.theta)
     res.anova <- rep(NA,nbitems)
     pval <- rep(NA,nbitems_o)
     fval <- rep(NA,nbitems_o)
diff --git a/man/pcm.Rd b/man/pcm.Rd
index 21cae17..ec90d33 100644
--- a/man/pcm.Rd
+++ b/man/pcm.Rd
@@ -11,7 +11,8 @@ pcm(
   dif.items = NULL,
   type.dif = NULL,
   verbose = T,
-  fit = "ucminf"
+  fit = "ucminf",
+  method.theta = "eap"
 )
 }
 \arguments{
@@ -24,6 +25,12 @@ pcm(
 \item{dif.items}{vector containing the list of indexes in "items" corresponding to dif items}
 
 \item{type.dif}{vector containing DIF form for each item specified in dif.items. 1 is homogeneous DIF, 0 is heterogeneous DIF}
+
+\item{verbose}{set to TRUE to print a detailed output, FALSE otherwise}
+
+\item{fit}{string determining the optimization algorithm. Values "ucminf" or "nlminb" ar recommended}
+
+\item{method.theta}{string determining the estimation method for individual latent variable values. Either "eap", "mle" or "wle"}
 }
 \value{
 A data.frame containing various model outputs
diff --git a/man/residif.Rd b/man/residif.Rd
index 3b4695e..d0753f5 100644
--- a/man/residif.Rd
+++ b/man/residif.Rd
@@ -4,7 +4,7 @@
 \alias{residif}
 \title{RESIDIF procedure for DIF detection as per Andrich and Hagquist (2015)}
 \usage{
-residif(df = NULL, items = NULL, grp = NULL, verbose = T)
+residif(df = NULL, items = NULL, grp = NULL, method.theta = "eap", verbose = T)
 }
 \arguments{
 \item{df}{data.frame containing the data}
@@ -12,6 +12,10 @@ residif(df = NULL, items = NULL, grp = NULL, verbose = T)
 \item{items}{vector containing the names of columns where item responses are stored in df}
 
 \item{grp}{vector containing the name of the column where an optional group membership variable is stored in df}
+
+\item{method.theta}{string determining the estimation method for individual latent variable values. Either "eap", "mle" or "wle"}
+
+\item{verbose}{set to TRUE to print a detailed output, FALSE otherwise}
 }
 \value{
 A data.frame containing a column listing the detected DIF item and another listing detected DIF forms