Files
SPT/R/residif.R

121 lines
5.1 KiB
R

## File Name: residif.R
## File version: 1.0
#' RESIDIF procedure for DIF detection as per Andrich and Hagquist (2015)
#'
#' This function detects DIF on PCM items using ANOVA of person-item residuals
#'
#' @param df data.frame containing the data
#' @param items vector containing the names of columns where item responses are stored in df
#' @param grp vector containing the name of the column where an optional group membership variable is stored in df
#' @param method.theta string determining the estimation method for individual latent variable values. Either "eap", "mle" or "wle"
#' @param verbose set to TRUE to print a detailed output, FALSE otherwise
#' @return A data.frame containing a column listing the detected DIF item and another listing detected DIF forms
#' @import vcrpart
#' @import PP
#' @export
residif <- function(df=NULL,items=NULL,grp=NULL,method.theta="eap",verbose=T) {
if (any(!(items %in% colnames(df)))) {
stop("ERROR: provided item name does not exist in df")
}
if (any(!(grp %in% colnames(df)))) {
stop("ERROR: provided grp variable name does not exist in df")
}
if (any(is.null(grp))) {
stop("ERROR: no grp variable provided")
}
if (any(is.null(items))) {
stop("ERROR: no items provided")
}
maxcat <- max(df[,items])
nbitems <- length(items)
nbitems_o <- nbitems
if (verbose) {
cat('\n')
cat("#################################################################################################\n")
cat("##################################### COMPUTING INITIAL PCM #####################################\n")
cat("#################################################################################################\n")
}
startt <- Sys.time()
pcm_initial <- pcm(df = df,items = items,grp = grp,verbose=F,method.theta = method.theta)
dat <- df
dat$score <- rowSums(dat[,items])
nqt <- ifelse(length(unique(quantile(dat$score,seq(0,1,0.2))))==6,5,length(unique(quantile(dat$score,seq(0,1,0.2))))-1)
while (length(unique(quantile(dat$score,seq(0,1,1/nqt))))!=nqt+1) {
nqt <- nqt-1
}
dat$score_q5 <- cut(dat$score,unique(quantile(dat$score,seq(0,1,1/nqt))),labels=1:nqt,include.lowest=T)
res.anova <- rep(NA,nbitems)
pval <- rep(NA,nbitems)
fval <- rep(NA,nbitems)
for (i in 1:nbitems) {
dat[,paste0('res_',i)] <- pcm_initial$residuals[,i]
res.anova[i] <- summary(aov(dat[,paste0('res_',i)]~get(grp)*score_q5,data=dat))
pval[c(i,i+nbitems)] <- c(res.anova[[i]][1,"Pr(>F)"],res.anova[[i]][3,"Pr(>F)"])
fval[c(i,i+nbitems)] <- c(res.anova[[i]][1,'F value'],res.anova[[i]][3,"F value"])
}
if (verbose) {
cat('DONE\n')
cat('#################################################################################################\n')
}
res.items <- c()
res.uniform <- c()
resp <- df[,items]
k <- 1
while(any(pval<0.05/(nbitems_o*3))) {
k <- k+1
if (verbose) {
cat(paste("######################################## COMPUTING STEP",k,"#######################################\n"))
cat("#################################################################################################\n")
}
numitem <- ifelse(which.max(fval)%%(length(fval)/2)!=0,which.max(fval)%%(length(fval)/2),length(fval)/2)
res.item <- gsub("[a-z]", "",colnames(resp)[numitem])
res.items <- c(res.items,res.item)
res.uni <- res.anova[[numitem]][3,"Pr(>F)"]>0.05
res.uniform <- c(res.uniform,res.uni)
pcm_while <- pcm(df = df,items = items,grp = grp,dif.items = res.items,type.dif = res.uniform,verbose=F,method.theta = method.theta)
res.anova <- rep(NA,nbitems)
pval <- rep(NA,nbitems_o)
fval <- rep(NA,nbitems_o)
numitems <- 1:nbitems_o
numitems <- numitems[-which(numitems%in%res.items)]
for (i in numitems) {
dat[,paste0('res_',i)] <- pcm_while$residuals[,i]
res.anova[i] <- summary(aov(dat[,paste0('res_',i)]~dat[,grp]*score_q5,data=dat))
pval[c(i,i+nbitems)] <- c(res.anova[[i]][1,"Pr(>F)"],res.anova[[i]][3,"Pr(>F)"])
fval[c(i,i+nbitems)] <- c(res.anova[[i]][1,'F value'],res.anova[[i]][3,"F value"])
}
for (i in 1:nbitems_o) {
pval[i] <- ifelse(is.na(pval[i]),999,pval[i])
fval[i] <- ifelse(is.na(fval[i]),-999,fval[i])
}
if (verbose) {
cat('DONE\n')
if (any(pval<0.05/(nbitems_o*3))) {
cat('#################################################################################################\n')
}
}
}
endt <- Sys.time()
cat(paste(c('Algorithm ran for',round(endt-startt,4),"seconds\n")))
if (verbose) {
cat('#################################################################################################\n')
cat("###################################### DETECTED DIF ITEMS #######################################\n")
cat("#################################################################################################\n")
}
if (length(res.items>0)) {
results <- data.frame(dif.items=res.items,
uniform=ifelse(res.uniform==1,TRUE,FALSE))
return(results)
}
else {
if (verbose) {
cat("No DIF was detected\n")
}
return(NULL)
}
}