118 lines
4.8 KiB
R
118 lines
4.8 KiB
R
## File Name: residif.R
|
|
## File version: 1.0
|
|
|
|
#' RESIDIF procedure for DIF detection as per Andrich and Hagquist (2015)
|
|
#'
|
|
#' This function detects DIF on PCM items using ANOVA of person-item residuals
|
|
#'
|
|
#' @param df data.frame containing the data
|
|
#' @param items vector containing the names of columns where item responses are stored in df
|
|
#' @param grp vector containing the name of the column where an optional group membership variable is stored in df
|
|
#' @return A data.frame containing a column listing the detected DIF item and another listing detected DIF forms
|
|
#' @import vcrpart
|
|
#' @export
|
|
|
|
residif <- function(df=NULL,items=NULL,grp=NULL,verbose=T) {
|
|
if (any(!(items %in% colnames(df)))) {
|
|
stop("ERROR: provided item name does not exist in df")
|
|
}
|
|
if (any(!(grp %in% colnames(df)))) {
|
|
stop("ERROR: provided grp variable name does not exist in df")
|
|
}
|
|
if (any(is.null(grp))) {
|
|
stop("ERROR: no grp variable provided")
|
|
}
|
|
if (any(is.null(items))) {
|
|
stop("ERROR: no items provided")
|
|
}
|
|
|
|
maxcat <- max(df[,items])
|
|
nbitems <- length(items)
|
|
nbitems_o <- nbitems
|
|
|
|
if (verbose) {
|
|
cat('\n')
|
|
cat("#################################################################################################\n")
|
|
cat("##################################### COMPUTING INITIAL PCM #####################################\n")
|
|
cat("#################################################################################################\n")
|
|
}
|
|
startt <- Sys.time()
|
|
pcm_initial <- pcm(df = df,items = items,grp = grp,verbose=F)
|
|
dat <- df
|
|
dat$score <- rowSums(dat[,items])
|
|
nqt <- ifelse(length(unique(quantile(dat$score,seq(0,1,0.2))))==6,5,length(unique(quantile(dat$score,seq(0,1,0.2))))-1)
|
|
while (length(unique(quantile(dat$score,seq(0,1,1/nqt))))!=nqt+1) {
|
|
nqt <- nqt-1
|
|
}
|
|
dat$score_q5 <- cut(dat$score,unique(quantile(dat$score,seq(0,1,1/nqt))),labels=1:nqt,include.lowest=T)
|
|
res.anova <- rep(NA,nbitems)
|
|
pval <- rep(NA,nbitems)
|
|
fval <- rep(NA,nbitems)
|
|
for (i in 1:nbitems) {
|
|
dat[,paste0('res_',i)] <- pcm_initial$residuals[,i]
|
|
res.anova[i] <- summary(aov(dat[,paste0('res_',i)]~get(grp)*score_q5,data=dat))
|
|
pval[c(i,i+nbitems)] <- c(res.anova[[i]][1,"Pr(>F)"],res.anova[[i]][3,"Pr(>F)"])
|
|
fval[c(i,i+nbitems)] <- c(res.anova[[i]][1,'F value'],res.anova[[i]][3,"F value"])
|
|
}
|
|
if (verbose) {
|
|
cat('DONE\n')
|
|
cat('#################################################################################################\n')
|
|
}
|
|
res.items <- c()
|
|
res.uniform <- c()
|
|
resp <- df[,items]
|
|
k <- 1
|
|
while(any(pval<0.05/(nbitems_o*3))) {
|
|
k <- k+1
|
|
if (verbose) {
|
|
cat(paste("######################################## COMPUTING STEP",k,"#######################################\n"))
|
|
cat("#################################################################################################\n")
|
|
}
|
|
numitem <- ifelse(which.max(fval)%%(length(fval)/2)!=0,which.max(fval)%%(length(fval)/2),length(fval)/2)
|
|
res.item <- gsub("[a-z]", "",colnames(resp)[numitem])
|
|
res.items <- c(res.items,res.item)
|
|
res.uni <- res.anova[[numitem]][3,"Pr(>F)"]>0.05
|
|
res.uniform <- c(res.uniform,res.uni)
|
|
pcm_while <- pcm(df = df,items = items,grp = grp,dif.items = res.items,type.dif = res.uniform,verbose=F)
|
|
res.anova <- rep(NA,nbitems)
|
|
pval <- rep(NA,nbitems_o)
|
|
fval <- rep(NA,nbitems_o)
|
|
numitems <- 1:nbitems_o
|
|
numitems <- numitems[-which(numitems%in%res.items)]
|
|
for (i in numitems) {
|
|
dat[,paste0('res_',i)] <- pcm_while$residuals[,i]
|
|
res.anova[i] <- summary(aov(dat[,paste0('res_',i)]~dat[,grp]*score_q5,data=dat))
|
|
pval[c(i,i+nbitems)] <- c(res.anova[[i]][1,"Pr(>F)"],res.anova[[i]][3,"Pr(>F)"])
|
|
fval[c(i,i+nbitems)] <- c(res.anova[[i]][1,'F value'],res.anova[[i]][3,"F value"])
|
|
}
|
|
for (i in 1:nbitems_o) {
|
|
pval[i] <- ifelse(is.na(pval[i]),999,pval[i])
|
|
fval[i] <- ifelse(is.na(fval[i]),-999,fval[i])
|
|
}
|
|
if (verbose) {
|
|
cat('DONE\n')
|
|
if (any(pval<0.05/(nbitems_o*3))) {
|
|
cat('#################################################################################################\n')
|
|
}
|
|
}
|
|
}
|
|
endt <- Sys.time()
|
|
cat(paste(c('Algorithm ran for',round(endt-startt,4),"seconds\n")))
|
|
if (verbose) {
|
|
cat('#################################################################################################\n')
|
|
cat("###################################### DETECTED DIF ITEMS #######################################\n")
|
|
cat("#################################################################################################\n")
|
|
}
|
|
if (length(res.items>0)) {
|
|
results <- data.frame(dif.items=res.items,
|
|
uniform=ifelse(res.uniform==1,TRUE,FALSE))
|
|
return(results)
|
|
}
|
|
else {
|
|
if (verbose) {
|
|
cat("No DIF was detected\n")
|
|
}
|
|
return(NULL)
|
|
}
|
|
}
|