## File Name: residif.R ## File version: 1.0 #' RESIDIF procedure for DIF detection as per Andrich and Hagquist (2015) #' #' This function detects DIF on PCM items using ANOVA of person-item residuals #' #' @param df data.frame containing the data #' @param items vector containing the names of columns where item responses are stored in df #' @param grp vector containing the name of the column where an optional group membership variable is stored in df #' @param method.theta string determining the estimation method for individual latent variable values. Either "eap", "mle" or "wle" #' @param verbose set to TRUE to print a detailed output, FALSE otherwise #' @return A data.frame containing a column listing the detected DIF item and another listing detected DIF forms #' @import vcrpart #' @import PP #' @export residif <- function(df=NULL,items=NULL,grp=NULL,method.theta="eap",verbose=T) { if (any(!(items %in% colnames(df)))) { stop("ERROR: provided item name does not exist in df") } if (any(!(grp %in% colnames(df)))) { stop("ERROR: provided grp variable name does not exist in df") } if (any(is.null(grp))) { stop("ERROR: no grp variable provided") } if (any(is.null(items))) { stop("ERROR: no items provided") } maxcat <- max(df[,items]) nbitems <- length(items) nbitems_o <- nbitems if (verbose) { cat('\n') cat("#################################################################################################\n") cat("##################################### COMPUTING INITIAL PCM #####################################\n") cat("#################################################################################################\n") } startt <- Sys.time() pcm_initial <- pcm(df = df,items = items,grp = grp,verbose=F,method.theta = method.theta) dat <- df dat$score <- rowSums(dat[,items]) nqt <- ifelse(length(unique(quantile(dat$score,seq(0,1,0.2))))==6,5,length(unique(quantile(dat$score,seq(0,1,0.2))))-1) while (length(unique(quantile(dat$score,seq(0,1,1/nqt))))!=nqt+1) { nqt <- nqt-1 } dat$score_q5 <- cut(dat$score,unique(quantile(dat$score,seq(0,1,1/nqt))),labels=1:nqt,include.lowest=T) res.anova <- rep(NA,nbitems) pval <- rep(NA,nbitems) fval <- rep(NA,nbitems) for (i in 1:nbitems) { dat[,paste0('res_',i)] <- pcm_initial$residuals[,i] res.anova[i] <- summary(aov(dat[,paste0('res_',i)]~get(grp)*score_q5,data=dat)) pval[c(i,i+nbitems)] <- c(res.anova[[i]][1,"Pr(>F)"],res.anova[[i]][3,"Pr(>F)"]) fval[c(i,i+nbitems)] <- c(res.anova[[i]][1,'F value'],res.anova[[i]][3,"F value"]) } if (verbose) { cat('DONE\n') cat('#################################################################################################\n') } res.items <- c() res.uniform <- c() resp <- df[,items] k <- 1 while(any(pval<0.05/(nbitems_o*3))) { k <- k+1 if (verbose) { cat(paste("######################################## COMPUTING STEP",k,"#######################################\n")) cat("#################################################################################################\n") } numitem <- ifelse(which.max(fval)%%(length(fval)/2)!=0,which.max(fval)%%(length(fval)/2),length(fval)/2) res.item <- gsub("[a-z]", "",colnames(resp)[numitem]) res.items <- c(res.items,res.item) res.uni <- res.anova[[numitem]][3,"Pr(>F)"]>0.05 res.uniform <- c(res.uniform,res.uni) pcm_while <- pcm(df = df,items = items,grp = grp,dif.items = res.items,type.dif = res.uniform,verbose=F,method.theta = method.theta) res.anova <- rep(NA,nbitems) pval <- rep(NA,nbitems_o) fval <- rep(NA,nbitems_o) numitems <- 1:nbitems_o numitems <- numitems[-which(numitems%in%res.items)] for (i in numitems) { dat[,paste0('res_',i)] <- pcm_while$residuals[,i] res.anova[i] <- summary(aov(dat[,paste0('res_',i)]~dat[,grp]*score_q5,data=dat)) pval[c(i,i+nbitems)] <- c(res.anova[[i]][1,"Pr(>F)"],res.anova[[i]][3,"Pr(>F)"]) fval[c(i,i+nbitems)] <- c(res.anova[[i]][1,'F value'],res.anova[[i]][3,"F value"]) } for (i in 1:nbitems_o) { pval[i] <- ifelse(is.na(pval[i]),999,pval[i]) fval[i] <- ifelse(is.na(fval[i]),-999,fval[i]) } if (verbose) { cat('DONE\n') if (any(pval<0.05/(nbitems_o*3))) { cat('#################################################################################################\n') } } } endt <- Sys.time() cat(paste(c('Algorithm ran for',round(endt-startt,4),"seconds\n"))) if (verbose) { cat('#################################################################################################\n') cat("###################################### DETECTED DIF ITEMS #######################################\n") cat("#################################################################################################\n") } if (length(res.items>0)) { results <- data.frame(dif.items=res.items, uniform=ifelse(res.uniform==1,TRUE,FALSE)) return(results) } else { if (verbose) { cat("No DIF was detected\n") } return(NULL) } }