## File Name: pcm.R ## File version: 1.1 #' Compute Partial Credit Model (PCM) for polytomous and dichotomous items #' #' This function computes a frequentist PCM, potentially accounting for DIF on specified items #' #' @param df data.frame containing the data #' @param items vector containing the names of columns where item responses are stored in df #' @param grp string containing the name of the column where an optional group membership variable is stored in df #' @param dif.items vector containing the list of indexes in "items" corresponding to dif items #' @param type.dif vector containing DIF form for each item specified in dif.items. 1 is homogeneous DIF, 0 is heterogeneous DIF #' @param weights string containing the name of the column where an optional variable containing weights is stored in df #' @param verbose set to TRUE to print a detailed output, FALSE otherwise #' @param fit string determining the optimization algorithm. Values "ucminf" or "nlminb" ar recommended #' @param method.theta string determining the estimation method for individual latent variable values. Either "eap", "mle" or "wle" #' @return A data.frame containing various model outputs #' @import vcrpart #' @import PP #' @export pcm <- function(df=NULL,items=NULL,grp=NULL,dif.items=NULL,type.dif=NULL,weights=NULL,verbose=T,fit="ucminf",method.theta="eap") { ##### Detecting errors if (any(!(items %in% colnames(df)))) { stop("ERROR: provided item name does not exist in df") } if (any(!(grp %in% colnames(df)))) { stop("ERROR: provided group variable name does not exist in df") } if (any(!is.null(grp))) { if (any(!(grp%in%colnames(df)))) { stop("ERROR: group name does not exist in df") } } if (!is.null(dif.items) & length(dif.items)!=length(type.dif)) { stop('ERROR: type.dif is not the same length as dif.items') } if (!is.null(dif.items) & is.null(type.dif)) { warning("WARNING: no type.dif provided, assuming non-homogeneous DIF on all items") } if (!("id"%in%colnames(df))) { stop('ERROR: no column named id provided') } if ( any(apply(df[df[,grp]==0,items],2,max)0),"+",""),ifelse(length(difvar.unif>0),paste0(difvar.unif,":grp",collapse="+"),""),")+ce(item",ifelse(length(difvar.nonunif>0),"+",""),ifelse(length(difvar.nonunif)>0,paste0(difvar.nonunif,":grp",collapse="+"),""),")+re(0|id)") formudif <- as.formula(formudif) if (is.null(weights)) { mod <- olmm(formudif,data=df.long,family = adjacent(link = "logit"),control=olmm_control(fit=fit)) } else { mod <- olmm(formudif,data=df.long,family = adjacent(link = "logit"),control=olmm_control(fit=fit),weights = df.long$weights) } comod <- coef(mod) # output results nbcoef <- nbitems+length(difvar.nonunif) if (is.null(weights)) { restab <- t(sapply(1:nbcoef,function(x) comod[seq(x,length(comod)-2-length(difvar.unif),nbitems+length(difvar.nonunif))])) } else { restab <- t(sapply(1:nbcoef,function(x) comod[seq(x,length(comod)-2-length(difvar.unif),nbitems+length(difvar.nonunif))])) } difcoef.unif <- NULL if (length(difvar.unif)>0) { difcoef.unif <- comod[(length(comod)-length(difvar.unif)):(length(comod)-1)] if (length(difvar.unif)!=1) { difcoef.unif <- as.matrix(difcoef.unif) } else { difcoef.unif <- t(as.matrix(difcoef.unif)) } rname <- paste0("item",dif.items[type.dif==1]) rownames(difcoef.unif) <- paste0("dif.",items_o[which(items%in%rname)]) colnames(difcoef.unif) <- "gamma" difcoef.unif <- as.data.frame(difcoef.unif) for (k in 1:maxmod) { difcoef.unif[,paste0("gamma_",k)] <- difcoef.unif[,"gamma"] } difcoef.unif <- as.matrix(difcoef.unif[,2:ncol(difcoef.unif)]) } difcoef.nonunif <- NULL if (length(difvar.nonunif)>0) { difcoef.nonunif <- restab[nbitems+c(1:length(difvar.nonunif)),] if (length(difvar.nonunif)==1) { difcoef.nonunif <- t(as.matrix(difcoef.nonunif)) } else { difcoef.nonunif <- as.matrix(difcoef.nonunif) } rname <- paste0("item",dif.items[type.dif==0]) rownames(difcoef.nonunif) <- paste0("dif.",items_o[which(items%in%rname)]) colnames(difcoef.nonunif) <- paste0("gamma_",1:maxmod) } restab <- restab[1:nbitems,] rownames(restab) <- items_o colnames(restab) <- paste0("delta_",1:maxmod) restab.dif <- rbind(difcoef.nonunif,difcoef.unif) restab.diftype <- matrix(ifelse(type.dif==1,"HOMOGENEOUS","NON-HOMOGENEOUS")) restab.diftype <- noquote(restab.diftype) rownames(restab.diftype) <- rownames(restab.dif) colnames(restab.diftype) <- "dif.type" beta <- comod["grp"] se.beta <- (confint(mod)["grp",2]-beta)/1.96 beta.ci <- confint(mod)["grp",] beta.p <- 2*pnorm(-abs(beta/se.beta)) beta <- as.numeric(beta) se.beta <- as.numeric(se.beta) beta.p <- as.numeric(beta.p) beta <- -1*beta beta.ci <- -1*c(beta.ci[2],beta.ci[1]) } else { # If group no DIF if (verbose) { cat('\n') cat("#################################################################################################\n") cat("######################################### FITTING MODEL #########################################\n") cat("#################################################################################################\n") } # prepare data if (is.null(weights)) { df <- df[,c('id',items,"grp")] colnames(df)[2:(length(colnames(df))-1)] <- paste0("item",seq(1,length(colnames(df))-2)) } else { df <- df[,c('id',items,"grp",weights)] colnames(df)[2:(length(colnames(df))-2)] <- paste0("item",seq(1,length(colnames(df))-3)) } df.long <- reshape(df,v.names=c("item"),direction="long",varying=c(items)) if (is.null(weights)) { colnames(df.long) <- c("id","grp","item","resp") nbitems <- length(2:(length(colnames(df))-1)) maxmod <- max(df[,2:(length(colnames(df))-1)]) df.long$item <- factor(df.long$item,levels=seq(1,length(colnames(df))-2),ordered = F) } else { colnames(df.long) <- c("id","grp","weights","item","resp") nbitems <- length(2:(length(colnames(df))-2)) maxmod <- max(df[,2:(length(colnames(df))-2)]) df.long$item <- factor(df.long$item,levels=seq(1,length(colnames(df))-3),ordered = F) } df.long$resp <- factor(df.long$resp,0:maxmod,ordered=T) df.long$id <- factor(df.long$id) # fit pcm if (is.null(weights)) { mod <- olmm(resp ~ 0 + ge(grp) + ce(item) + re(0|id),data=df.long,family = adjacent(link = "logit"),control=olmm_control(fit=fit)) } else { mod <- olmm(resp ~ 0 + ge(grp) + ce(item) + re(0|id),data=df.long,family = adjacent(link = "logit"),control=olmm_control(fit=fit),weights=df.long$weights) } comod <- coef(mod) # output results if (is.null(weights)) { restab <- t(sapply(1:nbitems,function(x) comod[seq(x,length(comod)-2,nbitems)])) } else { restab <- t(sapply(1:nbitems,function(x) comod[seq(x,length(comod)-2,nbitems)])) } rownames(restab) <- items_o colnames(restab) <- paste0("delta_",1:maxmod) restab.dif <- NULL beta <- comod[length(comod)-1] se.beta <- (confint(mod)["grp",2]-beta)/1.96 beta.ci <- confint(mod)["grp",] beta.p <- 2*pnorm(-abs(beta/se.beta)) beta <- as.numeric(beta) se.beta <- as.numeric(se.beta) beta.p <- as.numeric(beta.p) beta <- -1*beta beta.ci <- -1*c(beta.ci[2],beta.ci[1]) } } if (method.theta=="eap") { theta <- c(-1*ranef(mod,norm=F)+ifelse(grp==1,beta,0)) } else if (method.theta=="wle") { theta <- PP::PP_gpcm(as.matrix(df[,items]),t(restab),rep(1,length(items)))$resPP$resPP[,1] } else if (method.theta=="mle") { theta <- PP::PP_gpcm(as.matrix(df[,items]),t(restab),rep(1,length(items)),type="mle")$resPP$resPP[,1] } resid <- apply(matrix(1:nbitems,ncol=length(nbitems)),1, function(k) sapply(1:nrow(df), function(j) resi(theta[j],restab[k,],df[j,items[k]],beta=0))) colnames(resid) <- items_o ##### Output if (verbose) { cat(paste0('Number of individuals: ',nrow(df),"\n")) cat(paste0('Number of items: ',length(items),"\n")) cat(paste0('Item Thresholds and DIF parameters: ',"\n")) } out <- list( beta=beta, beta.se=se.beta, beta.ci=beta.ci, beta.p=beta.p, dif.items=dif.items, dif.type=restab.diftype, thresholds=restab, dif.param=restab.dif, theta=theta, residuals=resid ) return(out) }