From e8f7bd6eb984f64d54c95c7283f203085f9879ee Mon Sep 17 00:00:00 2001 From: corentinchoisy Date: Thu, 14 Mar 2024 16:12:16 +0100 Subject: [PATCH] Corrected bugs + TAM dif analysis --- RProject/.Rhistory | 998 ++++++++++++++++---------------- RProject/dif.R | 27 + Scripts/Analysis/DIF/pcm_dif.do | 32 +- 3 files changed, 542 insertions(+), 515 deletions(-) create mode 100644 RProject/dif.R diff --git a/RProject/.Rhistory b/RProject/.Rhistory index af4c9a9..d810d6f 100644 --- a/RProject/.Rhistory +++ b/RProject/.Rhistory @@ -1,512 +1,512 @@ -if (unique(s$J)==4) { -if (unique(s$M)==2) { -a <- data.frame(m.item1=mean(s$item1),m.item2=mean(s$item2),m.item3=mean(s$item3),m.item4=mean(s$item4)) -} else { -a <- data.frame(m.item1_1=mean(s$item1_1),m.item1_2=mean(s$item1_2),m.item1_3=mean(s$item1_3), -m.item2_1=mean(s$item2_1),m.item2_2=mean(s$item2_2),m.item2_3=mean(s$item2_3), -m.item3_1=mean(s$item3_1),m.item3_2=mean(s$item3_2),m.item3_3=mean(s$item3_3), -m.item4_1=mean(s$item4_1),m.item4_2=mean(s$item4_2),m.item4_3=mean(s$item4_3) -) +splits_evtl[[count+1]][[item]][[variable]][(knoten+1),splits_evtl[[count+1]][[item]][[variable]][(knoten+1),]<=split] <- NA +# any split? +anysplit <- !all(is.na(unlist(splits_evtl[[count+1]]))) +# passe vars_evtl an +vars_evtl[[count+1]] <- vars_evtl[[count]] +vars_evtl[[count+1]][[item]] <- rep(0,n_knots) +vars_evtl[[count+1]][[item]][c(knoten,knoten+1)] <- rep(vars_evtl[[count]][[item]][knoten],2) +vars_evtl[[count+1]][[item]][-c(knoten,knoten+1)]<- vars_evtl[[count]][[item]][-knoten] +if(length(which(!is.na(splits_evtl[[count+1]][[item]][[variable]][knoten,])))==0){ +vars_evtl[[count+1]][[item]][knoten] <- vars_evtl[[count+1]][[item]][knoten]-1 } -} else { -if (unique(s$M)==2) { -a <- data.frame(m.item1=mean(s$item1),m.item2=mean(s$item2),m.item3=mean(s$item3),m.item4=mean(s$item4), -m.item5=mean(s$item5),m.item6=mean(s$item6),m.item7=mean(s$item7)) -} else { -a <- data.frame(m.item1_1=mean(s$item1_1),m.item1_2=mean(s$item1_2),m.item1_3=mean(s$item1_3), -m.item2_1=mean(s$item2_1),m.item2_2=mean(s$item2_2),m.item2_3=mean(s$item2_3), -m.item3_1=mean(s$item3_1),m.item3_2=mean(s$item3_2),m.item3_3=mean(s$item3_3), -m.item4_1=mean(s$item4_1),m.item4_2=mean(s$item4_2),m.item4_3=mean(s$item4_3), -m.item5_1=mean(s$item5_1),m.item5_2=mean(s$item5_2),m.item5_3=mean(s$item5_3), -m.item6_1=mean(s$item6_1),m.item6_2=mean(s$item6_2),m.item6_3=mean(s$item6_3), -m.item7_1=mean(s$item7_1),m.item7_2=mean(s$item7_2),m.item7_3=mean(s$item7_3) -) +if(length(which(!is.na(splits_evtl[[count+1]][[item]][[variable]][knoten+1,])))==0){ +vars_evtl[[count+1]][[item]][knoten+1] <- vars_evtl[[count+1]][[item]][knoten+1]-1 } +# passe which_obs an +which_obs[[count+1]] <- which_obs[[count]] +which_obs[[count+1]][[item]] <- matrix(0,nrow=n_knots,ncol=npersons) +which_obs[[count+1]][[item]][c(knoten,knoten+1),] <- matrix(rep(which_obs[[count]][[item]][knoten,],2),nrow=2,byrow=T) +which_obs[[count+1]][[item]][-c(knoten,knoten+1),] <- which_obs[[count]][[item]][-knoten,] +thresh <- ordered_values[[variable]][1:n_s[variable]][split] +which_obs[[count+1]][[item]][knoten,DM_kov[,variable]>thresh] <- NA +which_obs[[count+1]][[item]][(knoten+1),DM_kov[,variable]<=thresh] <- NA +# passe numbers an +numbers[[count+1]] <- numbers[[count]] +numbers[[count+1]][[item]] <- numeric(length=n_knots) +numbers[[count+1]][[item]][c(knoten,knoten+1)] <- c(left,right) +numbers[[count+1]][[item]][-c(knoten,knoten+1)] <- numbers[[count]][[item]][-knoten] +# trace +if(trace){ +cat(paste0("\n Split"," ",count,";"," ","Item"," ",item,"\n")) } -N <- ifelse(substr(scenario,start=nchar(scenario)-1,stop=nchar(scenario))=="50","50",substr(scenario,start=nchar(scenario)-2,stop=nchar(scenario))) -zz <- ifelse(N=="50",substr(scenario,start=0,stop=nchar(scenario)-3),substr(scenario,start=0,stop=nchar(scenario)-4)) -b <- data.frame(scenario=zz, -scenario.type=substr(zz,start=nchar(zz),stop=nchar(zz)), -N=N, -J=unique(s$J), -M=unique(s$M), -eff.size=unique(s$eff.size), -nb.dif=unique(s$nb.dif), -dif.size=unique(s$dif.size) -) -z <- data.frame(m.beta=mean(s$beta), -se.empirical.beta=sd(s$beta), -se.analytical.beta=mean(s$se.beta), -m.low.ci.beta=mean(s$low.ci.beta), -m.high.ci.beta=mean(s$high.ci.beta), -true.value.in.ci.p=mean(s$true.value.in.ci), -h0.rejected.p=mean(s$h0.rejected), -beta.same.sign.truebeta.p=mean(s$beta.same.sign.truebeta,na.rm=T), -beta.same.sign.truebeta.signif.p=mean(s[s$h0.rejected==1,]$beta.same.sign.truebeta,na.rm=T)) -d <- cbind(b,a,z) -d$prop. -return(d) -} -#### Compiled results -res.dat <- compile_simulation('1A_100') -for (x in results[seq(2,length(results))]) { -y <- compile_simulation(x) -res.dat <- bind_rows(res.dat,y) -} -res.dat[res.dat$scenario.type=='A','dif.size'] <- -res.dat[res.dat$scenario.type=='A','dif.size'] -res.dat[is.na(res.dat$dif.size),'dif.size'] <- 0 -res.dat[193:417,'nb.dif'] <- 2 -res.dat[417:528,'nb.dif'] <- 3 -res.dat[res.dat$scenario.type=="B",] <- 0.2 -res.dat[res.dat$scenario.type=="C" & res.dat$dif.size==0,] <- 0.4 -res.dat[res.dat$scenario.type=="C" & res.dat$dif.size!=0,] <- 0.2 -res.dat[res.dat$scenario.type=="D" & res.dat$dif.size==0,] <- -0.2 -res.dat[res.dat$scenario.type=="D" & res.dat$dif.size!=0,] <- 0.4 -res.dat[res.dat$scenario.type=="E" & res.dat$dif.size==0,] <- -0.4 -res.dat[res.dat$scenario.type=="E" & res.dat$dif.size!=0,] <- 0.4 -res.dat[res.dat$scenario.type=="F",] <- -0.2 -res.dat[res.dat$scenario.type=="G",] <- -0.4 -View(res.dat) -res.dat.simple <- res.dat[,c(1:8,13,16:18)] -res.dat.simple$m.beta <- round(res.dat.simple$m.beta,3) -res.dat.simple -#### Create data.frame -results <- c(sapply(1:4,function(x) paste0(x,c('A','B','C','D','E'))),sapply(5:9,function(x) paste0(x,c('A','B','C','D','E','F','G')))) -results2 <- c(sapply(10:20,function(x) paste0(x,c('A','B','C','D','E','F','G')))) -results <- c(sapply(c(50,100,200,300),function(x) paste0(results,'_',x))) -results2 <- c(sapply(c(50,100,200,300),function(x) paste0(results2,'_',x))) -results <- sort(results) -results2 <- sort(results2) -results <- c(results,results2) -#### Compiler function -compile_simulation <- function(scenario) { -name <- as.numeric(gsub("[^0-9.-]", "", substr(scenario,start=0,stop=2))) -if (substr(scenario,start=nchar(scenario)-1,stop=nchar(scenario))=="50" & name<=4) { -s <- read.csv(paste0('/home/corentin/Documents/These/Recherche/Simulations/Analysis/NoDIF/N50/scenario_',scenario,'.csv')) -} -if (substr(scenario,start=nchar(scenario)-1,stop=nchar(scenario))=="50" & name>4) { -s <- read.csv(paste0('/home/corentin/Documents/These/Recherche/Simulations/Analysis/NoDIF/N50/scenario_',scenario,'.csv')) -} -if (substr(scenario,start=nchar(scenario)-2,stop=nchar(scenario))=="100" & name<=4) { -s <- read.csv(paste0('/home/corentin/Documents/These/Recherche/Simulations/Analysis/NoDIF/N100/scenario_',scenario,'.csv')) -} -if (substr(scenario,start=nchar(scenario)-2,stop=nchar(scenario))=="100" & name>4) { -s <- read.csv(paste0('/home/corentin/Documents/These/Recherche/Simulations/Analysis/NoDIF/N100/scenario_',scenario,'_nodif.csv')) -} -if (substr(scenario,start=nchar(scenario)-2,stop=nchar(scenario))=="200" & name<=4) { -s <- read.csv(paste0('/home/corentin/Documents/These/Recherche/Simulations/Analysis/NoDIF/N200/scenario_',scenario,'.csv')) -} -if (substr(scenario,start=nchar(scenario)-2,stop=nchar(scenario))=="200" & name>4) { -s <- read.csv(paste0('/home/corentin/Documents/These/Recherche/Simulations/Analysis/NoDIF/N200/scenario_',scenario,'_nodif.csv')) -} -if (substr(scenario,start=nchar(scenario)-2,stop=nchar(scenario))=="300" & name<=4) { -s <- read.csv(paste0('/home/corentin/Documents/These/Recherche/Simulations/Analysis/NoDIF/N300/scenario_',scenario,'.csv')) -} -if (substr(scenario,start=nchar(scenario)-2,stop=nchar(scenario))=="300" & name>4) { -s <- read.csv(paste0('/home/corentin/Documents/These/Recherche/Simulations/Analysis/NoDIF/N300/scenario_',scenario,'_nodif.csv')) -} -if (unique(s$J)==4) { -if (unique(s$M)==2) { -a <- data.frame(m.item1=mean(s$item1),m.item2=mean(s$item2),m.item3=mean(s$item3),m.item4=mean(s$item4)) -} else { -a <- data.frame(m.item1_1=mean(s$item1_1),m.item1_2=mean(s$item1_2),m.item1_3=mean(s$item1_3), -m.item2_1=mean(s$item2_1),m.item2_2=mean(s$item2_2),m.item2_3=mean(s$item2_3), -m.item3_1=mean(s$item3_1),m.item3_2=mean(s$item3_2),m.item3_3=mean(s$item3_3), -m.item4_1=mean(s$item4_1),m.item4_2=mean(s$item4_2),m.item4_3=mean(s$item4_3) -) +# erhoehe counter +count <- count+1 +} else{ +sig <- FALSE } -} else { -if (unique(s$M)==2) { -a <- data.frame(m.item1=mean(s$item1),m.item2=mean(s$item2),m.item3=mean(s$item3),m.item4=mean(s$item4), -m.item5=mean(s$item5),m.item6=mean(s$item6),m.item7=mean(s$item7)) -} else { -a <- data.frame(m.item1_1=mean(s$item1_1),m.item1_2=mean(s$item1_2),m.item1_3=mean(s$item1_3), -m.item2_1=mean(s$item2_1),m.item2_2=mean(s$item2_2),m.item2_3=mean(s$item2_3), -m.item3_1=mean(s$item3_1),m.item3_2=mean(s$item3_2),m.item3_3=mean(s$item3_3), -m.item4_1=mean(s$item4_1),m.item4_2=mean(s$item4_2),m.item4_3=mean(s$item4_3), -m.item5_1=mean(s$item5_1),m.item5_2=mean(s$item5_2),m.item5_3=mean(s$item5_3), -m.item6_1=mean(s$item6_1),m.item6_2=mean(s$item6_2),m.item6_3=mean(s$item6_3), -m.item7_1=mean(s$item7_1),m.item7_2=mean(s$item7_2),m.item7_3=mean(s$item7_3) -) } +################################################################################### +# prettify results +mod_opt <- mod_potential[[count]] +ip_opt <- names(coef(mod_opt))[-c(1:(npersons-1))] +theta_hat <- c(coef(mod_opt)[1:(npersons-1)],0) +delta_hat <- coef(mod_opt)[npersons:length(coef(mod_opt))] +if(count>1){ +dif_items <- unique(splits[,2]) +nodif_items <- c(1:nitems)[-dif_items] +delta_hat_nodif <- sapply(nodif_items,function(j) delta_hat[grep(paste0("delta",j,":"),ip_opt)]) +rownames(delta_hat_nodif) <- 1:nrow(delta_hat_nodif) +colnames(delta_hat_nodif) <- paste0("delta", nodif_items) +delta_hat_dif <- lapply(dif_items, function(j) delta_hat[grep(paste0("delta",j,":"),ip_opt)]) +names(delta_hat_dif) <- dif_items +help9 <- cumsum(c(0,(n_levels-1))) +colnames(splits) <- c("var","item","split","level","node","number","left","right") +splits <- data.frame(cbind(splits[,1:5,drop=FALSE],"variable"=rep(NA,nrow(splits)),"threshold"=rep(NA,nrow(splits)),splits[,6:8,drop=FALSE])) +for(i in 1:nrow(splits)){ +if(!is.null(colnames(DM_kov))){ +splits[i,6] <- colnames(DM_kov)[splits[i,1]] +} else{ +splits[i,6] <- splits[i,1] } -N <- ifelse(substr(scenario,start=nchar(scenario)-1,stop=nchar(scenario))=="50","50",substr(scenario,start=nchar(scenario)-2,stop=nchar(scenario))) -zz <- ifelse(N=="50",substr(scenario,start=0,stop=nchar(scenario)-3),substr(scenario,start=0,stop=nchar(scenario)-4)) -b <- data.frame(scenario=zz, -scenario.type=substr(zz,start=nchar(zz),stop=nchar(zz)), -N=N, -J=unique(s$J), -M=unique(s$M), -eff.size=unique(s$eff.size), -nb.dif=unique(s$nb.dif), -dif.size=unique(s$dif.size) -) -z <- data.frame(m.beta=mean(s$beta), -se.empirical.beta=sd(s$beta), -se.analytical.beta=mean(s$se.beta), -m.low.ci.beta=mean(s$low.ci.beta), -m.high.ci.beta=mean(s$high.ci.beta), -true.value.in.ci.p=mean(s$true.value.in.ci), -h0.rejected.p=mean(s$h0.rejected), -beta.same.sign.truebeta.p=mean(s$beta.same.sign.truebeta,na.rm=T), -beta.same.sign.truebeta.signif.p=mean(s[s$h0.rejected==1,]$beta.same.sign.truebeta,na.rm=T)) -d <- cbind(b,a,z) -d$prop. -return(d) -} -#### Compiled results -res.dat <- compile_simulation('1A_100') -for (x in results[seq(2,length(results))]) { -y <- compile_simulation(x) -res.dat <- bind_rows(res.dat,y) -} -res.dat[res.dat$scenario.type=='A','dif.size'] <- -res.dat[res.dat$scenario.type=='A','dif.size'] -res.dat[is.na(res.dat$dif.size),'dif.size'] <- 0 -res.dat[193:417,'nb.dif'] <- 2 -res.dat[417:528,'nb.dif'] <- 3 -res.dat[res.dat$scenario.type=="B",]$eff.size <- 0.2 -res.dat[res.dat$scenario.type=="C" & res.dat$dif.size==0,]$eff.size <- 0.4 -res.dat[res.dat$scenario.type=="C" & res.dat$dif.size!=0,]$eff.size <- 0.2 -res.dat[res.dat$scenario.type=="D" & res.dat$dif.size==0,]$eff.size <- -0.2 -res.dat[res.dat$scenario.type=="D" & res.dat$dif.size!=0,]$eff.size <- 0.4 -res.dat[res.dat$scenario.type=="E" & res.dat$dif.size==0,]$eff.size <- -0.4 -res.dat[res.dat$scenario.type=="E" & res.dat$dif.size!=0,]$eff.size <- 0.4 -res.dat[res.dat$scenario.type=="F",]$eff.size <- -0.2 -res.dat[res.dat$scenario.type=="G",]$eff.size <- -0.4 -View(res.dat) -res.dat.simple <- res.dat[,c(1:8,13,16:18)] -res.dat.simple$m.beta <- round(res.dat.simple$m.beta,3) -res.dat.simple -############################################################################## -#----------------------------------------------------------------------------# -########################### AGGREGATION DIF MATRICES ######################### -#----------------------------------------------------------------------------# -############################################################################## -#### Create data.frame -results <- c(sapply(1:4,function(x) paste0(x,c('A','B','C','D','E'))),sapply(5:9,function(x) paste0(x,c('A','B','C','D','E','F','G')))) -results2 <- c(sapply(10:20,function(x) paste0(x,c('A','B','C','D','E','F','G')))) -results <- c(sapply(c(50,100,200,300),function(x) paste0(results,'_',x))) -results2 <- c(sapply(c(50,100,200,300),function(x) paste0(results2,'_',x))) -results <- sort(results) -results2 <- sort(results2) -results <- c(results,results2)[81:528] -#### Compiler function -compile_simulation2 <- function(scenario) { -name <- as.numeric(gsub("[^0-9.-]", "", substr(scenario,start=0,stop=2))) -if (substr(scenario,start=nchar(scenario)-1,stop=nchar(scenario))=="50" & name>4) { -s <- read_excel(paste0('/home/corentin/Documents/These/Recherche/Simulations/Analysis/DIF/N50/',scenario,'.xls')) -} -if (substr(scenario,start=nchar(scenario)-2,stop=nchar(scenario))=="100" & name>4) { -s <- read_excel(paste0('/home/corentin/Documents/These/Recherche/Simulations/Analysis/DIF/N100/',scenario,'.xls')) -} -if (substr(scenario,start=nchar(scenario)-2,stop=nchar(scenario))=="200" & name>4) { -s <- read_excel(paste0('/home/corentin/Documents/These/Recherche/Simulations/Analysis/DIF/N200/',scenario,'.xls')) -} -if (substr(scenario,start=nchar(scenario)-2,stop=nchar(scenario))=="300" & name>4) { -s <- read_excel(paste0('/home/corentin/Documents/These/Recherche/Simulations/Analysis/DIF/N300/',scenario,'.xls')) -} -J <- max(which(sapply(1:7,function(x) paste0('item',x) %in% colnames(s) | paste0('item',x,'_1') %in% colnames(s)))) -M <- 1+sum(sapply(1:3,function(x) paste0('item1_',x) %in% colnames(s) )) -if (M==1) {M <- 2} -nb.dif <- max(which(sapply(1:3,function(x) paste0('dif',x) %in% colnames(s) | paste0('dif',x,'_1') %in% colnames(s)))) -if (J==4) { -if (M==2) { -a <- data.frame(m.item1=mean(s$item1),m.item2=mean(s$item2),m.item3=mean(s$item3),m.item4=mean(s$item4)) -} else { -a <- data.frame(m.item1_1=mean(s$item1_1),m.item1_2=mean(s$item1_2),m.item1_3=mean(s$item1_3), -m.item2_1=mean(s$item2_1),m.item2_2=mean(s$item2_2),m.item2_3=mean(s$item2_3), -m.item3_1=mean(s$item3_1),m.item3_2=mean(s$item3_2),m.item3_3=mean(s$item3_3), -m.item4_1=mean(s$item4_1),m.item4_2=mean(s$item4_2),m.item4_3=mean(s$item4_3) -) +v2 <- lapply(1:nvar,function(j) ordered_values[[j]][-length(ordered_values[[j]])]) +splits[i,7] <- v2[[splits[i,1]]][splits[i,3]] } -} else { -if (M==2) { -a <- data.frame(m.item1=mean(s$item1),m.item2=mean(s$item2),m.item3=mean(s$item3),m.item4=mean(s$item4), -m.item5=mean(s$item5),m.item6=mean(s$item6),m.item7=mean(s$item7)) -} else { -a <- data.frame(m.item1_1=mean(s$item1_1),m.item1_2=mean(s$item1_2),m.item1_3=mean(s$item1_3), -m.item2_1=mean(s$item2_1),m.item2_2=mean(s$item2_2),m.item2_3=mean(s$item2_3), -m.item3_1=mean(s$item3_1),m.item3_2=mean(s$item3_2),m.item3_3=mean(s$item3_3), -m.item4_1=mean(s$item4_1),m.item4_2=mean(s$item4_2),m.item4_3=mean(s$item4_3), -m.item5_1=mean(s$item5_1),m.item5_2=mean(s$item5_2),m.item5_3=mean(s$item5_3), -m.item6_1=mean(s$item6_1),m.item6_2=mean(s$item6_2),m.item6_3=mean(s$item6_3), -m.item7_1=mean(s$item7_1),m.item7_2=mean(s$item7_2),m.item7_3=mean(s$item7_3) -) +splits <- splits[,-1] +nthres <- length(unique(y))-1 +for(i in dif_items){ +info <- splits[splits[,"item"]==i,] +endnodes <- get_endnodes(info) +names(delta_hat_dif[[paste(i)]]) <- paste(rep(endnodes,each=nthres),rep(1:nthres,length(endnodes)),sep=":") } +} else{ +delta_hat_nodif <- sapply(1:nitems,function(j) delta_hat[grep(paste0("delta",j,":"),ip_opt)]) +delta_hat_dif <- c() } -N <- ifelse(substr(scenario,start=nchar(scenario)-1,stop=nchar(scenario))=="50","50",substr(scenario,start=nchar(scenario)-2,stop=nchar(scenario))) -zz <- ifelse(N=="50",substr(scenario,start=0,stop=nchar(scenario)-3),substr(scenario,start=0,stop=nchar(scenario)-4)) -eff.size <- unique(res.dat[res.dat$scenario==zz & res.dat$N==N,'eff.size']) -dif.size <- unique(res.dat[res.dat$scenario==zz & res.dat$N==N,'dif.size']) -b <- data.frame(scenario=zz, -scenario.type=substr(zz,start=nchar(zz),stop=nchar(zz)), -N=N, -J=J, -M=M, -eff.size=eff.size, -nb.dif=nb.dif, -dif.size=dif.size -) -true.value.in.ci <- eff.size <= s$beta+1.96*s$se_beta & eff.size >= s$beta-1.96*s$se_beta -beta.same.sign.truebeta.p <- ifelse(rep(eff.size,nrow(s))==0,NA,(rep(eff.size,nrow(s))/s$beta)>0) -num.reject <- which((s$beta-1.96*s$se_beta)>0 | (s$beta+1.96*s$se_beta)<0) -z <- data.frame(m.beta=mean(s$beta), -se.empirical.beta=sd(s$beta), -se.analytical.beta=mean(s$se_beta), -m.low.ci.beta=mean(s$beta-1.96*s$se_beta), -m.high.ci.beta=mean(s$beta+1.96*s$se_beta), -true.value.in.ci.p=mean(true.value.in.ci), -h0.rejected.p=mean( (s$beta-1.96*s$se_beta)>0 | (s$beta+1.96*s$se_beta)<0 ), -beta.same.sign.truebeta.p=mean(beta.same.sign.truebeta.p), -beta.same.sign.truebeta.signif.p=mean(beta.same.sign.truebeta.p[num.reject]) -) -d <- cbind(b,a,z) -d$prop. -return(d) -} -#### Compiled results -res.dat.dif <- compile_simulation2('5A_100') -for (x in results[seq(2,length(results))]) { -y <- compile_simulation2(x) -res.dat.dif <- bind_rows(res.dat.dif,y) -} -res.dat$bias <- res.dat$eff.size-res.dat$m.beta -res.dat.dif$bias <- res.dat.dif$eff.size-res.dat.dif$m.beta -View(res.dat.dif) -############################################################################## -#----------------------------------------------------------------------------# -################################## RASCHPOWER ################################ -#----------------------------------------------------------------------------# -############################################################################## -###### Puissance théorique -res.dat$theoretical.power <- 0 -### Scénarios N=100 -## Scénarios J=4 / M=2 -res.dat[res.dat$scenario %in% paste0(c(1,5,7,9,11),'A') & res.dat$N==100,]$theoretical.power <- 0.05 -res.dat[res.dat$scenario %in% paste0(c(1,5,7,9,11),'B') & res.dat$N==100,]$theoretical.power <- 0.1543 -res.dat[res.dat$scenario %in% paste0(c(5,7,9,11),'C') & res.dat$N==100,]$theoretical.power <- 0.1543 -res.dat[res.dat$scenario %in% paste0(c(5,7,9,11),'D') & res.dat$N==100,]$theoretical.power <- 0.4627 -res.dat[res.dat$scenario %in% paste0(c(5,7,9,11),'E') & res.dat$N==100,]$theoretical.power <- 0.4627 -res.dat[res.dat$scenario %in% paste0(c(5,7,9,11),'F') & res.dat$N==100,]$theoretical.power <- 0.1543 -res.dat[res.dat$scenario %in% paste0(c(5,7,9,11),'G') & res.dat$N==100,]$theoretical.power <- 0.4627 -res.dat[res.dat$scenario %in% paste0(1,'C') & res.dat$N==100,]$theoretical.power <- 0.4627 -res.dat[res.dat$scenario %in% paste0(1,'D') & res.dat$N==100,]$theoretical.power <- 0.1543 -res.dat[res.dat$scenario %in% paste0(1,'E') & res.dat$N==100,]$theoretical.power <- 0.4627 -## Scénarios J=4 / M=4 -res.dat[res.dat$scenario %in% paste0(c(2,6,8,10,12),'A') & res.dat$N==100,]$theoretical.power <- 0.05 -res.dat[res.dat$scenario %in% paste0(c(2,6,8,10,12),'B') & res.dat$N==100,]$theoretical.power <- 0.2177 -res.dat[res.dat$scenario %in% paste0(c(6,8,10,12),'C') & res.dat$N==100,]$theoretical.power <- 0.2177 -res.dat[res.dat$scenario %in% paste0(c(6,8,10,12),'D') & res.dat$N==100,]$theoretical.power <- 0.6586 -res.dat[res.dat$scenario %in% paste0(c(6,8,10,12),'E') & res.dat$N==100,]$theoretical.power <- 0.6586 -res.dat[res.dat$scenario %in% paste0(c(6,8,10,12),'F') & res.dat$N==100,]$theoretical.power <- 0.2177 -res.dat[res.dat$scenario %in% paste0(c(6,8,10,12),'G') & res.dat$N==100,]$theoretical.power <- 0.6586 -res.dat[res.dat$scenario %in% paste0(2,'C') & res.dat$N==100,]$theoretical.power <- 0.6586 -res.dat[res.dat$scenario %in% paste0(2,'D') & res.dat$N==100,]$theoretical.power <- 0.2177 -res.dat[res.dat$scenario %in% paste0(2,'E') & res.dat$N==100,]$theoretical.power <- 0.6586 -## Scénarios J=7 / M=2 -res.dat[res.dat$scenario %in% paste0(c(3,13,15,17,19),'A') & res.dat$N==100,]$theoretical.power <- 0.05 -res.dat[res.dat$scenario %in% paste0(c(3,13,15,17,19),'B') & res.dat$N==100,]$theoretical.power <- 0.1870 -res.dat[res.dat$scenario %in% paste0(c(13,15,17,19),'C') & res.dat$N==100,]$theoretical.power <- 0.1870 -res.dat[res.dat$scenario %in% paste0(c(13,15,17,19),'D') & res.dat$N==100,]$theoretical.power <- 0.5666 -res.dat[res.dat$scenario %in% paste0(c(13,15,17,19),'E') & res.dat$N==100,]$theoretical.power <- 0.5666 -res.dat[res.dat$scenario %in% paste0(c(13,15,17,19),'F') & res.dat$N==100,]$theoretical.power <- 0.1870 -res.dat[res.dat$scenario %in% paste0(c(13,15,17,19),'G') & res.dat$N==100,]$theoretical.power <- 0.5666 -res.dat[res.dat$scenario %in% paste0(3,'C') & res.dat$N==100,]$theoretical.power <- 0.5666 -res.dat[res.dat$scenario %in% paste0(3,'D') & res.dat$N==100,]$theoretical.power <- 0.1870 -res.dat[res.dat$scenario %in% paste0(3,'E') & res.dat$N==100,]$theoretical.power <- 0.5666 -## Scénarios J=7 / M=4 -res.dat[res.dat$scenario %in% paste0(c(4,14,16,18,20),'A') & res.dat$N==100,]$theoretical.power <- 0.05 -res.dat[res.dat$scenario %in% paste0(c(4,14,16,18,20),'B') & res.dat$N==100,]$theoretical.power <- 0.2450 -res.dat[res.dat$scenario %in% paste0(c(14,16,18,20),'C') & res.dat$N==100,]$theoretical.power <- 0.2450 -res.dat[res.dat$scenario %in% paste0(c(14,16,18,20),'D') & res.dat$N==100,]$theoretical.power <- 0.7136 -res.dat[res.dat$scenario %in% paste0(c(14,16,18,20),'E') & res.dat$N==100,]$theoretical.power <- 0.7136 -res.dat[res.dat$scenario %in% paste0(c(14,16,18,20),'F') & res.dat$N==100,]$theoretical.power <- 0.2450 -res.dat[res.dat$scenario %in% paste0(c(14,16,18,20),'G') & res.dat$N==100,]$theoretical.power <- 0.7136 -res.dat[res.dat$scenario %in% paste0(4,'C') & res.dat$N==100,]$theoretical.power <- 0.7136 -res.dat[res.dat$scenario %in% paste0(4,'D') & res.dat$N==100,]$theoretical.power <- 0.2450 -res.dat[res.dat$scenario %in% paste0(4,'E') & res.dat$N==100,]$theoretical.power <- 0.7136 -### Scénarios N=200 -## Scénarios J=4 / M=2 -res.dat[res.dat$scenario %in% paste0(c(1,5,7,9,11),'A') & res.dat$N==200,]$theoretical.power <- 0.05 -res.dat[res.dat$scenario %in% paste0(c(1,5,7,9,11),'B') & res.dat$N==200,]$theoretical.power <- 0.2618 -res.dat[res.dat$scenario %in% paste0(c(5,7,9,11),'C') & res.dat$N==200,]$theoretical.power <- 0.2618 -res.dat[res.dat$scenario %in% paste0(c(5,7,9,11),'D') & res.dat$N==200,]$theoretical.power <- 0.7507 -res.dat[res.dat$scenario %in% paste0(c(5,7,9,11),'E') & res.dat$N==200,]$theoretical.power <- 0.7507 -res.dat[res.dat$scenario %in% paste0(c(5,7,9,11),'F') & res.dat$N==200,]$theoretical.power <- 0.2618 -res.dat[res.dat$scenario %in% paste0(c(5,7,9,11),'G') & res.dat$N==200,]$theoretical.power <- 0.7507 -res.dat[res.dat$scenario %in% paste0(1,'C') & res.dat$N==200,]$theoretical.power <- 0.7507 -res.dat[res.dat$scenario %in% paste0(1,'D') & res.dat$N==200,]$theoretical.power <- 0.2618 -res.dat[res.dat$scenario %in% paste0(1,'E') & res.dat$N==200,]$theoretical.power <- 0.7507 -## Scénarios J=4 / M=4 -res.dat[res.dat$scenario %in% paste0(c(2,6,8,10,12),'A') & res.dat$N==200,]$theoretical.power <- 0.05 -res.dat[res.dat$scenario %in% paste0(c(2,6,8,10,12),'B') & res.dat$N==200,]$theoretical.power <- 0.3875 -res.dat[res.dat$scenario %in% paste0(c(6,8,10,12),'C') & res.dat$N==200,]$theoretical.power <- 0.3875 -res.dat[res.dat$scenario %in% paste0(c(6,8,10,12),'D') & res.dat$N==200,]$theoretical.power <- 0.9161 -res.dat[res.dat$scenario %in% paste0(c(6,8,10,12),'E') & res.dat$N==200,]$theoretical.power <- 0.9161 -res.dat[res.dat$scenario %in% paste0(c(6,8,10,12),'F') & res.dat$N==200,]$theoretical.power <- 0.3875 -res.dat[res.dat$scenario %in% paste0(c(6,8,10,12),'G') & res.dat$N==200,]$theoretical.power <- 0.9161 -res.dat[res.dat$scenario %in% paste0(2,'C') & res.dat$N==200,]$theoretical.power <- 0.9161 -res.dat[res.dat$scenario %in% paste0(2,'D') & res.dat$N==200,]$theoretical.power <- 0.3875 -res.dat[res.dat$scenario %in% paste0(2,'E') & res.dat$N==200,]$theoretical.power <- 0.9161 -## Scénarios J=7 / M=2 -res.dat[res.dat$scenario %in% paste0(c(3,13,15,17,19),'A') & res.dat$N==200,]$theoretical.power <- 0.05 -res.dat[res.dat$scenario %in% paste0(c(3,13,15,17,19),'B') & res.dat$N==200,]$theoretical.power <- 0.3258 -res.dat[res.dat$scenario %in% paste0(c(13,15,17,19),'C') & res.dat$N==200,]$theoretical.power <- 0.3258 -res.dat[res.dat$scenario %in% paste0(c(13,15,17,19),'D') & res.dat$N==200,]$theoretical.power <- 0.8538 -res.dat[res.dat$scenario %in% paste0(c(13,15,17,19),'E') & res.dat$N==200,]$theoretical.power <- 0.8538 -res.dat[res.dat$scenario %in% paste0(c(13,15,17,19),'F') & res.dat$N==200,]$theoretical.power <- 0.3258 -res.dat[res.dat$scenario %in% paste0(c(13,15,17,19),'G') & res.dat$N==200,]$theoretical.power <- 0.8538 -res.dat[res.dat$scenario %in% paste0(3,'C') & res.dat$N==200,]$theoretical.power <- 0.8538 -res.dat[res.dat$scenario %in% paste0(3,'D') & res.dat$N==200,]$theoretical.power <- 0.3258 -res.dat[res.dat$scenario %in% paste0(3,'E') & res.dat$N==200,]$theoretical.power <- 0.8538 -## Scénarios J=7 / M=4 -res.dat[res.dat$scenario %in% paste0(c(4,14,16,18,20),'A') & res.dat$N==200,]$theoretical.power <- 0.05 -res.dat[res.dat$scenario %in% paste0(c(4,14,16,18,20),'B') & res.dat$N==200,]$theoretical.power <- 0.4321 -res.dat[res.dat$scenario %in% paste0(c(14,16,18,20),'C') & res.dat$N==200,]$theoretical.power <- 0.4321 -res.dat[res.dat$scenario %in% paste0(c(14,16,18,20),'D') & res.dat$N==200,]$theoretical.power <- 0.9471 -res.dat[res.dat$scenario %in% paste0(c(14,16,18,20),'E') & res.dat$N==200,]$theoretical.power <- 0.9471 -res.dat[res.dat$scenario %in% paste0(c(14,16,18,20),'F') & res.dat$N==200,]$theoretical.power <- 0.4321 -res.dat[res.dat$scenario %in% paste0(c(14,16,18,20),'G') & res.dat$N==200,]$theoretical.power <- 0.9471 -res.dat[res.dat$scenario %in% paste0(4,'C') & res.dat$N==200,]$theoretical.power <- 0.9471 -res.dat[res.dat$scenario %in% paste0(4,'D') & res.dat$N==200,]$theoretical.power <- 0.4321 -res.dat[res.dat$scenario %in% paste0(4,'E') & res.dat$N==200,]$theoretical.power <- 0.9471 -### Scénarios N=300 -## Scénarios J=4 / M=2 -res.dat[res.dat$scenario %in% paste0(c(1,5,7,9,11),'A') & res.dat$N==300,]$theoretical.power <- 0.05 -res.dat[res.dat$scenario %in% paste0(c(1,5,7,9,11),'B') & res.dat$N==300,]$theoretical.power <- 0.3660 -res.dat[res.dat$scenario %in% paste0(c(5,7,9,11),'C') & res.dat$N==300,]$theoretical.power <- 0.3660 -res.dat[res.dat$scenario %in% paste0(c(5,7,9,11),'D') & res.dat$N==300,]$theoretical.power <- 0.8981 -res.dat[res.dat$scenario %in% paste0(c(5,7,9,11),'E') & res.dat$N==300,]$theoretical.power <- 0.8981 -res.dat[res.dat$scenario %in% paste0(c(5,7,9,11),'F') & res.dat$N==300,]$theoretical.power <- 0.3660 -res.dat[res.dat$scenario %in% paste0(c(5,7,9,11),'G') & res.dat$N==300,]$theoretical.power <- 0.8981 -res.dat[res.dat$scenario %in% paste0(1,'C') & res.dat$N==300,]$theoretical.power <- 0.8981 -res.dat[res.dat$scenario %in% paste0(1,'D') & res.dat$N==300,]$theoretical.power <- 0.3660 -res.dat[res.dat$scenario %in% paste0(1,'E') & res.dat$N==300,]$theoretical.power <- 0.8981 -## Scénarios J=4 / M=4 -res.dat[res.dat$scenario %in% paste0(c(2,6,8,10,12),'A') & res.dat$N==300,]$theoretical.power <- 0.05 -res.dat[res.dat$scenario %in% paste0(c(2,6,8,10,12),'B') & res.dat$N==300,]$theoretical.power <- 0.5373 -res.dat[res.dat$scenario %in% paste0(c(6,8,10,12),'C') & res.dat$N==300,]$theoretical.power <- 0.5373 -res.dat[res.dat$scenario %in% paste0(c(6,8,10,12),'D') & res.dat$N==300,]$theoretical.power <- 0.9834 -res.dat[res.dat$scenario %in% paste0(c(6,8,10,12),'E') & res.dat$N==300,]$theoretical.power <- 0.9834 -res.dat[res.dat$scenario %in% paste0(c(6,8,10,12),'F') & res.dat$N==300,]$theoretical.power <- 0.5373 -res.dat[res.dat$scenario %in% paste0(c(6,8,10,12),'G') & res.dat$N==300,]$theoretical.power <- 0.9834 -res.dat[res.dat$scenario %in% paste0(2,'C') & res.dat$N==300,]$theoretical.power <- 0.9834 -res.dat[res.dat$scenario %in% paste0(2,'D') & res.dat$N==300,]$theoretical.power <- 0.5373 -res.dat[res.dat$scenario %in% paste0(2,'E') & res.dat$N==300,]$theoretical.power <- 0.9834 -## Scénarios J=7 / M=2 -res.dat[res.dat$scenario %in% paste0(c(3,13,15,17,19),'A') & res.dat$N==300,]$theoretical.power <- 0.05 -res.dat[res.dat$scenario %in% paste0(c(3,13,15,17,19),'B') & res.dat$N==300,]$theoretical.power <- 0.4550 -res.dat[res.dat$scenario %in% paste0(c(13,15,17,19),'C') & res.dat$N==300,]$theoretical.power <- 0.4550 -res.dat[res.dat$scenario %in% paste0(c(13,15,17,19),'D') & res.dat$N==300,]$theoretical.power <- 0.9584 -res.dat[res.dat$scenario %in% paste0(c(13,15,17,19),'E') & res.dat$N==300,]$theoretical.power <- 0.9584 -res.dat[res.dat$scenario %in% paste0(c(13,15,17,19),'F') & res.dat$N==300,]$theoretical.power <- 0.4550 -res.dat[res.dat$scenario %in% paste0(c(13,15,17,19),'G') & res.dat$N==300,]$theoretical.power <- 0.9584 -res.dat[res.dat$scenario %in% paste0(3,'C') & res.dat$N==300,]$theoretical.power <- 0.9584 -res.dat[res.dat$scenario %in% paste0(3,'D') & res.dat$N==300,]$theoretical.power <- 0.4550 -res.dat[res.dat$scenario %in% paste0(3,'E') & res.dat$N==300,]$theoretical.power <- 0.9584 -## Scénarios J=7 / M=4 -res.dat[res.dat$scenario %in% paste0(c(4,14,16,18,20),'A') & res.dat$N==300,]$theoretical.power <- 0.05 -res.dat[res.dat$scenario %in% paste0(c(4,14,16,18,20),'B') & res.dat$N==300,]$theoretical.power <- 0.5907 -res.dat[res.dat$scenario %in% paste0(c(14,16,18,20),'C') & res.dat$N==300,]$theoretical.power <- 0.5907 -res.dat[res.dat$scenario %in% paste0(c(14,16,18,20),'D') & res.dat$N==300,]$theoretical.power <- 0.9919 -res.dat[res.dat$scenario %in% paste0(c(14,16,18,20),'E') & res.dat$N==300,]$theoretical.power <- 0.9919 -res.dat[res.dat$scenario %in% paste0(c(14,16,18,20),'F') & res.dat$N==300,]$theoretical.power <- 0.5907 -res.dat[res.dat$scenario %in% paste0(c(14,16,18,20),'G') & res.dat$N==300,]$theoretical.power <- 0.9919 -res.dat[res.dat$scenario %in% paste0(4,'C') & res.dat$N==300,]$theoretical.power <- 0.9919 -res.dat[res.dat$scenario %in% paste0(4,'D') & res.dat$N==300,]$theoretical.power <- 0.5907 -res.dat[res.dat$scenario %in% paste0(4,'E') & res.dat$N==300,]$theoretical.power <- 0.9919 -### Scénarios N=50 -## Scénarios J=4 / M=2 -res.dat[res.dat$scenario %in% paste0(c(1,5,7,9,11),'A') & res.dat$N==50,]$theoretical.power <- 0.05 -res.dat[res.dat$scenario %in% paste0(c(1,5,7,9,11),'B') & res.dat$N==50,]$theoretical.power <- 0.1013 -res.dat[res.dat$scenario %in% paste0(c(5,7,9,11),'C') & res.dat$N==50,]$theoretical.power <- 0.1013 -res.dat[res.dat$scenario %in% paste0(c(5,7,9,11),'D') & res.dat$N==50,]$theoretical.power <- 0.2615 -res.dat[res.dat$scenario %in% paste0(c(5,7,9,11),'E') & res.dat$N==50,]$theoretical.power <- 0.2615 -res.dat[res.dat$scenario %in% paste0(c(5,7,9,11),'F') & res.dat$N==50,]$theoretical.power <- 0.1013 -res.dat[res.dat$scenario %in% paste0(c(5,7,9,11),'G') & res.dat$N==50,]$theoretical.power <- 0.2615 -res.dat[res.dat$scenario %in% paste0(1,'C') & res.dat$N==50,]$theoretical.power <- 0.2615 -res.dat[res.dat$scenario %in% paste0(1,'D') & res.dat$N==50,]$theoretical.power <- 0.1013 -res.dat[res.dat$scenario %in% paste0(1,'E') & res.dat$N==50,]$theoretical.power <- 0.2615 -## Scénarios J=4 / M=4 -res.dat[res.dat$scenario %in% paste0(c(2,6,8,10,12),'A') & res.dat$N==50,]$theoretical.power <- 0.05 -res.dat[res.dat$scenario %in% paste0(c(2,6,8,10,12),'B') & res.dat$N==50,]$theoretical.power <- 0.1339 -res.dat[res.dat$scenario %in% paste0(c(6,8,10,12),'C') & res.dat$N==50,]$theoretical.power <- 0.1339 -res.dat[res.dat$scenario %in% paste0(c(6,8,10,12),'D') & res.dat$N==50,]$theoretical.power <- 0.3863 -res.dat[res.dat$scenario %in% paste0(c(6,8,10,12),'E') & res.dat$N==50,]$theoretical.power <- 0.3863 -res.dat[res.dat$scenario %in% paste0(c(6,8,10,12),'F') & res.dat$N==50,]$theoretical.power <- 0.1339 -res.dat[res.dat$scenario %in% paste0(c(6,8,10,12),'G') & res.dat$N==50,]$theoretical.power <- 0.3863 -res.dat[res.dat$scenario %in% paste0(2,'C') & res.dat$N==50,]$theoretical.power <- 0.3863 -res.dat[res.dat$scenario %in% paste0(2,'D') & res.dat$N==50,]$theoretical.power <- 0.1339 -res.dat[res.dat$scenario %in% paste0(2,'E') & res.dat$N==50,]$theoretical.power <- 0.3863 -## Scénarios J=7 / M=2 -res.dat[res.dat$scenario %in% paste0(c(3,13,15,17,19),'A') & res.dat$N==50,]$theoretical.power <- 0.05 -res.dat[res.dat$scenario %in% paste0(c(3,13,15,17,19),'B') & res.dat$N==50,]$theoretical.power <- 0.1171 -res.dat[res.dat$scenario %in% paste0(c(13,15,17,19),'C') & res.dat$N==50,]$theoretical.power <- 0.1171 -res.dat[res.dat$scenario %in% paste0(c(13,15,17,19),'D') & res.dat$N==50,]$theoretical.power <- 0.3236 -res.dat[res.dat$scenario %in% paste0(c(13,15,17,19),'E') & res.dat$N==50,]$theoretical.power <- 0.3236 -res.dat[res.dat$scenario %in% paste0(c(13,15,17,19),'F') & res.dat$N==50,]$theoretical.power <- 0.1171 -res.dat[res.dat$scenario %in% paste0(c(13,15,17,19),'G') & res.dat$N==50,]$theoretical.power <- 0.3236 -res.dat[res.dat$scenario %in% paste0(3,'C') & res.dat$N==50,]$theoretical.power <- 0.3236 -res.dat[res.dat$scenario %in% paste0(3,'D') & res.dat$N==50,]$theoretical.power <- 0.1171 -res.dat[res.dat$scenario %in% paste0(3,'E') & res.dat$N==50,]$theoretical.power <- 0.3236 -## Scénarios J=7 / M=4 -res.dat[res.dat$scenario %in% paste0(c(4,14,16,18,20),'A') & res.dat$N==50,]$theoretical.power <- 0.05 -res.dat[res.dat$scenario %in% paste0(c(4,14,16,18,20),'B') & res.dat$N==50,]$theoretical.power <- 0.1448 -res.dat[res.dat$scenario %in% paste0(c(14,16,18,20),'C') & res.dat$N==50,]$theoretical.power <- 0.1448 -res.dat[res.dat$scenario %in% paste0(c(14,16,18,20),'D') & res.dat$N==50,]$theoretical.power <- 0.4328 -res.dat[res.dat$scenario %in% paste0(c(14,16,18,20),'E') & res.dat$N==50,]$theoretical.power <- 0.4328 -res.dat[res.dat$scenario %in% paste0(c(14,16,18,20),'F') & res.dat$N==50,]$theoretical.power <- 0.1448 -res.dat[res.dat$scenario %in% paste0(c(14,16,18,20),'G') & res.dat$N==50,]$theoretical.power <- 0.4328 -res.dat[res.dat$scenario %in% paste0(4,'C') & res.dat$N==50,]$theoretical.power <- 0.4328 -res.dat[res.dat$scenario %in% paste0(4,'D') & res.dat$N==50,]$theoretical.power <- 0.1448 -res.dat[res.dat$scenario %in% paste0(4,'E') & res.dat$N==50,]$theoretical.power <- 0.4328 -### DIF scenarios -res.dat.dif$theoretical.power <- res.dat[81:nrow(res.dat),]$theoretical.power -install.packages("lordif") -library(lordif) -library(TAM) -library(doMC) -library(parallel) -library(pbmcapply) -library(funprog) -library(dplyr) -library(readxl) -dat1 <- read.csv(file = '/home/corentin/Documents/These/Recherche/Simulations/Data/NoDIF/N50/scenario_1A_50.csv') -dat1 <- dat1[dat1$replication==1,] -lordif(dat1,group=dat1$TT) -dat1 -lordif(dat1,group=dat1$TT,selection = dat[,c('item1','item2',"item3",'item4')],model = "GPCM") -lordif(dat1,group=dat1$TT,selection = dat1[,c('item1','item2',"item3",'item4')],model = "GPCM") -lordif(dat1[,c('item1','item2',"item3",'item4')],group=dat1$TT,model = "GPCM") -dat1 <- read.csv(file = '/home/corentin/Documents/These/Recherche/Simulations/Data/DIF/N50/scenario_5A_50.csv') -dat1 <- dat1[dat1$replication==1,] -lordif(dat1[,c('item1','item2',"item3",'item4')],group=dat1$TT,model = "GPCM") -lordif(dat1[,c('item1','item2',"item3",'item4')],group=dat1$TT,model = "GPCM",maxIter = 10000) -lordif(dat1[,c('item1','item2',"item3",'item4')],group=dat1$TT,model = "GPCM",maxIter = 10000,criterion = "Beta") -dat1 <- read.csv(file = '/home/corentin/Documents/These/Recherche/Simulations/Data/DIF/N300/scenario_5A_300.csv') +to_return <- list("splits"=splits, +"thetas"=theta_hat, +"deltas_nodif"=delta_hat_nodif, +"deltas_dif"=delta_hat_dif, +"pvalues"=pvalues, +"devs"=devs, +"crits"=crits) +return(to_return) +} +DIFtree(Y=dat1[,c('item1','item2','item3','item4','item5','item6','item7')],X=as.data.frame(dat1[,'TT']),model="PCM") +DIFtree +View(DIFtree) +DIFtree <- function (Y, X, model = c("Rasch", "Logistic", "PCM"), type = c("udif", +"dif", "nudif"), alpha = 0.05, nperm = 1000, trace = FALSE, +penalize = FALSE, ...) +{ +UseMethod("DIFtree") +} +DIFtree <- function (Y, X, model = c("Rasch", "Logistic", "PCM"), type = c("udif", +"dif", "nudif"), alpha = 0.05, nperm = 1000, trace = FALSE, +penalize = FALSE, ...) +{ +browser() +UseMethod("DIFtree") +} +DIFtree(Y=dat1[,c('item1','item2','item3','item4','item5','item6','item7')],X=as.data.frame(dat1[,'TT']),model="PCM") +library(DIFtree) +dat1 <- read.csv(file = '/home/corentin/Documents/These/Recherche/Simulations/Data/DIF/N300/scenario_13A_300.csv') +dat1 <- dat1[dat1$replication==1] dat1 <- dat1[dat1$replication==1,] -lordif(dat1[,c('item1','item2',"item3",'item4')],group=dat1$TT,model = "GPCM",maxIter = 10000,criterion = "Beta") -dat1 <- read.csv(file = '/home/corentin/Documents/These/Recherche/Simulations/Data/DIF/N300/scenario_5D_300.csv') +DIFtree(Y=dat1[,c('item1','item2','item3','item4','item5','item6','item7')],X=as.data.frame(dat1[,'TT']),model="PCM") +function (Y, X, model = c("Rasch", "Logistic", "PCM"), type = c("udif", +"dif", "nudif"), alpha = 0.05, nperm = 1000, trace = FALSE, +penalize = FALSE, ...) +{ +UseMethod("DIFtree") +} +DIFtree <- function (Y, X, model = c("Rasch", "Logistic", "PCM"), type = c("udif", +"dif", "nudif"), alpha = 0.05, nperm = 1000, trace = FALSE, +penalize = FALSE, ...) +{ +UseMethod("DIFtree") +} +DIFtree(Y=dat1[,c('item1','item2','item3','item4','item5','item6','item7')],X=as.data.frame(dat1[,'TT']),model="PCM") +library(DIFtree) +dat1 <- read.csv(file = '/home/corentin/Documents/These/Recherche/Simulations/Data/DIF/N300/scenario_13A_300.csv') dat1 <- dat1[dat1$replication==1,] -lordif(dat1[,c('item1','item2',"item3",'item4')],group=dat1$TT,model = "GPCM",maxIter = 10000,criterion = "Beta") -dat1 <- read.csv(file = '/home/corentin/Documents/These/Recherche/Simulations/Data/DIF/N300/scenario_7D_300.csv') +DIFtree(Y=dat1[,c('item1','item2','item3','item4','item5','item6','item7')],X=as.data.frame(dat1[,'TT']),model="PCM") +DIFtree(Y=dat1[,c('item1','item2','item3','item4','item5','item6','item7')],X=as.data.frame(dat1[,'TT']),model="PCM",trace=T) +dat1 <- read.csv(file = '/home/corentin/Documents/These/Recherche/Simulations/Data/DIF/N300/scenario_14A_300.csv') dat1 <- dat1[dat1$replication==1,] -lordif(dat1[,c('item1','item2',"item3",'item4')],group=dat1$TT,model = "GPCM",maxIter = 10000,criterion = "Beta") -dat1 <- dat1[dat1$replication<=20,] -dat1 <- read.csv(file = '/home/corentin/Documents/These/Recherche/Simulations/Data/DIF/N300/scenario_7D_300.csv') +DIFtree(Y=dat1[,c('item1','item2','item3','item4','item5','item6','item7')],X=as.data.frame(dat1[,'TT']),model="PCM",trace=T) +dat1$item1 <- as.factor(dat1$item1) +dat1$item2 <- as.factor(dat1$item2) +dat1$item3 <- as.factor(dat1$item3) +dat1$item4 <- as.factor(dat1$item4) +dat1$item5 <- as.factor(dat1$item5) +dat1$item6 <- as.factor(dat1$item6) +dat1$item7 <- as.factor(dat1$item7) +DIFtree(Y=dat1[,c('item1','item2','item3','item4','item5','item6','item7')],X=as.data.frame(dat1[,'TT']),model="PCM",trace=T) +data("data_sim_PCM") +data_sim_PCM +dat1 <- read.csv(file = '/home/corentin/Documents/These/Recherche/Simulations/Data/DIF/N300/scenario_14A_300.csv') dat1 <- dat1[dat1$replication==1,] -lordif(dat1[,c('item1','item2',"item3",'item4')],group=dat1$TT,model = "GPCM",maxIter = 10000,criterion = "Beta") -lordif(dat1[,c('item1','item2',"item3",'item4')],group=dat1$TT,model = "GRM",maxIter = 10000,criterion = "Beta") -lordif(dat1[,c('item1','item2',"item3",'item4')],group=dat1$TT,model = "GPCM",maxIter = 10000,criterion = "Beta") -lordif(dat1[,c('item1','item2',"item3",'item4')],group=dat1$TT,model = "GPCM",maxIter = 10000,criterion = "Beta",MonteCarlo = T) -lordif(dat1[,c('item1','item2',"item3",'item4')],group=dat1$TT,model = "GPCM",maxIter = 10000,criterion = "Beta",inc=0.01) -lordif(dat1[,c('item1','item2',"item3",'item4')],group=dat1$TT,model = "GPCM",maxIter = 10000,criterion = "Beta",minCell=0.01) -lordif(dat1[,c('item1','item2',"item3",'item4')],group=dat1$TT,model = "GPCM",maxIter = 10000,criterion = "Beta",minCell=1) -lordif(dat1[,c('item1','item2',"item3",'item4')],group=dat1$TT,model = "GPCM",maxIter = 10000,criterion = "Beta",minCell=10) -lordif(dat1[,c('item1','item2',"item3",'item4')],group=dat1$TT,model = "GPCM",maxIter = 10000,criterion = "Beta") -dat1 <- read.csv(file = '/home/corentin/Documents/These/Recherche/Simulations/Data/DIF/N300/scenario_7A_300.csv') -dat1 <- dat1[dat1$replication<=20,] -lordif(dat1[,c('item1','item2',"item3",'item4')],group=dat1$TT,model = "GPCM",maxIter = 10000,criterion = "Beta") -lordif(dat1[,c('item1','item2',"item3",'item4')],group=dat1$TT,model = "GPCM",maxIter = 10000,criterion = "Beta",beta.change = 0.01) -lordif(dat1[,c('item1','item2',"item3",'item4')],group=dat1$TT,model = "GPCM",maxIter = 10000,criterion = "Beta",beta.change = 0.001) -lordif(dat1[,c('item1','item2',"item3",'item4')],group=dat1$TT,model = "GPCM",maxIter = 10000,criterion = "Beta",beta.change = 0.2) +dat1 +DIFtree(Y=dat1[,c('item1','item2','item3','item4','item5','item6','item7')],X=as.data.frame(dat1[,'TT']),model="PCM",trace=T) +dat1[,c('item1','item2','item3','item4','item5','item6','item7')] +dat1[,c('item1','item2','item3','item4','item5','item6','item7')] <- dat1[,c('item1','item2','item3','item4','item5','item6','item7')]+1 +dat1[,c('item1','item2','item3','item4','item5','item6','item7')] +DIFtree(Y=dat1[,c('item1','item2','item3','item4','item5','item6','item7')],X=as.data.frame(dat1[,'TT']),model="PCM",trace=T) +DIFtree(Y=dat1[,c('item1')],X=as.data.frame(dat1[,'TT']),model="PCM",trace=T) +dat1[,c('item1')] +dat1[,c('TT')] +DIFtree(Y=dat1[,c('item1')],X=as.data.frame(dat1[,'TT']),model="PCM",trace=T) +as.data.frame(dat1[,'TT']) +DIFtree(Y=data.matrix(dat1[,c('item1','item2','item3','item4','item5','item6','item7')]),X=as.matrix(dat1[,'TT']),model="PCM",trace=T) +DIFtree(Y=data.matrix(dat1[,c('item1','item2','item3','item4','item5','item6','item7')]),X=data.frame(x1=dat1[,'TT']),model="PCM",trace=T) +data.matrix(dat1[,c('item1','item2','item3','item4','item5','item6','item7')]) +Y2 <- data_sim_PCM[,1] +X2 <- data_sim_PCM[,-1] +Y2 +X2 +DIFtree(Y=as.matrix(dat1[,c('item1','item2','item3','item4','item5','item6','item7')]),X=data.frame(x1=dat1[,'TT']),model="PCM",trace=T) +mod2 <- DIFtree(Y=Y2,X=X2,model="PCM",alpha=0.05,nperm=100,trace=TRUE) +library(DIFtree) +data(data_sim_Rasch) +data(data_sim_PCM) +Y1 <- data_sim_Rasch[,1] +X1 <- data_sim_Rasch[,-1] +Y2 <- data_sim_PCM[,1] +X2 <- data_sim_PCM[,-1] +## Not run: +mod1 <- DIFtree(Y=Y1,X=X1,model="Logistic",type="udif",alpha=0.05,nperm=1000,trace=TRUE) +mod2 <- DIFtree(Y=Y2,X=X2,model="PCM",alpha=0.05,nperm=100,trace=TRUE) +remove.packages("DIFtree") +library(devtools) +install_version("DIFtree","3.1.4") +library(DIFtree) +data(data_sim_Rasch) +data(data_sim_PCM) +Y1 <- data_sim_Rasch[,1] +X1 <- data_sim_Rasch[,-1] +Y2 <- data_sim_PCM[,1] +X2 <- data_sim_PCM[,-1] +## Not run: +mod1 <- DIFtree(Y=Y1,X=X1,model="Logistic",type="udif",alpha=0.05,nperm=1000,trace=TRUE) +mod2 <- DIFtree(Y=Y2,X=X2,model="PCM",alpha=0.05,nperm=100,trace=TRUE) +remove.packages("DIFtree") +install_version("DIFtree","2.0.1") +library(DIFtree) +data(data_sim_Rasch) +data(data_sim_PCM) +Y1 <- data_sim_Rasch[,1] +X1 <- data_sim_Rasch[,-1] +Y2 <- data_sim_PCM[,1] +X2 <- data_sim_PCM[,-1] +## Not run: +mod1 <- DIFtree(Y=Y1,X=X1,model="Logistic",type="udif",alpha=0.05,nperm=1000,trace=TRUE) +mod2 <- DIFtree(Y=Y2,X=X2,model="PCM",alpha=0.05,nperm=100,trace=TRUE) +tree_PCM <- +function(y, +DM_kov, +npersons, +nitems, +nvar, +ordered_values, +n_levels, +n_s, +alpha, +nperm, +trace +){ +# design of PCM +pp_design <- diag(npersons) # persons, person P reference +pp_design <- pp_design[rep(1:nrow(pp_design),each=nitems),] +pp_design <- pp_design[,-npersons] +ip_design <- -1*diag(nitems) # item parameter +ip_design <- ip_design[rep(1:nrow(ip_design),times=npersons),] +dm_pcm <- cbind(pp_design,ip_design) +names_pcm <- c(paste("theta",1:(npersons-1),sep=""),paste("delta",1:nitems,sep="")) +colnames(dm_pcm) <- names_pcm +# functions to build design +thresholds <- lapply(1:nvar, function(j) ordered_values[[j]][-length(ordered_values[[j]])]) +v <- lapply(1:nvar,function(j) 1:(n_levels[j]-1)) +w <- lapply(1:nvar, function(j) rep(paste0("s",j),n_s[j])) +design_one <- function(x,threshold,upper){ +if(upper){ +ret <- ifelse(x > threshold,1,0) +} else{ +ret <- ifelse(x > threshold,0,1) +} +return(ret) +} +design <- function(x,thresholds,upper){ +ret <- sapply(thresholds, function(j) design_one(x,j,upper)) +return(ret) +} +whole_design <- function(X,var,item,thresholds,upper=TRUE){ +design_tree <- matrix(0,nrow=nitems*npersons,ncol=length(thresholds[[var]])) +rows <- seq(item,(nitems*npersons),by=nitems) +design_tree[rows,] <- design(X[,var],thresholds[[var]],upper) +z <- rep(paste0(ifelse(upper,"_u","_l"),item),length(thresholds[[var]])) +colnames(design_tree) <- paste0(w[[var]],v[[var]],z) +return(design_tree) +} +designlists <- function(X,thresholds,upper=TRUE){ +ret <- lapply(1:nitems, function(j){ +lapply(1:nvar, function(var){ +whole_design(X,var,j,thresholds,upper) +}) +}) +return(ret) +} +######################################################################################### +mod_potential <- list() +devs <- c() +crits <- c() +splits <- c() +pvalues <- c() +ip <- list() +vars_evtl <- list() +splits_evtl <- list() +which_obs <- list() +numbers <- list() +count <- 1 +numbers[[1]] <- lapply(1:nitems,function(j) 1) +which_obs[[1]] <- lapply(1:nitems,function(j) matrix(1:npersons,nrow=1)) +splits_evtl[[1]] <- lapply(1:nitems,function(j) lapply(1:nvar, function(var) matrix(1:n_s[var],nrow=1))) +vars_evtl[[1]] <- lapply(1:nitems,function(j) nvar) +ip[[1]] <- lapply(1:nitems,function(j) paste0("delta",j)) +### PCM ### +pp <- paste("theta",1:(npersons-1),sep="") +help_p <- paste0(pp,collapse="+") +help01 <- formula(paste("y~",help_p,"+",paste0(unlist(ip[[1]]),collapse="+"),"-1")) +help02 <- formula(paste0("FALSE~",paste0(unlist(ip[[1]]),collapse="+"))) +dat0 <- data.frame(y,dm_pcm) +mod0 <- tryCatch(vglm(help01, +family=acat(parallel=help02, reverse=FALSE), +data=dat0, +na.action=na.omit, +checkwz=FALSE), +error = function(e) stop("PCM not identified!", call. =FALSE)) +start <- VGAM::predict(mod0) +mod_potential[[1]] <- mod0 +design_upper <- designlists(DM_kov,thresholds) +design_lower <- designlists(DM_kov,thresholds,upper=FALSE) +sig <- TRUE +anysplit <- TRUE +# function to compute all models in one knot +allmodels <- function(i,var,kn,design_lower,design_upper){ +deviances <- rep(0,n_s[var]) +help_kn <- ip[[count]][[i]][kn] +help1 <- paste0(unlist(ip[[count]])[-which(unlist(ip[[count]])==help_kn)],collapse="+") +splits_aktuell <- splits_evtl[[count]][[i]][[var]][kn,] +splits_aktuell <- splits_aktuell[!is.na(splits_aktuell)] +obs0 <- which(!is.na(which_obs[[count]][[i]][kn,])) +if(length(splits_aktuell)>0){ +for(j in splits_aktuell){ +n_lower <- sum(DM_kov[obs0,var]<=ordered_values[[var]][j]) +n_upper <- sum(DM_kov[obs0,var]>ordered_values[[var]][j]) +if(n_lower>=30 & n_upper>=30){ +dat <- data.frame(dat0,design_lower[[i]][[var]][,j,drop=FALSE],design_upper[[i]][[var]][,j,drop=FALSE]) +help2 <- paste(ip[[count]][[i]][kn],c(colnames(design_lower[[i]][[var]])[j],colnames(design_upper[[i]][[var]])[j]),sep=":") +help3 <- paste(help2,collapse="+") +help41 <- formula(paste("y~",help1,"+",help3,"-1")) +help42 <- formula(paste0("FALSE~",help1,"+",help3)) +suppressWarnings( +mod <- try(vglm(help41, +family=acat(parallel=help42, reverse=FALSE), +data=dat, +checkwz=FALSE, +na.action=na.omit, +offset=start)) +) +if(class(mod)!="try-error"){ +deviances[j] <- deviance(mod0)-deviance(mod) +} +} +} +} +return(deviances) +} +# estimate tree +while(sig & anysplit){ +# compute all models +dv <- lapply(1:nvar,function(var) { +lapply(1:nitems,function(i) { +n_knots <- length(ip[[count]][[i]]) +deviances <- matrix(rep(0,n_s[var]*n_knots),ncol=n_knots) +for(kn in 1:n_knots){ +deviances[,kn] <- allmodels(i,var,kn,design_lower,design_upper) +} +return(deviances) +}) +}) +# select optimum +variable <- which.max(lapply(1:nvar,function(j) max(unlist(dv[[j]])))) +item <- which.max(lapply(1:nitems, function(j) max(dv[[variable]][[j]]))) +split <- as.numeric(which(dv[[variable]][[item]]==max(dv[[variable]][[item]]),arr.ind=TRUE)[,1]) +knoten <- as.numeric(which(dv[[variable]][[item]]==max(dv[[variable]][[item]]),arr.ind=TRUE)[,2]) +if(length(split)>1){ +split <- split[1] +knoten <- knoten[1] +warning(paste("Maximum in iteration ",count," not uniquely defined")) +} +ip_old <- ip[[count]][[item]][knoten] +level <- length(strsplit(ip_old,":")[[1]]) +number <- numbers[[count]][[item]][knoten] +left <- max(numbers[[count]][[item]])+1 +right <- max(numbers[[count]][[item]])+2 +# compute permutation test +dev <- rep(NA,nperm) +for(perm in 1:nperm){ +dv_perm <- rep(0,n_s[variable]) +obs_aktuell <- which_obs[[count]][[item]][knoten,] +obs_aktuell <- obs_aktuell[!is.na(obs_aktuell)] +DM_kov_perm <- DM_kov +DM_kov_perm[obs_aktuell,variable] <- sample(DM_kov_perm[obs_aktuell,variable],length(obs_aktuell)) +design_upper_perm <- design_upper +design_upper_perm[[item]][[variable]] <- whole_design(DM_kov_perm,variable,item,thresholds) +design_lower_perm <- design_lower +design_lower_perm[[item]][[variable]] <- whole_design(DM_kov_perm,variable,item,thresholds,upper=FALSE) +dv_perm <- allmodels(item,variable,knoten,design_lower_perm,design_upper_perm) +dev[perm] <- max(dv_perm) +if(trace){ +cat(".") +} +} +# test decision +crit_val <- quantile(dev,1-(alpha/vars_evtl[[count]][[item]][knoten])) +proof <- max(dv[[variable]][[item]]) > crit_val +devs[count] <- max(dv[[variable]][[item]]) +crits[count] <- crit_val +pvalues[count] <- length(which(dev>max(dv[[variable]][[item]])))/nperm +if(proof){ +# get new formula +help_kn2 <- ip[[count]][[item]][knoten] +help5 <- paste0(unlist(ip[[count]])[-which(unlist(ip[[count]])==help_kn2)],collapse="+") +help6 <- paste(ip[[count]][[item]][knoten],c(colnames(design_lower[[item]][[variable]])[split],colnames(design_upper[[item]][[variable]])[split]),sep=":") +help7 <- paste(help6,collapse="+") +help81 <- formula(paste("y~",help_p,"+",help5,"+",help7,"-1")) +help82 <- formula(paste0("FALSE~",help5,"+",help7)) +###################### +if(level>1){ +help_kn4 <- lu(c(),1,level-1,c()) +help_kn5 <- unlist(strsplit(help_kn2,"")) +help_kn6 <- paste0(help_kn5[which(help_kn5=="_")+1],collapse="") +knoten2 <- which(help_kn4==help_kn6) +} else{ +knoten2 <- knoten +} +###################### +splits <- rbind(splits,c(variable,item,split,level,knoten2,number,left,right)) +# fit new model +dat <- dat0 <- data.frame(dat0,design_lower[[item]][[variable]][,split,drop=FALSE],design_upper[[item]][[variable]][,split,drop=FALSE]) +suppressWarnings( +mod0 <- mod_potential[[count+1]] <- tryCatch(vglm(help81, +family=acat(parallel=help82, reverse=FALSE), +data=dat, +na.action=na.omit, +checkwz=FALSE, +etastart=start), +error = function(e) stop("IFT_PCM not identified!", call. =FALSE)) +) +start <- VGAM::predict(mod0) +# generiere neue itemparameter +ip[[count+1]] <- ip[[count]] +ip[[count+1]][[item]] <- rep("",length(ip[[count]][[item]])+1) +ip[[count+1]][[item]][c(knoten,knoten+1)] <- help6 +ip[[count+1]][[item]][-c(knoten,knoten+1)]<- ip[[count]][[item]][-knoten] +# passe splits_evtl an +n_knots <- length(ip[[count+1]][[item]]) +splits_evtl[[count+1]] <- splits_evtl[[count]] +for(var in 1:nvar){ +splits_evtl[[count+1]][[item]][[var]] <- matrix(0,nrow=n_knots,ncol=n_s[var]) +splits_evtl[[count+1]][[item]][[var]][c(knoten,knoten+1),] <- matrix(rep(splits_evtl[[count]][[item]][[var]][knoten,],2),nrow=2,byrow=T) +splits_evtl[[count+1]][[item]][[var]][-c(knoten,knoten+1),] <- splits_evtl[[count]][[item]][[var]][-knoten,] +} +splits_evtl[[count+1]][[item]][[variable]][knoten,splits_evtl[[count+1]][[item]][[variable]][knoten,]>=split] <- NA +splits_evtl[[count+1]][[item]][[variable]][(knoten+1),splits_evtl[[count+1]][[item]][[variable]][(knoten+1),]<=split] <- NA +# any split? +anysplit <- !all(is.na(unlist(splits_evtl[[count+1]]))) +# passe vars_evtl an +vars_evtl[[count+1]] <- vars_evtl[[count]] +vars_evtl[[count+1]][[item]] <- rep(0,n_knots) +vars_evtl[[count+1]][[item]][c(knoten,knoten+1)] <- rep(vars_evtl[[count]][[item]][knoten],2) +vars_evtl[[count+1]][[item]][-c(knoten,knoten+1)]<- vars_evtl[[count]][[item]][-knoten] +if(length(which(!is.na(splits_evtl[[count+1]][[item]][[variable]][knoten,])))==0){ +vars_evtl[[count+1]][[item]][knoten] <- vars_evtl[[count+1]][[item]][knoten]-1 +} +if(length(which(!is.na(splits_evtl[[count+1]][[item]][[variable]][knoten+1,])))==0){ +vars_evtl[[count+1]][[item]][knoten+1] <- vars_evtl[[count+1]][[item]][knoten+1]-1 +} +# passe which_obs an +which_obs[[count+1]] <- which_obs[[count]] +which_obs[[count+1]][[item]] <- matrix(0,nrow=n_knots,ncol=npersons) +which_obs[[count+1]][[item]][c(knoten,knoten+1),] <- matrix(rep(which_obs[[count]][[item]][knoten,],2),nrow=2,byrow=T) +which_obs[[count+1]][[item]][-c(knoten,knoten+1),] <- which_obs[[count]][[item]][-knoten,] +thresh <- ordered_values[[variable]][1:n_s[variable]][split] +which_obs[[count+1]][[item]][knoten,DM_kov[,variable]>thresh] <- NA +which_obs[[count+1]][[item]][(knoten+1),DM_kov[,variable]<=thresh] <- NA +# passe numbers an +numbers[[count+1]] <- numbers[[count]] +numbers[[count+1]][[item]] <- numeric(length=n_knots) +numbers[[count+1]][[item]][c(knoten,knoten+1)] <- c(left,right) +numbers[[count+1]][[item]][-c(knoten,knoten+1)] <- numbers[[count]][[item]][-knoten] +# trace +if(trace){ +cat(paste0("\n Split"," ",count,";"," ","Item"," ",item,"\n")) +} +# erhoehe counter +count <- count+1 +} else{ +sig <- FALSE +} +} +################################################################################### +# prettify results +mod_opt <- mod_potential[[count]] +ip_opt <- names(coef(mod_opt))[-c(1:(npersons-1))] +theta_hat <- c(coef(mod_opt)[1:(npersons-1)],0) +delta_hat <- coef(mod_opt)[npersons:length(coef(mod_opt))] +if(count>1){ +dif_items <- unique(splits[,2]) +nodif_items <- c(1:nitems)[-dif_items] +delta_hat_nodif <- sapply(nodif_items,function(j) delta_hat[grep(paste0("delta",j,":"),ip_opt)]) +rownames(delta_hat_nodif) <- 1:nrow(delta_hat_nodif) +colnames(delta_hat_nodif) <- paste0("delta", nodif_items) +delta_hat_dif <- lapply(dif_items, function(j) delta_hat[grep(paste0("delta",j,":"),ip_opt)]) +names(delta_hat_dif) <- dif_items +help9 <- cumsum(c(0,(n_levels-1))) +colnames(splits) <- c("var","item","split","level","node","number","left","right") +splits <- data.frame(cbind(splits[,1:5,drop=FALSE],"variable"=rep(NA,nrow(splits)),"threshold"=rep(NA,nrow(splits)),splits[,6:8,drop=FALSE])) +for(i in 1:nrow(splits)){ +if(!is.null(colnames(DM_kov))){ +splits[i,6] <- colnames(DM_kov)[splits[i,1]] +} else{ +splits[i,6] <- splits[i,1] +} +v2 <- lapply(1:nvar,function(j) ordered_values[[j]][-length(ordered_values[[j]])]) +splits[i,7] <- v2[[splits[i,1]]][splits[i,3]] +} +splits <- splits[,-1] +nthres <- length(unique(y))-1 +for(i in dif_items){ +info <- splits[splits[,"item"]==i,] +endnodes <- get_endnodes(info) +names(delta_hat_dif[[paste(i)]]) <- paste(rep(endnodes,each=nthres),rep(1:nthres,length(endnodes)),sep=":") +} +} else{ +delta_hat_nodif <- sapply(1:nitems,function(j) delta_hat[grep(paste0("delta",j,":"),ip_opt)]) +delta_hat_dif <- c() +} +to_return <- list("splits"=splits, +"thetas"=theta_hat, +"deltas_nodif"=delta_hat_nodif, +"deltas_dif"=delta_hat_dif, +"pvalues"=pvalues, +"devs"=devs, +"crits"=crits) +return(to_return) +} +library(VGAM) +mod2 <- DIFtree(Y=Y2,X=X2,model="PCM",alpha=0.05,nperm=100,trace=TRUE) +colnames(res.dat.dif) +max(res.dat[res.dat$scenario.type=="A",]$h0.rejected.p) +max(1-res.dat[res.dat$scenario.type!="A",]$beta.same.sign.truebeta.signif.p) +max(res.dat[res.dat$scenario.type!="A",]$beta.same.sign.truebeta.signif.p) +res.dat[res.dat$scenario.type!="A",]$beta.same.sign.truebeta.signif.p +View(res.dat) +res.dat[res.dat$beta.same.sign.truebeta.p==NaN & res.dat$eff.size!=0,] +res.dat[res.dat$beta.same.sign.truebeta.p==NaN & res.dat$eff.size!=0,]$N +res.dat[is.nan(res.dat$beta.same.sign.truebeta.p) & res.dat$eff.size!=0,]$N +res.dat[res.dat$N==50,] +res.dat[res.dat$N==50,]$eff.size +max(res.dat[res.dat$scenario.type!="A" & res.dat$N!=50,]$beta.same.sign.truebeta.signif.p) +max(1-res.dat[res.dat$scenario.type!="A" & res.dat$N!=50,]$beta.same.sign.truebeta.signif.p) +max(1-res.dat[res.dat$scenario.type!="A" & res.dat$N!=50,]$beta.same.sign.truebeta.p) diff --git a/RProject/dif.R b/RProject/dif.R new file mode 100644 index 0000000..6064cfd --- /dev/null +++ b/RProject/dif.R @@ -0,0 +1,27 @@ +library(mirt) + +pcm_analysis <- function(df=NULL,treatment='TT',irtmodel='PCM2',method='MML') { + nbitems <- sum(sapply(1:20,function(x) paste0('item',x)) %in% colnames(df)) + resp <- df[,sapply(seq(1,nbitems),function(x) paste0('item',x))] + if (method=='MML') { + tam1 <- tam.mml(resp=resp,Y=df[,treatment],irtmodel = irtmodel,est.variance = T,verbose=F) + } + if (method=='JML') { + tam1 <- tam.jml(resp=resp,group=1+df[,treatment]) + } + if (method!='MML' & method!='JML') { + stop('Invalid method. Please choose among MML or JML') + } + return(tam1) +} + +dff <- read.csv(file = '/home/corentin/Documents/These/Recherche/Simulations/Data/DIF/N50/scenario_5A_50.csv') +dfff <- dff[dff$replication==1,] +facets <- dfff$TT +dfff$item2_noTT <- NA +dfff$item2_TT <- NA +dfff[dfff$TT==0,]$item2_noTT <- dfff[dfff$TT==0,"item2"] +dfff[dfff$TT==1,]$item2_TT <- dfff[dfff$TT==1,"item2"] + +mml.mod <- tam.mml(resp=dfff[,c('item1','item2_noTT','item2_TT',"item3","item4")],Y=dfff$TT,constraint='cases' + ,irtmodel = "PCM2",est.variance = T,verbose=F) diff --git a/Scripts/Analysis/DIF/pcm_dif.do b/Scripts/Analysis/DIF/pcm_dif.do index de880d1..a936341 100644 --- a/Scripts/Analysis/DIF/pcm_dif.do +++ b/Scripts/Analysis/DIF/pcm_dif.do @@ -71,7 +71,7 @@ mat outmat[`k',`nbitems'+4] = `difitems1' // numéro item de dif restore } - putexcel set "`path_res'/out/5`scen'_`Nn'.xls", sheet("outmat") replace + putexcel set "`path_res'/5`scen'_`Nn'.xls", sheet("outmat") replace putexcel A1=matrix(outmat), colnames } } @@ -149,7 +149,7 @@ forvalues k=1/1000 { mat outmat[`k',3*`nbitems'+6] = `difitems1' // numéro item de dif restore } -putexcel set "`path_res'/out/6`scen'_`Nn'.xls", sheet("outmat") replace +putexcel set "`path_res'/6`scen'_`Nn'.xls", sheet("outmat") replace putexcel A1=matrix(outmat), colnames } } @@ -208,7 +208,7 @@ putexcel A1=matrix(outmat), colnames mat outmat[`k',`nbitems'+4] = `difitems1' // numéro item de dif restore } - putexcel set "`path_res'/out/7`scen'_`Nn'.xls", sheet("outmat") replace + putexcel set "`path_res'/7`scen'_`Nn'.xls", sheet("outmat") replace putexcel A1=matrix(outmat), colnames } } @@ -286,7 +286,7 @@ forvalues k=1/1000 { mat outmat[`k',3*`nbitems'+6] = `difitems1' // numéro item de dif restore } -putexcel set "`path_res'/out/8`scen'_`Nn'.xls", sheet("outmat") replace +putexcel set "`path_res'/8`scen'_`Nn'.xls", sheet("outmat") replace putexcel A1=matrix(outmat), colnames } } @@ -364,7 +364,7 @@ adopath+"/home/corentin/Documents/These/Recherche/Simulations/Modules/" mat outmat[`k',`nbitems'+6] = `difitemsmax' // numéro item de dif restore } - putexcel set "`path_res'/out/9`scen'_`Nn'.xls", sheet("outmat") replace + putexcel set "`path_res'/9`scen'_`Nn'.xls", sheet("outmat") replace putexcel A1=matrix(outmat), colnames } } @@ -474,7 +474,7 @@ forvalues k=1/1000 { mat outmat[`k',3*`nbitems'+10] = `difitemsmax' // numéro item de dif restore } -putexcel set "`path_res'/out/10`scen'_`Nn'.xls", sheet("outmat") replace +putexcel set "`path_res'/10`scen'_`Nn'.xls", sheet("outmat") replace putexcel A1=matrix(outmat), colnames } } @@ -548,7 +548,7 @@ putexcel A1=matrix(outmat), colnames mat outmat[`k',`nbitems'+6] = `difitemsmax' // numéro item de dif restore } - putexcel set "`path_res'/out/11`scen'_`Nn'.xls", sheet("outmat") replace + putexcel set "`path_res'/11`scen'_`Nn'.xls", sheet("outmat") replace putexcel A1=matrix(outmat), colnames } } @@ -658,7 +658,7 @@ forvalues k=1/1000 { mat outmat[`k',3*`nbitems'+10] = `difitemsmax' // numéro item de dif restore } -putexcel set "`path_res'/out/12`scen'_`Nn'.xls", sheet("outmat") replace +putexcel set "`path_res'/12`scen'_`Nn'.xls", sheet("outmat") replace putexcel A1=matrix(outmat), colnames } } @@ -742,7 +742,7 @@ adopath+"/home/corentin/Documents/These/Recherche/Simulations/Modules/" mat outmat[`k',`nbitems'+6] = `difitemsmax' // numéro item de dif restore } - putexcel set "`path_res'/out/13`scen'_`Nn'.xls", sheet("outmat") replace + putexcel set "`path_res'/13`scen'_`Nn'.xls", sheet("outmat") replace putexcel A1=matrix(outmat), colnames } } @@ -852,7 +852,7 @@ forvalues k=1/1000 { mat outmat[`k',3*`nbitems'+10] = `difitemsmax' // numéro item de dif restore } -putexcel set "`path_res'/out/14`scen'_`Nn'.xls", sheet("outmat") replace +putexcel set "`path_res'/14`scen'_`Nn'.xls", sheet("outmat") replace putexcel A1=matrix(outmat), colnames } } @@ -926,7 +926,7 @@ putexcel A1=matrix(outmat), colnames mat outmat[`k',`nbitems'+6] = `difitemsmax' // numéro item de dif restore } - putexcel set "`path_res'/out/15`scen'_`Nn'.xls", sheet("outmat") replace + putexcel set "`path_res'/15`scen'_`Nn'.xls", sheet("outmat") replace putexcel A1=matrix(outmat), colnames } } @@ -1036,7 +1036,7 @@ forvalues k=1/1000 { mat outmat[`k',3*`nbitems'+10] = `difitemsmax' // numéro item de dif restore } -putexcel set "`path_res'/out/16`scen'_`Nn'.xls", sheet("outmat") replace +putexcel set "`path_res'/16`scen'_`Nn'.xls", sheet("outmat") replace putexcel A1=matrix(outmat), colnames } } @@ -1130,7 +1130,7 @@ adopath+"/home/corentin/Documents/These/Recherche/Simulations/Modules/" mat outmat[`k',`nbitems'+8] = `difitemsmax' // numéro item de dif restore } - putexcel set "`path_res'/out/17`scen'_`Nn'.xls", sheet("outmat") replace + putexcel set "`path_res'/17`scen'_`Nn'.xls", sheet("outmat") replace putexcel A1=matrix(outmat), colnames } } @@ -1274,7 +1274,7 @@ forvalues k=1/1000 { mat outmat[`k',3*`nbitems'+14] = `difitemsmax' // numéro item de dif restore } -putexcel set "`path_res'/out/18`scen'_`Nn'.xls", sheet("outmat") replace +putexcel set "`path_res'/18`scen'_`Nn'.xls", sheet("outmat") replace putexcel A1=matrix(outmat), colnames } } @@ -1363,7 +1363,7 @@ putexcel A1=matrix(outmat), colnames mat outmat[`k',`nbitems'+8] = `difitemsmax' // numéro item de dif restore } - putexcel set "`path_res'/out/19`scen'_`Nn'.xls", sheet("outmat") replace + putexcel set "`path_res'/19`scen'_`Nn'.xls", sheet("outmat") replace putexcel A1=matrix(outmat), colnames } } @@ -1507,7 +1507,7 @@ forvalues k=1/1000 { mat outmat[`k',3*`nbitems'+14] = `difitemsmax' // numéro item de dif restore } -putexcel set "`path_res'/out/20`scen'_`Nn'.xls", sheet("outmat") replace +putexcel set "`path_res'/20`scen'_`Nn'.xls", sheet("outmat") replace putexcel A1=matrix(outmat), colnames } }