diff --git a/.gitignore b/.gitignore index a52f3bc..9782371 100644 --- a/.gitignore +++ b/.gitignore @@ -3,3 +3,5 @@ .Rproj.user *.RData *.Rhistory +rapport.Rmd +rapport.pdf diff --git a/Rapport/criteria.png b/Rapport/criteria.png new file mode 100644 index 0000000..a67492c Binary files /dev/null and b/Rapport/criteria.png differ diff --git a/Rapport/falsepos.png b/Rapport/falsepos.png new file mode 100644 index 0000000..b8fb2ae Binary files /dev/null and b/Rapport/falsepos.png differ diff --git a/Rproject/analyse_rosali.R b/Rproject/analyse_rosali.R index 564eb8c..e3df237 100644 --- a/Rproject/analyse_rosali.R +++ b/Rproject/analyse_rosali.R @@ -11,11 +11,137 @@ library(pbmcapply) library(funprog) library(dplyr) library(readxl) +library(furrr) +library(tibble) lastChar <- function(str){ substr(str, nchar(str), nchar(str)) } +############################################################################## +#----------------------------------------------------------------------------# +################################### RESALI ################################### +#----------------------------------------------------------------------------# +############################################################################## + +generate_resali <- function(scenario=NULL,grp=NULL) { + scen <- as.numeric(gsub("[A,B,C,_]","",substr(scenario,0,3))) + if (substr(scenario,start=nchar(scenario)-1,stop=nchar(scenario))=="50") { + N <- 50 + } + if (substr(scenario,start=nchar(scenario)-2,stop=nchar(scenario))=="300") { + N <- 300 + } + dat <- read.csv(paste0('/home/corentin/Documents/These/Recherche/ROSALI-SIM/Data/N',N,'/scenario_',scenario,'.csv')) + if (scen%in%c(4,16)) { + res <- resali(df=dat[dat$replication==1,],items = seq(1,7),group=grp,verbose=FALSE) + df_res <- data.frame(dif.detect.1=ifelse(length(res$dif.items)>=1,res$dif.items[1],NA), + dif.detect.2=ifelse(length(res$dif.items)>=2,res$dif.items[2],NA), + dif.detect.3=ifelse(length(res$dif.items)>=3,res$dif.items[3],NA), + dif.detect.4=ifelse(length(res$dif.items)>=4,res$dif.items[4],NA), + dif.detect.5=ifelse(length(res$dif.items)>=5,res$dif.items[5],NA), + dif.detect.6=ifelse(length(res$dif.items)>=6,res$dif.items[6],NA), + dif.detect.7=ifelse(length(res$dif.items)>=7,res$dif.items[7],NA), + dif.detect.unif.1=ifelse(length(res$uniform)>=1,res$uniform[1],NA), + dif.detect.unif.2=ifelse(length(res$uniform)>=2,res$uniform[2],NA), + dif.detect.unif.3=ifelse(length(res$uniform)>=3,res$uniform[3],NA), + dif.detect.unif.4=ifelse(length(res$uniform)>=4,res$uniform[4],NA), + dif.detect.unif.5=ifelse(length(res$uniform)>=5,res$uniform[5],NA), + dif.detect.unif.6=ifelse(length(res$uniform)>=6,res$uniform[6],NA), + dif.detect.unif.7=ifelse(length(res$uniform)>=7,res$uniform[7],NA), + N=N, + nbdif=ifelse(scen==16,2,0), + true.dif.1=ifelse(scen==16,unique(dat[dat$replication==1,]$dif1),NA), + true.dif.2=ifelse(scen==16,unique(dat[dat$replication==1,]$dif2),NA) + ) + for (k in 2:1000) { + if (k%%100==0) { + cat(paste0('N=',k,'/1000\n')) + } + res <- resali(df=dat[dat$replication==k,],items = seq(1,7),group=grp,verbose=FALSE) + df_res2 <- data.frame(dif.detect.1=ifelse(length(res$dif.items)>=1,res$dif.items[1],NA), + dif.detect.2=ifelse(length(res$dif.items)>=2,res$dif.items[2],NA), + dif.detect.3=ifelse(length(res$dif.items)>=3,res$dif.items[3],NA), + dif.detect.4=ifelse(length(res$dif.items)>=4,res$dif.items[4],NA), + dif.detect.5=ifelse(length(res$dif.items)>=5,res$dif.items[5],NA), + dif.detect.6=ifelse(length(res$dif.items)>=6,res$dif.items[6],NA), + dif.detect.7=ifelse(length(res$dif.items)>=7,res$dif.items[7],NA), + dif.detect.unif.1=ifelse(length(res$uniform)>=1,res$uniform[1],NA), + dif.detect.unif.2=ifelse(length(res$uniform)>=2,res$uniform[2],NA), + dif.detect.unif.3=ifelse(length(res$uniform)>=3,res$uniform[3],NA), + dif.detect.unif.4=ifelse(length(res$uniform)>=4,res$uniform[4],NA), + dif.detect.unif.5=ifelse(length(res$uniform)>=5,res$uniform[5],NA), + dif.detect.unif.6=ifelse(length(res$uniform)>=6,res$uniform[6],NA), + dif.detect.unif.7=ifelse(length(res$uniform)>=7,res$uniform[7],NA), + N=N, + nbdif=ifelse(scen==16,2,0), + true.dif.1=ifelse(scen==16,unique(dat[dat$replication==k,]$dif1),NA), + true.dif.2=ifelse(scen==16,unique(dat[dat$replication==k,]$dif2),NA)) + df_res <- rbind(df_res,df_res2) + } + } + else if (scen%in%c(2,8)) { + res <- resali(df=dat[dat$replication==1,],items = seq(1,4),group=grp,verbose=FALSE) + df_res <- data.frame(dif.detect.1=ifelse(length(res$dif.items)>=1,res$dif.items[1],NA), + dif.detect.2=ifelse(length(res$dif.items)>=2,res$dif.items[2],NA), + dif.detect.3=ifelse(length(res$dif.items)>=3,res$dif.items[3],NA), + dif.detect.4=ifelse(length(res$dif.items)>=4,res$dif.items[4],NA), + dif.detect.unif.1=ifelse(length(res$uniform)>=1,res$uniform[1],NA), + dif.detect.unif.2=ifelse(length(res$uniform)>=2,res$uniform[2],NA), + dif.detect.unif.3=ifelse(length(res$uniform)>=3,res$uniform[3],NA), + dif.detect.unif.4=ifelse(length(res$uniform)>=4,res$uniform[4],NA), + N=N, + nbdif=ifelse(scen==8,1,0), + true.dif=ifelse(scen==8,unique(dat[dat$replication==1,]$dif1),NA) + ) + for (k in 2:1000) { + if (k%%100==0) { + cat(paste0('N=',k,'/1000\n')) + } + res <- resali(df=dat[dat$replication==k,],items = seq(1,4),group=grp,verbose=FALSE) + df_res2 <- data.frame(dif.detect.1=ifelse(length(res$dif.items)>=1,res$dif.items[1],NA), + dif.detect.2=ifelse(length(res$dif.items)>=2,res$dif.items[2],NA), + dif.detect.3=ifelse(length(res$dif.items)>=3,res$dif.items[3],NA), + dif.detect.4=ifelse(length(res$dif.items)>=4,res$dif.items[4],NA), + dif.detect.unif.1=ifelse(length(res$uniform)>=1,res$uniform[1],NA), + dif.detect.unif.2=ifelse(length(res$uniform)>=2,res$uniform[2],NA), + dif.detect.unif.3=ifelse(length(res$uniform)>=3,res$uniform[3],NA), + dif.detect.unif.4=ifelse(length(res$uniform)>=4,res$uniform[4],NA), + N=N, + nbdif=ifelse(scen==8,1,0), + true.dif=ifelse(scen==8,unique(dat[dat$replication==k,]$dif1),NA)) + df_res <- rbind(df_res,df_res2) + } + } + return(df_res) +} + + + + + + +results <- c(sapply(c(2,4),function(x) paste0(x,c('A','B','C'))),sapply(8,function(x) paste0(x,c('A','B','C')))) + +results2 <- c(sapply(16,function(x) paste0(x,c('A','B','C')))) + +results <- c(sapply(c(50,300),function(x) paste0(results,'_',x))) + +results2 <- c(sapply(c(50,300),function(x) paste0(results2,'_',x))) + +results <- sort(results) + +results2 <- sort(results2) + +results <- c(results,results2) + +for (r in results) { + cat(paste0(r,"\n")) + cat(paste0("-------------------------------------------","\n")) + write.csv(generate_resali(r,"TT"),paste0("/home/corentin/Documents/These/Recherche/ROSALI-SIM/Analysis/resali/",r,".csv")) + cat(paste0("-------------------------------------------","\n")) +} + ############################################################################## @@ -40,7 +166,7 @@ results2 <- sort(results2) results <- c(results,results2) -results <- c(sapply(c('noBF','noLRT','noLRT_noBF','original'),function(x) paste0(results,'_',x))) +results <- c(sapply(c("noBF","noLRT","noLRT_noBF","original","resali"),function(x) paste0(results,'_',x))) #### Compiler function @@ -56,15 +182,32 @@ compile_simulation2 <- function(scenario) { } if (substr(scenario,start=nchar(scenario)-9,stop=nchar(scenario))=="noLRT_noBF") { s <- read_excel(paste0('/home/corentin/Documents/These/Recherche/ROSALI-SIM/Analysis/noLRTnoBF/',scenario,'.xls')) - s$version <- "noLRT & noBF" + s$version <- "noLRTnoBF" } if (substr(scenario,start=nchar(scenario)-7,stop=nchar(scenario))=="original") { s <- read_excel(paste0('/home/corentin/Documents/These/Recherche/ROSALI-SIM/Analysis/original/',scenario,'.xls')) s$version <- "original" } + if (substr(scenario,start=nchar(scenario)-5,stop=nchar(scenario))=="resali") { + s <- read.csv(paste0('/home/corentin/Documents/These/Recherche/ROSALI-SIM/Analysis/resali/',gsub("_resali","",scenario),'.csv'),row.names = 1,header = T) + if (as.numeric(gsub("[A-Z]","", substr(scenario,start=0,stop=2)))%in%c(2,4)) { + s <- s[,-c(ncol(s))] + } + if (as.numeric(gsub("[A-Z]","", substr(scenario,start=0,stop=2)))%in%c(4)) { + s <- s[,-c(ncol(s))] + } + if (as.numeric(gsub("[A-Z]","", substr(scenario,start=0,stop=2)))%in%c(2,8)) { + s <- add_column(s,J=4,.after='N') + } + if (as.numeric(gsub("[A-Z]","", substr(scenario,start=0,stop=2)))%in%c(4,16)) { + s <- add_column(s,J=7,.after='N') + } + s$version <- "resali" + } name <- gsub("_", "", substr(scenario,start=0,stop=3)) if (as.numeric(gsub("[A,B,C]","",name))==2){ colnames(s) <- c("dif.detect.1","dif.detect.2","dif.detect.3","dif.detect.4", + "dif.detect.unif.1","dif.detect.unif.2","dif.detect.unif.3","dif.detect.unif.4", "N","J","nb.dif","version") s$dif.detected <- sapply(1:1000,function(x) any(!is.na(s[x,1:4]))) s$prop.perfect <- NA @@ -73,32 +216,40 @@ compile_simulation2 <- function(scenario) { } if (as.numeric(gsub("[A,B,C]","",name))==4){ colnames(s) <- c("dif.detect.1","dif.detect.2","dif.detect.3","dif.detect.4", - "dif.detect.5","dif.detect.6","dif.detect.7","N","J","nb.dif","version") + "dif.detect.5","dif.detect.6","dif.detect.7", + "dif.detect.unif.1","dif.detect.unif.2","dif.detect.unif.3","dif.detect.unif.4", + "dif.detect.unif.5","dif.detect.unif.6","dif.detect.unif.7", + "N","J","nb.dif","version") s$dif.detected <- sapply(1:1000,function(x) any(!is.na(s[x,1:7]))) s$prop.perfect <- NA s$prop.flexible <- NA s$prop.moreflexible <- NA } if (as.numeric(gsub("[A,B,C]","",name))==8){ - colnames(s) <- c("dif.detect.1","dif.detect.2","dif.detect.3","dif.detect.4","N","J","nb.dif","true.dif","version") + colnames(s) <- c("dif.detect.1","dif.detect.2","dif.detect.3","dif.detect.4", + "dif.detect.unif.1","dif.detect.unif.2","dif.detect.unif.3","dif.detect.unif.4", + "N","J","nb.dif","true.dif","version") s$dif.detected <- sapply(1:1000,function(x) any(!is.na(s[x,1:4]))) - s$perfect.detection <- sapply(1:1000,function(x) ifelse(sum(!is.na(s[x,1:4]))==1,s[x,which(sapply(1:4,function(y) !is.na(s[x,y])))]==s[x,"true.dif"],0) ) + s$perfect.detection <- sapply(1:1000,function(x) ifelse(sum(!is.na(s[x,1:4]))==1,s[x,"dif.detect.unif.1"]==1 & s[x,which(sapply(1:4,function(y) !is.na(s[x,y])))]==s[x,"true.dif"],0) ) s$prop.perfect <- mean(s$perfect.detection) - s$flexible.detect <- sapply(1:1000,function(x) ifelse(sum(!is.na(s[x,1:4]))!=0,s[x,"true.dif"]%in%s[x,which(sapply(1:4,function(y) !is.na(s[x,y])))],0) ) + s$flexible.detect <- sapply(1:1000,function(x) ifelse(sum(!is.na(s[x,1:4]))==1,s[x,which(sapply(1:4,function(y) !is.na(s[x,y])))]==s[x,"true.dif"],0) ) s$prop.flexible <- mean(s$flexible.detect) - s$prop.moreflexible <- s$prop.flexible + s$moreflexible.detect <- sapply(1:1000,function(x) ifelse(sum(!is.na(s[x,1:4]))!=0,s[x,"true.dif"]%in%c(s[x,which(sapply(1:4,function(y) !is.na(s[x,y])))]),0) ) + s$prop.moreflexible <- mean(s$moreflexible.detect) } if (as.numeric(gsub("[A,B,C]","",name))==16){ colnames(s) <- c("dif.detect.1","dif.detect.2","dif.detect.3","dif.detect.4", - "dif.detect.5","dif.detect.6","dif.detect.7","N","J","nb.dif","true.dif.1","true.dif.2","version") + "dif.detect.5","dif.detect.6","dif.detect.7", + "dif.detect.unif.1","dif.detect.unif.2","dif.detect.unif.3","dif.detect.unif.4", + "dif.detect.unif.5","dif.detect.unif.6","dif.detect.unif.7", + "N","J","nb.dif","true.dif.1","true.dif.2","version") s$dif.detected <- sapply(1:1000,function(x) any(!is.na(s[x,1:7]))) - perfect.detection <- sapply(1:1000,function(x) ifelse(sum(!is.na(s[x,1:7]))==2,s[x,which(sapply(1:7,function(y) !is.na(s[x,y])))[1]]%in%c(s[x,c("true.dif.1","true.dif.2")]) & s[x,which(sapply(1:7,function(y) !is.na(s[x,y])))[2]]%in%c(s[x,c("true.dif.1","true.dif.2")]) + perfect.detection <- sapply(1:1000,function(x) ifelse(sum(!is.na(s[x,1:7]))==2,s[x,"dif.detect.unif.1"]==1 & s[x,"dif.detect.unif.2"]==1 & s[x,which(sapply(1:7,function(y) !is.na(s[x,y])))[1]]%in%c(s[x,c("true.dif.1","true.dif.2")]) & s[x,which(sapply(1:7,function(y) !is.na(s[x,y])))[2]]%in%c(s[x,c("true.dif.1","true.dif.2")]) ,0) ) s$prop.perfect <- mean(perfect.detection) - s$flexible.detect <- sapply(1:1000,function(x) ifelse(sum(!is.na(s[x,1:7]))!=0,s[x,"true.dif.1"]%in%c(s[x,which(sapply(1:7,function(y) !is.na(s[x,y])))]) & - s[x,"true.dif.2"]%in%c(s[x,which(sapply(1:7,function(y) !is.na(s[x,y])))]),0) ) + s$flexible.detect <- sapply(1:1000,function(x) ifelse(sum(!is.na(s[x,1:7]))==2,s[x,which(sapply(1:7,function(y) !is.na(s[x,y])))[1]]%in%c(s[x,c("true.dif.1","true.dif.2")]) & s[x,which(sapply(1:7,function(y) !is.na(s[x,y])))[2]]%in%c(s[x,c("true.dif.1","true.dif.2")]),0 )) s$prop.flexible <- mean(s$flexible.detect) - s$moreflexible.detect <- sapply(1:1000,function(x) ifelse(sum(!is.na(s[x,1:7]))!=0,s[x,"true.dif.1"]%in%c(s[x,which(sapply(1:7,function(y) !is.na(s[x,y])))]) | + s$moreflexible.detect <- sapply(1:1000,function(x) ifelse(sum(!is.na(s[x,1:7]))!=0,s[x,"true.dif.1"]%in%c(s[x,which(sapply(1:7,function(y) !is.na(s[x,y])))]) & s[x,"true.dif.2"]%in%c(s[x,which(sapply(1:7,function(y) !is.na(s[x,y])))]),0) ) s$prop.moreflexible <- mean(s$moreflexible.detect) } @@ -119,8 +270,6 @@ compile_simulation2 <- function(scenario) { #### Compiled results -results <- results[-c(21,23,93,95)] - res.dat.dif <- compile_simulation2("2A_300_noBF") for (x in results[seq(2,length(results))]) { @@ -139,5 +288,146 @@ res.dat.dif # False positives -plot.dat <- res.dat.dif[res.dat.dif$J==4 & res.dat.dif$M==4 & res.dat.dif$nb.dif==0 & res.dat.dif$version=="original",] -boxplot(plot.dat$prop.dif.detect~plot.dat$N,ylim=c(0,0.05)) +plot.dat <- data.frame(N=c(50,300), + prop.dif.detect=sapply(c(50,300),function(x) mean(res.dat.dif[res.dat.dif$N==x & res.dat.dif$nb.dif==0 & res.dat.dif$version=='original',]$prop.dif.detect) ) +) +plot(plot.dat$prop.dif.detect~plot.dat$N,ylim=c(0,1),lty=1,pch=3,xlab='N',ylab="Proportions of scenarios where DIF was detected") +lines(plot.dat$prop.dif.detect~plot.dat$N,ylim=c(0,0.05),lty=1) +title('Proportion of DIF detection in scenarios without DIF') + +plot.dat <- data.frame(N=c(50,300), + prop.dif.detect=sapply(c(50,300),function(x) mean(res.dat.dif[res.dat.dif$N==x & res.dat.dif$nb.dif==0 & res.dat.dif$version=='noBF',]$prop.dif.detect) ) +) +points(plot.dat$prop.dif.detect~plot.dat$N,ylim=c(0,0.05),lty=1,col="blue",pch=2) +lines(plot.dat$prop.dif.detect~plot.dat$N,ylim=c(0,0.05),lty=1,col="blue") + +plot.dat <- data.frame(N=c(50,300), + prop.dif.detect=sapply(c(50,300),function(x) mean(res.dat.dif[res.dat.dif$N==x & res.dat.dif$nb.dif==0 & res.dat.dif$version=='noLRTnoBF',]$prop.dif.detect) ) +) +points(plot.dat$prop.dif.detect~plot.dat$N,ylim=c(0,0.05),lty=1,col="red",pch=4) +lines(plot.dat$prop.dif.detect~plot.dat$N,ylim=c(0,0.05),lty=1,col="red") + +plot.dat <- data.frame(N=c(50,300), + prop.dif.detect=sapply(c(50,300),function(x) mean(res.dat.dif[res.dat.dif$N==x & res.dat.dif$nb.dif==0 & res.dat.dif$version=='noLRT',]$prop.dif.detect) ) +) +points(plot.dat$prop.dif.detect~plot.dat$N,ylim=c(0,0.05),lty=1,col="green",pch=5) +lines(plot.dat$prop.dif.detect~plot.dat$N,ylim=c(0,0.05),lty=1,col="green") + +plot.dat <- data.frame(N=c(50,300), + prop.dif.detect=sapply(c(50,300),function(x) mean(res.dat.dif[res.dat.dif$N==x & res.dat.dif$nb.dif==0 & res.dat.dif$version=='resali',]$prop.dif.detect) ) +) +points(plot.dat$prop.dif.detect~plot.dat$N,ylim=c(0,0.05),lty=1,col="purple",pch=6) +lines(plot.dat$prop.dif.detect~plot.dat$N,ylim=c(0,0.05),lty=1,col="purple") + +legend(x='topleft',legend=c('original','no Bonferroni correction',"no LRT","no LRT and no Bonferroni",'Residuals'), + col=c('black',"blue","green","red",'purple'),lty=c(1,1,1,1,1),pch=c(3,2,4,5,6)) + +par(mfrow=c(2,2)) + +# Perfect detection + +plot.dat <- data.frame(N=c(50,300), + prop.dif.detect=sapply(c(50,300),function(x) mean(res.dat.dif[res.dat.dif$N==x & res.dat.dif$nb.dif!=0 & res.dat.dif$version=='original',]$prop.perfect) ) +) +plot(plot.dat$prop.dif.detect~plot.dat$N,ylim=c(0,1),lty=1,pch=3,xlab='N',ylab="Proportions of scenarios with perfect DIF detection") +lines(plot.dat$prop.dif.detect~plot.dat$N,ylim=c(0,0.05),lty=1) +title('Proportion of perfect DIF detection in scenarios with DIF') + +plot.dat <- data.frame(N=c(50,300), + prop.dif.detect=sapply(c(50,300),function(x) mean(res.dat.dif[res.dat.dif$N==x & res.dat.dif$nb.dif!=0 & res.dat.dif$version=='noBF',]$prop.perfect) ) +) +points(plot.dat$prop.dif.detect~plot.dat$N,ylim=c(0,0.05),lty=1,col="blue",pch=2) +lines(plot.dat$prop.dif.detect~plot.dat$N,ylim=c(0,0.05),lty=1,col="blue") + +plot.dat <- data.frame(N=c(50,300), + prop.dif.detect=sapply(c(50,300),function(x) mean(res.dat.dif[res.dat.dif$N==x & res.dat.dif$nb.dif!=0 & res.dat.dif$version=='noLRTnoBF',]$prop.perfect) ) +) +points(plot.dat$prop.dif.detect~plot.dat$N,ylim=c(0,0.05),lty=1,col="red",pch=4) +lines(plot.dat$prop.dif.detect~plot.dat$N,ylim=c(0,0.05),lty=1,col="red") + +plot.dat <- data.frame(N=c(50,300), + prop.dif.detect=sapply(c(50,300),function(x) mean(res.dat.dif[res.dat.dif$N==x & res.dat.dif$nb.dif!=0 & res.dat.dif$version=='noLRT',]$prop.perfect) ) +) +points(plot.dat$prop.dif.detect~plot.dat$N,ylim=c(0,0.05),lty=1,col="green",pch=5) +lines(plot.dat$prop.dif.detect~plot.dat$N,ylim=c(0,0.05),lty=1,col="green") + +plot.dat <- data.frame(N=c(50,300), + prop.dif.detect=sapply(c(50,300),function(x) mean(res.dat.dif[res.dat.dif$N==x & res.dat.dif$nb.dif!=0 & res.dat.dif$version=='resali',]$prop.perfect) ) +) +points(plot.dat$prop.dif.detect~plot.dat$N,ylim=c(0,0.05),lty=1,col="purple",pch=6) +lines(plot.dat$prop.dif.detect~plot.dat$N,ylim=c(0,0.05),lty=1,col="purple") +legend(x='topleft',legend=c('original','no Bonferroni correction',"no LRT","no LRT and no Bonferroni",'Residuals'), + col=c('black',"blue","green","red",'purple'),lty=c(1,1,1,1,1),pch=c(3,2,4,5,6)) + + + +# Flexible detection + +plot.dat <- data.frame(N=c(50,300), + prop.dif.detect=sapply(c(50,300),function(x) mean(res.dat.dif[res.dat.dif$N==x & res.dat.dif$nb.dif!=0 & res.dat.dif$version=='original',]$prop.flexible) ) +) +plot(plot.dat$prop.dif.detect~plot.dat$N,ylim=c(0,1),lty=1,pch=3,xlab='N',ylab="Proportions of scenarios with flexible DIF detection") +lines(plot.dat$prop.dif.detect~plot.dat$N,ylim=c(0,0.05),lty=1) +title('Proportion of flexible DIF detection in scenarios with DIF') + +plot.dat <- data.frame(N=c(50,300), + prop.dif.detect=sapply(c(50,300),function(x) mean(res.dat.dif[res.dat.dif$N==x & res.dat.dif$nb.dif!=0 & res.dat.dif$version=='noBF',]$prop.flexible) ) +) +points(plot.dat$prop.dif.detect~plot.dat$N,ylim=c(0,0.05),lty=1,col="blue",pch=2) +lines(plot.dat$prop.dif.detect~plot.dat$N,ylim=c(0,0.05),lty=1,col="blue") + +plot.dat <- data.frame(N=c(50,300), + prop.dif.detect=sapply(c(50,300),function(x) mean(res.dat.dif[res.dat.dif$N==x & res.dat.dif$nb.dif!=0 & res.dat.dif$version=='noLRTnoBF',]$prop.flexible) ) +) +points(plot.dat$prop.dif.detect~plot.dat$N,ylim=c(0,0.05),lty=1,col="red",pch=4) +lines(plot.dat$prop.dif.detect~plot.dat$N,ylim=c(0,0.05),lty=1,col="red") + +plot.dat <- data.frame(N=c(50,300), + prop.dif.detect=sapply(c(50,300),function(x) mean(res.dat.dif[res.dat.dif$N==x & res.dat.dif$nb.dif!=0 & res.dat.dif$version=='noLRT',]$prop.flexible) ) +) +points(plot.dat$prop.dif.detect~plot.dat$N,ylim=c(0,0.05),lty=1,col="green",pch=5) +lines(plot.dat$prop.dif.detect~plot.dat$N,ylim=c(0,0.05),lty=1,col="green") + +plot.dat <- data.frame(N=c(50,300), + prop.dif.detect=sapply(c(50,300),function(x) mean(res.dat.dif[res.dat.dif$N==x & res.dat.dif$nb.dif!=0 & res.dat.dif$version=='resali',]$prop.flexible) ) +) +points(plot.dat$prop.dif.detect~plot.dat$N,ylim=c(0,0.05),lty=1,col="purple",pch=6) +lines(plot.dat$prop.dif.detect~plot.dat$N,ylim=c(0,0.05),lty=1,col="purple") +legend(x='topleft',legend=c('original','no Bonferroni correction',"no LRT","no LRT and no Bonferroni",'Residuals'), + col=c('black',"blue","green","red",'purple'),lty=c(1,1,1,1,1),pch=c(3,2,4,5,6)) + + +# Most flexible detection + +plot.dat <- data.frame(N=c(50,300), + prop.dif.detect=sapply(c(50,300),function(x) mean(res.dat.dif[res.dat.dif$N==x & res.dat.dif$nb.dif!=0 & res.dat.dif$version=='original',]$prop.moreflexible) ) +) +plot(plot.dat$prop.dif.detect~plot.dat$N,ylim=c(0,1),lty=1,pch=3,xlab='N',ylab="Proportions of scenarios with most flexible DIF detection") +lines(plot.dat$prop.dif.detect~plot.dat$N,ylim=c(0,0.05),lty=1) +title('Proportion of most flexible DIF detection in scenarios with DIF') + +plot.dat <- data.frame(N=c(50,300), + prop.dif.detect=sapply(c(50,300),function(x) mean(res.dat.dif[res.dat.dif$N==x & res.dat.dif$nb.dif!=0 & res.dat.dif$version=='noBF',]$prop.moreflexible) ) +) +points(plot.dat$prop.dif.detect~plot.dat$N,ylim=c(0,0.05),lty=1,col="blue",pch=2) +lines(plot.dat$prop.dif.detect~plot.dat$N,ylim=c(0,0.05),lty=1,col="blue") + +plot.dat <- data.frame(N=c(50,300), + prop.dif.detect=sapply(c(50,300),function(x) mean(res.dat.dif[res.dat.dif$N==x & res.dat.dif$nb.dif!=0 & res.dat.dif$version=='noLRTnoBF',]$prop.moreflexible) ) +) +points(plot.dat$prop.dif.detect~plot.dat$N,ylim=c(0,0.05),lty=1,col="red",pch=4) +lines(plot.dat$prop.dif.detect~plot.dat$N,ylim=c(0,0.05),lty=1,col="red") + +plot.dat <- data.frame(N=c(50,300), + prop.dif.detect=sapply(c(50,300),function(x) mean(res.dat.dif[res.dat.dif$N==x & res.dat.dif$nb.dif!=0 & res.dat.dif$version=='noLRT',]$prop.moreflexible) ) +) +points(plot.dat$prop.dif.detect~plot.dat$N,ylim=c(0,0.05),lty=1,col="green",pch=5) +lines(plot.dat$prop.dif.detect~plot.dat$N,ylim=c(0,0.05),lty=1,col="green") + +plot.dat <- data.frame(N=c(50,300), + prop.dif.detect=sapply(c(50,300),function(x) mean(res.dat.dif[res.dat.dif$N==x & res.dat.dif$nb.dif!=0 & res.dat.dif$version=='resali',]$prop.moreflexible) ) +) +points(plot.dat$prop.dif.detect~plot.dat$N,ylim=c(0,0.05),lty=1,col="purple",pch=6) +lines(plot.dat$prop.dif.detect~plot.dat$N,ylim=c(0,0.05),lty=1,col="purple") +legend(x='topleft',legend=c('original','no Bonferroni correction',"no LRT","no LRT and no Bonferroni",'Residuals'), + col=c('black',"blue","green","red",'purple'),lty=c(1,1,1,1,1),pch=c(3,2,4,5,6)) \ No newline at end of file diff --git a/Rproject/resali.R b/Rproject/resali.R new file mode 100644 index 0000000..304f32a --- /dev/null +++ b/Rproject/resali.R @@ -0,0 +1,267 @@ +library(TAM) + +resali <- function(df=NULL,items=NULL,group=NULL,verbose=T) { + if (verbose) { + cat('-----------------------------------------------------------\n') + cat('COMPUTING INITIAL PCM\n') + cat('-----------------------------------------------------------\n') + } + nbitems <- length(items) + nbitems_o <- nbitems + items_n <- paste0('item',items) + resp <- df[,items_n] + grp <- df[,group] + pcm_initial <- TAM::tam.mml(resp=resp,Y=grp,irtmodel = "PCM",est.variance = T,verbose=F) + dat <- df + dat$score <- rowSums(dat[,items_n]) + nqt <- ifelse(length(unique(quantile(dat$score,seq(0,1,0.2))))==6,5,length(unique(quantile(dat$score,seq(0,1,0.2))))-1) + dat$score_q5 <- cut(dat$score,unique(quantile(dat$score,seq(0,1,1/nqt))),labels=1:nqt,include.lowest=T) + res.anova <- rep(NA,nbitems) + pval <- rep(NA,nbitems) + fval <- rep(NA,nbitems) + for (i in items) { + dat[,paste0('res_',i)] <- IRT.residuals(pcm_initial)$stand_residuals[,i] + res.anova[i] <- summary(aov(dat[,paste0('res_',i)]~TT*score_q5,data=dat)) + pval[i] <- res.anova[[i]][1,"Pr(>F)"] + fval[i] <- res.anova[[i]][1,'F value'] + } + if (verbose) { + cat('DONE\n') + cat('-----------------------------------------------------------\n') + } + res.items <- c() + res.uniform <- c() + k <- 1 + while(any(pval<0.05/(nbitems_o*3))) { + k <- k+1 + if (verbose) { + cat(paste('COMPUTING STEP',k,'\n')) + cat('-----------------------------------------------------------\n') + } + res.item <- gsub("[a-z]", "",colnames(resp)[which.max(fval)]) + res.items <- c(res.items,res.item) + res.uni <- res.anova[[which.max(fval)]][3,"Pr(>F)"]>0.05 + res.uniform <- c(res.uniform,res.uni) + items_n <- c(items_n[items_n!=paste0('item',res.item)],paste0("item",res.item,c("noTT","TT"))) + dat[,paste0("item",res.item,'TT')] <- dat[dat$TT==1,paste0('item',res.item)] + dat[,paste0("item",res.item,'noTT')] <- dat[dat$TT==0,paste0('item',res.item)] + resp <- dat[,items_n] + grp <- dat[,group] + pcm_while <- TAM::tam.mml(resp=resp,Y=grp,irtmodel = "PCM",est.variance = T,verbose=F) + nbitems <- length(items_n) + res.anova <- rep(NA,nbitems) + pval <- rep(NA,nbitems) + fval <- rep(NA,nbitems) + for (i in 1:nbitems) { + dat[,paste0('res_',i)] <- IRT.residuals(pcm_while)$stand_residuals[,i] + res.anova[i] <- summary(aov(dat[,paste0('res_',i)]~TT*score_q5,data=dat)) + pval[i] <- res.anova[[i]][1,"Pr(>F)"] + fval[i] <- res.anova[[i]][1,'F value'] + } + if (verbose) { + cat('DONE\n') + cat('-----------------------------------------------------------\n') + } + } + if (verbose) { + cat("DETECTED DIF ITEMS\n") + cat('-----------------------------------------------------------\n') + } + if (length(res.items>0)) { + results <- data.frame(dif.items=res.items, + uniform=1*res.uniform) + return(results) + } + else { + if (verbose) { + cat("No DIF was detected\n") + } + return(NULL) + } +} + + + + + +resali_BF2 <- function(df=NULL,items=NULL,group=NULL,verbose=T) { + if (verbose) { + cat('-----------------------------------------------------------\n') + cat('COMPUTING INITIAL PCM\n') + cat('-----------------------------------------------------------\n') + } + nbitems <- length(items) + nbitems_o <- nbitems + items_n <- paste0('item',items) + resp <- df[,items_n] + grp <- df[,group] + pcm_initial <- TAM::tam.mml(resp=resp,Y=grp,irtmodel = "PCM",est.variance = T,verbose=F) + dat <- df + dat$score <- rowSums(dat[,items_n]) + nqt <- ifelse(length(unique(quantile(dat$score,seq(0,1,0.2))))==6,5,length(unique(quantile(dat$score,seq(0,1,0.2))))-1) + dat$score_q5 <- cut(dat$score,unique(quantile(dat$score,seq(0,1,1/nqt))),labels=1:nqt,include.lowest=T) + res.anova <- rep(NA,nbitems) + pval <- rep(NA,nbitems) + fval <- rep(NA,nbitems) + for (i in items) { + dat[,paste0('res_',i)] <- rowMeans(predict(pcm_initial)$stand.resid[,,i]) + res.anova[i] <- summary(aov(dat[,paste0('res_',i)]~TT*score_q5,data=dat)) + pval[i] <- res.anova[[i]][1,"Pr(>F)"] + fval[i] <- res.anova[[i]][1,'F value'] + } + if (verbose) { + cat('DONE\n') + cat('-----------------------------------------------------------\n') + } + res.items <- c() + res.uniform <- c() + k <- 1 + while(any(pval<0.05/(nbitems_o-k+1))) { + k <- k+1 + if (verbose) { + cat(paste('COMPUTING STEP',k,'\n')) + cat('-----------------------------------------------------------\n') + } + res.item <- gsub("[a-z]", "",colnames(resp)[which.max(fval)]) + res.items <- c(res.items,res.item) + res.uni <- res.anova[[which.max(fval)]][3,"Pr(>F)"]>0.05 + res.uniform <- c(res.uniform,res.uni) + items_n <- c(items_n[items_n!=paste0('item',res.item)],paste0("item",res.item,c("noTT","TT"))) + dat[,paste0("item",res.item,'TT')] <- dat[dat$TT==1,paste0('item',res.item)] + dat[,paste0("item",res.item,'noTT')] <- dat[dat$TT==0,paste0('item',res.item)] + resp <- dat[,items_n] + grp <- dat[,group] + pcm_while <- TAM::tam.mml(resp=resp,Y=grp,irtmodel = "PCM",est.variance = T,verbose=F) + nbitems <- length(items_n) + res.anova <- rep(NA,nbitems) + pval <- rep(NA,nbitems) + fval <- rep(NA,nbitems) + for (i in 1:nbitems) { + dat[,paste0('res_',i)] <- rowMeans(predict(pcm_while)$stand.resid[,,i]) + res.anova[i] <- summary(aov(dat[,paste0('res_',i)]~TT*score_q5,data=dat)) + pval[i] <- res.anova[[i]][1,"Pr(>F)"] + fval[i] <- res.anova[[i]][1,'F value'] + } + if (verbose) { + cat('DONE\n') + cat('-----------------------------------------------------------\n') + } + } + if (verbose) { + cat("DETECTED DIF ITEMS\n") + cat('-----------------------------------------------------------\n') + } + if (length(res.items>0)) { + results <- data.frame(dif.items=res.items, + uniform=1*res.uniform) + return(results) + } + else { + if (verbose) { + cat("No DIF was detected\n") + } + return(NULL) + } +} + + + + + + + + +resali_LRT <- function(df=NULL,items=NULL,group=NULL,verbose=T) { + if (verbose) { + cat('-----------------------------------------------------------\n') + cat('COMPUTING INITIAL PCM\n') + cat('-----------------------------------------------------------\n') + } + nbitems <- length(items) + nbitems_o <- nbitems + items_n <- paste0('item',items) + resp <- df[,items_n] + grp <- df[,group] + items_n_alt <- paste0(items_n,c("_TT","_noTT")) + for (i in items_n) { + df[df$TT==0,paste0(i,"_noTT")] <- df[df$TT==0,i] + df[df$TT==1,paste0(i,"_TT")] <- df[df$TT==1,i] + } + resp_alt <- df[,items_n_alt] + pcm_initial <- TAM::tam.mml(resp=resp,Y=grp,irtmodel = "PCM",est.variance = T,verbose=F) + pcm_alt <- TAM::tam.mml(resp=resp_alt,Y=grp,irtmodel = "PCM",est.variance = T,verbose=F) + dat <- df + dat$score <- rowSums(dat[,items_n]) + nqt <- ifelse(length(unique(quantile(dat$score,seq(0,1,0.2))))==6,5,length(unique(quantile(dat$score,seq(0,1,0.2))))-1) + dat$score_q5 <- cut(dat$score,unique(quantile(dat$score,seq(0,1,1/nqt))),labels=1:nqt,include.lowest=T) + res.anova <- rep(NA,nbitems) + pval <- rep(NA,nbitems) + fval <- rep(NA,nbitems) + for (i in items) { + dat[,paste0('res_',i)] <- rowMeans(predict(pcm_initial)$stand.resid[,,i]) + res.anova[i] <- summary(aov(dat[,paste0('res_',i)]~TT*score_q5,data=dat)) + pval[i] <- res.anova[[i]][1,"Pr(>F)"] + fval[i] <- res.anova[[i]][1,'F value'] + } + if (verbose) { + cat('DONE\n') + cat('-----------------------------------------------------------\n') + } + res.items <- c() + res.uniform <- c() + k <- 1 + if (anova(pcm_initial,pcm_alt)$p[1]<0.05) { + while(any(pval<0.05/nbitems_o)) { + k <- k+1 + if (verbose) { + cat(paste('COMPUTING STEP',k,'\n')) + cat('-----------------------------------------------------------\n') + } + res.item <- gsub("[a-z]", "",colnames(resp)[which.max(fval)]) + res.items <- c(res.items,res.item) + res.uni <- res.anova[[which.max(fval)]][3,"Pr(>F)"]>0.05 + res.uniform <- c(res.uniform,res.uni) + items_n <- c(items_n[items_n!=paste0('item',res.item)],paste0("item",res.item,c("noTT","TT"))) + dat[,paste0("item",res.item,'TT')] <- dat[dat$TT==1,paste0('item',res.item)] + dat[,paste0("item",res.item,'noTT')] <- dat[dat$TT==0,paste0('item',res.item)] + resp <- dat[,items_n] + grp <- dat[,group] + pcm_while <- TAM::tam.mml(resp=resp,Y=grp,irtmodel = "PCM",est.variance = T,verbose=F) + nbitems <- length(items_n) + res.anova <- rep(NA,nbitems) + pval <- rep(NA,nbitems) + fval <- rep(NA,nbitems) + for (i in 1:nbitems) { + dat[,paste0('res_',i)] <- rowMeans(predict(pcm_while)$stand.resid[,,i]) + res.anova[i] <- summary(aov(dat[,paste0('res_',i)]~TT*score_q5,data=dat)) + pval[i] <- res.anova[[i]][1,"Pr(>F)"] + fval[i] <- res.anova[[i]][1,'F value'] + } + if (verbose) { + cat('DONE\n') + cat('-----------------------------------------------------------\n') + } + } + if (verbose) { + cat("DETECTED DIF ITEMS\n") + cat('-----------------------------------------------------------\n') + } + if (length(res.items>0)) { + results <- data.frame(dif.items=res.items, + uniform=1*res.uniform) + return(results) + } + else { + if (verbose) { + cat("No DIF was detected\n") + } + return(NULL) + } + } + else { + if (verbose) { + cat("No DIF was detected\n") + } + return(NULL) + } +} \ No newline at end of file