Updated RESALI with new bonferroni correction

2024-04-02 14:38:29 +02:00
parent 6c3e75fc42
commit 635c7c081e
5 changed files with 575 additions and 16 deletions
--- a/.gitignore
+++ b/.gitignore
@ -3,3 +3,5 @@
 .Rproj.user
 *.RData
 *.Rhistory
+rapport.Rmd
+rapport.pdf
--- a/Rapport/criteria.png
+++ b/Rapport/criteria.png
--- a/Rapport/falsepos.png
+++ b/Rapport/falsepos.png
--- a/Rproject/analyse_rosali.R
+++ b/Rproject/analyse_rosali.R
@ -11,11 +11,137 @@ library(pbmcapply)
 library(funprog)
 library(dplyr)
 library(readxl)
+library(furrr)
+library(tibble)

 lastChar <- function(str){
  substr(str, nchar(str), nchar(str))
 }

+##############################################################################
+#----------------------------------------------------------------------------#
+################################### RESALI ###################################
+#----------------------------------------------------------------------------#
+##############################################################################
+
+generate_resali <- function(scenario=NULL,grp=NULL) {
+  scen <- as.numeric(gsub("[A,B,C,_]","",substr(scenario,0,3)))
+  if (substr(scenario,start=nchar(scenario)-1,stop=nchar(scenario))=="50") {
+    N <- 50
+  }
+  if (substr(scenario,start=nchar(scenario)-2,stop=nchar(scenario))=="300") {
+    N <- 300
+  }
+  dat <- read.csv(paste0('/home/corentin/Documents/These/Recherche/ROSALI-SIM/Data/N',N,'/scenario_',scenario,'.csv'))
+  if (scen%in%c(4,16)) {
+  res <- resali(df=dat[dat$replication==1,],items = seq(1,7),group=grp,verbose=FALSE)
+  df_res <- data.frame(dif.detect.1=ifelse(length(res$dif.items)>=1,res$dif.items[1],NA),
+                       dif.detect.2=ifelse(length(res$dif.items)>=2,res$dif.items[2],NA),
+                       dif.detect.3=ifelse(length(res$dif.items)>=3,res$dif.items[3],NA),
+                       dif.detect.4=ifelse(length(res$dif.items)>=4,res$dif.items[4],NA),
+                       dif.detect.5=ifelse(length(res$dif.items)>=5,res$dif.items[5],NA),
+                       dif.detect.6=ifelse(length(res$dif.items)>=6,res$dif.items[6],NA),
+                       dif.detect.7=ifelse(length(res$dif.items)>=7,res$dif.items[7],NA),
+                       dif.detect.unif.1=ifelse(length(res$uniform)>=1,res$uniform[1],NA),
+                       dif.detect.unif.2=ifelse(length(res$uniform)>=2,res$uniform[2],NA),
+                       dif.detect.unif.3=ifelse(length(res$uniform)>=3,res$uniform[3],NA),
+                       dif.detect.unif.4=ifelse(length(res$uniform)>=4,res$uniform[4],NA),
+                       dif.detect.unif.5=ifelse(length(res$uniform)>=5,res$uniform[5],NA),
+                       dif.detect.unif.6=ifelse(length(res$uniform)>=6,res$uniform[6],NA),
+                       dif.detect.unif.7=ifelse(length(res$uniform)>=7,res$uniform[7],NA),
+                       N=N,
+                       nbdif=ifelse(scen==16,2,0),
+                       true.dif.1=ifelse(scen==16,unique(dat[dat$replication==1,]$dif1),NA),
+                       true.dif.2=ifelse(scen==16,unique(dat[dat$replication==1,]$dif2),NA)
+                      )
+  for (k in 2:1000) {
+    if (k%%100==0) {
+      cat(paste0('N=',k,'/1000\n'))
+    }
+    res <- resali(df=dat[dat$replication==k,],items = seq(1,7),group=grp,verbose=FALSE)
+    df_res2 <- data.frame(dif.detect.1=ifelse(length(res$dif.items)>=1,res$dif.items[1],NA),
+                         dif.detect.2=ifelse(length(res$dif.items)>=2,res$dif.items[2],NA),
+                         dif.detect.3=ifelse(length(res$dif.items)>=3,res$dif.items[3],NA),
+                         dif.detect.4=ifelse(length(res$dif.items)>=4,res$dif.items[4],NA),
+                         dif.detect.5=ifelse(length(res$dif.items)>=5,res$dif.items[5],NA),
+                         dif.detect.6=ifelse(length(res$dif.items)>=6,res$dif.items[6],NA),
+                         dif.detect.7=ifelse(length(res$dif.items)>=7,res$dif.items[7],NA),
+                         dif.detect.unif.1=ifelse(length(res$uniform)>=1,res$uniform[1],NA),
+                         dif.detect.unif.2=ifelse(length(res$uniform)>=2,res$uniform[2],NA),
+                         dif.detect.unif.3=ifelse(length(res$uniform)>=3,res$uniform[3],NA),
+                         dif.detect.unif.4=ifelse(length(res$uniform)>=4,res$uniform[4],NA),
+                         dif.detect.unif.5=ifelse(length(res$uniform)>=5,res$uniform[5],NA),
+                         dif.detect.unif.6=ifelse(length(res$uniform)>=6,res$uniform[6],NA),
+                         dif.detect.unif.7=ifelse(length(res$uniform)>=7,res$uniform[7],NA),
+                         N=N,
+                         nbdif=ifelse(scen==16,2,0),
+                         true.dif.1=ifelse(scen==16,unique(dat[dat$replication==k,]$dif1),NA),
+                         true.dif.2=ifelse(scen==16,unique(dat[dat$replication==k,]$dif2),NA))
+    df_res <- rbind(df_res,df_res2)
+  }
+  }
+  else if (scen%in%c(2,8)) {
+    res <- resali(df=dat[dat$replication==1,],items = seq(1,4),group=grp,verbose=FALSE)
+    df_res <- data.frame(dif.detect.1=ifelse(length(res$dif.items)>=1,res$dif.items[1],NA),
+                         dif.detect.2=ifelse(length(res$dif.items)>=2,res$dif.items[2],NA),
+                         dif.detect.3=ifelse(length(res$dif.items)>=3,res$dif.items[3],NA),
+                         dif.detect.4=ifelse(length(res$dif.items)>=4,res$dif.items[4],NA),
+                         dif.detect.unif.1=ifelse(length(res$uniform)>=1,res$uniform[1],NA),
+                         dif.detect.unif.2=ifelse(length(res$uniform)>=2,res$uniform[2],NA),
+                         dif.detect.unif.3=ifelse(length(res$uniform)>=3,res$uniform[3],NA),
+                         dif.detect.unif.4=ifelse(length(res$uniform)>=4,res$uniform[4],NA),
+                         N=N,
+                         nbdif=ifelse(scen==8,1,0),
+                         true.dif=ifelse(scen==8,unique(dat[dat$replication==1,]$dif1),NA)
+    )
+    for (k in 2:1000) {
+      if (k%%100==0) {
+        cat(paste0('N=',k,'/1000\n'))
+      }
+      res <- resali(df=dat[dat$replication==k,],items = seq(1,4),group=grp,verbose=FALSE)
+      df_res2 <- data.frame(dif.detect.1=ifelse(length(res$dif.items)>=1,res$dif.items[1],NA),
+                            dif.detect.2=ifelse(length(res$dif.items)>=2,res$dif.items[2],NA),
+                            dif.detect.3=ifelse(length(res$dif.items)>=3,res$dif.items[3],NA),
+                            dif.detect.4=ifelse(length(res$dif.items)>=4,res$dif.items[4],NA),
+                            dif.detect.unif.1=ifelse(length(res$uniform)>=1,res$uniform[1],NA),
+                            dif.detect.unif.2=ifelse(length(res$uniform)>=2,res$uniform[2],NA),
+                            dif.detect.unif.3=ifelse(length(res$uniform)>=3,res$uniform[3],NA),
+                            dif.detect.unif.4=ifelse(length(res$uniform)>=4,res$uniform[4],NA),
+                            N=N,
+                            nbdif=ifelse(scen==8,1,0),
+                            true.dif=ifelse(scen==8,unique(dat[dat$replication==k,]$dif1),NA))
+      df_res <- rbind(df_res,df_res2)
+    }
+  }
+  return(df_res)
+}
+
+
+
+
+
+
+results <- c(sapply(c(2,4),function(x) paste0(x,c('A','B','C'))),sapply(8,function(x) paste0(x,c('A','B','C'))))
+
+results2 <- c(sapply(16,function(x) paste0(x,c('A','B','C'))))
+
+results <- c(sapply(c(50,300),function(x) paste0(results,'_',x)))
+
+results2 <- c(sapply(c(50,300),function(x) paste0(results2,'_',x)))
+
+results <- sort(results)
+
+results2 <- sort(results2)
+
+results <- c(results,results2)
+
+for (r in results) {
+  cat(paste0(r,"\n"))
+  cat(paste0("-------------------------------------------","\n"))
+  write.csv(generate_resali(r,"TT"),paste0("/home/corentin/Documents/These/Recherche/ROSALI-SIM/Analysis/resali/",r,".csv"))
+  cat(paste0("-------------------------------------------","\n"))
+}
+


 ##############################################################################
@ -40,7 +166,7 @@ results2 <- sort(results2)

 results <- c(results,results2)

-results <- c(sapply(c('noBF','noLRT','noLRT_noBF','original'),function(x) paste0(results,'_',x)))
+results <- c(sapply(c("noBF","noLRT","noLRT_noBF","original","resali"),function(x) paste0(results,'_',x)))

 #### Compiler function

@ -56,15 +182,32 @@ compile_simulation2 <- function(scenario) {
  }
  if (substr(scenario,start=nchar(scenario)-9,stop=nchar(scenario))=="noLRT_noBF") {
    s <- read_excel(paste0('/home/corentin/Documents/These/Recherche/ROSALI-SIM/Analysis/noLRTnoBF/',scenario,'.xls'))
-    s$version <- "noLRT & noBF"
+    s$version <- "noLRTnoBF"
  }
  if (substr(scenario,start=nchar(scenario)-7,stop=nchar(scenario))=="original") {
    s <- read_excel(paste0('/home/corentin/Documents/These/Recherche/ROSALI-SIM/Analysis/original/',scenario,'.xls'))
    s$version <- "original"
  }
+  if (substr(scenario,start=nchar(scenario)-5,stop=nchar(scenario))=="resali") {
+    s <- read.csv(paste0('/home/corentin/Documents/These/Recherche/ROSALI-SIM/Analysis/resali/',gsub("_resali","",scenario),'.csv'),row.names = 1,header = T)
+    if (as.numeric(gsub("[A-Z]","", substr(scenario,start=0,stop=2)))%in%c(2,4)) {
+      s <- s[,-c(ncol(s))]
+    }
+    if (as.numeric(gsub("[A-Z]","", substr(scenario,start=0,stop=2)))%in%c(4)) {
+      s <- s[,-c(ncol(s))]
+    }
+    if (as.numeric(gsub("[A-Z]","", substr(scenario,start=0,stop=2)))%in%c(2,8)) {
+      s <- add_column(s,J=4,.after='N')
+    }
+    if (as.numeric(gsub("[A-Z]","", substr(scenario,start=0,stop=2)))%in%c(4,16)) {
+      s <- add_column(s,J=7,.after='N')
+    }
+    s$version <- "resali"
+  }
  name <- gsub("_", "", substr(scenario,start=0,stop=3))
  if (as.numeric(gsub("[A,B,C]","",name))==2){
    colnames(s) <- c("dif.detect.1","dif.detect.2","dif.detect.3","dif.detect.4",
+                     "dif.detect.unif.1","dif.detect.unif.2","dif.detect.unif.3","dif.detect.unif.4",
                     "N","J","nb.dif","version")
    s$dif.detected <- sapply(1:1000,function(x) any(!is.na(s[x,1:4])))
    s$prop.perfect <- NA
@ -73,32 +216,40 @@ compile_simulation2 <- function(scenario) {
  }
  if (as.numeric(gsub("[A,B,C]","",name))==4){
    colnames(s) <- c("dif.detect.1","dif.detect.2","dif.detect.3","dif.detect.4",
-                     "dif.detect.5","dif.detect.6","dif.detect.7","N","J","nb.dif","version")
+                     "dif.detect.5","dif.detect.6","dif.detect.7",
+                     "dif.detect.unif.1","dif.detect.unif.2","dif.detect.unif.3","dif.detect.unif.4",
+                     "dif.detect.unif.5","dif.detect.unif.6","dif.detect.unif.7",
+                     "N","J","nb.dif","version")
    s$dif.detected <- sapply(1:1000,function(x) any(!is.na(s[x,1:7])))
    s$prop.perfect <- NA
    s$prop.flexible <- NA
    s$prop.moreflexible <- NA
  }
  if (as.numeric(gsub("[A,B,C]","",name))==8){
-    colnames(s) <- c("dif.detect.1","dif.detect.2","dif.detect.3","dif.detect.4","N","J","nb.dif","true.dif","version")
+    colnames(s) <- c("dif.detect.1","dif.detect.2","dif.detect.3","dif.detect.4",
+                     "dif.detect.unif.1","dif.detect.unif.2","dif.detect.unif.3","dif.detect.unif.4",
+                     "N","J","nb.dif","true.dif","version")
    s$dif.detected <- sapply(1:1000,function(x) any(!is.na(s[x,1:4])))
-    s$perfect.detection <- sapply(1:1000,function(x) ifelse(sum(!is.na(s[x,1:4]))==1,s[x,which(sapply(1:4,function(y) !is.na(s[x,y])))]==s[x,"true.dif"],0)  )
+    s$perfect.detection <- sapply(1:1000,function(x) ifelse(sum(!is.na(s[x,1:4]))==1,s[x,"dif.detect.unif.1"]==1 & s[x,which(sapply(1:4,function(y) !is.na(s[x,y])))]==s[x,"true.dif"],0)  )
    s$prop.perfect <- mean(s$perfect.detection)
-    s$flexible.detect <- sapply(1:1000,function(x) ifelse(sum(!is.na(s[x,1:4]))!=0,s[x,"true.dif"]%in%s[x,which(sapply(1:4,function(y) !is.na(s[x,y])))],0)  )
+    s$flexible.detect <- sapply(1:1000,function(x) ifelse(sum(!is.na(s[x,1:4]))==1,s[x,which(sapply(1:4,function(y) !is.na(s[x,y])))]==s[x,"true.dif"],0)  )
    s$prop.flexible <- mean(s$flexible.detect)
-    s$prop.moreflexible <- s$prop.flexible
+    s$moreflexible.detect <- sapply(1:1000,function(x) ifelse(sum(!is.na(s[x,1:4]))!=0,s[x,"true.dif"]%in%c(s[x,which(sapply(1:4,function(y) !is.na(s[x,y])))]),0)  )
+    s$prop.moreflexible <- mean(s$moreflexible.detect)
  }
  if (as.numeric(gsub("[A,B,C]","",name))==16){
    colnames(s) <- c("dif.detect.1","dif.detect.2","dif.detect.3","dif.detect.4",
-                     "dif.detect.5","dif.detect.6","dif.detect.7","N","J","nb.dif","true.dif.1","true.dif.2","version")
+                     "dif.detect.5","dif.detect.6","dif.detect.7",
+                     "dif.detect.unif.1","dif.detect.unif.2","dif.detect.unif.3","dif.detect.unif.4",
+                     "dif.detect.unif.5","dif.detect.unif.6","dif.detect.unif.7",
+                     "N","J","nb.dif","true.dif.1","true.dif.2","version")
    s$dif.detected <- sapply(1:1000,function(x) any(!is.na(s[x,1:7])))
-    perfect.detection <- sapply(1:1000,function(x) ifelse(sum(!is.na(s[x,1:7]))==2,s[x,which(sapply(1:7,function(y) !is.na(s[x,y])))[1]]%in%c(s[x,c("true.dif.1","true.dif.2")]) & s[x,which(sapply(1:7,function(y) !is.na(s[x,y])))[2]]%in%c(s[x,c("true.dif.1","true.dif.2")]) 
+    perfect.detection <- sapply(1:1000,function(x) ifelse(sum(!is.na(s[x,1:7]))==2,s[x,"dif.detect.unif.1"]==1 & s[x,"dif.detect.unif.2"]==1 & s[x,which(sapply(1:7,function(y) !is.na(s[x,y])))[1]]%in%c(s[x,c("true.dif.1","true.dif.2")]) & s[x,which(sapply(1:7,function(y) !is.na(s[x,y])))[2]]%in%c(s[x,c("true.dif.1","true.dif.2")]) 
                                                            ,0)  )
    s$prop.perfect <- mean(perfect.detection)
-    s$flexible.detect <- sapply(1:1000,function(x) ifelse(sum(!is.na(s[x,1:7]))!=0,s[x,"true.dif.1"]%in%c(s[x,which(sapply(1:7,function(y) !is.na(s[x,y])))]) &
-                                                            s[x,"true.dif.2"]%in%c(s[x,which(sapply(1:7,function(y) !is.na(s[x,y])))]),0)  )
+    s$flexible.detect <- sapply(1:1000,function(x) ifelse(sum(!is.na(s[x,1:7]))==2,s[x,which(sapply(1:7,function(y) !is.na(s[x,y])))[1]]%in%c(s[x,c("true.dif.1","true.dif.2")]) & s[x,which(sapply(1:7,function(y) !is.na(s[x,y])))[2]]%in%c(s[x,c("true.dif.1","true.dif.2")]),0 ))
    s$prop.flexible <- mean(s$flexible.detect)
-    s$moreflexible.detect <- sapply(1:1000,function(x) ifelse(sum(!is.na(s[x,1:7]))!=0,s[x,"true.dif.1"]%in%c(s[x,which(sapply(1:7,function(y) !is.na(s[x,y])))]) |
+    s$moreflexible.detect <- sapply(1:1000,function(x) ifelse(sum(!is.na(s[x,1:7]))!=0,s[x,"true.dif.1"]%in%c(s[x,which(sapply(1:7,function(y) !is.na(s[x,y])))]) &
                                                            s[x,"true.dif.2"]%in%c(s[x,which(sapply(1:7,function(y) !is.na(s[x,y])))]),0)  )
    s$prop.moreflexible <- mean(s$moreflexible.detect)
  }
@ -119,8 +270,6 @@ compile_simulation2 <- function(scenario) {

 #### Compiled results

-results <- results[-c(21,23,93,95)]
-
 res.dat.dif <- compile_simulation2("2A_300_noBF")

 for (x in results[seq(2,length(results))]) {
@ -139,5 +288,146 @@ res.dat.dif

 # False positives

-plot.dat <- res.dat.dif[res.dat.dif$J==4 & res.dat.dif$M==4 & res.dat.dif$nb.dif==0 & res.dat.dif$version=="original",]
-boxplot(plot.dat$prop.dif.detect~plot.dat$N,ylim=c(0,0.05))
+plot.dat <- data.frame(N=c(50,300),
+                       prop.dif.detect=sapply(c(50,300),function(x) mean(res.dat.dif[res.dat.dif$N==x & res.dat.dif$nb.dif==0 & res.dat.dif$version=='original',]$prop.dif.detect) )
+)
+plot(plot.dat$prop.dif.detect~plot.dat$N,ylim=c(0,1),lty=1,pch=3,xlab='N',ylab="Proportions of scenarios where DIF was detected")
+lines(plot.dat$prop.dif.detect~plot.dat$N,ylim=c(0,0.05),lty=1)
+title('Proportion of DIF detection in scenarios without DIF')
+
+plot.dat <- data.frame(N=c(50,300),
+                       prop.dif.detect=sapply(c(50,300),function(x) mean(res.dat.dif[res.dat.dif$N==x & res.dat.dif$nb.dif==0 & res.dat.dif$version=='noBF',]$prop.dif.detect) )
+)
+points(plot.dat$prop.dif.detect~plot.dat$N,ylim=c(0,0.05),lty=1,col="blue",pch=2)
+lines(plot.dat$prop.dif.detect~plot.dat$N,ylim=c(0,0.05),lty=1,col="blue")
+
+plot.dat <- data.frame(N=c(50,300),
+                       prop.dif.detect=sapply(c(50,300),function(x) mean(res.dat.dif[res.dat.dif$N==x & res.dat.dif$nb.dif==0 & res.dat.dif$version=='noLRTnoBF',]$prop.dif.detect) )
+)
+points(plot.dat$prop.dif.detect~plot.dat$N,ylim=c(0,0.05),lty=1,col="red",pch=4)
+lines(plot.dat$prop.dif.detect~plot.dat$N,ylim=c(0,0.05),lty=1,col="red")
+
+plot.dat <- data.frame(N=c(50,300),
+                       prop.dif.detect=sapply(c(50,300),function(x) mean(res.dat.dif[res.dat.dif$N==x & res.dat.dif$nb.dif==0 & res.dat.dif$version=='noLRT',]$prop.dif.detect) )
+)
+points(plot.dat$prop.dif.detect~plot.dat$N,ylim=c(0,0.05),lty=1,col="green",pch=5)
+lines(plot.dat$prop.dif.detect~plot.dat$N,ylim=c(0,0.05),lty=1,col="green")
+
+plot.dat <- data.frame(N=c(50,300),
+                       prop.dif.detect=sapply(c(50,300),function(x) mean(res.dat.dif[res.dat.dif$N==x & res.dat.dif$nb.dif==0 & res.dat.dif$version=='resali',]$prop.dif.detect) )
+)
+points(plot.dat$prop.dif.detect~plot.dat$N,ylim=c(0,0.05),lty=1,col="purple",pch=6)
+lines(plot.dat$prop.dif.detect~plot.dat$N,ylim=c(0,0.05),lty=1,col="purple")
+
+legend(x='topleft',legend=c('original','no Bonferroni correction',"no LRT","no LRT and no Bonferroni",'Residuals'),
+       col=c('black',"blue","green","red",'purple'),lty=c(1,1,1,1,1),pch=c(3,2,4,5,6))
+
+par(mfrow=c(2,2))
+
+# Perfect detection
+
+plot.dat <- data.frame(N=c(50,300),
+                       prop.dif.detect=sapply(c(50,300),function(x) mean(res.dat.dif[res.dat.dif$N==x & res.dat.dif$nb.dif!=0 & res.dat.dif$version=='original',]$prop.perfect) )
+)
+plot(plot.dat$prop.dif.detect~plot.dat$N,ylim=c(0,1),lty=1,pch=3,xlab='N',ylab="Proportions of scenarios with perfect DIF detection")
+lines(plot.dat$prop.dif.detect~plot.dat$N,ylim=c(0,0.05),lty=1)
+title('Proportion of perfect DIF detection in scenarios with DIF')
+
+plot.dat <- data.frame(N=c(50,300),
+                       prop.dif.detect=sapply(c(50,300),function(x) mean(res.dat.dif[res.dat.dif$N==x & res.dat.dif$nb.dif!=0 & res.dat.dif$version=='noBF',]$prop.perfect) )
+)
+points(plot.dat$prop.dif.detect~plot.dat$N,ylim=c(0,0.05),lty=1,col="blue",pch=2)
+lines(plot.dat$prop.dif.detect~plot.dat$N,ylim=c(0,0.05),lty=1,col="blue")
+
+plot.dat <- data.frame(N=c(50,300),
+                       prop.dif.detect=sapply(c(50,300),function(x) mean(res.dat.dif[res.dat.dif$N==x & res.dat.dif$nb.dif!=0 & res.dat.dif$version=='noLRTnoBF',]$prop.perfect) )
+)
+points(plot.dat$prop.dif.detect~plot.dat$N,ylim=c(0,0.05),lty=1,col="red",pch=4)
+lines(plot.dat$prop.dif.detect~plot.dat$N,ylim=c(0,0.05),lty=1,col="red")
+
+plot.dat <- data.frame(N=c(50,300),
+                       prop.dif.detect=sapply(c(50,300),function(x) mean(res.dat.dif[res.dat.dif$N==x & res.dat.dif$nb.dif!=0 & res.dat.dif$version=='noLRT',]$prop.perfect) )
+)
+points(plot.dat$prop.dif.detect~plot.dat$N,ylim=c(0,0.05),lty=1,col="green",pch=5)
+lines(plot.dat$prop.dif.detect~plot.dat$N,ylim=c(0,0.05),lty=1,col="green")
+
+plot.dat <- data.frame(N=c(50,300),
+                       prop.dif.detect=sapply(c(50,300),function(x) mean(res.dat.dif[res.dat.dif$N==x & res.dat.dif$nb.dif!=0 & res.dat.dif$version=='resali',]$prop.perfect) )
+)
+points(plot.dat$prop.dif.detect~plot.dat$N,ylim=c(0,0.05),lty=1,col="purple",pch=6)
+lines(plot.dat$prop.dif.detect~plot.dat$N,ylim=c(0,0.05),lty=1,col="purple")
+legend(x='topleft',legend=c('original','no Bonferroni correction',"no LRT","no LRT and no Bonferroni",'Residuals'),
+       col=c('black',"blue","green","red",'purple'),lty=c(1,1,1,1,1),pch=c(3,2,4,5,6))
+
+
+
+# Flexible detection
+
+plot.dat <- data.frame(N=c(50,300),
+                       prop.dif.detect=sapply(c(50,300),function(x) mean(res.dat.dif[res.dat.dif$N==x & res.dat.dif$nb.dif!=0 & res.dat.dif$version=='original',]$prop.flexible) )
+)
+plot(plot.dat$prop.dif.detect~plot.dat$N,ylim=c(0,1),lty=1,pch=3,xlab='N',ylab="Proportions of scenarios with flexible DIF detection")
+lines(plot.dat$prop.dif.detect~plot.dat$N,ylim=c(0,0.05),lty=1)
+title('Proportion of flexible DIF detection in scenarios with DIF')
+
+plot.dat <- data.frame(N=c(50,300),
+                       prop.dif.detect=sapply(c(50,300),function(x) mean(res.dat.dif[res.dat.dif$N==x & res.dat.dif$nb.dif!=0 & res.dat.dif$version=='noBF',]$prop.flexible) )
+)
+points(plot.dat$prop.dif.detect~plot.dat$N,ylim=c(0,0.05),lty=1,col="blue",pch=2)
+lines(plot.dat$prop.dif.detect~plot.dat$N,ylim=c(0,0.05),lty=1,col="blue")
+
+plot.dat <- data.frame(N=c(50,300),
+                       prop.dif.detect=sapply(c(50,300),function(x) mean(res.dat.dif[res.dat.dif$N==x & res.dat.dif$nb.dif!=0 & res.dat.dif$version=='noLRTnoBF',]$prop.flexible) )
+)
+points(plot.dat$prop.dif.detect~plot.dat$N,ylim=c(0,0.05),lty=1,col="red",pch=4)
+lines(plot.dat$prop.dif.detect~plot.dat$N,ylim=c(0,0.05),lty=1,col="red")
+
+plot.dat <- data.frame(N=c(50,300),
+                       prop.dif.detect=sapply(c(50,300),function(x) mean(res.dat.dif[res.dat.dif$N==x & res.dat.dif$nb.dif!=0 & res.dat.dif$version=='noLRT',]$prop.flexible) )
+)
+points(plot.dat$prop.dif.detect~plot.dat$N,ylim=c(0,0.05),lty=1,col="green",pch=5)
+lines(plot.dat$prop.dif.detect~plot.dat$N,ylim=c(0,0.05),lty=1,col="green")
+
+plot.dat <- data.frame(N=c(50,300),
+                       prop.dif.detect=sapply(c(50,300),function(x) mean(res.dat.dif[res.dat.dif$N==x & res.dat.dif$nb.dif!=0 & res.dat.dif$version=='resali',]$prop.flexible) )
+)
+points(plot.dat$prop.dif.detect~plot.dat$N,ylim=c(0,0.05),lty=1,col="purple",pch=6)
+lines(plot.dat$prop.dif.detect~plot.dat$N,ylim=c(0,0.05),lty=1,col="purple")
+legend(x='topleft',legend=c('original','no Bonferroni correction',"no LRT","no LRT and no Bonferroni",'Residuals'),
+       col=c('black',"blue","green","red",'purple'),lty=c(1,1,1,1,1),pch=c(3,2,4,5,6))
+
+
+# Most flexible detection
+
+plot.dat <- data.frame(N=c(50,300),
+                       prop.dif.detect=sapply(c(50,300),function(x) mean(res.dat.dif[res.dat.dif$N==x & res.dat.dif$nb.dif!=0 & res.dat.dif$version=='original',]$prop.moreflexible) )
+)
+plot(plot.dat$prop.dif.detect~plot.dat$N,ylim=c(0,1),lty=1,pch=3,xlab='N',ylab="Proportions of scenarios with most flexible DIF detection")
+lines(plot.dat$prop.dif.detect~plot.dat$N,ylim=c(0,0.05),lty=1)
+title('Proportion of most flexible DIF detection in scenarios with DIF')
+
+plot.dat <- data.frame(N=c(50,300),
+                       prop.dif.detect=sapply(c(50,300),function(x) mean(res.dat.dif[res.dat.dif$N==x & res.dat.dif$nb.dif!=0 & res.dat.dif$version=='noBF',]$prop.moreflexible) )
+)
+points(plot.dat$prop.dif.detect~plot.dat$N,ylim=c(0,0.05),lty=1,col="blue",pch=2)
+lines(plot.dat$prop.dif.detect~plot.dat$N,ylim=c(0,0.05),lty=1,col="blue")
+
+plot.dat <- data.frame(N=c(50,300),
+                       prop.dif.detect=sapply(c(50,300),function(x) mean(res.dat.dif[res.dat.dif$N==x & res.dat.dif$nb.dif!=0 & res.dat.dif$version=='noLRTnoBF',]$prop.moreflexible) )
+)
+points(plot.dat$prop.dif.detect~plot.dat$N,ylim=c(0,0.05),lty=1,col="red",pch=4)
+lines(plot.dat$prop.dif.detect~plot.dat$N,ylim=c(0,0.05),lty=1,col="red")
+
+plot.dat <- data.frame(N=c(50,300),
+                       prop.dif.detect=sapply(c(50,300),function(x) mean(res.dat.dif[res.dat.dif$N==x & res.dat.dif$nb.dif!=0 & res.dat.dif$version=='noLRT',]$prop.moreflexible) )
+)
+points(plot.dat$prop.dif.detect~plot.dat$N,ylim=c(0,0.05),lty=1,col="green",pch=5)
+lines(plot.dat$prop.dif.detect~plot.dat$N,ylim=c(0,0.05),lty=1,col="green")
+
+plot.dat <- data.frame(N=c(50,300),
+                       prop.dif.detect=sapply(c(50,300),function(x) mean(res.dat.dif[res.dat.dif$N==x & res.dat.dif$nb.dif!=0 & res.dat.dif$version=='resali',]$prop.moreflexible) )
+)
+points(plot.dat$prop.dif.detect~plot.dat$N,ylim=c(0,0.05),lty=1,col="purple",pch=6)
+lines(plot.dat$prop.dif.detect~plot.dat$N,ylim=c(0,0.05),lty=1,col="purple")
+legend(x='topleft',legend=c('original','no Bonferroni correction',"no LRT","no LRT and no Bonferroni",'Residuals'),
+       col=c('black',"blue","green","red",'purple'),lty=c(1,1,1,1,1),pch=c(3,2,4,5,6))
--- a/Rproject/resali.R
+++ b/Rproject/resali.R
@ -0,0 +1,267 @@
+library(TAM)
+
+resali <- function(df=NULL,items=NULL,group=NULL,verbose=T) {
+  if (verbose) {
+  cat('-----------------------------------------------------------\n')
+  cat('COMPUTING INITIAL PCM\n')
+  cat('-----------------------------------------------------------\n')
+  }
+  nbitems <- length(items)
+  nbitems_o <- nbitems
+  items_n <- paste0('item',items)
+  resp <- df[,items_n]
+  grp <- df[,group]
+  pcm_initial <- TAM::tam.mml(resp=resp,Y=grp,irtmodel = "PCM",est.variance = T,verbose=F)
+  dat <- df
+  dat$score <- rowSums(dat[,items_n])
+  nqt <- ifelse(length(unique(quantile(dat$score,seq(0,1,0.2))))==6,5,length(unique(quantile(dat$score,seq(0,1,0.2))))-1)
+  dat$score_q5 <- cut(dat$score,unique(quantile(dat$score,seq(0,1,1/nqt))),labels=1:nqt,include.lowest=T)
+  res.anova <- rep(NA,nbitems)
+  pval <- rep(NA,nbitems)
+  fval <- rep(NA,nbitems)
+  for (i in items) {
+    dat[,paste0('res_',i)] <- IRT.residuals(pcm_initial)$stand_residuals[,i]
+    res.anova[i] <- summary(aov(dat[,paste0('res_',i)]~TT*score_q5,data=dat))
+    pval[i] <- res.anova[[i]][1,"Pr(>F)"]
+    fval[i] <- res.anova[[i]][1,'F value']
+  }
+  if (verbose) {
+  cat('DONE\n')
+  cat('-----------------------------------------------------------\n')
+  }
+  res.items <- c()
+  res.uniform <- c()
+  k <- 1
+  while(any(pval<0.05/(nbitems_o*3))) {
+    k <- k+1
+    if (verbose) {
+    cat(paste('COMPUTING STEP',k,'\n'))
+    cat('-----------------------------------------------------------\n')
+    }
+    res.item <- gsub("[a-z]", "",colnames(resp)[which.max(fval)])
+    res.items <- c(res.items,res.item)
+    res.uni <- res.anova[[which.max(fval)]][3,"Pr(>F)"]>0.05
+    res.uniform <- c(res.uniform,res.uni)
+    items_n <- c(items_n[items_n!=paste0('item',res.item)],paste0("item",res.item,c("noTT","TT")))
+    dat[,paste0("item",res.item,'TT')] <- dat[dat$TT==1,paste0('item',res.item)]
+    dat[,paste0("item",res.item,'noTT')] <- dat[dat$TT==0,paste0('item',res.item)]
+    resp <- dat[,items_n]
+    grp <- dat[,group]
+    pcm_while <- TAM::tam.mml(resp=resp,Y=grp,irtmodel = "PCM",est.variance = T,verbose=F)
+    nbitems <- length(items_n)
+    res.anova <- rep(NA,nbitems)
+    pval <- rep(NA,nbitems)
+    fval <- rep(NA,nbitems)
+    for (i in 1:nbitems) {
+      dat[,paste0('res_',i)] <- IRT.residuals(pcm_while)$stand_residuals[,i]
+      res.anova[i] <- summary(aov(dat[,paste0('res_',i)]~TT*score_q5,data=dat))
+      pval[i] <- res.anova[[i]][1,"Pr(>F)"]
+      fval[i] <- res.anova[[i]][1,'F value']
+    }
+    if (verbose) {
+    cat('DONE\n')
+    cat('-----------------------------------------------------------\n')
+    }
+  }
+  if (verbose) {
+  cat("DETECTED DIF ITEMS\n")
+  cat('-----------------------------------------------------------\n')
+  }
+  if (length(res.items>0)) {
+    results <- data.frame(dif.items=res.items,
+                          uniform=1*res.uniform)
+    return(results)
+  }
+  else {
+    if (verbose) {
+      cat("No DIF was detected\n")
+    }
+    return(NULL)
+  }
+}
+
+
+
+
+
+resali_BF2 <- function(df=NULL,items=NULL,group=NULL,verbose=T) {
+  if (verbose) {
+    cat('-----------------------------------------------------------\n')
+    cat('COMPUTING INITIAL PCM\n')
+    cat('-----------------------------------------------------------\n')
+  }
+  nbitems <- length(items)
+  nbitems_o <- nbitems
+  items_n <- paste0('item',items)
+  resp <- df[,items_n]
+  grp <- df[,group]
+  pcm_initial <- TAM::tam.mml(resp=resp,Y=grp,irtmodel = "PCM",est.variance = T,verbose=F)
+  dat <- df
+  dat$score <- rowSums(dat[,items_n])
+  nqt <- ifelse(length(unique(quantile(dat$score,seq(0,1,0.2))))==6,5,length(unique(quantile(dat$score,seq(0,1,0.2))))-1)
+  dat$score_q5 <- cut(dat$score,unique(quantile(dat$score,seq(0,1,1/nqt))),labels=1:nqt,include.lowest=T)
+  res.anova <- rep(NA,nbitems)
+  pval <- rep(NA,nbitems)
+  fval <- rep(NA,nbitems)
+  for (i in items) {
+    dat[,paste0('res_',i)] <- rowMeans(predict(pcm_initial)$stand.resid[,,i])
+    res.anova[i] <- summary(aov(dat[,paste0('res_',i)]~TT*score_q5,data=dat))
+    pval[i] <- res.anova[[i]][1,"Pr(>F)"]
+    fval[i] <- res.anova[[i]][1,'F value']
+  }
+  if (verbose) {
+    cat('DONE\n')
+    cat('-----------------------------------------------------------\n')
+  }
+  res.items <- c()
+  res.uniform <- c()
+  k <- 1
+  while(any(pval<0.05/(nbitems_o-k+1))) {
+    k <- k+1
+    if (verbose) {
+      cat(paste('COMPUTING STEP',k,'\n'))
+      cat('-----------------------------------------------------------\n')
+    }
+    res.item <- gsub("[a-z]", "",colnames(resp)[which.max(fval)])
+    res.items <- c(res.items,res.item)
+    res.uni <- res.anova[[which.max(fval)]][3,"Pr(>F)"]>0.05
+    res.uniform <- c(res.uniform,res.uni)
+    items_n <- c(items_n[items_n!=paste0('item',res.item)],paste0("item",res.item,c("noTT","TT")))
+    dat[,paste0("item",res.item,'TT')] <- dat[dat$TT==1,paste0('item',res.item)]
+    dat[,paste0("item",res.item,'noTT')] <- dat[dat$TT==0,paste0('item',res.item)]
+    resp <- dat[,items_n]
+    grp <- dat[,group]
+    pcm_while <- TAM::tam.mml(resp=resp,Y=grp,irtmodel = "PCM",est.variance = T,verbose=F)
+    nbitems <- length(items_n)
+    res.anova <- rep(NA,nbitems)
+    pval <- rep(NA,nbitems)
+    fval <- rep(NA,nbitems)
+    for (i in 1:nbitems) {
+      dat[,paste0('res_',i)] <- rowMeans(predict(pcm_while)$stand.resid[,,i])
+      res.anova[i] <- summary(aov(dat[,paste0('res_',i)]~TT*score_q5,data=dat))
+      pval[i] <- res.anova[[i]][1,"Pr(>F)"]
+      fval[i] <- res.anova[[i]][1,'F value']
+    }
+    if (verbose) {
+      cat('DONE\n')
+      cat('-----------------------------------------------------------\n')
+    }
+  }
+  if (verbose) {
+    cat("DETECTED DIF ITEMS\n")
+    cat('-----------------------------------------------------------\n')
+  }
+  if (length(res.items>0)) {
+    results <- data.frame(dif.items=res.items,
+                          uniform=1*res.uniform)
+    return(results)
+  }
+  else {
+    if (verbose) {
+      cat("No DIF was detected\n")
+    }
+    return(NULL)
+  }
+}
+
+
+
+
+
+
+
+
+resali_LRT <- function(df=NULL,items=NULL,group=NULL,verbose=T) {
+  if (verbose) {
+    cat('-----------------------------------------------------------\n')
+    cat('COMPUTING INITIAL PCM\n')
+    cat('-----------------------------------------------------------\n')
+  }
+  nbitems <- length(items)
+  nbitems_o <- nbitems
+  items_n <- paste0('item',items)
+  resp <- df[,items_n]
+  grp <- df[,group]
+  items_n_alt <- paste0(items_n,c("_TT","_noTT"))
+  for (i in items_n) {
+    df[df$TT==0,paste0(i,"_noTT")] <- df[df$TT==0,i]
+    df[df$TT==1,paste0(i,"_TT")] <- df[df$TT==1,i]
+  }
+  resp_alt <- df[,items_n_alt]
+  pcm_initial <- TAM::tam.mml(resp=resp,Y=grp,irtmodel = "PCM",est.variance = T,verbose=F)
+  pcm_alt <- TAM::tam.mml(resp=resp_alt,Y=grp,irtmodel = "PCM",est.variance = T,verbose=F)
+  dat <- df
+  dat$score <- rowSums(dat[,items_n])
+  nqt <- ifelse(length(unique(quantile(dat$score,seq(0,1,0.2))))==6,5,length(unique(quantile(dat$score,seq(0,1,0.2))))-1)
+  dat$score_q5 <- cut(dat$score,unique(quantile(dat$score,seq(0,1,1/nqt))),labels=1:nqt,include.lowest=T)
+  res.anova <- rep(NA,nbitems)
+  pval <- rep(NA,nbitems)
+  fval <- rep(NA,nbitems)
+  for (i in items) {
+    dat[,paste0('res_',i)] <- rowMeans(predict(pcm_initial)$stand.resid[,,i])
+    res.anova[i] <- summary(aov(dat[,paste0('res_',i)]~TT*score_q5,data=dat))
+    pval[i] <- res.anova[[i]][1,"Pr(>F)"]
+    fval[i] <- res.anova[[i]][1,'F value']
+  }
+  if (verbose) {
+    cat('DONE\n')
+    cat('-----------------------------------------------------------\n')
+  }
+  res.items <- c()
+  res.uniform <- c()
+  k <- 1
+  if (anova(pcm_initial,pcm_alt)$p[1]<0.05) {
+  while(any(pval<0.05/nbitems_o)) {
+    k <- k+1
+    if (verbose) {
+      cat(paste('COMPUTING STEP',k,'\n'))
+      cat('-----------------------------------------------------------\n')
+    }
+    res.item <- gsub("[a-z]", "",colnames(resp)[which.max(fval)])
+    res.items <- c(res.items,res.item)
+    res.uni <- res.anova[[which.max(fval)]][3,"Pr(>F)"]>0.05
+    res.uniform <- c(res.uniform,res.uni)
+    items_n <- c(items_n[items_n!=paste0('item',res.item)],paste0("item",res.item,c("noTT","TT")))
+    dat[,paste0("item",res.item,'TT')] <- dat[dat$TT==1,paste0('item',res.item)]
+    dat[,paste0("item",res.item,'noTT')] <- dat[dat$TT==0,paste0('item',res.item)]
+    resp <- dat[,items_n]
+    grp <- dat[,group]
+    pcm_while <- TAM::tam.mml(resp=resp,Y=grp,irtmodel = "PCM",est.variance = T,verbose=F)
+    nbitems <- length(items_n)
+    res.anova <- rep(NA,nbitems)
+    pval <- rep(NA,nbitems)
+    fval <- rep(NA,nbitems)
+    for (i in 1:nbitems) {
+      dat[,paste0('res_',i)] <- rowMeans(predict(pcm_while)$stand.resid[,,i])
+      res.anova[i] <- summary(aov(dat[,paste0('res_',i)]~TT*score_q5,data=dat))
+      pval[i] <- res.anova[[i]][1,"Pr(>F)"]
+      fval[i] <- res.anova[[i]][1,'F value']
+    }
+    if (verbose) {
+      cat('DONE\n')
+      cat('-----------------------------------------------------------\n')
+    }
+  }
+  if (verbose) {
+    cat("DETECTED DIF ITEMS\n")
+    cat('-----------------------------------------------------------\n')
+  }
+  if (length(res.items>0)) {
+    results <- data.frame(dif.items=res.items,
+                          uniform=1*res.uniform)
+    return(results)
+  }
+  else {
+    if (verbose) {
+      cat("No DIF was detected\n")
+    }
+    return(NULL)
+  }
+  }
+  else {
+    if (verbose) {
+      cat("No DIF was detected\n")
+    }
+    return(NULL)
+  }
+}