Modified RESALI (corrected score quantile calculation)

main
Corentin Choisy 8 months ago
parent 8bffc896b9
commit ad3ce98415

Binary file not shown.

File diff suppressed because it is too large Load Diff

@ -2050,7 +2050,7 @@ write.csv(res_nodif[[3]],'/home/corentin/Documents/These/Recherche/Simulations/A
############################################################################## ##############################################################################
#----------------------------------------------------------------------------# #----------------------------------------------------------------------------#
########################## SCENARIO ANALYSIS N=50 ########################## ########################### SCENARIO ANALYSIS N=50 ###########################
#----------------------------------------------------------------------------# #----------------------------------------------------------------------------#
############################################################################## ##############################################################################
@ -2706,6 +2706,374 @@ for (x in results[seq(2,length(results))]) {
res.dat$bias <- res.dat$eff.size-res.dat$m.beta res.dat$bias <- res.dat$eff.size-res.dat$m.beta
res.dat.dif$bias <- res.dat.dif$eff.size-res.dat.dif$m.beta res.dat.dif$bias <- res.dat.dif$eff.size-res.dat.dif$m.beta
##############################################################################
#----------------------------------------------------------------------------#
####################### AGGREGATION DIF MATRICES ROSALI ######################
#----------------------------------------------------------------------------#
##############################################################################
#### Create data.frame
results <- c(sapply(1:4,function(x) paste0(x,c('A','B','C','D','E'))),sapply(5:9,function(x) paste0(x,c('A','B','C','D','E','F','G'))))
results2 <- c(sapply(10:20,function(x) paste0(x,c('A','B','C','D','E','F','G'))))
results <- c(sapply(c(50,100,200,300),function(x) paste0(results,'_',x)))
results2 <- c(sapply(c(50,100,200,300),function(x) paste0(results2,'_',x)))
results <- sort(results)
results2 <- sort(results2)
results <- c(results,results2)
#### Compiler function
compile_simulation2_rosali <- function(scenario) {
name <- as.numeric(gsub("[^0-9.-]", "", substr(scenario,start=0,stop=2)))
if (substr(scenario,start=nchar(scenario)-1,stop=nchar(scenario))=="50" & name>0) {
s <- read_excel(paste0('/home/corentin/Documents/These/Recherche/Simulations/Analysis/ROSALI-DIF/N50/',scenario,'_original.xls'))
}
if (substr(scenario,start=nchar(scenario)-2,stop=nchar(scenario))=="100" & name>0) {
s <- read_excel(paste0('/home/corentin/Documents/These/Recherche/Simulations/Analysis/ROSALI-DIF/N100/',scenario,'_original.xls'))
}
if (substr(scenario,start=nchar(scenario)-2,stop=nchar(scenario))=="200" & name>0) {
s <- read_excel(paste0('/home/corentin/Documents/These/Recherche/Simulations/Analysis/ROSALI-DIF/N200/',scenario,'_original.xls'))
}
if (substr(scenario,start=nchar(scenario)-2,stop=nchar(scenario))=="300" & name>0) {
s <- read_excel(paste0('/home/corentin/Documents/These/Recherche/Simulations/Analysis/ROSALI-DIF/N300/',scenario,'_original.xls'))
}
J <- max(which(sapply(1:7,function(x) paste0('item',x) %in% colnames(s) | paste0('item',x,'_1') %in% colnames(s))))
M <- 1+sum(sapply(1:3,function(x) paste0('item1_',x) %in% colnames(s) ))
if (M==1) {M <- 2}
nb.dif <- max(which(sapply(1:3,function(x) paste0('dif',x) %in% colnames(s) | paste0('dif',x,'_1') %in% colnames(s))))
if (J==4) {
if (M==2) {
a <- data.frame(m.item1=mean(s$item1),m.item2=mean(s$item2),m.item3=mean(s$item3),m.item4=mean(s$item4))
} else {
a <- data.frame(m.item1_1=mean(s$item1_1),m.item1_2=mean(s$item1_2),m.item1_3=mean(s$item1_3),
m.item2_1=mean(s$item2_1),m.item2_2=mean(s$item2_2),m.item2_3=mean(s$item2_3),
m.item3_1=mean(s$item3_1),m.item3_2=mean(s$item3_2),m.item3_3=mean(s$item3_3),
m.item4_1=mean(s$item4_1),m.item4_2=mean(s$item4_2),m.item4_3=mean(s$item4_3)
)
}
} else {
if (M==2) {
a <- data.frame(m.item1=mean(s$item1),m.item2=mean(s$item2),m.item3=mean(s$item3),m.item4=mean(s$item4),
m.item5=mean(s$item5),m.item6=mean(s$item6),m.item7=mean(s$item7))
} else {
a <- data.frame(m.item1_1=mean(s$item1_1),m.item1_2=mean(s$item1_2),m.item1_3=mean(s$item1_3),
m.item2_1=mean(s$item2_1),m.item2_2=mean(s$item2_2),m.item2_3=mean(s$item2_3),
m.item3_1=mean(s$item3_1),m.item3_2=mean(s$item3_2),m.item3_3=mean(s$item3_3),
m.item4_1=mean(s$item4_1),m.item4_2=mean(s$item4_2),m.item4_3=mean(s$item4_3),
m.item5_1=mean(s$item5_1),m.item5_2=mean(s$item5_2),m.item5_3=mean(s$item5_3),
m.item6_1=mean(s$item6_1),m.item6_2=mean(s$item6_2),m.item6_3=mean(s$item6_3),
m.item7_1=mean(s$item7_1),m.item7_2=mean(s$item7_2),m.item7_3=mean(s$item7_3)
)
}
}
N <- ifelse(substr(scenario,start=nchar(scenario)-1,stop=nchar(scenario))=="50","50",substr(scenario,start=nchar(scenario)-2,stop=nchar(scenario)))
zz <- ifelse(N=="50",substr(scenario,start=0,stop=nchar(scenario)-3),substr(scenario,start=0,stop=nchar(scenario)-4))
eff.size <- unique(res.dat[res.dat$scenario==zz & res.dat$N==N,'eff.size'])
dif.size <- unique(res.dat[res.dat$scenario==zz & res.dat$N==N,'dif.size'])
b <- data.frame(scenario=zz,
scenario.type=substr(zz,start=nchar(zz),stop=nchar(zz)),
N=N,
J=J,
M=M,
eff.size=eff.size,
nb.dif=nb.dif,
dif.size=dif.size
)
true.value.in.ci <- eff.size <= s$beta+1.96*s$se_beta & eff.size >= s$beta-1.96*s$se_beta
beta.same.sign.truebeta.p <- ifelse(rep(eff.size,nrow(s))==0,NA,(rep(eff.size,nrow(s))/s$beta)>0)
num.reject <- which((s$beta-1.96*s$se_beta)>0 | (s$beta+1.96*s$se_beta)<0)
z <- data.frame(m.beta=mean(s$beta),
se.empirical.beta=sd(s$beta),
se.analytical.beta=mean(s$se_beta),
m.low.ci.beta=mean(s$beta-1.96*s$se_beta),
m.high.ci.beta=mean(s$beta+1.96*s$se_beta),
true.value.in.ci.p=mean(true.value.in.ci),
h0.rejected.p=mean( (s$beta-1.96*s$se_beta)>0 | (s$beta+1.96*s$se_beta)<0 ),
beta.same.sign.truebeta.p=mean(beta.same.sign.truebeta.p),
beta.same.sign.truebeta.signif.p=mean(beta.same.sign.truebeta.p[num.reject])
)
d <- cbind(b,a,z)
d$prop.
return(d)
}
#### Compiled results
res.dat.dif.rosali <- compile_simulation2_rosali('1A_100')
for (x in results[seq(2,length(results))]) {
y <- compile_simulation2_rosali(x)
res.dat.dif.rosali <- bind_rows(res.dat.dif.rosali,y)
}
res.dat.dif.rosali$bias <- res.dat.dif.rosali$eff.size-res.dat.dif.rosali$m.beta
##############################################################################
#----------------------------------------------------------------------------#
################################### RESALI ###################################
#----------------------------------------------------------------------------#
##############################################################################
generate_resali <- function(scenario=NULL,grp=NULL) {
scen <- as.numeric(gsub("[A,B,C,D,E,F,G,_]","",substr(scenario,0,3)))
if (substr(scenario,start=nchar(scenario)-1,stop=nchar(scenario))=="50") {
N <- 50
}
if (substr(scenario,start=nchar(scenario)-2,stop=nchar(scenario))=="100") {
N <- 100
}
if (substr(scenario,start=nchar(scenario)-2,stop=nchar(scenario))=="200") {
N <- 200
}
if (substr(scenario,start=nchar(scenario)-2,stop=nchar(scenario))=="300") {
N <- 300
}
if (scen<5) {
dat <- read.csv(paste0('/home/corentin/Documents/These/Recherche/Simulations/Data/NoDIF/N',N,'/scenario_',scenario,'.csv'))
}
if (scen>=5) {
dat <- read.csv(paste0('/home/corentin/Documents/These/Recherche/Simulations/Data/DIF/N',N,'/scenario_',scenario,'.csv'))
}
if (scen%in%c(3,4,13:20)) {
res <- resali(df=dat[dat$replication==1,],items = seq(1,7),group=grp,verbose=FALSE)
df_res <- data.frame(dif.detect.1=ifelse(length(res$dif.items)>=1,res$dif.items[1],NA),
dif.detect.2=ifelse(length(res$dif.items)>=2,res$dif.items[2],NA),
dif.detect.3=ifelse(length(res$dif.items)>=3,res$dif.items[3],NA),
dif.detect.4=ifelse(length(res$dif.items)>=4,res$dif.items[4],NA),
dif.detect.5=ifelse(length(res$dif.items)>=5,res$dif.items[5],NA),
dif.detect.6=ifelse(length(res$dif.items)>=6,res$dif.items[6],NA),
dif.detect.7=ifelse(length(res$dif.items)>=7,res$dif.items[7],NA),
dif.detect.unif.1=ifelse(length(res$uniform)>=1,res$uniform[1],NA),
dif.detect.unif.2=ifelse(length(res$uniform)>=2,res$uniform[2],NA),
dif.detect.unif.3=ifelse(length(res$uniform)>=3,res$uniform[3],NA),
dif.detect.unif.4=ifelse(length(res$uniform)>=4,res$uniform[4],NA),
dif.detect.unif.5=ifelse(length(res$uniform)>=5,res$uniform[5],NA),
dif.detect.unif.6=ifelse(length(res$uniform)>=6,res$uniform[6],NA),
dif.detect.unif.7=ifelse(length(res$uniform)>=7,res$uniform[7],NA),
N=N,
nbdif=ifelse(scen<=4,0,ifelse(scen<=16,2,3)),
true.dif.1=ifelse(scen<=4,NA,unique(dat[dat$replication==1,]$dif1)),
true.dif.2=ifelse(scen<=4,NA,unique(dat[dat$replication==1,]$dif2)),
true.dif.3=ifelse(scen<=16,NA,unique(dat[dat$replication==1,]$dif3))
)
for (k in 2:1000) {
if (k%%100==0) {
cat(paste0('N=',k,'/1000\n'))
}
res <- resali(df=dat[dat$replication==k,],items = seq(1,7),group=grp,verbose=FALSE)
df_res2 <- data.frame(dif.detect.1=ifelse(length(res$dif.items)>=1,res$dif.items[1],NA),
dif.detect.2=ifelse(length(res$dif.items)>=2,res$dif.items[2],NA),
dif.detect.3=ifelse(length(res$dif.items)>=3,res$dif.items[3],NA),
dif.detect.4=ifelse(length(res$dif.items)>=4,res$dif.items[4],NA),
dif.detect.5=ifelse(length(res$dif.items)>=5,res$dif.items[5],NA),
dif.detect.6=ifelse(length(res$dif.items)>=6,res$dif.items[6],NA),
dif.detect.7=ifelse(length(res$dif.items)>=7,res$dif.items[7],NA),
dif.detect.unif.1=ifelse(length(res$uniform)>=1,res$uniform[1],NA),
dif.detect.unif.2=ifelse(length(res$uniform)>=2,res$uniform[2],NA),
dif.detect.unif.3=ifelse(length(res$uniform)>=3,res$uniform[3],NA),
dif.detect.unif.4=ifelse(length(res$uniform)>=4,res$uniform[4],NA),
dif.detect.unif.5=ifelse(length(res$uniform)>=5,res$uniform[5],NA),
dif.detect.unif.6=ifelse(length(res$uniform)>=6,res$uniform[6],NA),
dif.detect.unif.7=ifelse(length(res$uniform)>=7,res$uniform[7],NA),
N=N,
nbdif=ifelse(scen<=4,0,ifelse(scen<=16,2,3)),
true.dif.1=ifelse(scen<=4,NA,unique(dat[dat$replication==k,]$dif1)),
true.dif.2=ifelse(scen<=4,NA,unique(dat[dat$replication==k,]$dif2)),
true.dif.3=ifelse(scen<=16,NA,unique(dat[dat$replication==k,]$dif3)))
df_res <- rbind(df_res,df_res2)
}
}
else if (scen%in%c(1,2,5:12)) {
res <- resali(df=dat[dat$replication==1,],items = seq(1,4),group=grp,verbose=FALSE)
df_res <- data.frame(dif.detect.1=ifelse(length(res$dif.items)>=1,res$dif.items[1],NA),
dif.detect.2=ifelse(length(res$dif.items)>=2,res$dif.items[2],NA),
dif.detect.3=ifelse(length(res$dif.items)>=3,res$dif.items[3],NA),
dif.detect.4=ifelse(length(res$dif.items)>=4,res$dif.items[4],NA),
dif.detect.unif.1=ifelse(length(res$uniform)>=1,res$uniform[1],NA),
dif.detect.unif.2=ifelse(length(res$uniform)>=2,res$uniform[2],NA),
dif.detect.unif.3=ifelse(length(res$uniform)>=3,res$uniform[3],NA),
dif.detect.unif.4=ifelse(length(res$uniform)>=4,res$uniform[4],NA),
N=N,
nbdif=ifelse(scen<=4,0,ifelse(scen<=8,1,2)),
true.dif.1=ifelse(scen<=4,NA,unique(dat[dat$replication==1,]$dif1)),
true.dif.2=ifelse(scen<=8,NA,unique(dat[dat$replication==1,]$dif2))
)
for (k in 2:1000) {
if (k%%100==0) {
cat(paste0('N=',k,'/1000\n'))
}
res <- resali(df=dat[dat$replication==k,],items = seq(1,4),group=grp,verbose=FALSE)
df_res2 <- data.frame(dif.detect.1=ifelse(length(res$dif.items)>=1,res$dif.items[1],NA),
dif.detect.2=ifelse(length(res$dif.items)>=2,res$dif.items[2],NA),
dif.detect.3=ifelse(length(res$dif.items)>=3,res$dif.items[3],NA),
dif.detect.4=ifelse(length(res$dif.items)>=4,res$dif.items[4],NA),
dif.detect.unif.1=ifelse(length(res$uniform)>=1,res$uniform[1],NA),
dif.detect.unif.2=ifelse(length(res$uniform)>=2,res$uniform[2],NA),
dif.detect.unif.3=ifelse(length(res$uniform)>=3,res$uniform[3],NA),
dif.detect.unif.4=ifelse(length(res$uniform)>=4,res$uniform[4],NA),
N=N,
nbdif=ifelse(scen<=4,0,ifelse(scen<=8,1,2)),
true.dif.1=ifelse(scen<=4,NA,unique(dat[dat$replication==k,]$dif1)),
true.dif.2=ifelse(scen<=8,NA,unique(dat[dat$replication==k,]$dif2)))
df_res <- rbind(df_res,df_res2)
}
}
return(df_res)
}
results <- c(sapply(1:4,function(x) paste0(x,c('A','B','C','D','E'))),sapply(5:9,function(x) paste0(x,c('A','B','C','D','E','F','G'))))
results2 <- c(sapply(10:20,function(x) paste0(x,c('A','B','C','D','E','F','G'))))
results <- c(sapply(c(50,100,200,300),function(x) paste0(results,'_',x)))
results2 <- c(sapply(c(50,100,200,300),function(x) paste0(results2,'_',x)))
results <- sort(results)
results2 <- sort(results2)
results <- c(results,results2)
for (r in results) {
cat(paste0(r,"\n"))
cat(paste0("-------------------------------------------","\n"))
write.csv(generate_resali(r,"TT"),paste0("/home/corentin/Documents/These/Recherche/Simulations/Analysis/RESALI/Detection/",r,".csv"))
cat(paste0("-------------------------------------------","\n"))
}
##############################################################################
#----------------------------------------------------------------------------#
####################### AGGREGATION DIF MATRICES RESALI ######################
#----------------------------------------------------------------------------#
##############################################################################
#### Create data.frame
results <- c(sapply(1:4,function(x) paste0(x,c('A','B','C','D','E'))),sapply(5:9,function(x) paste0(x,c('A','B','C','D','E','F','G'))))
results2 <- c(sapply(10:20,function(x) paste0(x,c('A','B','C','D','E','F','G'))))
results <- c(sapply(c(50,100,200,300),function(x) paste0(results,'_',x)))
results2 <- c(sapply(c(50,100,200,300),function(x) paste0(results2,'_',x)))
results <- sort(results)
results2 <- sort(results2)
results <- c(results,results2)
#### Compiler function
compile_simulation2_resali <- function(scenario) {
name <- as.numeric(gsub("[^0-9.-]", "", substr(scenario,start=0,stop=2)))
if (substr(scenario,start=nchar(scenario)-1,stop=nchar(scenario))=="50" & name>0) {
s <- read_excel(paste0('/home/corentin/Documents/These/Recherche/Simulations/Analysis/resali-DIF/N50/',scenario,'_original.xls'))
}
if (substr(scenario,start=nchar(scenario)-2,stop=nchar(scenario))=="100" & name>0) {
s <- read_excel(paste0('/home/corentin/Documents/These/Recherche/Simulations/Analysis/resali-DIF/N100/',scenario,'_original.xls'))
}
if (substr(scenario,start=nchar(scenario)-2,stop=nchar(scenario))=="200" & name>0) {
s <- read_excel(paste0('/home/corentin/Documents/These/Recherche/Simulations/Analysis/resali-DIF/N200/',scenario,'_original.xls'))
}
if (substr(scenario,start=nchar(scenario)-2,stop=nchar(scenario))=="300" & name>0) {
s <- read_excel(paste0('/home/corentin/Documents/These/Recherche/Simulations/Analysis/resali-DIF/N300/',scenario,'_original.xls'))
}
J <- max(which(sapply(1:7,function(x) paste0('item',x) %in% colnames(s) | paste0('item',x,'_1') %in% colnames(s))))
M <- 1+sum(sapply(1:3,function(x) paste0('item1_',x) %in% colnames(s) ))
if (M==1) {M <- 2}
nb.dif <- max(which(sapply(1:3,function(x) paste0('dif',x) %in% colnames(s) | paste0('dif',x,'_1') %in% colnames(s))))
if (J==4) {
if (M==2) {
a <- data.frame(m.item1=mean(s$item1),m.item2=mean(s$item2),m.item3=mean(s$item3),m.item4=mean(s$item4))
} else {
a <- data.frame(m.item1_1=mean(s$item1_1),m.item1_2=mean(s$item1_2),m.item1_3=mean(s$item1_3),
m.item2_1=mean(s$item2_1),m.item2_2=mean(s$item2_2),m.item2_3=mean(s$item2_3),
m.item3_1=mean(s$item3_1),m.item3_2=mean(s$item3_2),m.item3_3=mean(s$item3_3),
m.item4_1=mean(s$item4_1),m.item4_2=mean(s$item4_2),m.item4_3=mean(s$item4_3)
)
}
} else {
if (M==2) {
a <- data.frame(m.item1=mean(s$item1),m.item2=mean(s$item2),m.item3=mean(s$item3),m.item4=mean(s$item4),
m.item5=mean(s$item5),m.item6=mean(s$item6),m.item7=mean(s$item7))
} else {
a <- data.frame(m.item1_1=mean(s$item1_1),m.item1_2=mean(s$item1_2),m.item1_3=mean(s$item1_3),
m.item2_1=mean(s$item2_1),m.item2_2=mean(s$item2_2),m.item2_3=mean(s$item2_3),
m.item3_1=mean(s$item3_1),m.item3_2=mean(s$item3_2),m.item3_3=mean(s$item3_3),
m.item4_1=mean(s$item4_1),m.item4_2=mean(s$item4_2),m.item4_3=mean(s$item4_3),
m.item5_1=mean(s$item5_1),m.item5_2=mean(s$item5_2),m.item5_3=mean(s$item5_3),
m.item6_1=mean(s$item6_1),m.item6_2=mean(s$item6_2),m.item6_3=mean(s$item6_3),
m.item7_1=mean(s$item7_1),m.item7_2=mean(s$item7_2),m.item7_3=mean(s$item7_3)
)
}
}
N <- ifelse(substr(scenario,start=nchar(scenario)-1,stop=nchar(scenario))=="50","50",substr(scenario,start=nchar(scenario)-2,stop=nchar(scenario)))
zz <- ifelse(N=="50",substr(scenario,start=0,stop=nchar(scenario)-3),substr(scenario,start=0,stop=nchar(scenario)-4))
eff.size <- unique(res.dat[res.dat$scenario==zz & res.dat$N==N,'eff.size'])
dif.size <- unique(res.dat[res.dat$scenario==zz & res.dat$N==N,'dif.size'])
b <- data.frame(scenario=zz,
scenario.type=substr(zz,start=nchar(zz),stop=nchar(zz)),
N=N,
J=J,
M=M,
eff.size=eff.size,
nb.dif=nb.dif,
dif.size=dif.size
)
true.value.in.ci <- eff.size <= s$beta+1.96*s$se_beta & eff.size >= s$beta-1.96*s$se_beta
beta.same.sign.truebeta.p <- ifelse(rep(eff.size,nrow(s))==0,NA,(rep(eff.size,nrow(s))/s$beta)>0)
num.reject <- which((s$beta-1.96*s$se_beta)>0 | (s$beta+1.96*s$se_beta)<0)
z <- data.frame(m.beta=mean(s$beta),
se.empirical.beta=sd(s$beta),
se.analytical.beta=mean(s$se_beta),
m.low.ci.beta=mean(s$beta-1.96*s$se_beta),
m.high.ci.beta=mean(s$beta+1.96*s$se_beta),
true.value.in.ci.p=mean(true.value.in.ci),
h0.rejected.p=mean( (s$beta-1.96*s$se_beta)>0 | (s$beta+1.96*s$se_beta)<0 ),
beta.same.sign.truebeta.p=mean(beta.same.sign.truebeta.p),
beta.same.sign.truebeta.signif.p=mean(beta.same.sign.truebeta.p[num.reject])
)
d <- cbind(b,a,z)
d$prop.
return(d)
}
#### Compiled results
res.dat.dif.resali <- compile_simulation2_resali('1A_100')
for (x in results[seq(2,length(results))]) {
y <- compile_simulation2_resali(x)
res.dat.dif.resali <- bind_rows(res.dat.dif.resali,y)
}
res.dat.dif.resali$bias <- res.dat.dif.resali$eff.size-res.dat.dif.resali$m.beta
############################################################################## ##############################################################################
#----------------------------------------------------------------------------# #----------------------------------------------------------------------------#
################################## RASCHPOWER ################################ ################################## RASCHPOWER ################################

@ -15,6 +15,9 @@ resali <- function(df=NULL,items=NULL,group=NULL,verbose=T) {
dat <- df dat <- df
dat$score <- rowSums(dat[,items_n]) dat$score <- rowSums(dat[,items_n])
nqt <- ifelse(length(unique(quantile(dat$score,seq(0,1,0.2))))==6,5,length(unique(quantile(dat$score,seq(0,1,0.2))))-1) nqt <- ifelse(length(unique(quantile(dat$score,seq(0,1,0.2))))==6,5,length(unique(quantile(dat$score,seq(0,1,0.2))))-1)
while (length(unique(quantile(dat$score,seq(0,1,1/nqt))))!=nqt+1) {
nqt <- nqt-1
}
dat$score_q5 <- cut(dat$score,unique(quantile(dat$score,seq(0,1,1/nqt))),labels=1:nqt,include.lowest=T) dat$score_q5 <- cut(dat$score,unique(quantile(dat$score,seq(0,1,1/nqt))),labels=1:nqt,include.lowest=T)
res.anova <- rep(NA,nbitems) res.anova <- rep(NA,nbitems)
pval <- rep(NA,nbitems) pval <- rep(NA,nbitems)

Loading…
Cancel
Save