You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

89 lines
3.4 KiB
R

##############################################################################
#----------------------------------------------------------------------------#
############################# DATA TRANSFORMATION ############################
#----------------------------------------------------------------------------#
##############################################################################
# Import ROSALI and RESALI
ros_mdc <- read_excel("/home/corentin/Documents/These/Recherche/Simulations/Analysis/ROSALI-DIF/N300/6A_300_original.xls")
res_mdc <- read_excel("/home/corentin/Documents/These/Recherche/Simulations/Analysis/RESALI/Results/N300/6A_300_original.xls")
# Perform MH
library(difR)
dat_mh <- read.csv('/home/corentin/Documents/These/Recherche/Simulations/Data/DIF/N300/scenario_6A_300.csv')[,c("item1","item2","item3","item4",'replication',"TT")]
det_mh <- c()
for (k in 1:1000) {
if (k%%1000==0) {
cat(paste0(k,'/1000\n'))
}
dat_mh_temp <- dat_mh[dat_mh$replication==k,c("item1",'item2',"item3","item4",'TT')]
aa <- difMH(Data=dat_mh_temp,group = "TT",focal.name = 0,exact=F)
det_mh <- c(det_mh,1:4 %in% aa$DIFitems)
}
# Create 1 line per item per replication in df
library(tidyr)
da <- as.data.frame(sapply(1:4, function(k) sapply(1:1000,function(x) k%in%ros_mdc[x,paste0("dif_detect_",1:4)])))
db <- as.data.frame(sapply(1:4, function(k) sapply(1:1000,function(x) k%in%res_mdc[x,paste0("dif_detect_",1:4)])))
dc <- as.data.frame(sapply(1:4, function(k) sapply(1:1000,function(x) k%in%res_mdc[x,paste0("real_dif_",1)])))
data_mdca <- data.frame(rosali=da)
data_mdca <- pivot_longer(data_mdca,cols=1:4)
data_mdcb <- data.frame(resali=db)
data_mdcb <- pivot_longer(data_mdcb,cols=1:4)
data_mdcc <- data.frame(real=dc)
data_mdcc <- pivot_longer(data_mdcc,cols=1:4)
data_mdc <- cbind(data_mdca,data_mdcb,data_mdcc)[,c(2,4,6)]
colnames(data_mdc) <- c("rosali","resali","real")
make_repl <- function(kk) {
b <- c()
for (k in kk) {
a <- rep(k,4)
b <- c(b,a)
}
return(b)
}
data_mdc$mh <- det_mh
data_mdc$replication <- make_repl(1:1000)
##############################################################################
#----------------------------------------------------------------------------#
########################### FIT DIF DETECTION MODEL ##########################
#----------------------------------------------------------------------------#
##############################################################################
# Fit TAN model
# Fit logistic model, stratified on replication
mod_glm <- glm(formula = real~rosali+resali,data = data_mdc[1:2000,],family = binomial())
data_valid <- data_mdc[2000:4000,]
data_valid$predict <- predict(mod_glm,newdata = data_valid)
roc_c <- pROC::roc(response=data_valid$real,predictor=data_valid$predict)
data_mdc$logit_pred <- predict(mod_glm,newdata = data_mdc)>=-0.6275167
perf_moreflex <- c()
for (k in 1:1000) {
dattt <- data_mdc[4*(k-1)+1:4,]
perf_moreflex <- c(perf_moreflex,all(rownames(dattt[dattt$real==TRUE,])%in%rownames(dattt[dattt$logit_pred==TRUE,])))
}
##############################################################################
#----------------------------------------------------------------------------#
######################## FIT UNIFORMITY DETECTION MODEL ######################
#----------------------------------------------------------------------------#
##############################################################################