From 0a8437fc480235c234f4621c2c20d404dc15b5d1 Mon Sep 17 00:00:00 2001 From: corentinchoisy Date: Mon, 26 May 2025 11:04:03 +0200 Subject: [PATCH] Added functions for the PCBM and PCBSM --- NAMESPACE | 3 + R/pcbm.R | 268 +++++++++++++++++++++++++++++++++++++++++ R/pcbsm.R | 278 +++++++++++++++++++++++++++++++++++++++++++ R/select_weight.R | 25 ++++ man/pcbm.Rd | 43 +++++++ man/pcbsm.Rd | 46 +++++++ man/select_weight.Rd | 23 ++++ 7 files changed, 686 insertions(+) create mode 100644 R/pcbm.R create mode 100644 R/pcbsm.R create mode 100644 R/select_weight.R create mode 100644 man/pcbm.Rd create mode 100644 man/pcbsm.Rd create mode 100644 man/select_weight.Rd diff --git a/NAMESPACE b/NAMESPACE index 4aa249a..145233e 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,9 +1,12 @@ # Generated by roxygen2: do not edit by hand export(bpcm) +export(pcbm) +export(pcbsm) export(pcm) export(res_ij) export(residif) +export(select_weight) import(PP) import(dclone) import(rjags) diff --git a/R/pcbm.R b/R/pcbm.R new file mode 100644 index 0000000..c583350 --- /dev/null +++ b/R/pcbm.R @@ -0,0 +1,268 @@ +## File Name: pcbm.R +## File version: 1.0 + +#' Compute Partial Credit Behavioral Model (PCBSM) for polytomous and dichotomous items +#' +#' This function computes a frequentist PCBM, potentially accounting for DIF on specified items +#' +#' @param df data.frame containing the data +#' @param items vector containing the names of columns where item responses are stored in df +#' @param grp string containing the name of the column where the group membership variable is stored in df +#' @param X vector of strings containing the name of additional adjustment variables to be included in the model +#' @param dif.items vector containing the list of indexes in "items" corresponding to dif items +#' @param type.dif vector containing DIF form for each item specified in dif.items. 1 is homogeneous DIF, 0 is heterogeneous DIF +#' @param verbose set to TRUE to print a detailed output, FALSE otherwise +#' @param fit string determining the optimization algorithm. Values "ucminf" or "nlminb" ar recommended +#' @param method.theta string determining the estimation method for individual latent variable values. Either "eap", "mle" or "wle" +#' @return A data.frame containing various model outputs +#' @import vcrpart +#' @import PP +#' @export + +pcbm <- function(df=NULL,items=NULL,grp=NULL,X=NULL,dif.items=NULL,type.dif=NULL,verbose=T,fit="ucminf",method.theta="eap") { + ##### Detecting errors + + if (any(!(items %in% colnames(df)))) { + stop("ERROR: provided item name does not exist in df") + } + if (any(!(grp %in% colnames(df)))) { + stop("ERROR: provided group variable name does not exist in df") + } + if (any(!is.null(grp))) { + if (any(!(grp%in%colnames(df)))) { + stop("ERROR: group name does not exist in df") + } + } + if (any(is.null(grp))) { + stop("ERROR: group variable required in PCSM. Please use PCM if no group variable is needed") + } + if (!is.null(dif.items) & length(dif.items)!=length(type.dif)) { + stop('ERROR: type.dif is not the same length as dif.items') + } + if (!is.null(dif.items) & is.null(type.dif)) { + warning("WARNING: no type.dif provided, assuming non-homogeneous DIF on all items") + } + if (!("id"%in%colnames(df))) { + stop('ERROR: no column named id provided') + } + if ( any(apply(df[df[,grp]==0,items],2,max)0),"+",""),ifelse(length(difvar.unif>0),paste0(difvar.unif,":grp",collapse="+"),""),")+ce(item",ifelse(length(difvar.nonunif>0),"+",""),ifelse(length(difvar.nonunif)>0,paste0(difvar.nonunif,":grp",collapse="+"),""),")+re(0|id)") + formudif <- as.formula(formudif) + mod <- olmm(formudif,data=df.long,family = adjacent(link = "logit"),control=olmm_control(fit=fit)) + comod <- coef(mod) + # output results + nbcoef <- nbitems+length(difvar.nonunif) + restab <- t(sapply(1:nbcoef,function(x) comod[seq(x,length(comod)-2-length(difvar.unif)-length(X),nbitems+length(difvar.nonunif))])) + difcoef.unif <- NULL + if (length(difvar.unif)>0) { + difcoef.unif <- comod[(length(comod)-length(difvar.unif)):(length(comod)-1)] + if (length(difvar.unif)!=1) { + difcoef.unif <- as.matrix(difcoef.unif) + } else { + difcoef.unif <- t(as.matrix(difcoef.unif)) + } + rname <- paste0("item",dif.items[type.dif==1]) + rownames(difcoef.unif) <- paste0("dif.",items_o[which(items%in%rname)]) + colnames(difcoef.unif) <- "gamma" + difcoef.unif <- as.data.frame(difcoef.unif) + for (k in 1:maxmod) { + difcoef.unif[,paste0("gamma_",k)] <- difcoef.unif[,"gamma"] + } + difcoef.unif <- as.matrix(difcoef.unif[,2:ncol(difcoef.unif)]) + } + difcoef.nonunif <- NULL + if (length(difvar.nonunif)>0) { + difcoef.nonunif <- restab[nbitems+c(1:length(difvar.nonunif)),] + if (length(difvar.nonunif)==1) { + difcoef.nonunif <- t(as.matrix(difcoef.nonunif)) + } else { + difcoef.nonunif <- as.matrix(difcoef.nonunif) + } + rname <- paste0("item",dif.items[type.dif==0]) + rownames(difcoef.nonunif) <- paste0("dif.",items_o[which(items%in%rname)]) + colnames(difcoef.nonunif) <- paste0("gamma_",1:maxmod) + } + restab <- restab[1:nbitems,] + rownames(restab) <- items_o + colnames(restab) <- paste0("delta_",1:maxmod) + restab.dif <- rbind(difcoef.nonunif,difcoef.unif) + restab.diftype <- matrix(ifelse(type.dif==1,"HOMOGENEOUS","NON-HOMOGENEOUS")) + restab.diftype <- noquote(restab.diftype) + rownames(restab.diftype) <- rownames(restab.dif) + colnames(restab.diftype) <- "dif.type" + beta <- comod["grp"] + se.beta <- (confint(mod)["grp",2]-beta)/1.96 + beta.ci <- confint(mod)["grp",] + beta.p <- 2*pnorm(-abs(beta/se.beta)) + beta <- as.numeric(beta) + se.beta <- as.numeric(se.beta) + beta.p <- as.numeric(beta.p) + beta <- -1*beta + beta.ci <- -1*c(beta.ci[2],beta.ci[1]) + + } else { + # If group no DIF + if (verbose) { + cat('\n') + cat("#################################################################################################\n") + cat("######################################### FITTING MODEL #########################################\n") + cat("#################################################################################################\n") + } + # prepare data + xx <- df[,X] + xx <- as.data.frame(xx) + colnames(xx) <- X + df <- df[,c('id',items,"grp")] + colnames(df)[2:(length(colnames(df))-1)] <- paste0("item",seq(1,length(colnames(df))-2)) + df.long <- reshape(df,v.names=c("item"),direction="long",varying=c(items)) + colnames(df.long) <- c("id","grp","item","resp") + nbitems <- length(2:(length(colnames(df))-1)) + maxmod <- max(df[,2:(length(colnames(df))-1)]) + df.long$item <- factor(df.long$item,levels=seq(1,length(colnames(df))-2),ordered = F) + df.long$resp <- factor(df.long$resp,0:maxmod,ordered=T) + df.long$id <- factor(df.long$id) + k <- 1 + formu <- "resp ~ 0 + ge(" + for (x in X) { + df.long[,x] <- rep(xx[,k],nbitems) + k <- k+1 + formu <- paste0(formu,x,"+") + } + formu <- paste0(formu,"grp) + ce(item) + re(0|id)") + # fit pcm + mod <- olmm(formula = as.formula(formu),data=df.long,family = adjacent(link = "logit"),control=olmm_control(fit=fit)) + comod <- coef(mod) + # output results + restab <- t(sapply(1:nbitems,function(x) comod[seq(x,length(comod)-2-length(X),nbitems)])) + rownames(restab) <- items_o + colnames(restab) <- paste0("delta_",1:maxmod) + restab.dif <- NULL + beta <- comod[length(comod)-1] + se.beta <- (confint(mod)["grp",2]-beta)/1.96 + beta.ci <- confint(mod)["grp",] + beta.p <- 2*pnorm(-abs(beta/se.beta)) + beta <- as.numeric(beta) + se.beta <- as.numeric(se.beta) + beta.p <- as.numeric(beta.p) + beta <- -1*beta + beta.ci <- -1*c(beta.ci[2],beta.ci[1]) + } + + } + if (method.theta=="eap") { + theta <- c(-1*ranef(mod,norm=F)+ifelse(grp==1,beta,0)) + } else if (method.theta=="wle") { + theta <- PP::PP_gpcm(as.matrix(df[,items]),t(restab),rep(1,length(items)))$resPP$resPP[,1] + } else if (method.theta=="mle") { + theta <- PP::PP_gpcm(as.matrix(df[,items]),t(restab),rep(1,length(items)),type="mle")$resPP$resPP[,1] + } + resid <- apply(matrix(1:nbitems,ncol=length(nbitems)),1, function(k) sapply(1:nrow(df), function(j) res_ij(theta[j],restab[k,],df[j,items[k]],beta=0))) + colnames(resid) <- items_o + + ##### Output + if (verbose) { + cat(paste0('Number of individuals: ',nrow(df),"\n")) + cat(paste0('Number of items: ',length(items),"\n")) + cat(paste0('Item Thresholds and DIF parameters: ',"\n")) + } + + + out <- list( + beta=beta, + beta.se=se.beta, + beta.ci=beta.ci, + beta.p=beta.p, + dif.items=dif.items, + dif.type=restab.diftype, + thresholds=restab, + dif.param=restab.dif, + theta=theta, + residuals=resid + ) + return(out) +} + + + diff --git a/R/pcbsm.R b/R/pcbsm.R new file mode 100644 index 0000000..b2eee45 --- /dev/null +++ b/R/pcbsm.R @@ -0,0 +1,278 @@ +## File Name: pcbsm.R +## File version: 1.0 + +#' Compute Partial Credit Behavioral Selection Model (PCBSM) for polytomous and dichotomous items +#' +#' This function computes a frequentist PCBSM, potentially accounting for DIF on specified items +#' +#' @param df data.frame containing the data +#' @param items vector containing the names of columns where item responses are stored in df +#' @param grp string containing the name of the column where the group membership variable is stored in df +#' @param X vector of strings containing the name of additional adjustment variables to be included in the model +#' @param u vector of weights to be included in the model as a covariate to account for unobserved confounding. Can be obtained from the "select_weight" function extracting response residuals from a probit model with grp as dependent variable and confounders and instruments as independent variables. +#' @param dif.items vector containing the list of indexes in "items" corresponding to dif items +#' @param type.dif vector containing DIF form for each item specified in dif.items. 1 is homogeneous DIF, 0 is heterogeneous DIF +#' @param verbose set to TRUE to print a detailed output, FALSE otherwise +#' @param fit string determining the optimization algorithm. Values "ucminf" or "nlminb" ar recommended +#' @param method.theta string determining the estimation method for individual latent variable values. Either "eap", "mle" or "wle" +#' @return A data.frame containing various model outputs +#' @import vcrpart +#' @import PP +#' @export + +pcbsm <- function(df=NULL,items=NULL,grp=NULL,u=NULL,X=NULL,dif.items=NULL,type.dif=NULL,verbose=T,fit="ucminf",method.theta="eap") { + ##### Detecting errors + + if (any(!(items %in% colnames(df)))) { + stop("ERROR: provided item name does not exist in df") + } + if (any(!(grp %in% colnames(df)))) { + stop("ERROR: provided group variable name does not exist in df") + } + if (any(!is.null(grp))) { + if (any(!(grp%in%colnames(df)))) { + stop("ERROR: group name does not exist in df") + } + } + if (any(is.null(grp))) { + stop("ERROR: group variable required in PCSM. Please use PCM if no group variable is needed") + } + if (!is.null(dif.items) & length(dif.items)!=length(type.dif)) { + stop('ERROR: type.dif is not the same length as dif.items') + } + if (!is.null(dif.items) & is.null(type.dif)) { + warning("WARNING: no type.dif provided, assuming non-homogeneous DIF on all items") + } + if (!("id"%in%colnames(df))) { + stop('ERROR: no column named id provided') + } + if ( any(apply(df[df[,grp]==0,items],2,max)0),"+",""),ifelse(length(difvar.unif>0),paste0(difvar.unif,":grp",collapse="+"),""),")+ce(item",ifelse(length(difvar.nonunif>0),"+",""),ifelse(length(difvar.nonunif)>0,paste0(difvar.nonunif,":grp",collapse="+"),""),")+re(0|id)") + formudif <- as.formula(formudif) + mod <- olmm(formudif,data=df.long,family = adjacent(link = "logit"),control=olmm_control(fit=fit)) + comod <- coef(mod) + # output results + nbcoef <- nbitems+length(difvar.nonunif) + restab <- t(sapply(1:nbcoef,function(x) comod[seq(x,length(comod)-3-length(difvar.unif)-length(X),nbitems+length(difvar.nonunif))])) + difcoef.unif <- NULL + if (length(difvar.unif)>0) { + difcoef.unif <- comod[(length(comod)-length(difvar.unif)):(length(comod)-1)] + if (length(difvar.unif)!=1) { + difcoef.unif <- as.matrix(difcoef.unif) + } else { + difcoef.unif <- t(as.matrix(difcoef.unif)) + } + rname <- paste0("item",dif.items[type.dif==1]) + rownames(difcoef.unif) <- paste0("dif.",items_o[which(items%in%rname)]) + colnames(difcoef.unif) <- "gamma" + difcoef.unif <- as.data.frame(difcoef.unif) + for (k in 1:maxmod) { + difcoef.unif[,paste0("gamma_",k)] <- difcoef.unif[,"gamma"] + } + difcoef.unif <- as.matrix(difcoef.unif[,2:ncol(difcoef.unif)]) + } + difcoef.nonunif <- NULL + if (length(difvar.nonunif)>0) { + difcoef.nonunif <- restab[nbitems+c(1:length(difvar.nonunif)),] + if (length(difvar.nonunif)==1) { + difcoef.nonunif <- t(as.matrix(difcoef.nonunif)) + } else { + difcoef.nonunif <- as.matrix(difcoef.nonunif) + } + rname <- paste0("item",dif.items[type.dif==0]) + rownames(difcoef.nonunif) <- paste0("dif.",items_o[which(items%in%rname)]) + colnames(difcoef.nonunif) <- paste0("gamma_",1:maxmod) + } + restab <- restab[1:nbitems,] + rownames(restab) <- items_o + colnames(restab) <- paste0("delta_",1:maxmod) + restab.dif <- rbind(difcoef.nonunif,difcoef.unif) + restab.diftype <- matrix(ifelse(type.dif==1,"HOMOGENEOUS","NON-HOMOGENEOUS")) + restab.diftype <- noquote(restab.diftype) + rownames(restab.diftype) <- rownames(restab.dif) + colnames(restab.diftype) <- "dif.type" + lambda <- as.numeric(comod["u"]) + beta <- as.numeric(comod["grp"])#+lambda + beta <- -1*beta + se.beta <- sqrt(vcov(mod)["grp","grp"]) + beta.ci <- c("2.5%"=beta-1.96*se.beta,"97.5%"=beta+1.96*se.beta) + #se.beta <- sqrt(vcov(mod)["grp","grp"]+vcov(mod)["u","u"]+2*vcov(mod)["u","grp"]) + #beta.ci <- c("2.5%"=beta-1.96*se.beta,"97.5%"=beta+1.96*se.beta) + names(beta.ci) <- c("2.5%","97.5%") + beta.p <- 2*pnorm(-abs(beta/se.beta)) + beta <- as.numeric(beta) + se.beta <- as.numeric(se.beta) + beta.p <- as.numeric(beta.p) + + } else { + # If group no DIF + if (verbose) { + cat('\n') + cat("#################################################################################################\n") + cat("######################################### FITTING MODEL #########################################\n") + cat("#################################################################################################\n") + } + # prepare data + uu <- df[,u] + xx <- df[,X] + xx <- as.data.frame(xx) + colnames(xx) <- X + df <- df[,c('id',items,"grp")] + colnames(df)[2:(length(colnames(df))-1)] <- paste0("item",seq(1,length(colnames(df))-2)) + df.long <- reshape(df,v.names=c("item"),direction="long",varying=c(items)) + colnames(df.long) <- c("id","grp","item","resp") + nbitems <- length(2:(length(colnames(df))-1)) + maxmod <- max(df[,2:(length(colnames(df))-1)]) + df.long$item <- factor(df.long$item,levels=seq(1,length(colnames(df))-2),ordered = F) + df.long$resp <- factor(df.long$resp,0:maxmod,ordered=T) + df.long$id <- factor(df.long$id) + df$u <- uu + df.long$u <- rep(uu,nbitems) + k <- 1 + formu <- "resp ~ 0 + ge(u" + for (x in X) { + df.long[,x] <- rep(xx[,k],nbitems) + k <- k+1 + formu <- paste0(formu,"+",x) + } + formu <- paste0(formu,"+grp) + ce(item) + re(0|id)") + # fit pcm + mod <- olmm(formula = as.formula(formu),data=df.long,family = adjacent(link = "logit"),control=olmm_control(fit=fit)) + comod <- coef(mod) + # output results + restab <- t(sapply(1:nbitems,function(x) comod[seq(x,length(comod)-3-length(X),nbitems)])) + rownames(restab) <- items_o + colnames(restab) <- paste0("delta_",1:maxmod) + restab.dif <- NULL + lambda <- as.numeric(comod["u"]) + beta <- as.numeric(comod["grp"])#+lambda + beta <- -1*beta + se.beta <- sqrt(vcov(mod)["grp","grp"]) + beta.ci <- c("2.5%"=beta-1.96*se.beta,"97.5%"=beta+1.96*se.beta) + #se.beta <- sqrt(vcov(mod)["grp","grp"]+vcov(mod)["u","u"]+2*vcov(mod)["u","grp"]) + #beta.ci <- c("2.5%"=beta-1.96*se.beta,"97.5%"=beta+1.96*se.beta) + names(beta.ci) <- c("2.5%","97.5%") + beta.p <- 2*pnorm(-abs(beta/se.beta)) + se.beta <- as.numeric(se.beta) + beta.p <- as.numeric(beta.p) + } + + } + if (method.theta=="eap") { + theta <- c(-1*ranef(mod,norm=F)+ifelse(grp==1,beta,0)) + } else if (method.theta=="wle") { + theta <- PP::PP_gpcm(as.matrix(df[,items]),t(restab),rep(1,length(items)))$resPP$resPP[,1] + } else if (method.theta=="mle") { + theta <- PP::PP_gpcm(as.matrix(df[,items]),t(restab),rep(1,length(items)),type="mle")$resPP$resPP[,1] + } + resid <- apply(matrix(1:nbitems,ncol=length(nbitems)),1, function(k) sapply(1:nrow(df), function(j) res_ij(theta[j],restab[k,],df[j,items[k]],beta=0))) + colnames(resid) <- items_o + + ##### Output + if (verbose) { + cat(paste0('Number of individuals: ',nrow(df),"\n")) + cat(paste0('Number of items: ',length(items),"\n")) + cat(paste0('Item Thresholds and DIF parameters: ',"\n")) + } + + + out <- list( + beta=beta, + beta.se=se.beta, + beta.ci=beta.ci, + beta.p=beta.p, + lambda=as.numeric(lambda), + dif.items=dif.items, + dif.type=restab.diftype, + thresholds=restab, + dif.param=restab.dif, + theta=theta, + residuals=resid + ) + return(out) +} diff --git a/R/select_weight.R b/R/select_weight.R new file mode 100644 index 0000000..548a021 --- /dev/null +++ b/R/select_weight.R @@ -0,0 +1,25 @@ +## File Name: select_weight.R +## File version: 1.0 + +#' Compute confounding weights for the PCBSM. +#' +#' This function computes weights to be included in a PCBSM as a covariate accounting for unobserved confounding. Obtained by extracting response residuals from a probit model with grp as dependent variable and confounders and instruments as independent variables. +#' +#' @param df data.frame containing the data +#' @param grp string containing the name of the column where the group membership variable is stored in df +#' @param X vector of strings containing the name of confounders to be included in the model +#' @param instr vector of strings containing the name of instrumental variables to be included in the model +#' @return A vector of weights to be included in a PCBSM +#' @export + + +select_weight <- function(df=NULL,grp=NULL,X=NULL,instr=NULL) { + formu <- paste0(grp,"~") + formu2 <- paste(X,sep="+",collapse="+") + formu3 <- paste(instr,sep="+",collapse="+") + formu2 <- paste(formu2,formu3,sep="+") + formu <- paste(formu,formu2) + logit_mod <- glm(formula = formu,data = df,family = binomial(link = "probit")) + res <- residuals(logit_mod) + return(res) +} diff --git a/man/pcbm.Rd b/man/pcbm.Rd new file mode 100644 index 0000000..1875e40 --- /dev/null +++ b/man/pcbm.Rd @@ -0,0 +1,43 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/pcbm.R +\name{pcbm} +\alias{pcbm} +\title{Compute Partial Credit Behavioral Model (PCBSM) for polytomous and dichotomous items} +\usage{ +pcbm( + df = NULL, + items = NULL, + grp = NULL, + X = NULL, + dif.items = NULL, + type.dif = NULL, + verbose = T, + fit = "ucminf", + method.theta = "eap" +) +} +\arguments{ +\item{df}{data.frame containing the data} + +\item{items}{vector containing the names of columns where item responses are stored in df} + +\item{grp}{string containing the name of the column where the group membership variable is stored in df} + +\item{X}{vector of strings containing the name of additional adjustment variables to be included in the model} + +\item{dif.items}{vector containing the list of indexes in "items" corresponding to dif items} + +\item{type.dif}{vector containing DIF form for each item specified in dif.items. 1 is homogeneous DIF, 0 is heterogeneous DIF} + +\item{verbose}{set to TRUE to print a detailed output, FALSE otherwise} + +\item{fit}{string determining the optimization algorithm. Values "ucminf" or "nlminb" ar recommended} + +\item{method.theta}{string determining the estimation method for individual latent variable values. Either "eap", "mle" or "wle"} +} +\value{ +A data.frame containing various model outputs +} +\description{ +This function computes a frequentist PCBM, potentially accounting for DIF on specified items +} diff --git a/man/pcbsm.Rd b/man/pcbsm.Rd new file mode 100644 index 0000000..c1b68f9 --- /dev/null +++ b/man/pcbsm.Rd @@ -0,0 +1,46 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/pcbsm.R +\name{pcbsm} +\alias{pcbsm} +\title{Compute Partial Credit Behavioral Selection Model (PCBSM) for polytomous and dichotomous items} +\usage{ +pcbsm( + df = NULL, + items = NULL, + grp = NULL, + u = NULL, + X = NULL, + dif.items = NULL, + type.dif = NULL, + verbose = T, + fit = "ucminf", + method.theta = "eap" +) +} +\arguments{ +\item{df}{data.frame containing the data} + +\item{items}{vector containing the names of columns where item responses are stored in df} + +\item{grp}{string containing the name of the column where the group membership variable is stored in df} + +\item{u}{vector of weights to be included in the model as a covariate to account for unobserved confounding. Can be obtained from the "select_weight" function extracting response residuals from a probit model with grp as dependent variable and confounders and instruments as independent variables.} + +\item{X}{vector of strings containing the name of additional adjustment variables to be included in the model} + +\item{dif.items}{vector containing the list of indexes in "items" corresponding to dif items} + +\item{type.dif}{vector containing DIF form for each item specified in dif.items. 1 is homogeneous DIF, 0 is heterogeneous DIF} + +\item{verbose}{set to TRUE to print a detailed output, FALSE otherwise} + +\item{fit}{string determining the optimization algorithm. Values "ucminf" or "nlminb" ar recommended} + +\item{method.theta}{string determining the estimation method for individual latent variable values. Either "eap", "mle" or "wle"} +} +\value{ +A data.frame containing various model outputs +} +\description{ +This function computes a frequentist PCBSM, potentially accounting for DIF on specified items +} diff --git a/man/select_weight.Rd b/man/select_weight.Rd new file mode 100644 index 0000000..f553e12 --- /dev/null +++ b/man/select_weight.Rd @@ -0,0 +1,23 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/select_weight.R +\name{select_weight} +\alias{select_weight} +\title{Compute confounding weights for the PCBSM.} +\usage{ +select_weight(df = NULL, grp = NULL, X = NULL, instr = NULL) +} +\arguments{ +\item{df}{data.frame containing the data} + +\item{grp}{string containing the name of the column where the group membership variable is stored in df} + +\item{X}{vector of strings containing the name of confounders to be included in the model} + +\item{instr}{vector of strings containing the name of instrumental variables to be included in the model} +} +\value{ +A vector of weights to be included in a PCBSM +} +\description{ +This function computes weights to be included in a PCBSM as a covariate accounting for unobserved confounding. Obtained by extracting response residuals from a probit model with grp as dependent variable and confounders and instruments as independent variables. +}