Nothing
#' weihting on propensity score
#'
#' Implement the weighting on propensity score with Matching Weights (MW)
#' or the Inverse Probability of Treatment Weighting (IPTW) for all the
#' drug exposures of the input drug matrix \code{x} which have more
#' than a given number of co-occurence with the outcome.
#' The binary outcome is regressed on a drug exposure through a
#' classical weighted regression, for each drug exposure
#' considered after filtering.
#' With this approach, a p-value is obtained for each drug and a
#' variable selection is performed over the corrected for multiple
#' comparisons p-values.
#'
#'
#' The MW are defined by
#' \deqn{mw_i = min(PS_i, 1-PS_i)/[(expo_i) * PS_i + (1-expo_i) * (1-PS_i) ] }
#' and weights from IPTW by
#' \deqn{iptw_i = expo_i/PS_i + (1-expo_i)/(1-PS_i) }
#' where \eqn{expo_i} is the drug exposure indicator.
#' The PS could be estimated in different ways: using lasso-bic approach,
#' the hdps algorithm or gradient tree boosting.
#' The scores are estimated using the default parameter values of
#' \code{est_ps_bic}, \code{est_ps_hdps} and \code{est_ps_xgb} functions
#' (see documentation for details).
#' We apply the same filter and the same multiple testing correction as in
#' the paper UPCOMING REFERENCE: first, PS are estimated only for drug covariates which have
#' more than \code{n_min} co-occurence with the outcome \code{y}.
#' Adjustment on the PS is performed for these covariates and
#' one sided or two-sided (depend on \code{betaPos} parameter)
#' p-values are obtained.
#' The p-values of the covariates not retained after filtering are set to 1.
#' All these p-values are then adjusted for multiple comparaison with the
#' Benjamini-Yekutieli correction.
#' COULD BE VERY LONG. Since this approach (i) estimate a score for several
#' drug covariates and (ii) perform an adjustment on these scores,
#' parallelization is highly recommanded.
#'
#'
#'
#' @param x Input matrix, of dimension nobs x nvars. Each row is an observation
#' vector. Can be in sparse matrix format (inherit from class
#' \code{"sparseMatrix"} as in package \code{Matrix}).
#' @param y Binary response variable, numeric.
#' @param n_min Numeric, Minimal number of co-occurence between a drug covariate
#' and the outcome y to estimate its score. See details belows.
#' Default is 3.
#' @param betaPos Should the covariates selected by the procedure be
#' positively associated with the outcome ? Default is \code{TRUE}.
#' @param weights_type Character. Indicates which type of weighting
#' is implemented. Could be either "mw" or "iptw".
#' @param truncation Bouleen, should we do weight truncation?
#' Default is \code{FALSE}.
#' @param q If \code{truncation} is \code{TRUE}, quantile value for
#' weight truncation. Ignored if \code{truncation} is \code{FALSE}.
#' Default is 2.5 \eqn{\%}.
#' @param est_type Character, indicates which approach is used to estimate
#' the propensity score.
#' Could be either "bic", "hdps" or "xgb".
#' Default is "bic".
#' @param threshold Threshold for the p-values. Default is 0.05.
#' @param ncore The number of calcul units used for parallel computing.
#' Default is 1, no parallelization is implemented.
#'
#' @return An object with S3 class \code{"ps", "*" ,"**" },
#' where \code{"*"} is \code{"mw"} or \code{"iptw"}, same as the
#' input parameter \code{weights_type}, and \code{"**"} is
#' \code{"bic"}, \code{"hdps"} or \code{"xgb"} according on how the
#' score was estimated.
#' \item{estimates}{Regression coefficients associated with
#' the drug covariates. Numeric, length equal to the number of selected
#' variables with this approach.
#' Some elements could be NA if
#' (i) the corresponding covariate was filtered out,
#' (ii) weigted regression did not converge. Trying to estimate
#' the score in a different way could help, but it's not insured.}
#' \item{corrected_pvals}{One sided p-values if \code{betaPos = TRUE},
#' two-sided p-values if \code{betaPos = FALSE} adjusted for multiple testing.
#' Numeric, length equal to nvars.}
#' \item{selected_variables}{Character vector, names of variable(s)
#' selected with the weighting on PS based approach.
#' If \code{betaPos = TRUE}, this set is the covariates with a
#' corrected one-sided p-value lower than \code{threshold}.
#' Else this set is the covariates with a
#' corrected two-sided p-value lower than \code{threshold}.
#' Covariates are ordering according to their corrected p-value.}
#' @examples
#'
#' set.seed(15)
#' drugs <- matrix(rbinom(100*20, 1, 0.2), nrow = 100, ncol = 20)
#' colnames(drugs) <- paste0("drugs",1:ncol(drugs))
#' ae <- rbinom(100, 1, 0.3)
#' pondps <- ps_pond(x = drugs, y = ae, n_min = 10, weights_type = "iptw")
#'
#'
#' @author Emeline Courtois \cr Maintainer: Emeline Courtois
#' \email{emeline.courtois@@inserm.fr}
#'
#'@references Benjamini, Y., & Yekuteli, D. (2001). "The Control of the False Discovery Rate in Multiple Testing under Dependency".
#' \emph{The Annals of Statistics}. 29(4), 1165–1188, doi: \doi{10.1214/aos/1013699998}.
#'
#' @export ps_pond
ps_pond <- function(x, y, n_min = 3, betaPos = TRUE,
weights_type = c("mw", "iptw"), truncation = FALSE,
q = 0.025, est_type = "bic",
threshold = 0.05, ncore = 1){
# Check args, same as ps_pond_one -----
if(missing(weights_type)){
stop("weighting type not specified")
}
if(missing(q) & truncation){#troncation quantile non precise
warning("default quantile for weight truncation is 0.025")
}else if(!missing(q) & !truncation){# pas truncation mais quantile precise
warning("truncature option is set to false, quantile will be ignored")
}
## Step 1 : filter -----
co_occurence <- t(t(y) %*% x)
covar_ok_idx <- which(co_occurence[,1] >= n_min)
covar_nope_idx <- which(co_occurence[,1] < n_min)
## Step 2 : multiple estimation of scores -----
# scores are implemented only for the "ok" covariables,
# all the varable of x are candidates covariables
i <- 0
if(est_type == "bic"){
if(ncore == 1 ){ #no parallelisation
step2 <- lapply(covar_ok_idx, est_ps_bic, x = x)
}else if(ncore >1){
# #parallelisation UNIX
# step2 <- mclapply(covar_ok_idx, est_ps_bic, x = x,
# mc.preschedule = FALSE, mc.cores = ncore)
#parallelization all OX
cl <- makeCluster(ncore)
registerDoParallel(cl)
step2 = foreach(i = covar_ok_idx, .packages = c("glmnet", "speedglm"),
.export = c("est_ps_bic", "predict_speedglm.wfit")) %dopar% {
est_ps_bic(i, x= x)
}
stopCluster(cl)
names(step2) <- names(covar_ok_idx)
}
}else if(est_type == "hdps"){
if(ncore == 1 ){ #no parallelisation
step2 <- lapply(covar_ok_idx, est_ps_hdps, x = x, y = y)
}else if(ncore >1){
# #parallelisation UNIX
# step2 <- mclapply(covar_ok_idx, est_ps_hdps, x = x, y = y,
# mc.preschedule = FALSE, mc.cores = ncore)
#parallelization all OX
cl <- makeCluster(ncore)
registerDoParallel(cl)
step2 = foreach(i = covar_ok_idx, .packages = c("speedglm"),
.export = c("est_ps_hdps", "hdps_pv", "predict_speedglm.wfit")) %dopar% {
est_ps_hdps(i, x= x, y = y)
}
stopCluster(cl)
names(step2) <- names(covar_ok_idx)
}
}else if(est_type == "xgb"){
if(ncore == 1 ){ #no parallelisation
step2 <- lapply(covar_ok_idx, est_ps_xgb, x = x)
}else if(ncore >1){
# #parallelisation UNIX
# step2 <- mclapply(covar_ok_idx, est_ps_xgb, x = x
# mc.preschedule = FALSE, mc.cores = ncore)
#parallelization all OX
cl <- makeCluster(ncore)
registerDoParallel(cl)
step2 = foreach(i = covar_ok_idx, .packages = c("xgboost", "speedglm"),
.export = "est_ps_xgb") %dopar% {
est_ps_xgb(i, x= x)
}
stopCluster(cl)
names(step2) <- names(covar_ok_idx)
}
}else{
stop("Invalide est_type argument")
}
# Step 3 : Weighting -----
# for the variables for which we estimated a score, weighting
if(ncore == 1 ){ #no parallelisation
step3 <- lapply(step2, ps_pond_one, y = y,
weights_type = weights_type, truncation = truncation,
q = q)
}else if(ncore >1){
# #parallelisation UNIX
# step3 <- mclapply(step2, ps_pond_one, y = y,
# weights_type = weights_type,
# truncation = truncation,
# q = q, mc.preschedule = FALSE, mc.cores = ncore)
# parallelization all OX
cl <- makeCluster(ncore)
registerDoParallel(cl)
step3 = foreach(i = 1:length(step2), .packages = c("speedglm"),
.export = "ps_pond_one") %dopar% {
ps_pond_one(ps_est = step2[[i]], y = y,
weights_type = weights_type,
truncation = truncation, q = q)
}
stopCluster(cl)
names(step3) <- names(step2)
}
# Step 4 : extraction of p-values ----
# Depend on betaPos
if(betaPos){
step4 <- unlist(lapply(step3, function(x) try_bis(x$pval_1sided)))
}else{
step4 <- unlist(lapply(step3, function(x) try_bis(x$pval_2sided)))
}
# Step 5 : correction ------
#filtered covar ave their p-value set to 1, then BY correction
step5 <- rep(1, ncol(x)) ; names(step5) <- colnames(x)
step5[colnames(x)[covar_ok_idx]] <- step4[colnames(x)[covar_ok_idx]]
if(sum(!is.na(step5)) > 20 ) step5 <- p.adjust(step5, method = "BY")
#Step 6 : covariables selection + output ----
variables <- names(sort(step5[which(step5<=threshold)], decreasing = FALSE))
final <- list()
final$estimate <- unlist(lapply(step3, function(x) try_bis(x$estimate)))
final$estimate <- final$estimate[variables]
final$corrected_pvals <- step5
final$selected_variables <- variables
class(final) <- c("ps", weights_type, est_type)
return(final)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.