HMA: High-Dimensional Mediation Analysis

Documented in qHIMA

# This is the main function for our proposed method for high-dimensional quantile mediation analysis
#' High-dimensional quantile mediation analysis
#' 
#' \code{qHIMA} is used to estimate and test high-dimensional quantile mediation effects.
#' 
#' @param X a vector of exposure. 
#' @param M a \code{data.frame} or \code{matrix} of high-dimensional mediators. Rows represent samples, columns 
#' represent mediator variables.
#' @param Y a vector of continuous outcome. Do not use data.frame or matrix.
#' @param COV a matrix of adjusting covariates. Rows represent samples, columns represent variables. Can be \code{NULL}.
#' @param penalty the penalty to be applied to the model (a parameter passed to function \code{conquer.cv.reg} in package \code{conquer}. 
#' Either 'MCP' (the default), 'SCAD', or 'lasso'.
#' @param topN an integer specifying the number of top markers from sure independent screening. 
#' Default = \code{NULL}. If \code{NULL}, \code{topN} will be \code{2*ceiling(n/log(n))}, where \code{n} is the sample size.
#' If the sample size is greater than topN (pre-specified or calculated), all mediators will be included in the test (i.e. low-dimensional scenario).
#' @param tau quantile level of outcome. Default = 0.5. A vector of tau is accepted.
#' @param scale logical. Should the function scale the data? Default = \code{TRUE}.
#' @param Bonfcut Bonferroni-corrected p value cutoff applied to select significant mediators. Default = \code{0.05}. 
#' @param verbose logical. Should the function be verbose? Default = \code{FALSE}.
#' @param ... reserved passing parameter.
#' 
#' @return A data.frame containing mediation testing results of selected mediators (Bonferroni-adjusted p value <\code{Bonfcut}). 
#' \describe{
#'     \item{Index: }{mediation name of selected significant mediator.}
#'     \item{alpha_hat: }{coefficient estimates of exposure (X) --> mediators (M) (adjusted for covariates).}
#'     \item{alpha_se: }{standard error for alpha.}
#'     \item{beta_hat: }{coefficient estimates of mediators (M) --> outcome (Y) (adjusted for covariates and exposure).}
#'     \item{beta_se: }{standard error for beta.}
#'     \item{IDE: }{mediation (indirect) effect, i.e., alpha*beta.}
#'     \item{rimp: }{relative importance of the mediator.}
#'     \item{pmax: }{joint raw p-value of selected significant mediator (based on Bonferroni method).}
#' }
#' 
#' @references Zhang H, Hong X, Zheng Y, Hou L, Zheng C, Wang X, Liu L. High-Dimensional Quantile Mediation Analysis with Application to a Birth 
#' Cohort Study of Mother–Newborn Pairs. Bioinformatics. 2024. DOI: 10.1093/bioinformatics/btae055. PMID: 38290773; PMCID: PMC10873903
#' 
#' @examples
#' \dontrun{
#' # Note: In the following example, M1, M2, and M3 are true mediators.
#' data(himaDat)
#' 
#' head(himaDat$Example5$PhenoData)
#' 
#' qHIMA.fit <- qHIMA(X = himaDat$Example5$PhenoData$Treatment,
#'                 M = himaDat$Example5$Mediator, 
#'                 Y = himaDat$Example5$PhenoData$Outcome, 
#'                 COV = himaDat$Example5$PhenoData[, c("Sex", "Age")], 
#'                 Bonfcut = 0.05,
#'                 tau = c(0.3, 0.5, 0.7),
#'                 scale = FALSE, # Disabled only for simulation data
#'                 verbose = TRUE)
#' qHIMA.fit
#' }
#' 
#' @export
qHIMA <- function(X, M, Y, COV = NULL, 
                  penalty = c('MCP', "SCAD", "lasso"), 
                  topN = NULL, 
                  tau = 0.5, 
                  scale = TRUE, 
                  Bonfcut = 0.05, 
                  verbose = FALSE,
                  ...)
{
  penalty <- match.arg(penalty)
  
  n <- nrow(M)
  p <- ncol(M)
  
  if(scale)
  {
    X <- scale(X)
    M <- scale(M)
    if(!is.null(COV)) COV <- scale(COV)
    if(verbose) message("Data scaling is completed.")
  } else {
    X <- as.matrix(X)
    M <- as.matrix(M)
    if(!is.null(COV)) COV <- as.matrix(COV)
  }
  
  if(is.null(COV)) XZ <- X else XZ <- cbind(X,COV)
  
  if(is.null(topN)) {
    d <- ceiling(2 * n/log(n)) 
  } else {
    d <- topN  # the number of top mediators that associated with exposure (X)
  }
  
  d <- min(p, d) # if d > p select all mediators
  
  M_ID_name <- colnames(M)
  if(is.null(M_ID_name)) M_ID_name <- seq_len(p)
  
  out_result <- NULL
  
  for (tau_temp in tau)
  {
    message("Running penalized quantile regression with tau = ", tau_temp, " ...", "     (", format(Sys.time(), "%X"), ")")
    
    #------------- Step 1: Mediator screening ---------------------------
    message("Step 1: Sure Independent Screening ...", "     (", format(Sys.time(), "%X"), ")")
    
    alpha_est <- matrix(0,1,p) # the OLS estimator of alpha
    alpha_SE  <- matrix(0,1,p) # the SE of alpha-OLS
    beta_SIS_est <- matrix(0,1,p) # the screening based estimator of beta
    beta_SIS_SE <- matrix(0,1,p) # the SE of beta_SIS_est
    
    for (k in 1:p){
      MXZ_k <- cbind(M[,k], XZ)
      fit_rq <- rq(Y ~ MXZ_k, tau = tau_temp, method="fn", model=TRUE) # screening in the path M-Y
      beta_SIS_est[k] <- fit_rq$coefficients[2]
      fit_M <- lsfit(XZ,M[,k],intercept = TRUE) # screening in the path x-M
      alpha_est[k] <- matrix(coef(fit_M))[2]
      alpha_SE[k] <- ls.diag(fit_M)$std.err[2]
    }
    
    T_sobel <- beta_SIS_est
    ID_SIS <- which(-abs(T_sobel) <= sort(-abs(T_sobel))[d]) # the index set in Step 1 after the screening
    
    alpha_est_hat <- alpha_est
    
    if(verbose) message("        Top ", length(ID_SIS), " mediators are selected: ", paste0(M_ID_name[ID_SIS], collapse = ", "))
    
    
    #----------- Step 2: Penalized estimate in Quantile Regression model
    message("Step 2: Penalized estimate (", penalty, ") ...", "     (", format(Sys.time(), "%X"), ")")
    
    if(verbose)
    {
      if(is.null(COV)) 
      {message("        No covariate was adjusted.")} 
      else
      {message("        Adjusting for covariate(s): ", paste0(colnames(COV), collapse = ", "))}
    }
    
    MXZ_ID_SIS <- cbind(M[,ID_SIS],XZ)
    
    fit.penalty= conquer::conquer.cv.reg(X=MXZ_ID_SIS, Y=Y, tau = tau_temp, penalty = tolower(penalty))
    beta.penalty = fit.penalty$coeff.min[2:(d+1)]
    
    #---------- Step 3: Mediator significance testing 
    message("Step 3: Joint significance test ...", "     (", format(Sys.time(), "%X"), ")")
    
    beta_fit_penalty <- matrix(0,1,p)
    ID_penalty <- ID_SIS[which(beta.penalty != 0)]     # the index of nonzero mediators
    MXZ_penalty <- cbind(M[,ID_penalty],XZ)            # cbind M[ID_penalty], X, and COV
    
    fit_rq_penalty <- quantreg::rq(Y ~ MXZ_penalty, tau = tau_temp, method="fn", model=TRUE)
    rq_est_penalty <- summary(fit_rq_penalty, covariance = TRUE, se = "boot", R = 10000)$coefficients
    beta_hat_penalty <- matrix(rq_est_penalty[2:(length(ID_penalty)+1),1])
    beta_SE_penalty  <- matrix(rq_est_penalty[2:(length(ID_penalty)+1),2])
    beta_fit_penalty[ID_penalty] <-  beta_hat_penalty
    
    #--- the p-values
    P_raw_2k <- 2*(1-pnorm(abs(alpha_est)/alpha_SE,0,1)) # the p-values of alpha
    P_raw_1k_penalty  <-  2*(1-pnorm(abs(beta_hat_penalty)/beta_SE_penalty,0,1))
    P_max_k_penalty <- apply(cbind(P_raw_1k_penalty, P_raw_2k[ID_penalty]),1,max)
    
    #-- the Pmax method
    P_penalty_Pmax  <- P_max_k_penalty*length(ID_penalty)
    sig_ind <- which(P_penalty_Pmax < Bonfcut)
    ID_Non_penalty_Pmax <- ID_penalty[sig_ind] # the ID of significant M by JS
    
    IDE <- alpha_est[ID_Non_penalty_Pmax] * beta_hat_penalty[sig_ind]
    
    if(length(ID_Non_penalty_Pmax) > 0)
    {
      out_result <- rbind(out_result, 
                          data.frame(Index = M_ID_name[ID_Non_penalty_Pmax], 
                                     alpha_hat = alpha_est[ID_Non_penalty_Pmax], 
                                     alpha_se = alpha_SE[ID_Non_penalty_Pmax], 
                                     beta_hat = beta_hat_penalty[sig_ind], 
                                     beta_se = beta_SE_penalty[sig_ind],
                                     IDE = IDE, 
                                     rimp = abs(IDE)/sum(abs(IDE)) * 100, 
                                     pmax = P_max_k_penalty[sig_ind],
                                     tau = tau_temp))
      if(verbose) message(paste0("        ", length(ID_Non_penalty_Pmax), " significant mediator(s) identified."))
    } else {
      if(verbose) message("        No significant mediator identified.")
    }
    message("\t")
  }
  
  message("Done!", "     (", format(Sys.time(), "%X"), ")")
  return(out_result)
}