R/SMAHP.R
In SMAHP: Survival Mediation Analysis of High-Dimensional Proteogenomic Data

Documented in SMAHP

#' Main function
#'
#' An approach for survival mediation analysis of high-dimensional proteogenomic data.
#' @param X An n by p matrix of exposures.
#' @param M An n by p matrix of mediators.
#' @param C An n by p matrix of covariates. If there are no covariates, set C = NULL.
#' @param time A vector of survival time of samples.
#' @param status A vector of status indicator: 0=alive, 1=dead.
#' @param model_option The penalization method used when selecting X for M. Options include MCP, elastic net and lasso. Default is MCP.
#' @param SIS_thres SIS thresholds. Default is "n/log(n)". Other options include "n-1", "n/2log(n)", "2n/log(n)", "3n/log(n)".
#' @param p_adjust_option The method for multiple correction. Option include q-value, holm, hochberg, hommel, bonferroni, BH, BY,
#' and fdr. Default is BH.
#' @param p_thres Threshold for determining significance.
#' @return A list with the following components:
#'   \item{p_final_matrix}{raw p-value matrix before adjustment}
#'   \item{p_adjusted_matrix}{adjusted p-value matrix}
#'   \item{p_med_matrix}{mediation-exposure matrix}
#'   \item{outcome_model}{coefficient estimates from outcome model}
#'   \item{med_results}{coefficient estimates from mediation model}
#' @import survival
#' @import fdrtool
#' @export
#' @examples
#' \donttest{
#' data(example_dat)
#' surv_dat <- example_dat$surv_dat
#' SMAHP(example_dat$X, example_dat$M, example_dat$C, time = surv_dat$time, status = surv_dat$status)
#' }

SMAHP <- function(X, M, C, time, status, model_option = "MCP", SIS_thres = "n/log(n)", p_adjust_option = "BH", p_thres = 0.05) {
  res_step1 <- StepOne(X = X, M = M, time = time, status = status, model_option = "MCP")
  if(length(res_step1$M_sel_Y_s1) == 0){
    warning("No mediator is associated with the survival outcome (i.e., no mediating effect found potentially in the causal pathway).")
    return(NULL)
  }
  M_X_sel_s2 <- StepTwo(X = X, M = M, time = time, status = status,
                        X_sel_Y_s1 = res_step1$X_sel_Y_s1,
                        M_X_s1 = res_step1$M_X_s1,
                        M_sel_Y_s1 = res_step1$M_sel_Y_s1,
                        SIS_thres = SIS_thres)
  res_step3 <- StepThree(X = X, M = M, C = C, time = time, status = status, X_sel_Y_s1 = res_step1$X_sel_Y_s1, M_X_sel_s2 = M_X_sel_s2)

  # generate final raw p-value
  p_beta_m <- res_step3$p_beta_m
  p_alpha_x <- res_step3$p_alpha_x
  d_x <- ncol(X)
  M_sel <- unique(M_X_sel_s2$M)
  p_final_matrix <- matrix(nrow = d_x, ncol = length(M_sel))
  for(i in 1:length(M_sel)){
    p_final_matrix[ ,i] <- ifelse(p_alpha_x[ ,i] >= p_beta_m[i], p_alpha_x[ ,i], p_beta_m[i])
  }
  rownames(p_final_matrix) <- colnames(X)
  colnames(p_final_matrix) <- M_sel

  # adjust p-value
  p_v <- as.vector(p_final_matrix)
  p_pos <- which(is.na(p_v) == FALSE)
  p_v_without_na <- as.vector(na.omit(p_v))
  if(p_adjust_option == "q-value"){
    p_adjusted_v <- fdrtool(p_v_without_na, statistic = "pvalue", plot=FALSE, verbose = FALSE, cutoff.method = "locfdr")$qval
  } else {
    p_adjusted_v <- p.adjust(p_v_without_na, method = p_adjust_option, n = length(p_v))
  }
  p_adjusted_v_na_filled <- rep(NA, length(p_v))
  p_adjusted_v_na_filled[p_pos] <- p_adjusted_v
  p_adjusted_matrix <- matrix(p_adjusted_v_na_filled, nrow = nrow(p_final_matrix), ncol = ncol(p_final_matrix))
  rownames(p_adjusted_matrix) <- rownames(p_final_matrix)
  colnames(p_adjusted_matrix) <- colnames(p_final_matrix)

  # generate selected M and X pairs
  p_med_matrix <- ifelse(p_adjusted_matrix < p_thres, 1, 0)
  p_med_matrix <- ifelse(is.na(p_med_matrix) == TRUE, 0, p_med_matrix)
  p_med_matrix_r <- matrix(0, nrow = ncol(X), ncol = ncol(M) - length(M_sel))
  rownames(p_med_matrix_r) <- colnames(X)
  colnames(p_med_matrix_r) <- setdiff(colnames(M), M_sel)
  p_med_matrix <- cbind(p_med_matrix, p_med_matrix_r)
  p_med_matrix <- p_med_matrix[ ,colnames(M)]

  outcome_model <- res_step3$outcome_model
  med_results <- res_step3$med_results

  return(list(p_final_matrix = p_final_matrix, p_adjusted_matrix = p_adjusted_matrix, p_med_matrix = p_med_matrix,
              outcome_model = outcome_model, med_results = med_results))

}