Nothing
# This is the function classic high-dimensional mediation analysis
#' Classic high-dimensional mediation analysis
#'
#' \code{hima_classic} is used to estimate and test classic high-dimensional mediation effects (linear & logistic regression).
#'
#' @param X a vector of exposure. Do not use \code{data.frame} or \code{matrix}.
#' @param M a \code{data.frame} or \code{matrix} of high-dimensional mediators. Rows represent samples, columns
#' represent variables.
#' @param Y a vector of outcome. Can be either continuous or binary (0-1). Do not use \code{data.frame} or \code{matrix}.
#' @param COV.XM a \code{data.frame} or \code{matrix} of covariates dataset for testing the association \code{M ~ X}.
#' Covariates specified here will not participate penalization. Default = \code{NULL}. If the covariates
#' contain mixed data types, please make sure all categorical variables are properly formatted as \code{factor} type.
#' @param COV.MY a \code{data.frame} or \code{matrix} of covariates dataset for testing the association \code{Y ~ M}.
#' Covariates specified here will not participate penalization. If not specified, the same set of covariates for
#' \code{M ~ X} will be applied (i.e., \code{COV.XM}. Using different sets of covariates is allowed but this needs to be
#' handled carefully.
#' @param Y.type data type of outcome (\code{Y}). Either \code{'continuous'} (default) or \code{'binary'}.
#' @param M.type data type of mediator (\code{M}). Either \code{'gaussian'} (default) or \code{'negbin'} (i.e., negative binomial).
#' @param penalty the penalty to be applied to the model. Either \code{'MCP'} (the default), \code{'SCAD'}, or
#' \code{'lasso'}.
#' @param topN an integer specifying the number of top markers from sure independent screening.
#' Default = \code{NULL}. If \code{NULL}, \code{topN} will be either \code{ceiling(n/log(n))} for continuous outcome,
#' or \code{ceiling(n/(2*log(n)))} for binary outcome, where \code{n} is the sample size. If the sample size is greater
#' than topN (pre-specified or calculated), all mediators will be included in the test (i.e. low-dimensional scenario).
#' @param parallel logical. Enable parallel computing feature? Default = \code{FALSE}.
#' @param ncore number of cores to run parallel computing Valid when \code{parallel = TRUE}.
#' By default max number of cores available in the machine will be utilized.
#' @param scale logical. Should the function scale the data? Default = \code{TRUE}.
#' @param Bonfcut Bonferroni-corrected p value cutoff applied to select significant mediators. Default = \code{0.05}.
#' @param verbose logical. Should the function be verbose? Default = \code{FALSE}.
#' @param ... other arguments passed to \code{ncvreg}.
#'
#' @return A data.frame containing mediation testing results of selected mediators.
#' \describe{
#' \item{Index: }{mediation name of selected significant mediator.}
#' \item{alpha_hat: }{coefficient estimates of exposure (X) --> mediators (M) (adjusted for covariates).}
#' \item{beta_hat: }{coefficient estimates of mediators (M) --> outcome (Y) (adjusted for covariates and exposure).}
#' \item{IDE: }{mediation (indirect) effect, i.e., alpha*beta.}
#' \item{rimp: }{relative importance of the mediator.}
#' \item{pmax: }{joint raw p-value of selected significant mediator (based on Bonferroni method).}
#' }
#'
#' @references Zhang H, Zheng Y, Zhang Z, Gao T, Joyce B, Yoon G, Zhang W, Schwartz J, Just A, Colicino E, Vokonas P, Zhao L,
#' Lv J, Baccarelli A, Hou L, Liu L. Estimating and Testing High-dimensional Mediation Effects in Epigenetic Studies.
#' Bioinformatics. 2016. DOI: 10.1093/bioinformatics/btw351. PMID: 27357171; PMCID: PMC5048064
#'
#' @examples
#' \dontrun{
#' # Note: In the following examples, M1, M2, and M3 are true mediators.
#'
#' # When Y is continuous and normally distributed
#' # Example 1 (continuous outcome):
#' head(ContinuousOutcome$PhenoData)
#'
#' hima.fit <- hima_classic(
#' X = ContinuousOutcome$PhenoData$Treatment,
#' Y = ContinuousOutcome$PhenoData$Outcome,
#' M = ContinuousOutcome$Mediator,
#' COV.XM = ContinuousOutcome$PhenoData[, c("Sex", "Age")],
#' Y.type = "continuous",
#' scale = FALSE, # Disabled only for simulation data
#' verbose = TRUE
#' )
#' hima.fit
#'
#' # When Y is binary
#' # Example 2 (binary outcome):
#' head(BinaryOutcome$PhenoData)
#'
#' hima.logistic.fit <- hima_classic(
#' X = BinaryOutcome$PhenoData$Treatment,
#' Y = BinaryOutcome$PhenoData$Disease,
#' M = BinaryOutcome$Mediator,
#' COV.XM = BinaryOutcome$PhenoData[, c("Sex", "Age")],
#' Y.type = "binary",
#' scale = FALSE, # Disabled only for simulation data
#' verbose = TRUE
#' )
#' hima.logistic.fit
#' }
#'
#' @export
#'
hima_classic <- function(X, M, Y, COV.XM = NULL, COV.MY = COV.XM,
Y.type = c("continuous", "binary"),
M.type = c("gaussian", "negbin"),
penalty = c("MCP", "SCAD", "lasso"),
topN = NULL,
parallel = FALSE,
ncore = 1,
scale = TRUE,
Bonfcut = 0.05,
verbose = FALSE,
...) {
if (!is.null(COV.XM) && !is.data.frame(COV.XM) && !is.matrix(COV.XM)) {
stop("COV.XM must be either a data.frame or a matrix when provided.")
}
if (!is.null(COV.MY) && !is.data.frame(COV.MY) && !is.matrix(COV.MY)) {
stop("COV.MY must be either a data.frame or a matrix when provided.")
}
Y.type <- match.arg(Y.type)
Y.family <- switch(Y.type,
continuous = "gaussian",
binary = "binomial",
stop("Invalid Y.type. Expected 'continuous' or 'binary'.")
)
M.type <- match.arg(M.type)
penalty <- match.arg(penalty)
if (Y.family == "gaussian") message("Running linear HIMA with ", penalty, " penalty...")
if (Y.family == "binomial") message("Running logistic HIMA with ", penalty, " penalty...")
if (parallel && (ncore == 1)) ncore <- parallel::detectCores()
if (!parallel && (ncore > 1)) parallel <- TRUE
n <- nrow(M)
p <- ncol(M)
# Process required variables
X <- process_var(X, scale)
M <- process_var(M, scale)
# Process optional covariates
COV.XM <- process_var(COV.XM, scale)
COV.MY <- process_var(COV.MY, scale)
if (scale && verbose) message("Data scaling is completed.")
if (is.null(topN)) {
if (Y.type == "binary") d <- ceiling(n / (2 * log(n))) else d <- ceiling(2 * n / log(n))
} else {
d <- topN # the number of top mediators that associated with exposure (X)
}
d <- min(p, d) # if d > p select all mediators
#########################################################################
################################ STEP 1 #################################
#########################################################################
message("Step 1: Sure Independent Screening ...", " (", format(Sys.time(), "%X"), ")")
if (Y.type == "binary") {
# Screen M using X given the limited information provided by Y (binary)
if (verbose) message(" Screening M using the association between X (independent variable) and \nM (dependent variable): ", appendLF = FALSE)
alpha <- SIS_Results <- himasis(NA, M, X, COV.XM,
glm.family = M.type, modelstatement = "Mone ~ X",
parallel = parallel, ncore = ncore, verbose, tag = paste0("Sure Independent Screening (M ~ X + COV.XM, family: ", M.type, ")")
)
SIS_Pvalue <- SIS_Results[2, ]
} else if (Y.type == "continuous") {
# Screen M using Y (continuous)
if (verbose) message(" Screening M using the association between M (independent variable) and \nY (dependent variable): ", appendLF = FALSE)
SIS_Results <- himasis(Y, M, X, COV.MY,
glm.family = "gaussian", modelstatement = "Y ~ Mone + X",
parallel = parallel, ncore = ncore, verbose, tag = paste0("Sure Independent Screening (Y ~ M + X + COV.MY, family: ", Y.type, ")")
)
SIS_Pvalue <- SIS_Results[2, ]
} else {
stop(paste0("A ", Y.type, " data type for outcome is not supported."))
}
# Note: ranking using p on un-standardized data is equivalent to ranking using beta on standardized data
SIS_Pvalue_sort <- sort(SIS_Pvalue)
ID <- which(SIS_Pvalue <= SIS_Pvalue_sort[d]) # the index of top mediators
if (verbose) message(" Top ", length(ID), " mediators are selected: ", paste0(names(SIS_Pvalue_sort[seq_len(d)]), collapse = ", "))
M_SIS <- M[, ID]
M_ID_name <- colnames(M)
XM <- cbind(M_SIS, X)
#########################################################################
################################ STEP 2 #################################
#########################################################################
message("Step 2: Penalized estimate (", penalty, ") ...", " (", format(Sys.time(), "%X"), ")")
## Based on the screening results in step 1. We will find the most influential M on Y.
if (is.null(COV.MY)) {
fit <- ncvreg(XM, Y,
family = Y.family,
penalty = penalty,
penalty.factor = c(rep(1, ncol(M_SIS)), 0), ...
)
} else {
COV.MY <- data.frame(COV.MY)
COV.MY <- data.frame(model.matrix(~., COV.MY))[, -1]
conf.names <- colnames(COV.MY)
if (verbose) message(" Adjusting for covariate(s): ", paste0(conf.names, collapse = ", "))
XM_COV <- cbind(XM, COV.MY)
fit <- ncvreg(XM_COV, Y,
family = Y.family,
penalty = penalty,
penalty.factor = c(rep(1, ncol(M_SIS)), rep(0, 1 + ncol(as.matrix(COV.MY)))), ...
)
}
lam <- fit$lambda[which.min(BIC(fit))]
if (verbose) message(" Tuning parameter lambda selected: ", lam)
Coefficients <- coef(fit, lambda = lam)
est <- Coefficients[2:(d + 1)]
ID_1_non <- which(est != 0)
if (length(ID_1_non) == 0) {
if (verbose) message(" All ", penalty, " beta estimates of the ", length(ID), " mediators are zero.")
results <- NULL
return(results)
} else {
if (verbose) message(" Non-zero ", penalty, " beta estimate(s) of mediator(s) found: ", paste0(names(ID_1_non), collapse = ","))
beta_est <- est[ID_1_non] # The non-zero MCP estimators of beta
ID_test <- ID[ID_1_non] # The index of the ID of non-zero beta in Y ~ M
##
if (Y.type == "binary") {
## This has been done in step 1 (when Y is binary, alpha is estimated in M ~ X)
alpha <- alpha[, ID_test, drop = FALSE]
message(" Using alpha estimated in Step 1 ... (", format(Sys.time(), "%X"), ")")
} else if (Y.type == "continuous") {
if (verbose) message(" Estimating alpha (effect of X on M): ", appendLF = FALSE)
alpha <- himasis(NA, M[, ID_test, drop = FALSE], X, COV.XM,
glm.family = M.type,
modelstatement = "Mone ~ X", parallel = FALSE, ncore = ncore,
verbose, tag = paste0("site-by-site alpha estimation (M ~ X + COV.XM, family: ", M.type, ")")
)
} else {
stop(paste0("A ", Y.type, " data type for outcome is not supported."))
}
#########################################################################
################################ STEP 3 #################################
#########################################################################
message("Step 3: Joint significance test ...", " (", format(Sys.time(), "%X"), ")")
alpha_est_ID_test <- as.numeric(alpha[1, ]) # the estimator for alpha
P_alpha <- alpha[2, ] # the raw p-value for alpha
alpha_est <- alpha_est_ID_test
## Post-test based on the oracle property of the MCP penalty
if (is.null(COV.MY)) {
YMX <- data.frame(Y = Y, M[, ID_test, drop = FALSE], X = X)
} else {
YMX <- data.frame(Y = Y, M[, ID_test, drop = FALSE], X = X, COV.MY)
}
if (Y.type == "continuous") res <- summary(glm(Y ~ ., family = "gaussian", data = YMX))$coefficients
if (Y.type == "binary") res <- summary(glm(Y ~ ., family = "binomial", data = YMX))$coefficients
est <- res[2:(length(ID_test) + 1), 1] # the estimator for beta
P_beta <- res[2:(length(ID_test) + 1), 4] # the raw p-value for beta
IDE <- alpha_est * beta_est # mediation(indirect) effect
## Use the maximum value as p value
Pmax <- apply(cbind(P_alpha, P_beta), 1, max)
## Bonferroni
Pmax_Bonf <- Pmax * length(ID_test)
sig_ind <- which(Pmax_Bonf < Bonfcut)
# FDRA <- rbind(P_fdr_beta, P_fdr_alpha)
# FDR <- apply(FDRA, 2, max)
# Total effect
# if(is.null(COV.MY)) {
# YX <- data.frame(Y = Y, X = X)
# } else {
# YX <- data.frame(Y = Y, X = X, COV.MY)
# }
#
# gamma_est <- coef(glm(Y ~ ., family = Y.type, data = YX))[2]
results <- data.frame(
Index = M_ID_name[ID_test][sig_ind],
alpha_hat = alpha_est[sig_ind],
beta_hat = beta_est[sig_ind],
IDE = IDE[sig_ind],
rimp = (abs(IDE) / sum(abs(IDE)))[sig_ind] * 100,
pmax = Pmax[sig_ind], check.names = FALSE
)
message("Done!", " (", format(Sys.time(), "%X"), ")")
doParallel::stopImplicitCluster()
return(results)
}
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.