# massGLM.R
# ::rtemis::
# 2021-2 E.D. Gennatas www.lambdamd.org
#' Mass-univariate GLM Analysis
#'
#' Run a mass-univariate analysis with either:
#' a) single outome (y) and multiple predictors (x), one at a time, with an
#' optional common set of covariates in each model - "massx"
#' b) multiple different outcomes (y) with a fixed set of predictors (x) - "massy"
#' Therefore, the term mass-univariate refers to looking at one variable of
#' interest (with potential covariates of no interest) at a time
#'
#' @param x Matrix / data frame of features
#' @param y Matrix / data frame of outcomes
#' @param scale.x Logical: If TRUE, scale and center `x`
#' @param scale.y Logical: If TRUE, scale and center `y`
#' @param type Character: "massx" or "massy". Default = NULL,
#' where if (NCOL(x) > NCOL(y)) "massx" else "massy"
#' @param xnames Character vector: names of `x` feature(s)
#' @param ynames Character vector: names of `y` feature(s)
#' @param coerce.y.numeric Logical: If `TRUE`, coerce `y` to numeric
#' @param save.mods Logical: If TRUE, save models.
# @param p.adjust.method Character: p-value adjustment method. See \code{p.adjust}.
#' @param print.plot Logical: If TRUE, print plot.
#' @param include_anova_pvals Logical: If TRUE, include ANOVA p-values,
#' (generated by `glm2table`)
#' @param trace Integer: If > 0, print more verbose output to console.
#' @param verbose Logical: If TRUE, print messages during run
#'
#' @author E.D. Gennatas
#' @export
#'
#' @examples
#' \dontrun{
#' # Common usage is "reversed":
#' # x: outcome of interest as first column, optional covariates
#' # in the other columns
#' # y: features whose association with x we want to study
#' set.seed(2022)
#' features <- rnormmat(500, 40)
#' outcome <- features[, 3] - features[, 5] + features[, 14] + rnorm(500)
#' massmod <- massGLM(outcome, features)
#' plot(massmod)
#' plot(massmod, what = "coef")
#' plot(massmod, what = "volcano")
#' }
#'
massGLM <- function(x, y,
scale.x = FALSE,
scale.y = FALSE,
type = NULL,
xnames = NULL,
ynames = NULL,
coerce.y.numeric = FALSE,
save.mods = FALSE,
print.plot = FALSE,
include_anova_pvals = NA,
verbose = TRUE,
trace = 0) {
# Intro ----
.call <- match.call()
.call[2] <- list(str2lang("dat"))
dependency_check("progressr")
start_time <- intro(verbose = verbose)
# Data ----
if (is.null(type)) type <- if (NCOL(x) > NCOL(y)) "massx" else "massy"
if (trace > 0) msg20('massGLM type is "', type, '"')
if (type == "massx") {
if (is.null(colnames(x))) colnames(x) <- paste0("Feature_", seq_len(NCOL(x)))
nmods <- NCOL(x)
} else {
if (is.null(colnames(y))) colnames(y) <- paste0("Outcome_", seq_len(NCOL(y)))
nmods <- NCOL(y)
}
if (is.null(xnames)) {
xnames <- if (is.null(colnames(x))) {
paste(deparse(substitute(x)), seq_len(NCOL(x)), sep = "_")
} else {
colnames(x)
}
}
if (trace > 0) msg2("Feature names:", paste(xnames, collapse = ", "))
if (is.null(ynames)) {
ynames <- if (is.null(colnames(y))) deparse(substitute(y)) else colnames(y)
}
if (trace > 0) msg2("Outcome names:", paste(ynames, collapse = ", "))
if (coerce.y.numeric) {
y <- sapply(y, as.numeric)
}
if (scale.x) {
x <- preprocess(x, scale = TRUE, verbose = verbose)
}
if (scale.y) {
y <- preprocess(y, scale = TRUE, verbose = verbose)
}
dat <- data.frame(x, y)
colnames(dat) <- c(xnames, ynames)
# fit1: Loop function ----
p <- progressr::progressor(along = seq_len(nmods))
fit1 <- function(index, dat, type) {
if (type == "massx") {
.formula <- as.formula(paste(ynames, "~", xnames[index]))
.family <- if (is.factor(dat[[ynames]])) "binomial" else "gaussian"
out <- glm(.formula, family = .family, data = dat)
p()
out
} else {
.formula <- as.formula(paste(ynames[index], "~", paste(xnames, collapse = " + ")))
.family <- if (is.factor(dat[[ynames[index]]])) "binomial" else "gaussian"
out <- glm(.formula, family = .family, data = dat)
p()
out
}
}
# Models ----
if (verbose) msg2("Training", nmods, "mass-GLM models...")
mods <- lapply(
seq_len(nmods),
fit1,
dat = dat,
type = type
)
names(mods) <- if (type == "massx") xnames else ynames
# Outro ----
out <- list(
mods = if (save.mods) mods else NULL,
summary = glm2table(mods, include_anova_pvals = include_anova_pvals),
xnames = xnames,
coefnames = names(coef(mods[[1]])),
ynames = ynames,
type = type,
call = .call
)
class(out) <- c("massGLM", "list")
if (print.plot) print(plot(out))
outro(start_time, verbose = verbose)
out
} # rtemis::massGLM
#' `print`[massGLM] object
#'
#' @method print massGLM
#' @param x [massGLM] object
#' @param ... Not used
#'
#' @author E.D. Gennatas
#' @export
print.massGLM <- function(x, ...) {
nx <- length(x$xnames)
ny <- length(x$ynames)
.text <- paste(
"Mass-univariate GLM analysis with", nx,
ngettext(nx, "predictor", "predictors"),
"and", ny, ngettext(ny, "outcome", "outcomes\n")
)
cat(.text)
invisible(.text)
}
#' `massGLM` object summary
#'
#' @param object An object created by [massGLM]
#' @param ... Not used
#'
#' @author E.D. Gennatas
#' @export
summary.massGLM <- function(object, ...) {
print(object$summary, row.names = FALSE, class = FALSE)
}
#' Plot `massGLM` object
#'
#' Plots a `massGLM` object using [dplot3_bar]
#'
#' @method plot massGLM
#' @param x `massGLM` object
#' @param what Character: "adjusted" or "raw" p-values to plot
#' @param xlab Character: x-axis label for volcano plot
#'
#' @author E.D. Gennatas
#' @export
plot.massGLM <- function(x,
predictor = NULL,
main = NULL,
what = c("volcano", "coefs", "pvals"),
# which_pvals = c("glm", "anova2", "anova3"),
p.adjust.method = "holm",
p.transform = \(x) -log10(x),
show = c("all", "signif"),
xnames = NULL,
pval.hline = c(.05, .001),
hline.col = "#ffffff",
hline.dash = "dash",
hline.annotate = as.character(pval.hline),
ylim = NULL,
xlab = NULL,
ylab = NULL,
group = NULL,
col.neg = "#43A4AC",
col.pos = "#FA9860",
col.ns = "#7f7f7f",
theme = rtTheme,
alpha = NULL,
volcano.annotate = TRUE,
volcano.annotate.n = 7,
volcano.hline = NULL,
volcano.hline.dash = "dot",
volcano.hline.annotate = NULL,
volcano.p.transform = \(x) -log10(x),
margin = NULL,
displayModeBar = TRUE,
trace = 0,
filename = NULL, ...) {
what <- match.arg(what)
# which_pvals <- match.arg(which_pvals)
which_pvals <- "glm"
show <- match.arg(show)
if (x$type == "massy") {
if (is.null(predictor)) {
if (which_pvals == "glm") {
predictor <- x$coefnames[2]
} else {
predictor <- x$xnames[1]
}
}
what <- match.arg(what)
# pval_idi <- switch(which_pvals,
# glm = which(names(x$summary) == paste("p_value", predictor)),
# anova2 = which(names(x$summary) == paste("p_value type II", predictor)),
# anova3 = which(names(x$summary) == paste("p_value type III", predictor))
# )
pval_idi <- which(names(x$summary) == paste("p_value", predictor))
if (what == "pvals") {
# p-values ----
if (is.null(main)) main <- "p-values"
pval_idi <- grep(paste("p_value", predictor), names(x$summary))[1]
.name <- gsub("p_value ", "", names(x$summary)[pval_idi])
.pvals <- p.adjust(x$summary[[pval_idi]], method = p.adjust.method)
.coefname <- getnames(x$summary, paste("Coefficient", .name))
.cols <- rep(col.ns, length(x$summary[[.coefname]]))
.cols[x$summary[[.coefname]] < 0 & .pvals < .05] <- col.neg
.cols[x$summary[[.coefname]] > 0 & .pvals < .05] <- col.pos
if (is.null(ylab)) {
ylab <- paste(
# print_transform(deparse(p.transform)[2]),
print_fn(p.transform),
what, .name, "p-value"
)
}
if (is.null(xnames)) {
xnames <- if (x$type == "massy") x$ynames else x$xnames
}
dplot3_bar(
p.transform(.pvals),
group.names = xnames,
main = main,
# ylim = c(0, 1),
ylim = ylim,
legend = FALSE,
ylab = ylab,
col = .cols,
hline = p.transform(pval.hline),
hline.col = hline.col,
hline.dash = hline.dash,
hline.annotate = hline.annotate,
theme = theme,
margin = margin,
displayModeBar = displayModeBar,
filename = filename, ...
)
} else if (what == "coefs") {
# Coefficients ----
if (is.null(main)) main <- "Coefficients"
coef_idi <- which(names(x$summary) == paste("Coefficient", predictor))
.name <- gsub("Coefficient ", "", names(x$summary)[coef_idi])
.pvals <- p.adjust(x$summary[[pval_idi]], method = p.adjust.method)
.coefname <- getnames(x$summary, paste("Coefficient", .name))
.cols <- rep(col.ns, length(x$summary[[.coefname]]))
.cols[x$summary[[.coefname]] < 0 & .pvals < .05] <- col.neg
.cols[x$summary[[.coefname]] > 0 & .pvals < .05] <- col.pos
if (is.null(xnames)) xnames <- x$ynames
dplot3_bar(
x$summary[[coef_idi]],
# group.names = if (x$type == "massy") x$ynames else x$xnames,
group.names = xnames,
main = main,
legend = FALSE,
ylab = paste(.name, "Coefficients"),
col = .cols,
theme = theme,
margin = margin,
displayModeBar = displayModeBar,
filename = filename, ...
)
} else {
# Volcano ----
if (is.null(xlab)) xlab <- paste(predictor, "Coefficient")
coef_idi <- which(names(x$summary) == paste("Coefficient", predictor))
if (is.null(xnames)) xnames <- x$ynames
dplot3_volcano(
x = x$summary[[coef_idi]],
pvals = x$summary[[pval_idi]],
group = group,
x.thresh = 0,
label.lo = "Neg",
label.hi = "Pos",
xnames = xnames,
xlab = xlab,
main = main,
p.adjust.method = p.adjust.method,
p.transform = volcano.p.transform,
annotate = volcano.annotate,
annotate.n = volcano.annotate.n,
hline = volcano.hline,
hline.annotate = volcano.hline.annotate,
hline.dash = volcano.hline.dash,
theme = theme,
alpha = alpha,
displayModeBar = displayModeBar,
verbose = trace > 0,
filename = filename, ...
)
}
} else {
cat('"massx" support not yet implemented')
}
} # rtemis::plot.massGLM
# print_transform <- function(x) gsub("[x()]", "", x)
print_fn <- function(x) {
fn <- deparse(x)[2]
fn <- gsub("\\(.*", "", fn)
if (fn == "I") fn <- ""
fn
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.