# massGLAM.R
# ::rtemis::
# 2021-2 E.D. Gennatas www.lambdamd.org
#' Mass-univariate GLM Analysis
#'
#' Run a mass-univariate analysis with either:
#' a) single outome (y) and multiple predictors (x), one at a time, with an
#' optional common set of covariates in each model - "massx"
#' b) multiple different outcomes (y) with a fixed set of predictors (x) - "massy"
#' Therefore, the term mass-univariate refers to looking at one variable of
#' interest (with potential covariates of no interest) at a time
#'
#' @param x Matrix / data frame of features
#' @param y Matrix / data frame of outcomes
#' @param scale.x Logical: If TRUE, scale and center `x`
#' @param scale.y Logical: If TRUE, scale and center `y`
#' @param mod Character: "glm" or "gam".
#' @param type Character: "massx" or "massy". Default = NULL,
#' where if (NCOL(x) > NCOL(y)) "massx" else "massy"
#' @param xnames Character vector: names of `x` feature(s)
#' @param ynames Character vector: names of `y` feature(s)
#' @param spline.index Integer vector: indices of features to fit splines for.
#' @param gam.k Integer: The dimension of the spline basis.
#' @param save.mods Logical: If TRUE, save models. Default = TRUE
# @param p.adjust.method Character: p-value adjustment method. See \code{p.adjust}.
# Default = "holm"
#' @param print.plot Logical: If TRUE, print plot. Default = FALSE (best to
#' choose which p-values you want to plot directly)
#' @param include_anova_pvals Logical: If TRUE, include ANOVA p-values,
#' generated by `glm2table` (internal function)
#' @param trace Integer: If > 0, print more verbose output to console.
#' @param verbose Logical: If TRUE, print messages during run
#' @param n.cores Integer: Number of cores to use. (Testing only, do not
#' change from 1)
#'
#' @author E.D. Gennatas
#' @export
#'
#' @examples
#' \dontrun{
#' # Common usage is "reversed":
#' # x: outcome of interest as first column, optional covariates
#' # in the other columns
#' # y: features whose association with x we want to study
#' set.seed(2022)
#' features <- rnormmat(500, 40)
#' outcome <- features[, 3] - features[, 5] + features[, 14] + rnorm(500)
#' massmod <- massGLAM(outcome, features)
#' plot(massmod)
#' plot(massmod, what = "coef")
#' plot(massmod, what = "volcano")
#' }
#'
massGLAM <- function(x, y,
scale.x = FALSE,
scale.y = FALSE,
mod = c("glm", "gam"),
type = NULL,
xnames = NULL,
ynames = NULL,
spline.index = NULL,
gam.k = 6,
save.mods = TRUE,
print.plot = FALSE,
include_anova_pvals = NA,
verbose = TRUE,
trace = 0,
n.cores = 1) {
# Intro ----
.call <- match.call()
.call[2] <- list(str2lang("dat"))
start.time <- intro(verbose = verbose)
mod <- match.arg(mod)
if (mod == "gam") {
if (is.null(spline.index)) spline.index <- which(sapply(x, is.numeric))
}
# Data ----
if (is.null(type)) type <- if (NCOL(x) > NCOL(y)) "massx" else "massy"
if (trace > 0) msg20('massGLAM type is "', type, '"')
if (type == "massx") {
if (is.null(colnames(x))) colnames(x) <- paste0("Feature_", seq_len(NCOL(x)))
nmods <- NCOL(x)
} else {
if (is.null(colnames(y))) colnames(y) <- paste0("Outcome_", seq_len(NCOL(y)))
nmods <- NCOL(y)
}
if (is.null(xnames)) {
xnames <- if (is.null(colnames(x))) {
paste(deparse(substitute(x)), seq_len(NCOL(x)), sep = "_")
} else {
colnames(x)
}
}
if (trace > 0) msg2("Feature names:", paste(xnames, collapse = ", "))
if (is.null(ynames)) {
ynames <- if (is.null(colnames(y))) deparse(substitute(y)) else colnames(y)
}
if (trace > 0) msg2("Outcome names:", paste(ynames, collapse = ", "))
if (scale.x) x <- scale(x)
if (scale.y) y <- scale(y)
dat <- data.frame(x, y)
colnames(dat) <- c(xnames, ynames)
# mod1: Loop function ----
mod1_glm <- function(index, dat, type) {
if (type == "massx") {
.formula <- as.formula(paste(ynames, "~", xnames[index]))
.family <- if (is.factor(dat[[ynames]])) "binomial" else "gaussian"
glm(.formula, family = .family, data = dat)
} else {
.formula <- as.formula(paste(ynames[index], "~", paste(xnames, collapse = " + ")))
.family <- if (is.factor(dat[[ynames[index]]])) "binomial" else "gaussian"
glm(.formula, family = .family, data = dat)
}
}
mod1_gam <- function(index, dat, type) {
if (type == "massx") {
.formula <- gam_formula(
xnames[index], ynames,
spline.index = spline.index, k = gam.k
)
.family <- if (is.factor(dat[[ynames]])) "binomial" else "gaussian"
glm(.formula, family = .family, data = dat)
} else {
.formula <- gam_formula(
xnames, ynames[index],
spline.index = spline.index, k = gam.k
)
.family <- if (is.factor(dat[[ynames[index]]])) "binomial" else "gaussian"
mgcv::gam(.formula, family = .family, data = dat)
}
}
# Models ----
if (verbose) msg2("Training", nmods, "mass-GL/AM models...")
if (verbose) {
pbapply::pboptions(type = "timer")
} else {
pbapply::pboptions(type = "none")
}
mods <- pbapply::pblapply(
seq_len(nmods),
if (mod == "glm") mod1_glm else mod1_gam,
dat = dat,
type = type,
cl = n.cores
)
names(mods) <- if (type == "massx") xnames else ynames
# Outro ----
# summary: a data.table with columns:
# "Variable", ["Coefficient x", "SE x", "t_value x", "p_value x" for x in xnames]
out <- list(
mods = if (save.mods) mods else NULL,
summary = if (mod == "glm") {
glm2table(mods, include_anova_pvals = include_anova_pvals)
} else {
gam2table(mods)
},
xnames = xnames,
coefnames = names(coef(mods[[1]])),
ynames = ynames,
type = type,
call = .call
)
class(out) <- if (mod == "glm") c("massGLM", "list") else c("massGAM", "list")
if (print.plot) print(plot(out))
outro(start.time, verbose = verbose)
out
} # rtemis::massGLAM
#' `print`[massGAM] object
#'
#' @method print massGAM
#' @param x [massGAM] object
#' @param ... Not used
#'
#' @author E.D. Gennatas
#' @export
print.massGAM <- function(x, ...) {
nx <- length(x$xnames)
ny <- length(x$ynames)
.text <- paste(
"Mass-univariate GAM analysis with", nx,
ngettext(nx, "predictor", "predictors"),
"and", ny, ngettext(ny, "outcome", "outcomes\n")
)
cat(.text)
invisible(x)
}
#' `massGAM` object summary
#'
#' @param object A `massGAM` object created by [massGLAM]
#' @param ... Not used
#'
#' @author E.D. Gennatas
#' @export
summary.massGAM <- function(object, ...) {
print(object$summary, row.names = FALSE, class = FALSE)
}
#' Plot `massGAM` object
#'
#' Plots a `massGAM` object using [dplot3_bar]
#'
#' @method plot massGAM
#' @param x `massGAM` object
# @param what Character: "adjusted" or "raw" p-values to plot
#' @param xlab Character: x-axis label for volcano plot
#'
#' @author E.D. Gennatas
#' @export
plot.massGAM <- function(x,
predictor = NULL,
main = NULL,
# what = c("coefs", "pvals", "volcano"),
what = "pvals",
p.adjust.method = "none",
p.transform = \(x) -log10(x),
show = c("all", "signif"),
pval.hline = c(.05, .001),
hline.col = NULL,
hline.dash = "dash",
hline.annotate = as.character(pval.hline),
ylim = NULL,
xlab = NULL,
ylab = NULL,
group = NULL,
grouped.nonsig.alpha = .5,
order.by.group = TRUE,
palette = rtPalette,
col.sig = "#43A4AC",
# col.neg = "#43A4AC",
# col.pos = "#FA9860",
col.ns = "#7f7f7f",
theme = rtTheme,
alpha = NULL,
# volcano.annotate = TRUE,
# volcano.annotate.n = 7,
# volcano.hline = NULL,
# volcano.hline.dash = "dot",
# volcano.hline.annotate = NULL,
# volcano.p.transform = \(x) -log10(x),
margin = NULL,
displayModeBar = FALSE,
trace = 0,
filename = NULL, ...) {
# Arguments ----
what <- match.arg(what)
show <- match.arg(show)
if (is.character(palette)) palette <- unlist(rtpalette(palette))
if (!is.null(group)) {
group <- factor(group)
col <- palette[as.integer(group)]
}
if (x$type == "massy") {
if (is.null(predictor)) {
predictor <- x$xnames[1]
}
what <- match.arg(what)
# pval_idi <- which(names(x$summary) == paste("p_value", predictor))
pval_name <- grep(predictor, names(x$summary), value = TRUE)
if (length(pval_name) > 1) {
warning(
"Search for ", predictor, "; returned", length(pval_name), "matches:",
pval_name, "; using", pval_name[1]
)
pval_name <- pval_name[1]
}
if (what == "pvals") {
# p-values ----
if (is.null(main)) main <- "p-values"
# pval_idi <- grep(paste("p_value", predictor), names(x$summary))[1]
.name <- gsub("p_value ", "", pval_name)
.pvals <- p.adjust(x$summary[[pval_name]], method = p.adjust.method)
if (is.null(group)) {
.cols <- rep(col.ns, length(.pvals))
.cols[.pvals < .05] <- col.sig
} else {
.cols <- col
.cols[.pvals >= .05] <- adjustcolor(
.cols[.pvals >= .05],
alpha.f = grouped.nonsig.alpha
)
}
if (is.null(ylab)) {
ylab <- paste(
# print_transform(deparse(p.transform)[2]),
print_fn(p.transform),
what, .name, "p-value"
)
}
if (!is.null(group) && order.by.group) {
group_order <- order(group)
} else {
group_order <- seq_along(.pvals)
}
dplot3_bar(
p.transform(.pvals)[group_order],
group.names = x$ynames[group_order],
main = main,
ylim = ylim,
legend = FALSE,
ylab = ylab,
col = .cols[group_order],
hline = p.transform(pval.hline),
hline.col = hline.col,
hline.dash = hline.dash,
hline.annotate = hline.annotate,
theme = theme,
margin = margin,
displayModeBar = displayModeBar,
filename = filename, ...
)
} else if (what == "coefs") {
# # Coefficients ----
# if (is.null(main)) main <- "Coefficients"
# coef_idi <- which(names(x$summary) == paste("Coefficient", predictor))
# .name <- gsub("Coefficient ", "", names(x$summary)[coef_idi])
# .pvals <- p.adjust(x$summary[[pval_idi]], method = p.adjust.method)
# .coefname <- getnames(x$summary, paste("Coefficient", .name))
# .cols <- rep(col.ns, length(x$summary[[.coefname]]))
# .cols[x$summary[[.coefname]] < 0 & .pvals < .05] <- col.neg
# .cols[x$summary[[.coefname]] > 0 & .pvals < .05] <- col.pos
# dplot3_bar(x$summary[[coef_idi]],
# # group.names = if (x$type == "massy") x$ynames else x$xnames,
# group.names = x$ynames,
# main = main,
# legend = FALSE,
# ylab = paste(.name, "Coefficients"),
# col = .cols,
# theme = theme,
# margin = margin,
# displayModeBar = displayModeBar,
# filename = filename, ...
# )
} else {
# Volcano ----
# if (is.null(xlab)) xlab <- paste(predictor, "Coefficient")
# coef_idi <- which(names(x$summary) == paste("Coefficient", predictor))
# dplot3_volcano(
# x = x$summary[[coef_idi]],
# pvals = x$summary[[pval_idi]],
# group = group,
# x.thresh = 0,
# label.lo = "Neg",
# label.hi = "Pos",
# xnames = x$ynames,
# xlab = xlab,
# main = main,
# p.adjust.method = p.adjust.method,
# p.transform = volcano.p.transform,
# annotate = volcano.annotate,
# annotate.n = volcano.annotate.n,
# hline = volcano.hline,
# hline.annotate = volcano.hline.annotate,
# hline.dash = volcano.hline.dash,
# theme = theme,
# alpha = alpha,
# displayModeBar = displayModeBar,
# verbose = trace > 0,
# filename = filename, ...
# )
}
} else {
cat('"massx" support not yet implemented')
}
} # rtemis::plot.massGAM
# print_transform <- function(x) gsub("[x()]", "", x)
print_fn <- function(x) {
fn <- deparse(x)[2]
fn <- gsub("\\(.*", "", fn)
if (fn == "I") fn <- ""
fn
}
gam_formula <- function(xnames, yname, spline.index, k = 6) {
# if (is.null(spline.index)) spline.index <- which(sapply(x, is.numeric))
lin.index <- which(!(seq_along(xnames) %in% spline.index))
spline.features <- paste0(
"s(", xnames[spline.index], ", k = ", k, ")",
collapse = " + "
)
lin.features <- if (length(lin.index) > 0) {
paste0(xnames[lin.index], collapse = " + ")
} else {
NULL
}
features <- if (is.null(lin.features)) {
spline.features
} else {
paste(spline.features, lin.features, sep = " + ")
}
as.formula(paste(yname, "~", features))
} # rtemis::gam_formula
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.