Nothing
#' @title RestrictedPCA.
#' @description \code{RestrictedPCA} combines an ANOVA based on 'fmod' and
#' restricts a PCA using the ANOVA result as a filter.
#' @details `fmod` should be something like 'GT*TR+Batch' to perform
#' an ANOVA with these factors defined as columns in sam.
#' @param dat Metabolite matrix (samples x metabolites).
#' @param sam Sample definition dataframe.
#' @param use.sam Numeric index vector (or logical) to select specific samples to be included in the analysis or NULL to include all.
#' @param fmod ANOVA model to calculate before PCA.
#' @param sign.col Which column(s) of the ANOVA result shall be used for P-value filtering (specify column names or leave on NULL to filter on all).
#' @param p.adjust.method Method use to adjust P-values (e.g. none, BH or bonferroni).
#' @param P P-value threshold used as a cutoff after P-value adjustment.
#' @param pcaMethods.scale pcaMethods scale parameter (usually pareto for metabolite data).
#' @param n.metab.min Minimum number of metabolites kept for PCA calculation (even if they exceed P).
#' @param group.col Column used for legend creation (column name from sam).
#' @param text.col Column used for text annotation of data points (column name from sam).
#' @param ... Handed through to \code{\link{PlotMetabolitePCA}}.
#' @return Will generate a PCA plot (generated by \link{PlotMetabolitePCA} internally)
#' restricted based on an ANOVA result based on \link{MetaboliteANOVA}.
#' @examples
#' # load raw data and sample description
#' raw <- MetabolomicsBasics::raw
#' sam <- MetabolomicsBasics::sam
#' # standard behavior
#' RestrictedPCA(dat = raw, sam = sam, group.col = "GT")
#' \dontrun{
#' # apply multiple testing using a strict P-value cutoff,
#' # dont show a legend but plot group mean values and sd's as overlay
#' RestrictedPCA(
#' dat = raw, sam = sam, group.col = "GT", p.adjust.method = "BH", P = 10^-10,
#' fmod = "GT+Batch+Order", sign.col = "GT", medsd = T, legend.x = NULL
#' )
#' # limit to a subset of samples, switching the ANOVA selection of by setting P=1
#' # and adding text (from \code{sam}) to each data point
#' RestrictedPCA(
#' dat = raw, sam = sam, use.sam = which(sam$GT %in% c("Mo17", "B73")), group.col = "GT",
#' fmod = "GT+Batch+Order", P = 1, sign.col = "GT", legend.x = NULL, text.col = "Batch"
#' )
#' }
#' @export
#' @importFrom pcaMethods pca
RestrictedPCA <- function(dat = NULL, sam = NULL, use.sam = NULL, group.col = NULL, text.col = NULL, fmod = NULL, sign.col = NULL, p.adjust.method = "none", P = 0.01, pcaMethods.scale = "pareto", n.metab.min = 20, ...) {
# helper function to identify cols/rows with all NA data
NAcrs <- function(x) {
list("r" = unlist(apply(x, 1, function(y) {
!all(is.na(y))
})), "c" = unlist(apply(x, 2, function(y) {
!all(is.na(y))
})))
}
stopifnot(length(group.col %in% colnames(sam)) == 1)
if (is.null(use.sam)) use.sam <- 1:nrow(dat)
if (is.factor(use.sam)) use.sam <- which(use.sam)
stopifnot(length(use.sam) >= 2)
filt <- NAcrs(dat[use.sam, ])
dat <- dat[use.sam, ][filt$r, filt$c]
sam <- as.data.frame(sam[use.sam, ][filt$r, , drop = FALSE]) # convert potential tibble to df
if (!is.null(text.col) && !is.character(sam[, text.col])) sam[, text.col] <- as.character(sam[, text.col])
if (!is.null(fmod)) {
colnames(dat) <- 1:ncol(dat)
out <- MetaboliteANOVA(dat = dat, sam = sam, model = fmod)
} else {
out <- data.frame("none" = rep(0, ncol(dat)))
fmod <- "none"
}
if (is.null(sign.col)) sign.col <- colnames(out)
if (length(sign.col) >= 2) {
p_adj <- apply(apply(out[, sign.col], 2, p.adjust, p.adjust.method), 1, function(x) {
ifelse(all(is.na(x)), NA, min(x, na.rm = T))
})
} else {
p_adj <- p.adjust(out[, sign.col], p.adjust.method)
}
if (sum(is.finite(p_adj)) < n.metab.min + 1) {
P <- 1
} else {
P <- ifelse(sort(p_adj)[n.metab.min + 1] >= P, sort(p_adj)[n.metab.min + 1], P)
}
use.met <- which(p_adj <= P)
filt <- NAcrs(dat[, use.met])
PlotMetabolitePCA(
pca_res = pcaMethods::pca(dat[, use.met][filt$r, filt$c], scale = pcaMethods.scale, method = "rnipals"),
sam = sam[, c("cols", "pchs", text.col)][filt$r, , drop = FALSE], g = factor(sam[, group.col][filt$r]),
comm = c(paste("m=", length(use.met), sep = ""), p.adjust.method, paste("P<=", ifelse(P < 0.01, formatC(P, digits = 2, format = "E"), round(P, 2)), sep = "")),
text.col = text.col, ...
)
invisible(data.frame(out, "selected" = p_adj <= P))
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.