R/RestrictedPCA.R

Defines functions RestrictedPCA

Documented in RestrictedPCA

#'@title RestrictedPCA.
#'
#'@description
#'\code{RestrictedPCA} Combines an ANOVA based on 'fmod' and restricts a PCA using the ANOVA result as a filter.
#'
#'@details
#'\code{fmod} should be something like 'GT*TR+Batch' to perform an ANOVA with these factors defined as columns in sam.
#'
#'@param dat Metabolite matrix (samples x metabolites).
#'@param sam Sample definition dataframe.
#'@param use.sam Numeric index vector (or logical) to select specific samples to be included in the analysis or NULL to include all.
#'@param fmod ANOVA model to calculate before PCA.
#'@param sign.col Which column(s) of the ANOVA result shall be used for P-value filtering (specify column names or leave on NULL to filter on all).
#'@param p.adjust.method Method use to adjust P-values (e.g. none, BH or bonferroni).
#'@param P P-value threshold used as a cutoff after P-value adjustment.
#'@param pcaMethods.scale pcaMethods scale parameter (usually pareto for metabolite data).
#'@param n.metab.min Minimum number of metabolites kept for PCA calculation (even if they exceed P).
#'@param group.col Column used for legend creation (column name from sam).
#'@param text.col Column used for text annotation of data points (column name from sam).
#'@param ... Handed through to \code{\link{PlotMetabolitePCA}}.
#'
#'@return
#'Will generate a PCA plot (generated by \link{PlotMetabolitePCA} internally) restricted based on an ANOVA result based on \link{MetaboliteANOVA}.
#'
#'@examples
#'# load raw data and sample description
#'utils::data(raw, package = "MetabolomicsBasics")
#'utils::data(sam, package = "MetabolomicsBasics")
#'# standard behavior
#'RestrictedPCA(dat=raw, sam=sam, group.col="GT")
#'\dontrun{
#'# apply multiple testing using a strict P-value cutoff,
#'# dont show a legend but plot group mean values and sd's as overlay
#'RestrictedPCA(dat=raw, sam=sam, group.col="GT", p.adjust.method = "BH", P=10^-10,
#'              fmod="GT+Batch+Order", sign.col="GT", medsd=T, legend.x=NULL)
#'# limit to a subset of samples, switching the ANOVA selection of by setting P=1
#'# and adding text (from \code{sam}) to each data point
#'RestrictedPCA(dat=raw, sam=sam, use.sam=which(sam$GT%in%c("Mo17","B73")), group.col="GT",
#'              fmod="GT+Batch+Order", P=1, sign.col="GT", legend.x=NULL, text.col="Batch")
#'}
#'
#'@export
#'
#'@importFrom pcaMethods pca
#'
RestrictedPCA <- function(dat=NULL, sam=NULL,  use.sam=NULL, group.col=NULL, text.col=NULL, fmod=NULL, sign.col=NULL, p.adjust.method="none", P=0.01, pcaMethods.scale="pareto", n.metab.min=20, ...) {
  # helper function to identify cols/rows with all NA data
  NAcrs <- function(x) {list("r"=unlist(apply(x,1,function(y){!all(is.na(y))})),"c"=unlist(apply(x,2,function(y){!all(is.na(y))})))}
  stopifnot(length(group.col %in% colnames(sam))==1)
  if (is.null(use.sam)) use.sam <- 1:nrow(dat)
  if (is.factor(use.sam)) use.sam <- which(use.sam)
  stopifnot(length(use.sam)>=2)
  filt <- NAcrs(dat[use.sam,])
  dat <- dat[use.sam,][filt$r,filt$c]
  sam <- sam[use.sam,][filt$r,,drop=FALSE]
  if (!is.null(text.col) && !is.character(sam[,text.col])) sam[,text.col] <- as.character(sam[,text.col])
  if (!is.null(fmod)) {
    out <- MetaboliteANOVA(dat = dat, sam = sam, model = fmod)
  } else {
    out <- data.frame("none"=rep(0,ncol(dat)))
    fmod <- "none"
  }
  if (is.null(sign.col)) sign.col <- colnames(out)
  if (length(sign.col)>=2) {
    p_adj <- apply(apply(out[,sign.col],2,p.adjust,p.adjust.method), 1, function(x) {ifelse(all(is.na(x)), NA, min(x,na.rm=T))})
  } else {
    p_adj <- p.adjust(out[,sign.col],p.adjust.method)
  }
  if (sum(is.finite(p_adj))<n.metab.min+1) {
    P <- 1
  } else {
    P <- ifelse(sort(p_adj)[n.metab.min+1]>=P, sort(p_adj)[n.metab.min+1], P)
  }
  use.met <- which(p_adj <= P)
  filt <- NAcrs(dat[,use.met])
  PlotMetabolitePCA(pca_res=pcaMethods::pca(dat[,use.met][filt$r,filt$c], scale=pcaMethods.scale, method="rnipals"),
                    sam=sam[,c("cols","pchs",text.col)][filt$r,,drop=FALSE], g=factor(sam[,group.col][filt$r]),
                    comm=c(paste("m=",length(use.met),sep=""), p.adjust.method, paste("P<=", ifelse(P<0.01, formatC(P,digits=2,format="E"), round(P,2)),sep="")),
                    text.col=text.col, ...)
  invisible(data.frame(out,"selected"=p_adj <= P))
}

Try the MetabolomicsBasics package in your browser

Any scripts or data that you put into this service are public.

MetabolomicsBasics documentation built on May 2, 2019, 7:25 a.m.