R/bootMoa.R

Defines functions bootMoa

Documented in bootMoa

#' Significant components in "moa" returned by function "moa".
#' 
#' Using bootstrap method to extract the components representing significant
#' concordance structures between datasets from "moa" (returned by function
#' "moa").
#' 
#' set plot=TRUE to help selecting significant components.
#' 
#' @param moa An object of \code{\link{moa}} returned by \code{\link{moa}}.
#' @param proc.row Preprocessing of rows of datasets, should be one of
#' \code{none} - no preprocessing, \code{center} - center only,
#' \code{center_ssq1} - center and scale (sum of squred values equals 1),
#' \code{center_ssqN} - center and scale (sum of squred values equals the
#' number of columns), \code{center_ssqNm1} - center and scale (sum of squred
#' values equals the number of columns - 1) MFA corresponds to
#' "proc.row=center_ssq1" and 'w.data="lambda1"'
#' @param w.data The weights of each separate dataset, should be one of
#' 
#' \code{uniform} - no weighting,
#' 
#' \code{lambda1} - weighted by the reverse of the first eigenvalue of each
#' individual dataset
#' 
#' or \code{inertia} - weighted by the reverse of the total inertia.  See
#' detail.
#' @param w.row If it is not null, it should be a list of positive numerical
#' vectors, the length of which should be the same with the number of rows of
#' each dataset to indicated the weight of rows of datasets.
#' @param statis A logical indicates whether STATIS method should be used. See
#' details.
#' @param mc.cores Integer; number of cores used in bootstrap. This value is
#' passed to function \code{mclapply}
#' @param B Integer; number of bootstrap
#' @param replace Logical; sampling with or without replacement
#' @param resample Could be one of "sample", "gene" or "total". "sample" and
#' "gene" means sample-wise and variable-wise resampling, repectively. "total"
#' means total resampling.
#' @param plot Logical; whether the result should be plotted.
#' @param log Could be "x", "y" or "xy" for plot log axis.
#' @param tol The minimum eigenvalues shown in the plot.
#' @return A matrix where columns are components and rows are variance of PCs
#' from bootstrap samples.
#' @author Chen Meng
#' @seealso \code{\link{moa}}, \code{\link{sup.moa}}, \code{\link{mogsa}}. More
#' about plot see \code{\link{moa-class}}.
#' @references Herve Abdi, Lynne J. Williams, Domininique Valentin and Mohammed
#' Bennani-Dosse. STATIS and DISTATIS: optimum multitable principal component
#' analysis and three way metric multidimensional scaling. WIREs Comput Stat
#' 2012. Volume 4, Issue 2, pages 124-167 Herve Abdi, Lynne J. Williams,
#' Domininique Valentin. Multiple factor analysis: principal component analysis
#' for multitable and multiblock data sets. WIREs Comput Stat 2013
#' @export
#' @examples
#' 
#'   # see function moa
#' 
bootMoa <- function(
  moa, proc.row="center_ssq1", w.data="inertia", w.row=NULL, statis=FALSE,
  mc.cores=1, B = 100, replace=TRUE, resample=c("sample", "gene", "total"),
  plot=TRUE, log="y", tol = 1e-7
) {
  if (plot) {
    if (max(moa@eig) < tol)
      stop("tolerance is greater than highest eigenvalue, decrease tol!")
  }
  data <- lapply(moa@data, as.matrix)
  call <- moa@call
  
  checkParam <- function(call, x, input, opt) {
    if (is.null(call[[x]]))
      return()
    
    if (is.character(call[[x]])){
      v <- try(
        match.arg(call[[x]], opt), 
        silent = TRUE
      )
      if (class(v) == "try-error" || v != input)
        warning(paste0("Double check parameter '", x, "', it may not consistant with moa call!"))
    } else if (is.logical(call[[x]]))
      if (call[[x]] != input)
        warning(paste0("Double check parameter '", x, "', it may not consistant with moa call!"))
  }
  
  checkParam(call, "proc.row", input = proc.row, opt = c("none", "center", "center_ssq1", "center_ssqN", "center_ssqNm1"))
  checkParam(call, "w.data", input = w.data, opt = c("uniform", "lambda1", "inertia"))
  checkParam(call, "statis", input = statis, opt = c(TRUE, FALSE))
  checkParam(call, "w.row", input = w.row)
  
  resample <- match.arg(resample, c("sample", "gene", "total"))
  resampleMoa <- function(d, replace, resample, proc.row, w.data, w.row, statis) {
    rsd <- switch(
      resample,
      "sample" = lapply(d, function(x) structure(
        x[, sample(1:ncol(x), replace = replace)], 
        dimnames=list(rownames(x), colnames(x)))),
      "gene" = lapply(d, function(x) structure(
        t(apply(x, 1, sample, replace=replace)), 
        dimnames=list(rownames(x), colnames(x)))),
      "total" = lapply(d, function(x) structure(
        apply(x, 2, sample, replace=replace), 
        dimnames=list(rownames(x), colnames(x))))
      )
    
    res <- moa(data = rsd, proc.row=proc.row, w.data=w.data, w.row=w.row, statis=statis, moa=FALSE)
    res$d^2
  }
  r <- mclapply(1:B, mc.cores = mc.cores, function(x) 
    resampleMoa(data, replace = replace, resample = resample, 
                proc.row=proc.row, w.data=w.data, w.row=w.row, statis=statis)
  )
  btk <- do.call("rbind", r)
  if (plot) {
    sc <- min(ncol(btk), length(moa@eig))
    isc <- intersect(which(moa@eig > tol), 1:sc)
    boxplot(rbind(moa@eig[isc], btk[, isc]), col=NA, border = NA, log=log)
    sds <- apply(btk[, isc], 2, sd)
    means <- apply(btk[, isc], 2, mean)
    points(isc,  means, pch=15, col="red")
    points(isc,  means+1.96*sds, col="red", pch="_")
    points(isc,  means-1.96*sds, col="red", pch="_")
    segments(isc, means+1.96*sds, isc, means-1.96*sds, col="red")
    lines(isc, moa@eig[isc], pch=20)
    points(isc, moa@eig[isc], pch=20)
  }
  colnames(btk) <- paste("PC", 1:ncol(btk), sep="")
  rownames(btk) <- paste("sample", 1:nrow(btk), sep="")
  btk
}
mengchen18/mogsa documentation built on June 7, 2020, 6:05 p.m.