R/functions-svd.R

Defines functions lean_esetSpectralMap flexible_mpm createSpectralMap

Documented in createSpectralMap flexible_mpm lean_esetSpectralMap

# dependencies
# library(esetVis)
# library(mpm) # for mpm()
# library(corpcor) # for fast.svd()
# library(svd) # for propack.svd()

#' lean_esetSpectralMap
#'
#' @title plot spectral map biplot of an eSet object
#' @description This function is a modified version of function esetSpectralMap(). This function 
#'              plots the biplot of the specified dimensions of an input mpm object (mpm object 
#'              contains the singular value decomposition / PCA of a matrix).
lean_esetSpectralMap <- function(
  
  # passed via input parameters of createSpectralMap
  color, topSamples, topGenes,  cloudGenes, 
  
  # created (computed or declared) in createSpectralMap and passed to lean_esetSpectralMap in 
  # createSpectralMap
  mpm_result, esetUsed, colorVar, psids, mpm.args, symmetryAxes, packageTextLabel, 
  packageInteractivity,
  
  # declared while calling lean_esetSpectralMap in createSpectralMap
  topGenesVar, cloudGenesIncludeLegend, cloudGenesColor, title, dim, returnAnalysis,
  typePlot = c("static", "interactive")
  
) {
  require(esetVis)
  
  typePlot = match.arg(typePlot)
  
  ## Default parameters of function esetSpectralMap which are not changed in createSpectralMap.
  # Hence these are declared as local variables of lean_esetSpectralMap.
  plot.mpm.args=list(scale="uvc")
  cloudGenesNBins = sqrt(length(psids)); cloudGenesTitleLegend = "nGenes"
  topGenesCex = 2.5; topGenesJust = c(0.5, 0.5); topGenesColor = "black"; topSamplesCex = 2.5
  topSamplesVar = character(); topSamplesJust = c(0.5, 0.5); topSamplesColor = "black"
  geneSets = list(); geneSetsVar = character(); geneSetsMaxNChar = numeric(); topGeneSets = 10
  topGeneSetsCex = 2.5; topGeneSetsJust = c(0.5, 0.5); topGeneSetsColor = "black"
  includeLegend = TRUE; includeLineOrigin = TRUE; figInteractiveSize = c(600, 400); 
  ggvisAdjustLegend = TRUE; interactiveTooltip = TRUE; interactiveTooltipExtraVars = character()
  
  is_R_3p5_or_higher = (R.version$major == "3" & floor(as.numeric(R.version$minor)) >= 5)
  is_R_3p4_or_lesser = (R.version$major == "3" & floor(as.numeric(R.version$minor)) <= 4)
  if (is_R_3p5_or_higher) { # R 3.5 and higher
    shapeVar = character(); shape = if (length(shapeVar) == 0) 15 else numeric()
    sizeVar = character(); sizeRange = numeric()
    size = if (length(sizeVar) == 0) {
      ifelse(typePlot == "interactive" && length(packageInteractivity) == 1 &&
               packageInteractivity == "rbokeh", 5, 2.5)
    } else { numeric() }
    alphaVar=character(); alpha = if (length(alphaVar) == 0) 1 else numeric(); alphaRange=numeric()
    returnEsetPlot = FALSE
  } else if (is_R_3p4_or_lesser) { # R 3.4 and lesser
    shapeVar = NULL; shape = if (is.null(shapeVar)) 15 else NULL
    sizeVar = NULL; sizeRange = NULL
    size = if (is.null(sizeVar)) 2.5 else NULL
    alphaVar = NULL; alpha = if (is.null(alphaVar)) 1 else NULL; alphaRange = NULL 
  }
  
  ## plot PCA via mpm
  argsPlotMpm <- c(plot.mpm.args, list(x = mpm_result, do.plot = FALSE, dim = dim))
  mpmCoords <- do.call("plot.mpm", argsPlotMpm)
  
  ## create plot data for esetVis
  dataPlotSamples <- data.frame(mpmCoords$Columns[, c("X", "Y")], 
                                sampleName = rownames(mpmCoords$Columns))
  dataPlotGenes <- data.frame(mpmCoords$Rows[, c("X", "Y")], 
                              geneName = rownames(mpmCoords$Rows))
  pctVar <- round(mpm_result$contrib * 100)
  axisLabels <- paste0("PC", dim, ": ", pctVar[dim], "%")
  
  ## Declare arguments of esetPlotWrapper
  esetPlotWrapper.args <- list(
    dataPlotSamples = dataPlotSamples, dataPlotGenes = dataPlotGenes, esetUsed = esetUsed, 
    xlab = axisLabels[1], ylab = axisLabels[2], title = title, symmetryAxes = symmetryAxes, 
    colorVar = colorVar, color = color, shapeVar = shapeVar, shape = shape, 
    sizeVar = sizeVar, size = size, sizeRange = sizeRange, 
    alphaVar = alphaVar, alpha = alpha, alphaRange = alphaRange, 
    cloudGenes = cloudGenes, cloudGenesColor = cloudGenesColor, cloudGenesNBins=cloudGenesNBins,
    cloudGenesIncludeLegend=cloudGenesIncludeLegend,cloudGenesTitleLegend=cloudGenesTitleLegend,
    topGenes = topGenes, topGenesCex = topGenesCex, topGenesVar = topGenesVar,
    topGenesJust = topGenesJust, topGenesColor = topGenesColor, topSamples = topSamples, 
    topSamplesCex = topSamplesCex, topSamplesVar = topSamplesVar, topSamplesJust=topSamplesJust,
    topSamplesColor = topSamplesColor, geneSets = geneSets, geneSetsVar = geneSetsVar, 
    geneSetsMaxNChar = geneSetsMaxNChar, topGeneSets=topGeneSets, topGeneSetsCex=topGeneSetsCex,
    topGeneSetsJust = topGeneSetsJust, topGeneSetsColor = topGeneSetsColor, 
    includeLegend = includeLegend, includeLineOrigin = includeLineOrigin, typePlot = typePlot,
    figInteractiveSize = figInteractiveSize,ggvisAdjustLegend = ggvisAdjustLegend, 
    interactiveTooltip=interactiveTooltip,interactiveTooltipExtraVars=interactiveTooltipExtraVars,
    returnTopElements = returnAnalysis)
  
  ## Plot using estVis
  if (is_R_3p5_or_higher) { # only difference is in parameter returnEsetPlot
    plot <- do.call("esetPlotWrapper", # esetVis::esetPlotWrapper
                    c(returnEsetPlot = returnEsetPlot, esetPlotWrapper.args))
  } else {
    plot <- do.call("esetPlotWrapper", esetPlotWrapper.args) # esetVis::esetPlotWrapper
  }
  
  ## create return object
  res <- if (!returnAnalysis) {
    plot
  } else {
    c(
      list(analysis = list(dataPlotSamples = dataPlotSamples, dataPlotGenes = dataPlotGenes,
                           esetUsed = esetUsed, axisLabels = axisLabels, 
                           axesContributionsPercentages = mpm_result$contrib * 100)),
      if (!is.null(plot$topElements)) list(topElements = plot$topElements), 
      list(plot = if (inherits(plot, "ggplot")) plot else plot$plot)
    )
  }
  
  return(res)
}

#' flexible_mpm
#'
#' @title Principal component analysis of a matrix
#' @description This function is a modified version of function mpm(). Depending upon requirement 
#'              one can use the default PCA computation method of mpm() for analyzing all PCs or 
#'              use a truncated PCA.
#'              
#' @param svd.method character; character indicating the algorithm to be used for PCA. Valid inputs
#'        are 'default_svd' and 'truncated_svd'. For 'truncated_svd' the number of principal 
#'        components computed is equivalent to the value of the parameter max.PC.
#' @param max.PC numeric; number of principal components to compute in case of truncated PCA. 
#'        Default: 20
#' @note All other parameters of this function are same as that of function mpm().
flexible_mpm <- function(
  data, logtrans = TRUE, logrepl = 1e-09, 
  center=c("double","row","column","global","none"), normal=c("global","row","column","none"),
  closure = c("none","row","column","global","double"),
  row.weight = c("constant", "mean", "median", "max", "logmean", "RW"), 
  col.weight = c("constant", "mean", "median", "max", "logmean", "CW"), 
  CW=rep(1, ncol(data) - 1), RW = rep(1, nrow(data)), 
  pos.row = rep(FALSE, nrow(data)), pos.column=rep(FALSE, ncol(data) - 1),
  svd.method = 'default_svd', max.PC = 20
) {
  
  ## Check input data
  if (missing(data)) 
    stop("Argument \"data\" is missing, with no default")
  NData <- as.matrix(data[, -1])
  if (any(is.na(NData)) || !is.numeric(NData)) 
    stop("Data must be numeric without NA's")
  if (length(pos.row) != dim(NData)[1]) 
    stop("Length of pos.row argument not equal to number of rows in table")
  if (length(pos.column) != dim(NData)[2]) 
    stop("Length of pos.column argument not equal to number of columns in table")
  if (length(RW) != nrow(NData)) 
    stop("Length of RW not equal to number of rows in table")
  if (length(CW) != ncol(NData)) 
    stop("Length of CW not equal to number of columns in table")
  
  ## Get / create global variables
  center <- match.arg(center)
  normal <- match.arg(normal)
  closure <- match.arg(closure)
  row.weight <- match.arg(row.weight)
  col.weight <- match.arg(col.weight)
  logrepl <- max(1e-09, logrepl)
  Row.Names <- as.character(data[, 1])
  RData <- NData
  RData[pos.row, ] <- NA
  RData[, pos.column] <- NA
  
  ## Utility function for computing log
  logtransf <- function(x, logrepl, comp) {
    if (any(x <= 0, na.rm = TRUE)) {
      warning(paste("Non-positive data replaced by",logrepl,"in computing",comp,"in: spectralmap.\n"), 
              call. = FALSE)
      x[x <= 0] <- logrepl
    }
    return(log(x))
  }
  
  ## Pre svd and La.svd steps
  LData <- if (logtrans) logtransf(NData, logrepl, comp = "logarithms") else NData
  RM <- rowMeans(NData[, !pos.column])
  CM <- colMeans(NData[!pos.row, ])
  Wn <- pmax(0, switch(row.weight, constant = rep(1, length(RM)), 
                       mean = apply(RData, 1, mean, na.rm = TRUE), 
                       median = apply(RData, 1, median, na.rm = TRUE), 
                       max = apply(RData, 1, max, na.rm = TRUE), 
                       logmean = apply(logtransf(RData, logrepl, comp = "logmean weights"), 1, 
                                       mean, na.rm = TRUE), 
                       RW = RW))
  Wp <- pmax(0, switch(col.weight, constant = rep(1, length(CM)), 
                       mean = apply(RData, 2, mean, na.rm = TRUE), 
                       median = apply(RData, 2, median, na.rm = TRUE), 
                       max = apply(RData, 2, max, na.rm = TRUE), 
                       logmean = apply(logtransf(RData, logrepl, comp = "logmean weights"), 2, 
                                       mean, na.rm = TRUE), 
                       CW = CW))
  Wn[pos.row] <- 0
  Wp[pos.column] <- 0
  Wn <- Wn/sum(Wn)
  Wp <- Wp/sum(Wp)
  if (closure != "none" && any(LData < 0)) warning("Closure operation with non-positive data")
  Tn <- rowSums(LData[, !pos.column], na.rm = TRUE)
  Tp <- colSums(LData[!pos.row, ], na.rm = TRUE)
  Tt <- sum(LData[!pos.row, !pos.column], na.rm = TRUE)
  ClData <- switch(closure, none = LData, 
                   row = sweep(LData, 1, Tn, "/"), column = sweep(LData, 2, Tp, "/"), 
                   global = LData/Tt, double = Tt * sweep(sweep(LData, 1, Tn, "/"), 2, Tp, "/"))
  if (any(!is.finite(ClData))) stop("Division by 0 in closure operation")
  Mp <- colSums(sweep(ClData, 1, Wn, "*"))
  Mn <- rowSums(sweep(ClData, 2, Wp, "*"))
  Mg <- sum(Mp * Wp)
  CData <- switch(center, 
                  double = Mg + sweep(sweep(ClData, 2, Mp), 1, Mn), 
                  row = sweep(ClData, 1, Mn), column = sweep(ClData, 2, Mp), global = ClData - Mg,
                  none = ClData)
  Vp <- colSums(sweep(CData^2, 1, Wn, "*"))
  Vn <- rowSums(sweep(CData^2, 2, Wp, "*"))
  Vg <- sum(Vp * Wp)
  SData <- switch(normal, global = CData/sqrt(Vg), 
                  row = sweep(CData, 1, sqrt(Vn), "/"), column = sweep(CData, 2, sqrt(Vp), "/"), 
                  none = CData)
  WData <- sweep(sweep(SData, 1, sqrt(Wn), "*"), 2, sqrt(Wp), "*")
  
  ## PCA
  if(svd.method == 'truncated_svd') {
    require(svd)
    svd.res <- svd::propack.svd(WData, neig = max.PC) # truncated SVD
    svd.res$vt <- t(svd.res$v); svd.res$v <- NULL
  } else if(svd.method == 'fast_svd') {
    require(corpcor)
    svd.res <- corpcor::fast.svd(WData) # faster than mpm's default La.svd
    svd.res$vt <- t(svd.res$v); svd.res$v <- NULL
  } else if(svd.method == 'default_svd') {
    svd.res <- La.svd(WData) # This is the default method of function mpm to compute svd    
  }
  eigen <- svd.res$d^2
  
  ## create return object
  contrib <- eigen/sum(eigen)
  r <- list(TData = SData, row.names = Row.Names, col.names = names(data)[-1], 
            closure = closure, center = center, normal = normal, 
            row.weight = row.weight, col.weight = col.weight, eigen = eigen, 
            contrib = contrib, Rm = RM, Cm = CM, Wn = Wn, Wp = Wp, 
            SVD = svd.res, pos.column = pos.column, pos.row = pos.row, 
            call = match.call())
  class(r) <- "mpm"
  
  return(r) 
}

#' createSpectralMap
#'
#' @title Generate SpectralMaps using the first 3 PCs
#' @description This function is a wrapper for esetVis::esetSpectralMap() and will generate 4 
#'              images (2x2). The first three images represent SpectralMaps where the largest PCs 
#'              are plotted against each other. The final image is a barplot representing the 20
#'              largest PCs. Also, this funciton separates esetVis::esetSpectralMap()'s SVD 
#'              computation and plotting step resulting in a single SVD computation and thereafter
#'              plotting the required biplots.
#'
#' @param eSetobj ExpressionSet object. One requirement: the featureData(eSetObj) must contain a 
#'        column called 'SYMBOL'.
#' @param mainTitle character; main title used on each plot.
#' @param col2use vector or list; colors to use for the different factorlevels defined further
#'        below (group1). If this is a "colorKey" class, the proper colors will assigned to the
#'        corresponding factor levels.
#' @param group1 character; (part of the) column header of pData(eSetObj) to group the samples on.
#'        Different colors will be assigned to different levels in this group (see also col2use). 
#'        Use in conjunction with exact1 parameter.
#' @param group2 character; (part of the) column header of pData(eSetObj) to group the samples on. 
#'        Different shapes will be given to different factor levels. Use in conjunction with exact2
#'        parameter.
#' @param spec.width numeric; width (in pixels) of the created image (default: 1600)
#' @param spec.height numeric; height (in pixels) of the created image (default: 1200)
#' @param spec.res numeric; resolution of the created image (default: 100)
#' @param output character; output type. Can be either to a PNG file ("png"), the screen ("x11") or
#'        to an RMarkdown document ("rmd") or prepare for Shiny (RShiny)
#' @param outputDir character; Directory to store the output files in.
#' @param studyName character; Description of the study, will be used in generating the output 
#'        filename (if output was set to "png") and is also shown in each image title.
#' @param returnAnalysis boolean; Should the function return the values used for generating the 
#'        ggplot?
#' @param max.PC numeric; number of principal components to include in the barplot. Default: 20
#' @param seperate.plots  boolean; Do you want to plot individual spectralmap plots?
#' @param factorReference vector; Order of your factor levels. Each value should be uniquely found
#'        (but is not case sensitive)
#' @param showText boolean; Print text to the screen (intended for Rmarkdown, but probably obsolete
#'        due to using message() now)
#' @param TSNE boolean; Do we want to include a TSNE plot as well? (default: FALSE)
#' @param individualscale numeric; scaling of the dimensions (spec.width and spec.height) for 
#'        generating individual plots. Default is 0.5 to ensure the individual images are as large
#'        as the ones in the 2x2 image.
#' @param exact1; boolean; Did you specify an exact column header name match for group1?
#' @param exact2; boolean; Did you specify an exact column header name match for group2?
#' @param topSamples; integer; numeric indicating which percentile (if <=1) or number (if >1) of
#'        samples most distant to the origin of the plot to annotate, by default: 5 samples are 
#'        selected If no samples should be annotated, set this parameter to 0.
#' @param topGenes; integer; numeric indicating which percentile (if <=1) or number (if >1) of 
#'        genes most distant to the origin of the plot to annotate, by default: 5 genes are 
#'        selected If no genes should be annotated, set this parameter to 0
#' @param removeControlProbesets; boolean; Remove the AFFX control probesets prior to calculating
#'        the spectralMaps? Default: TRUE.
#' @param cloudGenes; boolean; if TRUE (by default), include the cloud of genes in the plot.
#' @param svd.method character; Character indicating the algorithm to be used for singular value 
#'        decomposition (SVD). Valid inputs are 'default_svd', 'fast_svd' and 'truncated_svd'. 
#'        Default value is 'default_svd'. 'default_svd' calls base::La.svd for SVD computation. 
#'        'fast_svd' computes SVD faster than 'default_svd' as it uses corpcor::fast.svd rather 
#'        than base::La.svd. However, as the tolerance level of base::La.svd and corpcor::fast.svd
#'        are different the number of principal components computed might be different. For 
#'        'truncated_svd' the number of principal components computed is equivalent to the value of
#'        max.PC.
#' @param pcaResult mpm class object; Parameter to provide a pre-computed PCA object as returned by
#'        function flexible_mpm() OR provide a PCA object with signature identical to an object 
#'        returned by function flexible_mpm(). Default value is NULL. If an object is provided then
#'        the function will skip the PCA computation step and directly proceed to plotting biplots
#'        and histogram.
#' @param showRShinyProgress show progress bar in R Shiny apps       
#' @param ...; Additional parameters for eSetSpectralMap. See also ?eSetSpectralMap
#' @note A requirement (for now) is that the featureData(eSetObj) MUSt contain a column named 
#'       'SYMBOL' (case sensitive!)
createSpectralMap <- function(
  eSetObj,
  mainTitle = NULL, col2use = NULL, group1 = NULL, group2 = NULL, outputDir=NULL, studyName=NULL,
  spec.width = 1600, spec.height=1200, spec.res=100, output="RShiny", returnAnalysis = FALSE, 
  max.PC = 20, seperate.plots=FALSE, factorReference = NULL, showText = TRUE, TSNE = F,
  individualScale = 0.75, exact1 = FALSE, exact2=FALSE, topSamples = 5, topGenes = 5, 
  removeControlProbesets=FALSE, cloudGenes = TRUE,
  
  # parameter for providing pre-computed PCA object as returned by function mpm()
  pcaResult = NULL,
  
  # parameter required to specify the SVD computation method
  svd.method = c('default_svd', 'fast_svd', 'truncated_svd'),
  
  showRShinyProgress = TRUE
  
) {
  svd.method = match.arg(svd.method)
  require(esetVis)
  require(grid)
  require(gridExtra)
  require(ggplot2)
  require(mpm)
  
  error <- NULL
  if(class(eSetObj) != "ExpressionSet") { error <- c(error, "- eSetObj is not of ExpressionSet class.\n") }
  if(is.null(group1)) { error <- c(error, "- group1 is not defined\n") } ## group2 is optional
  if(is.null(studyName)) { error <- c(error, "- studyName is not defined\n") }
  if(!is.null(error)) { warning("[WARNING] The following errors occurred:\n"); stop(error) }
  
  if(showRShinyProgress == TRUE) { 
    progress <- shiny::Progress$new(min = 0, max = 1)
    on.exit(progress$close())
  }
  
  ## Checking output variable. Should be either X11 or png
  if(is.null(output)) { output = "x11" }
  output <- match.arg(output, c("png", "x11", "rmd", "RShiny")) ## rmd not implemented yet.
  
  current.dir <- getwd()
  if(is.null(outputDir)) { outputDir = "." } else { dir.create(outputDir, showWarnings=FALSE) }
  setwd(outputDir)
  if(showRShinyProgress == TRUE) { progress$set(message="Applying internal checks...", value = 0.1) }
  
  ## Replace spaces into underscores in the pData object first, before storing it in pheno.
  colnames(Biobase::pData(eSetObj)) <- gsub(" ", "_", colnames(Biobase::pData(eSetObj)), fixed=TRUE)
  pheno <- Biobase::pData(eSetObj)
  ## As a consequence of changing spaces into underscores in the phenodata, we have to replace spaces in the group1 and group2 variables as well.
  if(group2 == "") { group2 <- NULL }
  if(!is.null(group1)) { group1 <- gsub(" ", "_", group1, fixed=TRUE)}
  if(!is.null(group2)) { group2 <- gsub(" ", "_", group2, fixed=TRUE)}
  
  ## group1 is required (leads to stop), group2 is optional.
  ## Have to work on the error warning in a future version.
  attr.group1 <- colnames(pheno)[grep(tolower(group1), tolower(colnames(pheno)), perl=TRUE)]
  if(length(attr.group1) > 1) {
    attr.match <- which(tolower(attr.group1)[] == tolower(group1)) ## Find an exact match prior to reporting an error.
    if(exact1 == 1) { ## Check if this parameter has been set, if not go directly to ERROR
      if(length(attr.match) == 1) { ## Check if an exact match occurred. Set attr.group1 to proper value.
        attr.group1 <- attr.group1[attr.match]
        rm(attr.match)
      } else {
        wText <- paste0("\n!! No exact match found for group1 (", group1, "):\n  [ ] ", paste0(attr.group1, collapse="\n  [ ] "))
        warning(wText, immediate. = TRUE)
        stop("Please refine your search term or set exact.match to TRUE to keep the exact match\n")
      }
    } else {
      wText <- paste0("\n!! Multiple columns found for group1 (", group1, "):\n  [ ] ", paste0(attr.group1, collapse="\n  [ ] "))
      warning(wText, immediate. = TRUE)
      stop("Please refine your search term or set exact.match to TRUE to keep the exact match\n")
    }
  }
  if(length(attr.group1) == 0) {
    warning(paste0("\n!! No matching columns found for group1 (", group1, "):"))
    stop("Please refine your search term\n")
  }
  if(!is.null(group2)) {
    attr.group2 <- colnames(pheno)[grep(tolower(group2), tolower(colnames(pheno)), perl=TRUE)]
    if(length(attr.group2) > 1) {
      attr.match <- which(tolower(attr.group2)[] == tolower(group2)) ## Find an exact match prior to reporting an error.
      if(exact2 == 1) { ## Check if this parameter has been set, if not go directly to ERROR
        if(length(attr.match) == 1) { ## Check if an exact match occurred. Set attr.group1 to proper value.
          attr.group2 <- attr.group2[attr.match]
          rm(attr.match)
        } else {
          wText <- paste0("\n!! No exact match found for group2 (", group2, "):\n  [ ] ", paste0(attr.group2, collapse="\n  [ ] "))
          warning(wText, immediate. = TRUE)
          attr.group2 <- NULL
          message(" --> Skipping group 2 category")
        }
      } else {
        wText <- paste0("\n!! Multiple columns found for group2 (", group2, "):\n  [ ] ", paste0(attr.group2, collapse="\n  [ ] "))
        warning(wText, immediate. = TRUE)
        attr.group2 <- NULL
        message(" --> Skipping group 2 category")
      }
    }
    if(length(attr.group2) == 0) {
      warning(paste0("\n!! No matching columns found for group2(", group2, ")"), immediate. = TRUE)
      attr.group2 <- NULL
      message(" --> Skipping group 2 category")
    }
  } else {
    attr.group2 <- NULL ## overbodig, realizeer ik me net (:
  }
  
  ## Changing the order of samples is at the moment the only solution to change the order of the factors in the plots below.
  ## Will need to ask Laure if this is indeed the case.
  # 1) Identify the column containing the factor values (attr.group1)
  # 2) store the locations of these ordered factor levels in an object
  # 3) Create a new category, storing the attributes in the proper order (by adjusting the levels)
  if(!is.null(factorReference)) {
    factors1 <- unique(pheno[,attr.group1])
    factorsOrdered <- .changeFactorOrder(factors1, reference=factorReference, showText=showText) ## NEed to check how this can be changed in these plots
    eSetObj$sampleCategory <- factor(pheno[,attr.group1], levels=factorsOrdered)
    attr.group1 <- "sampleCategory"
  }
  ## If no colors are given, generate own colors using a specific palette and dependant on the unique values.
  if(is.null(col2use)) {
    if(class(pheno[,attr.group1]) == "list") {
      col2use <- gg_color_hue(length(unique(unlist(pheno[,attr.group1]))))
    } else {
      col2use <- gg_color_hue(length(unique(pheno[,attr.group1])))
    }
  }
  ## If a colorKey is used, ensure that the proper some additional changes.
  if(class(col2use) == "colorKey") {
    temp <- pheno[,attr.group1]
    #print(temp)
    
    if(is.null(levels(temp))) {
      warning("* Colorkey: No levels found in object! Colouring done based on unique values encountered!\n")
      col2use <- col2use[unique(temp),"color"]
    } else {
      col2use <- as.vector(col2use[levels(temp),"color"])
      #print(col2use)
    }
  }
  ## Remove Affymetrix control probesets from the spectralMaps or not. These reporters have no annotation in the SYMBOL column.
  ## If removeControlProbesets is set to FALSE, the SYMBOL column will then store their featurenames. (issue fixed in esetVis-0.0-16, but no labels will eb plotted then).
  features <- Biobase::fData(eSetObj)
  x.pos <- grep("affx", tolower(rownames(features)))
  if(length(x.pos) > 0) {
    if(removeControlProbesets == 1) {
      features <- features[-c(x.pos),]
      Biobase::fData(eSetObj) <- features
      Biobase::exprs(eSetObj) <- exprs(eSetObj)[-c(x.pos), ]
    } else {
      Biobase::fData(eSetObj)[x.pos,"SYMBOL"] <- gsub("_at", "", Biobase::featureNames(eSetObj)[x.pos])
    }
  }
  rm(features, x.pos)
  
  ## Defining fileName and output settings
  fileName.extension <- NULL
  switch(output, "png" = { fileName.extension = ".png" })
  fileName.suffix <- gsub("*", "", gsub(" ", "_", paste0(studyName, "__ColoredBy_", attr.group1, fileName.extension)), fixed=TRUE) ## Substitute spaces to underscores! And remove asterisks!
  #print(fileName.suffix)
  #stop("debugging!")
  if(showText == 1) { message("* Calculating spectralMap objects...") }
  #print("YES!")
  
  # ============== BEGIN: code snippet for PCA calculation, spectral plot creation ============== #

  ## Arguments required by for computing SVD
  ## (these are arguments of esetSpectralMap which are hidden from users of createSpectralMap)
  psids = 1:nrow(eSetObj)
  mpm.args = list(closure = "none", center = "double", normal = "global",
                  row.weight = "mean", col.weight = "constant", logtrans = FALSE)
  
  ## Check input parameters required for computing SVD
  if (length(psids) <= 1)
    stop(paste("At least two genes should be used for the visualization.",
               "Please change the 'psids' argument accordingly."))
  if(!(svd.method %in% c('default_svd', 'fast_svd', 'truncated_svd'))) {
    stop(paste('[ERROR] svd.method - ', svd.method, ' is an invalid input. ', 
                  'Valid inputs for svd.method are - default_svd, fast_svd, truncated_svd.', 
                  sep = ''))
  }
  
  ## create data for PCA
  esetMethods <- esetVis:::getMethodsInputObjectEsetVis(eSetObj)
  esetUsed <- eSetObj[psids,]
  exprsMat <- esetMethods$exprs(esetUsed)
  dataMPM <- data.frame(rownames(exprsMat), exprsMat, stringsAsFactors = FALSE)
  colnames(dataMPM) <- c("featureID", colnames(exprsMat))
  dataMPMWthtNA <- dataMPM[rowSums(is.na(dataMPM)) == 0,]
  
  ## compute PCA
  if(is.null(pcaResult)) {
    argsMpm <- c(mpm.args, list(data = dataMPMWthtNA, svd.method = svd.method, max.PC = max.PC))
    mpmResults <- do.call("flexible_mpm", argsMpm) 
  } else if (class(pcaResult) == 'mpm') {
    mpmResults <- pcaResult
  }
  
  ## create output data container
  x <- list()
  
  ## Return PCA computation object
  x$pca <- mpmResults

  ## Arguments required by for plotting SVD result
  ## (these are arguments of esetSpectralMap which are hidden from users of createSpectralMap)
  symmetryAxes = "combine"
  packageTextLabel = c("ggrepel", "ggplot2")
  packageInteractivity = c("rbokeh", "ggvis")
    
  ## Declare arguments for plotting SVD
  args_lean_esetSpectralMap <- list(    
    # arguments which have been locally computed or declared:
    mpm_result = mpmResults, esetUsed = esetUsed, colorVar = attr.group1, psids = psids, 
    mpm.args = mpm.args, symmetryAxes = symmetryAxes, packageTextLabel = packageTextLabel,
    packageInteractivity = packageInteractivity,
    # arguments passed during function call:
    topGenesVar='SYMBOL', cloudGenesIncludeLegend=T, cloudGenesColor='purple', returnAnalysis=T,
    typePlot = 'static',
    # passed via parameters of createSpectralMap:
    topGenes = topGenes, cloudGenes = cloudGenes, color = col2use, topSamples = topSamples)
  
  ## PC1 vs PC2
  message("PC1 vs PC2...")
  x$spectral12 <- do.call("lean_esetSpectralMap", 
                          c(list(title = "PC1 vs PC2", dim=c(1, 2)), args_lean_esetSpectralMap))

  ## PC1 vs PC3
  message("PC1 vs PC3...")
  x$spectral13 <- do.call("lean_esetSpectralMap", 
                          c(list(title = "PC1 vs PC3", dim=c(1, 3)), args_lean_esetSpectralMap))
  
  ## PC2 vs PC3
  message("PC2 vs PC3...")
  x$spectral23 <- do.call("lean_esetSpectralMap", 
                          c(list(title = "PC2 vs PC3", dim=c(2, 3)), args_lean_esetSpectralMap))
  
  # ================ END: code snippet for PCA calculation, spectral plot creation ================ #
  
  ## Create barplot of 20 largest PCs (spectralMaps). Adjust if there are less than 20 PCs.
  if(length(x$spectral12$analysis$axesContributionsPercentages) < max.PC) {
    max.PC <- length(x$spectral12$analysis$axesContributionsPercentages)
  }
  
  if(showRShinyProgress == TRUE) { progress$set(message="Generating histogram...", value = 0.8) }
  
  if(showText == 1) { message("* Calculating Spectral PC histogram...") }
  x.spectral <- data.frame( PC = c(1:max.PC), levels="PC", values=x$spectral12$analysis$axesContributionsPercentages[1:max.PC])
  x$spectralHist <- ggplot(data=x.spectral, aes(x=PC, y=values), environment = environment()) +
    geom_bar(stat="identity", col="#C09B00", fill="#00BB4E") + ## Fixed colors (nice green vs brown contrast!)
    ggtitle(paste("PC Distribution (top ", max.PC, ")", sep="")) +
    labs(x="Principal Component #", y="% of total variance explained") +
    #annotate("text", label=paste0("Generated on ", .currentDate()), x=max.PC-1, y=max(x.spectral$values, na.rm=TRUE)-1, cex=2.5) +
    scale_x_discrete(breaks=c(1:length(x.spectral$PC)), labels=paste("PC_", as.vector(x.spectral$PC), sep="")) +
    theme(axis.text.x = element_text(angle = 90, hjust = 1, colour = "black"), axis.text.y = element_text(colour="black"),
          panel.background = element_rect(fill = "white"),
          panel.grid.major = element_line(colour = "lightgrey", linetype = "dotted"))
  
  if(TSNE) {
    if(showText == 1) { message("* Calculating TSNE object...") }
    if(showRShinyProgress == TRUE) { progress$set(message="Generating TSNE ...", value = 0.9) }
    
    x$tsnePlot <- esetVis::esetTsne(eSetObj, shapeVar=attr.group2, colorVar=attr.group1, color=col2use, topSamples=5, title=paste(mainTitle, "\nTsne Plot", sep=""), returnAnalysis = TRUE)
  }
  if(showText == 1) { message("* Generating output image...") }
  bottomText <- paste0("Generated on ", Sys.Date(), " using RShiny Informe")
  ## Plotting SpectralMap
  switch(output, "png" = {
    if(showText == 1) { message(paste0("\n --> SpectralMaps_", fileName.suffix))}
    png(filename=paste0("SpectralMaps_", fileName.suffix), width=spec.width, height=spec.height, res=spec.res)
  }, "x11" = { x11(width=spec.width, height=spec.height) })
  
  if(output == "RShiny") {
    y <- x
  } else {
    if(showRShinyProgress == TRUE) { progress$set(message="Done!", value = 1) }
    
    gridExtra::grid.arrange(x$spectral12$plot, x$spectral13$plot, x$spectral23$plot, x$spectralHist, ncol=2,
                            bottom= grid::textGrob(bottomText, x=1, hjust=1, vjust=0, gp=gpar(fontsize=8)))
  }
  if(output == "png") { dev.off() }
  
  if(TSNE) { ## Plotting TSNE plot
    switch(output, "png" = {
      if(showText == 1) { message(paste0("\n --> TSNE_", fileName.suffix)) }
      png(filename=paste("TSNE_", fileName.suffix, sep=""), width=spec.width, height=spec.height, res=spec.res)
    }, "x11" = { dev.new(); x11(width=spec.width, height=spec.height) })
    
    grid.arrange(x$tsnePlot$plot, bottom=textGrob(bottomText, x=1, hjust=1, vjust=0, gp=gpar(fontsize=8)))
    
    if(output == "png") { dev.off() }
    if(output == "x11") { if(showText == 1) { message(" ok.\n") }}
  }
  if(output == "png" && seperate.plots == 1) {
    ## Generates individual images of the spectralMaps (+ PC Histogram)
    if(showText == 1) { message("\n  * Individual plots:\n") }
    for(i in 1:4) {
      #fileName <- paste0("SpectralMaps_", studyName, "_INDIVIDUAL_", names(x)[i], fileName.extension)
      #fileName.suffix <- paste0(studyName, "__ColoredBy_", attr.group1, fileName.extension)
      tempName <- strsplit(fileName.suffix, "__")[[1]]
      fileName <- paste0("SpectralMaps_", tempName[1], "_INDIVIDUAL_", names(x)[i], "__", tempName[2])#, fileName.extension)
      #fileName <- paste0("SpectralMaps_INDIVIDUAL_", names(x)[i], "_", fileName.suffix)
      if(showText == 1) { message(paste0("  ==> ", fileName, "\n")) }
      png(filename=fileName, width=spec.width/2, height=spec.height/2, res=spec.res)
      if(i == 4) { print(x[[i]]) } else { print(x[[i]]$plot) } ## 4th is the barplot, which doesn't have a $plot!
      dev.off()
      rm(fileName, tempName)
    }
  }
  
  setwd(current.dir)
  
  if(returnAnalysis == 1) {
    if(output == "RShiny") return(y) else return(x)
  }
}
Paradigm4/revealgenomics documentation built on April 7, 2020, 2:01 a.m.