R/plotStratifyBy.R

Defines functions plotStratifyBy plotStratify

Documented in plotStratify plotStratifyBy

#' plotStratify
#'
#' Plot gene expression stratified by another variable
#'
#' @param formula specify variables shown in the x- and y-axes.  Y-axis should be continuous variable, x-axis should be discrete.
#' @param data data.frame storing continuous and discrete variables specified in formula
#' @param xlab label x-asis. Defaults to value of xval
#' @param ylab label y-asis. Defaults to value of yval
#' @param main main label
#' @param sortBy name of column in geneExpr to sort samples by.  Defaults to xval
#' @param colorBy name of column in geneExpr to color box plots.  Defaults to xval
#' @param sort if TRUE, sort boxplots by median value, else use default ordering
#' @param text plot text on the top left of the plot
#' @param text.y indicate position of the text on the y-axis as a fraction of the y-axis range
#' @param text.size size of text
#' @param pts.cex size of points
#' @param ylim specify range of y-axis
#' @param legend show legend
#' @param x.labels show x axis labels
#'
#' @return
#' ggplot2 object
#'
#' @examples
#'
#' # Note: This is a newer, more convient interface to plotStratifyBy()
#'
#' # load library
#' # library(variancePartition)
#'
#' # load simulated data:
#' data(varPartData)
#'
#' # Create data.frame with expression and Tissue information for each sample
#' GE <- data.frame(Expression = geneExpr[1, ], Tissue = info$Tissue)
#'
#' # Plot expression stratified by Tissue
#' plotStratify(Expression ~ Tissue, GE)
#'
#' # Omit legend and color boxes grey
#' plotStratify(Expression ~ Tissue, GE, colorBy = NULL)
#'
#' # Specify colors
#' col <- c(B = "green", A = "red", C = "yellow")
#' plotStratify(Expression ~ Tissue, GE, colorBy = col, sort = FALSE)
#'
#' @export
plotStratify <- function(formula, data, xlab, ylab, main, sortBy, colorBy, sort = TRUE, text = NULL, text.y = 1, text.size = 5, pts.cex = 1, ylim = NULL, legend = TRUE, x.labels = FALSE) {
  mc <- match.call()
  m <- match(c("formula", "data"), names(mc), 0L)
  mf <- mc[c(1L, m)]
  mf[[1L]] <- as.name("model.frame")
  mf <- eval(mf, parent.frame())
  data.st <- data.frame(mf)

  if (ncol(data.st) != 2) {
    stop("formula must have exactly 2 entries")
  }

  xval <- colnames(data.st)[attr(attr(mf, "terms"), "response") + 1]
  yval <- colnames(data.st)[attr(attr(mf, "terms"), "response")]

  if (missing(xlab)) {
    xlab <- xval
  }
  if (missing(sortBy) || is.null(sortBy)) {
    sortBy <- xval
  }
  if (missing(colorBy)) {
    colorBy <- xval
  }
  if (missing(ylab)) {
    ylab <- yval
  }

  # check that sortBy exist in data.st
  if (!(sortBy %in% colnames(data.st))) {
    stop(paste("sortBy is not found in colnames(data): sortBy =", sortBy))
  }

  data.st[[yval]] <- as.numeric(data.st[[yval]])

  xpos <- 0.5 # text.x * nlevels(data.st[[xval]])
  ypos <- text.y * (max(data.st[[yval]]) - min(data.st[[yval]])) + min(data.st[[yval]])

  if (sort) {
    # sort categories by median expression
    data.st[["reorder"]] <- reorder(data.st[[sortBy]], data.st[[yval]], FUN = median)
    ord <- "reorder"
  } else {
    ord <- xval
  }

  pOut <- ggplot(data.st, aes_string(x = ord, y = yval)) +
    theme_bw() +
    theme(plot.background = element_blank(), panel.grid.major = element_blank(), panel.grid.minor = element_blank(), panel.background = element_blank()) +
    ylab(ylab) +
    xlab(xlab) +
    theme(plot.title = element_text(hjust = 0.5))

  if (any(is.null(colorBy)) || any(is.na(colorBy))) {
    pOut <- pOut + geom_boxplot(color = "grey", fill = "grey", outlier.colour = "black", outlier.shape = 20)
  } else {
    # if colors are specified and all levels of xval are represented
    if (sum(levels(data.st[[xval]]) %in% names(colorBy)) == nlevels(data.st[[xval]]) && nlevels(data.st[[xval]]) > 0) {
      # 		i = match(levels(data.st[[ord]]), levels(data.st[[xval]]) )
      # pOut = pOut + geom_boxplot(aes_string(fill=xval), color=colorBy[i], outlier.colour='black',outlier.shape = 20) + scale_fill_manual( values=array(colorBy))

      i <- match(levels(data.st[[xval]]), names(colorBy))

      pOut <- pOut + geom_boxplot(aes_string(fill = xval), color = colorBy[i], outlier.colour = "black", outlier.shape = 20) + scale_fill_manual(values = array(colorBy)[i])
    } else {
      # color boxes by colorBy variable in data.st
      pOut <- pOut + geom_boxplot(aes_string(color = colorBy, fill = colorBy), outlier.colour = "black", outlier.shape = 20)
    }

    # add legend
    if (legend) {
      pOut <- pOut + theme(legend.justification = c(1, 0), legend.position = c(1, 0), legend.key = element_rect(fill = "transparent"), axis.text.x = element_text(angle = 30), legend.background = element_rect(fill = "transparent"))
    } else {
      pOut <- pOut + theme(legend.position = "none", axis.text.x = element_text(angle = 30))
    }
  }

  if (!x.labels) {
    pOut <- pOut + theme(axis.ticks.x = element_blank(), axis.text.x = element_blank())
  }

  # add median bar
  pOut <- pOut + stat_summary(geom = "crossbar", width = 0.65, fatten = 0, color = "black", fun.data = function(x) {
    return(c(y = median(x), ymin = median(x), ymax = median(x)))
  })

  if (!missing(ylim)) {
    pOut <- pOut + ylim(ylim)
  }

  if (!missing(main)) {
    pOut <- pOut + ggtitle(main)
  }

  if (!missing(text)) {
    pOut <- pOut + annotate("text", label = text, x = xpos, y = ypos, size = text.size, hjust = 0)
  }

  # pOut = pOut + geom_jitter(size=pts.cex,height=0, width=0, col="black")

  return(pOut)
}


#' plotStratifyBy
#'
#' Plot gene expression stratified by another variable
#'
#' @param geneExpr data.frame of gene expression values and another variable for each sample.  If there are multiple columns, the user can specify which one to use
#' @param xval name of column in geneExpr to be used along x-axis to stratify gene expression
#' @param yval name of column in geneExpr indicating gene expression
#' @param xlab label x-asis. Defaults to value of xval
#' @param ylab label y-asis. Defaults to value of yval
#' @param main main label
#' @param sortBy name of column in geneExpr to sort samples by.  Defaults to xval
#' @param colorBy name of column in geneExpr to color box plots.  Defaults to xval
#' @param sort if TRUE, sort boxplots by median value, else use default ordering
#' @param text plot text on the top left of the plot
#' @param text.y indicate position of the text on the y-axis as a fraction of the y-axis range
#' @param text.size size of text
#' @param pts.cex size of points
#' @param ylim specify range of y-axis
#' @param legend show legend
#' @param x.labels show x axis labels
#'
#' @return
#' ggplot2 object
#'
#' @examples
#'
#' # load library
#' # library(variancePartition)
#'
#' # load simulated data:
#' data(varPartData)
#'
#' # Create data.frame with expression and Tissue information for each sample
#' GE <- data.frame(Expression = geneExpr[1, ], Tissue = info$Tissue)
#'
#' # Plot expression stratified by Tissue
#' plotStratifyBy(GE, "Tissue", "Expression")
#'
#' # Omit legend and color boxes grey
#' plotStratifyBy(GE, "Tissue", "Expression", colorBy = NULL)
#'
#' # Specify colors
#' col <- c(B = "green", A = "red", C = "yellow")
#' plotStratifyBy(GE, "Tissue", "Expression", colorBy = col, sort = FALSE)
#'
#' @export
plotStratifyBy <- function(geneExpr, xval, yval, xlab = xval, ylab = yval, main = NULL, sortBy = xval, colorBy = xval, sort = TRUE, text = NULL, text.y = 1, text.size = 5, pts.cex = 1, ylim = NULL, legend = TRUE, x.labels = FALSE) {
  geneExpr <- data.frame(geneExpr)
  geneExpr <- droplevels(geneExpr)

  sortBy <- xval

  # check that xval and yval exist in geneExpr
  if (!(xval %in% colnames(geneExpr))) {
    stop(paste("xval is not found in colnames(geneExpr): xval =", xval))
  }
  if (!(yval %in% colnames(geneExpr))) {
    stop(paste("yval is not found in colnames(geneExpr): yval =", yval))
  }

  # check that sortBy exist in geneExpr
  if (!(sortBy %in% colnames(geneExpr))) {
    stop(paste("sortBy is not found in colnames(geneExpr): sortBy =", sortBy))
  }

  geneExpr[[yval]] <- as.numeric(geneExpr[[yval]])

  xpos <- 0.5 # text.x * nlevels(geneExpr[[xval]])
  ypos <- text.y * (max(geneExpr[[yval]]) - min(geneExpr[[yval]])) + min(geneExpr[[yval]])

  if (sort) {
    # sort categories by median expression
    geneExpr[["reorder"]] <- reorder(geneExpr[[sortBy]], geneExpr[[yval]], FUN = median)
    ord <- "reorder"
  } else {
    ord <- xval
  }

  pOut <- ggplot(geneExpr, aes_string(x = ord, y = yval)) +
    theme_bw() +
    theme(plot.background = element_blank(), panel.grid.major = element_blank(), panel.grid.minor = element_blank(), panel.background = element_blank()) +
    ylab(ylab) +
    xlab(xlab) +
    theme(plot.title = element_text(hjust = 0.5))

  if (any(is.null(colorBy)) || any(is.na(colorBy))) {
    pOut <- pOut + geom_boxplot(color = "grey", fill = "grey", outlier.colour = "black", outlier.shape = 20)
  } else {
    # if colors are specified and all levels of xval are represented
    if (sum(levels(geneExpr[[xval]]) %in% names(colorBy)) == nlevels(geneExpr[[xval]])) {
      # 		i = match(levels(geneExpr[[ord]]), levels(geneExpr[[xval]]) )
      # pOut = pOut + geom_boxplot(aes_string(fill=xval), color=colorBy[i], outlier.colour='black',outlier.shape = 20) + scale_fill_manual( values=array(colorBy))
      i <- match(levels(geneExpr[[xval]]), names(colorBy))
      pOut <- pOut + geom_boxplot(aes_string(fill = xval), color = colorBy[i], outlier.colour = "black", outlier.shape = 20) + scale_fill_manual(values = array(colorBy)[i])
    } else {
      # color boxes by colorBy variable in geneExpr
      pOut <- pOut + geom_boxplot(aes_string(color = colorBy, fill = colorBy), outlier.colour = "black", outlier.shape = 20)
    }

    # add legend
    if (legend) {
      pOut <- pOut + theme(legend.justification = c(1, 0), legend.position = c(1, 0), legend.key = element_rect(fill = "transparent"), axis.text.x = element_text(angle = 30), legend.background = element_rect(fill = "transparent"))
    } else {
      pOut <- pOut + theme(legend.position = "none", axis.text.x = element_text(angle = 30))
    }
  }

  if (!x.labels) {
    pOut <- pOut + theme(axis.ticks.x = element_blank(), axis.text.x = element_blank())
  }

  # add median bar
  pOut <- pOut + stat_summary(geom = "crossbar", width = 0.65, fatten = 0, color = "black", fun.data = function(x) {
    return(c(y = median(x), ymin = median(x), ymax = median(x)))
  })

  if (!is.null(ylim)) {
    pOut <- pOut + ylim(ylim)
  }

  if (!is.null(main)) {
    pOut <- pOut + ggtitle(main)
  }

  if (!is.null(text)) {
    pOut <- pOut + annotate("text", label = text, x = xpos, y = ypos, size = text.size, hjust = 0)
  }

  # pOut = pOut + geom_jitter(size=pts.cex,height=0, width=0, col="black")

  return(pOut)
}
GabrielHoffman/variancePartition documentation built on April 20, 2024, 7:29 p.m.