R/exposure_related_functions.R

Defines functions plot_exposure_to_pdf plot_exposure plot_exposure_internal sort_exposure write_exposure read_exposure

Documented in plot_exposure plot_exposure_internal plot_exposure_to_pdf read_exposure sort_exposure write_exposure

#' @title Read an exposure matrix from a file.
#'
#' @param file File path to a CSV file containing an exposure matrix, i.e.
#'  the numbers of mutations due to each mutational signature.
#'  Each row corresponds to a mutational signature an each
#'  column corresponds to a tumor or other biological sample.
#'
#' @param check.names Passed to \code{read.csv}.
#' \strong{IMPORTANT}: If \code{TRUE} this will replace the double
#' colon in identifiers of the form <tumor_type>::<sample_id>
#' with two periods (i.e. <tumor_type>..<sample_id>.
#' If \code{check.names} is true, generate a warning
#' if double colons were present.
#'
#' @return Numerical matrix of exposures, with the same
#'  shape as the contents of \code{file}.
#'
#' @importFrom utils read.csv
#'
#' @export
#'
#' @examples
#' file <- system.file("extdata",
#'   "Liver-HCC.exposure.csv",
#'   package = "mSigTools"
#' )
#' exposure <- read_exposure(file)
read_exposure <- function(file, check.names = FALSE) {
  if (check.names) {
    headers <- read.csv(file, nrow = 1, header = FALSE, stringsAsFactors = FALSE)
    double.colon <- grep("::", unlist(headers)[-1], fixed = TRUE)
    if (length(double.colon) > 0) {
      warning(":: in sample ID replaced by ..; suggest calling with check.names = FALSE")
    }
  }
  retval <- read.csv(file, row.names = 1, check.names = check.names)
  if (any(duplicated(colnames(retval)))) {
    stop("There are duplicated column names in ", file)
  }
  return(data.matrix(retval))
}

#' @title Write an exposure matrix to a file.
#'
#' @param exposure Exposures as a numerical matrix (or data.frame) with
#'   signatures in rows and samples in columns. Rownames are taken as the
#'   signature names and column names are taken as the sample IDs.
#'
#' @param file File to which to write the exposure matrix (as a CSV file).
#'
#' @param row.names Either a logical value indicating whether the row names of
#'   \code{exposure} are to be written along with \code{exposure}, or a
#'   character vector of row names to be written.
#'
#' @return No return value, called for side effects.
#'
#' @importFrom utils write.csv
#'
#' @export
#'
#' @examples
#' file <- system.file("extdata",
#'   "Liver-HCC.exposure.csv",
#'   package = "mSigTools"
#' )
#' exposure <- read_exposure(file)
#' write_exposure(exposure, file = file.path(tempdir(), "Liver-HCC.exposure.csv"))
write_exposure <- function(exposure, file, row.names = TRUE) {
  old.digits <- getOption("digits")
  options(digits = 22)
  write.csv(exposure, file, row.names = row.names)
  on.exit(options(digits = old.digits))
}

#' Sort columns of an exposure matrix based on the number of mutations in each sample (column).
#'
#' @param exposure Exposures as a numerical matrix (or data.frame) with
#'   signatures in rows and samples in columns. Rownames are taken as the
#'   signature names and column names are taken as the sample IDs.
#'
#' @param decreasing If \code{TRUE}, sort from largest to smallest.
#'
#' @return The original \code{exposure} with columns sorted.
#'
#' @export
#'
#' @examples
#' file <- system.file("extdata",
#'   "Liver-HCC.exposure.csv",
#'   package = "mSigTools"
#' )
#' exposure <- read_exposure(file)
#' exposure.sorted <- sort_exposure(exposure)
sort_exposure <- function(exposure, decreasing = TRUE) {
  retval <- exposure[, order(colSums(exposure), decreasing = decreasing),
    drop = FALSE
  ]
  return(retval)
}

#' Plot a matrix of exposures in a single plot.
#'
#' @param exposure Exposures as a numerical \code{matrix} (or \code{data.frame})
#'   with signatures in rows and samples in columns. Rownames are taken as the
#'   signature names and column names are taken as the sample IDs. If you want
#'   \code{exposure} sorted from largest to smallest, use
#'   \code{\link{sort_exposure}}. Do not use column names that start with
#'   multiple underscores. The exposures will often be mutation counts, but
#'   could also be e.g. mutations per megabase.
#'
#' @param plot.proportion Plot exposure proportions rather than counts.
#'
#' @param xlim,ylim Limits for the x and y axis. If \code{NULL}(default), the
#'   function tries to do something reasonable.
#'
#' @param legend.x,legend.y The x and y co-ordinates to be used to position the
#'   legend.
#'
#' @param cex.legend A numerical value giving the amount by which legend
#'   plotting text and symbols should be magnified relative to the default.
#'
#' @param cex.yaxis A numerical value giving the amount by which y axis values
#'   should be magnified relative to the default.
#'
#' @param cex.xaxis A numerical value giving the amount by which x axis values
#'   should be magnified relative to the default. If
#'   \code{NULL}(default), the function tries to do something reasonable.
#'
#' @param plot.sample.names Whether to plot sample names below the x axis.
#'   Default is TRUE. Ignored if there are no column names on
#'   \code{exposure}.
#'
#' @param yaxis.labels User defined y axis labels to be plotted. If
#'   \code{NULL}(default), the function tries to do something reasonable.
#'
#' @param ... Other arguments passed to \code{\link[graphics]{barplot}}. If
#'   \code{ylab} is not included, it defaults to a value depending on
#'   \code{plot.proportion}. If \code{col} is not supplied the function tries to
#'   do something reasonable.
#'
#' @import graphics
#'
#' @return An \strong{invisible} list.
#'   The first element
#'   is a logical value
#'   indicating whether the plot was successful.
#'   The second element is a numeric
#'   vector giving the coordinates of
#'   the bar x-axis midpoints drawn, useful for
#'   adding to the graph.
#'
#' @keywords internal
plot_exposure_internal <-
  function(exposure, # This is actually the exposure "counts"
           plot.proportion = FALSE,
           xlim = NULL,
           ylim = NULL,
           legend.x = NULL,
           legend.y = NULL,
           cex.legend = 0.9,
           cex.yaxis = 1,
           cex.xaxis = NULL,
           plot.sample.names = TRUE,
           yaxis.labels = NULL,
           ...) {
    exposure <- as.matrix(exposure) # In case it is a data frame
    num.sigs <- nrow(exposure)
    num.samples <- ncol(exposure)
    args <- list(...)

    if (is.null(args$col)) {
      if (num.sigs <= 8) {
        args$col <-
          c("red", "black", "grey", "yellow", "blue", "brown", "green4", "skyblue")
      } else {
        # Lots of signatures; use shading lines to differentiate
        args$col <- grDevices::rainbow(num.sigs, alpha = 1)
      }
    }

    barplot.col <- character(num.sigs)
    barplot.col <-
      suppressWarnings(replace(barplot.col, 1:num.sigs, args$col))

    barplot.density <- barplot.angle <- numeric(num.sigs)

    if (num.sigs <= 12) {
      # Specify the density of shading lines, in lines per inch, for the bars or
      # bar components. Will repeat as needed, non-positive values of density
      # inhibit the drawing of shading lines.
      barplot.density <- rep(-1, num.sigs)

      # Sepcify the slope of shading lines, given as an angle in degrees
      # (counter-clockwise), for the bars or bar components. Will repeat as
      # needed.
      barplot.angle <- rep(0, num.sigs)
    } else {
      # Lots of signatures; use shading lines to differentiate
      barplot.density <-
        suppressWarnings(replace(
          barplot.density, 1:num.sigs,
          c(-1, 50, 50, 50, 50)
        ))
      barplot.angle <-
        suppressWarnings(replace(
          barplot.angle, 1:num.sigs,
          c(0, 45, 135, 45, 135)
        ))
    }
    # For legend, we need to put colour, density and angle in reversed order to make
    # sure this matches order used in barplot.
    legend.col <- rev(barplot.col)
    legend.density <- rev(barplot.density)
    legend.angle <- rev(barplot.angle)

    if (plot.proportion) {
      # Matrix divided by vector goes column-wise, not row-wise, so transpose twice
      plot.what <- t(t(exposure) / colSums(exposure))

      # If some columns in exposure matrix are "padded", i.e. all the values
      # are 0, then the division above will make NaNs. We try to replace all
      # NaN with 0
      plot.what[is.na(plot.what)] <- 0
    } else {
      plot.what <- exposure
    }

    # Set the limits for x axis, leave extra space for plotting legend
    if (is.null(xlim)) {
      xmax <- round(ncol(plot.what) * 1.7)
      xlim <- c(0, xmax)
    } else {
      xmax <- xlim[2]
    }

    if (is.null(ylim)) {
      ymax <- round(max(colSums(plot.what)))
      ylim <- c(0, ymax)
    } else {
      ymax <- ylim[2]
    }

    # Check whether user specifies argument for barplot
    ylab <- args$ylab
    args$ylab <- NULL
    if (is.null(ylab)) {
      ylab <- ifelse(plot.proportion,
        "Proportion of mutations",
        "Number of mutations"
      )
    }

    space <- args$space
    args$space <- NULL
    if (is.null(space)) {
      space <- xmax * 0.01
    }

    density <- args$density
    args$density <- NULL
    if (is.null(density)) {
      density <- barplot.density
    }

    # Ignore column names, we'll plot them separately to make them fit.
    mp <- do.call(
      barplot,
      args = c(
        list(
          height = plot.what,
          ylab = ylab,
          # The amount of space left before each bar
          space = space,
          xaxs = "i", # No extra spacing at each end of x axis
          xaxt = "n", # Do not plot the X axis
          yaxt = "n", # Do not plot the Y axis
          density = density,
          angle = barplot.angle,
          border = "white",
          xlim = xlim,
          ylim = ylim
        ),
        args
      )
    )

    # Get locations for y axis annotations
    y.axis.values <- seq(0, ymax, ymax / 4)

    if (is.null(yaxis.labels)) {
      if (plot.proportion) {
        y.axis.labels <- format(round(y.axis.values, 2), nsmall = 2)
      } else {
        y.axis.labels <- round(y.axis.values)
      }
    } else {
      y.axis.labels <- yaxis.labels
    }

    # Draw y axis and labels
    Axis(side = 2, at = y.axis.values, las = 1, labels = FALSE)
    text(
      x = -xmax * 0.03, y = y.axis.values, labels = y.axis.labels,
      las = 1, adj = 1, xpd = NA, cex = cex.yaxis
    )

    # Setting the default parameters for legend plotting
    if (is.null(legend.x)) {
      legend.x <- xmax * 0.9
    }
    if (is.null(legend.y)) {
      legend.y <- ymax
    }

    legend(
      x = legend.x,
      y = legend.y,
      legend = rev(row.names(exposure)),
      density = legend.density,
      angle = legend.angle,
      xpd = NA,
      fill = legend.col,
      x.intersp = 0.3,
      y.intersp = 1,
      bty = "n",
      border = "white",
      cex = cex.legend,
      title = "Signature"
    )

    # Now add sample names, rotated to hopefully fit,
    if (isTRUE(plot.sample.names)) {
      if (length(mp) < 50) {
        size.adj <- 0.75
      } else if (length(mp) < 80) {
        size.adj <- 0.65
      } else if (length(mp) < 100) {
        size.adj <- 0.4
      } else if (length(mp) < 120) {
        size.adj <- 0.4
      } else if (length(mp) < 150) {
        size.adj <- 0.3
      } else {
        size.adj <- 0.3
      }
      cnames <- colnames(exposure)
      if (!is.null(cnames)) {
        cnames <- sub("_____.*", "", cnames)
        if (is.null(cex.xaxis)) {
          mtext(cnames, side = 1, line = 0.38, at = mp, las = 2, cex = size.adj)
        } else {
          mtext(cnames, side = 1, line = 0.38, at = mp, las = 2, cex = cex.xaxis)
        }
      }
    }

    invisible(list(plot.success = TRUE, bp.coordinates = mp))
  }

#' Plot exposures in multiple plots, with each plot showing exposures for a manageable number of samples.
#'
#' @inheritParams plot_exposure_internal
#'
#' @param samples.per.line Number of samples to show in each plot.
#'
#' @param xlim,ylim Limits for the x and y axis. If \code{NULL}(default), the
#'   function tries to do something reasonable.
#'
#' @param legend.x,legend.y The x and y co-ordinates to be used to position the
#'   legend.
#'
#' @return An \strong{invisible} list.
#'   The first element is a logical value
#'   indicating whether the plot is successful.
#'   The second element is a numeric
#'   vector giving the coordinates of
#'   the bar x-axis midpoints drawn, useful for
#'   adding to the graph.
#'
#' @export
#'
#' @examples
#' file <- system.file("extdata",
#'   "Liver-HCC.exposure.csv",
#'   package = "mSigTools"
#' )
#' exposure <- read_exposure(file)
#' old.par <- par(mar = c(8, 5, 1, 1))
#' plot_exposure(exposure[, 1:30],
#'   main = "Liver-HCC exposure", cex.yaxis = 0.8,
#'   plot.proportion = TRUE
#' )
#' par(old.par)
plot_exposure <- function(exposure,
                          samples.per.line = 30,
                          plot.proportion = FALSE,
                          xlim = NULL,
                          ylim = NULL,
                          legend.x = NULL,
                          legend.y = NULL,
                          cex.legend = 0.9,
                          cex.yaxis = 1,
                          cex.xaxis = NULL,
                          plot.sample.names = TRUE,
                          yaxis.labels = NULL,
                          ...) {
  n.sample <- ncol(exposure)
  exposure <- exposure[rowSums(exposure) > 0, , drop = FALSE]
  num.ranges <- n.sample %/% samples.per.line
  size.of.last.range <- n.sample %% samples.per.line

  if (num.ranges > 0) {
    if (size.of.last.range > 0) {
      padding.len <- samples.per.line - size.of.last.range
      padding <- matrix(0, nrow = nrow(exposure), ncol = padding.len)
      # The column names starting with lots of underscore
      # will not be plotted in the final output.
      colnames(padding) <- paste("_____", 1:ncol(padding), sep = "_")
      exposure <- cbind(exposure, padding)
      starts <- 0:num.ranges * samples.per.line + 1
    } else {
      starts <- 0:(num.ranges - 1) * samples.per.line + 1
    }
    ends <- starts + samples.per.line - 1
  } else {
    starts <- 1
    ends <- n.sample
  }

  for (i in 1:length(starts)) {
    list <- plot_exposure_internal(exposure[, starts[i]:ends[i], drop = FALSE],
      plot.proportion   = plot.proportion,
      xlim              = xlim,
      ylim              = ylim,
      legend.x          = legend.x,
      legend.y          = legend.y,
      cex.legend        = cex.legend,
      cex.yaxis         = cex.yaxis,
      cex.xaxis         = cex.xaxis,
      plot.sample.names = plot.sample.names,
      yaxis.labels      = yaxis.labels,
      ...               = ...
    )
  }
  invisible(list(plot.success = TRUE, bp.coordinates = list$bp.coordinates))
}

#' Plot exposures in multiple plots to a single PDF file, with each plot showing
#' exposures for a manageable number of samples.
#'
#'
#' @inheritParams plot_exposure_internal
#'
#' @param file The name of the PDF file to be produced.
#'
#' @param mfrow A vector of the form \code{c(nr, nc)}.
#' Subsequent figures will be drawn in an \code{nr}-by-\code{nc}
#' array on the device by rows.
#'
#' @param mar A numerical vector of the form \code{c(bottom,
#' left, top, right)} which gives the number of lines of margin to be
#' specified on the four sides of the plot.
#'
#' @param oma A vector of the form \code{c(bottom, left, top,
#' right)} giving the size of the outer margins in lines of text.
#'
#' @param samples.per.line Number of samples to show in each plot.
#'
#' @param xlim,ylim Limits for the x and y axis. If \code{NULL}(default), the
#'   function tries to do something reasonable.
#'
#' @param legend.x,legend.y The x and y co-ordinates to be used to position the
#'   legend.
#'
#' @param width,height The width and height of the graphics region in inches.
#'
#' @return An \strong{invisible} list.
#'   The first element is a logical value
#'   indicating whether the plot is successful.
#'   The second element is a numeric
#'   vector giving the coordinates of
#'   the bar x-axis midpoints drawn, useful for
#'   adding to the graph.
#'
#' @export
#'
#' @examples
#' file <- system.file("extdata",
#'   "Liver-HCC.exposure.csv",
#'   package = "mSigTools"
#' )
#' exposure <- read_exposure(file)
#' plot_exposure_to_pdf(exposure,
#'   file = file.path(tempdir(), "Liver-HCC.exposure.pdf"),
#'   cex.yaxis = 0.8, plot.proportion = TRUE
#' )
plot_exposure_to_pdf <- function(exposure,
                                 file,
                                 mfrow = c(2, 1),
                                 mar = c(6, 4, 3, 2),
                                 oma = c(3, 2, 0, 2),
                                 samples.per.line = 30,
                                 plot.proportion = FALSE,
                                 xlim = NULL,
                                 ylim = NULL,
                                 legend.x = NULL,
                                 legend.y = NULL,
                                 cex.legend = 0.9,
                                 cex.yaxis = 1,
                                 cex.xaxis = NULL,
                                 plot.sample.names = TRUE,
                                 yaxis.labels = NULL,
                                 width = 8.2677,
                                 height = 11.6929,
                                 ...) {
  # Setting the width and length for A4 size plotting
  grDevices::pdf(file, width = width, height = height, onefile = TRUE)

  opar <- par(mfrow = mfrow, mar = mar, oma = oma)
  on.exit(par(opar))

  list <- plot_exposure(
    exposure = exposure,
    samples.per.line = samples.per.line,
    plot.proportion = plot.proportion,
    xlim = xlim,
    ylim = ylim,
    legend.x = legend.x,
    legend.y = legend.y,
    cex.legend = cex.legend,
    cex.yaxis = cex.yaxis,
    cex.xaxis = cex.xaxis,
    plot.sample.names = plot.sample.names,
    yaxis.labels = yaxis.labels,
    ... = ...
  )

  grDevices::dev.off()
  invisible(list(plot.success = TRUE, bp.coordinates = list$bp.coordinates))
}

Try the mSigTools package in your browser

Any scripts or data that you put into this service are public.

mSigTools documentation built on Jan. 13, 2023, 5:11 p.m.