R/plot_functions_explore.R
In DEP: Differential Enrichment analysis of Proteomics data

Documented in plot_cor plot_dist plot_pca

#' Plot PCA
#'
#' \code{plot_pca} generates a PCA plot using the top variable proteins.
#'
#' @param dep SummarizedExperiment,
#' Data object for which differentially enriched proteins are annotated
#' (output from \code{\link{test_diff}()} and \code{\link{add_rejections}()}).
#' @param x Integer(1),
#' Sets the principle component to plot on the x-axis.
#' @param y Integer(1),
#' Sets the principle component to plot on the y-axis.
#' @param indicate Character,
#' Sets the color, shape and facet_wrap of the plot
#' based on columns from the experimental design (colData).
#' @param label Logical,
#' Whether or not to add sample labels.
#' @param n Integer(1),
#' Sets the number of top variable proteins to consider.
#' @param point_size Integer(1),
#' Sets the size of the points.
#' @param label_size Integer(1),
#' Sets the size of the labels.
#' @param plot Logical(1),
#' If \code{TRUE} (default) the PCA plot is produced.
#' Otherwise (if \code{FALSE}), the data which the
#' PCA plot is based on are returned.
#' @return A scatter plot (generated by \code{\link[ggplot2]{ggplot}}).
#' @examples
#' # Load example
#' data <- UbiLength
#' data <- data[data$Reverse != "+" & data$Potential.contaminant != "+",]
#' data_unique <- make_unique(data, "Gene.names", "Protein.IDs", delim = ";")
#'
#' # Make SummarizedExperiment
#' columns <- grep("LFQ.", colnames(data_unique))
#' exp_design <- UbiLength_ExpDesign
#' se <- make_se(data_unique, columns, exp_design)
#'
#' # Filter, normalize and impute missing values
#' filt <- filter_missval(se, thr = 0)
#' norm <- normalize_vsn(filt)
#' imputed <- impute(norm, fun = "MinProb", q = 0.01)
#'
#' # Test for differentially expressed proteins
#' diff <- test_diff(imputed, "control", "Ctrl")
#' dep <- add_rejections(diff, alpha = 0.05, lfc = 1)
#'
#' # Plot PCA
#' plot_pca(dep)
#' plot_pca(dep, indicate = "condition")
#' @export
plot_pca <- function(dep, x = 1, y = 2, indicate = c("condition", "replicate"),
  label = FALSE, n = 500, point_size = 4, label_size = 3, plot = TRUE) {
  if(is.integer(x)) x <- as.numeric(x)
  if(is.integer(y)) y <- as.numeric(y)
  if(is.integer(n)) n <- as.numeric(n)
  if(is.integer(point_size)) point_size <- as.numeric(point_size)
  if(is.integer(label_size)) label_size <- as.numeric(label_size)
  # Show error if inputs are not the required classes
  assertthat::assert_that(inherits(dep, "SummarizedExperiment"),
    is.numeric(x),
    length(x) == 1,
    is.numeric(y),
    length(y) == 1,
    is.numeric(n),
    length(n) == 1,
    is.character(indicate),
    is.logical(label),
    is.numeric(point_size),
    length(point_size) == 1,
    is.numeric(label_size),
    length(label_size) == 1,
    is.logical(plot),
    length(plot) == 1)

  # Check for valid x and y values
  if(x > ncol(dep) | y > ncol(dep)) {
    stop(paste0("'x' and/or 'y' arguments are not valid\n",
                "Run plot_pca() with 'x' and 'y' <= ",
                ncol(dep), "."),
         call. = FALSE)
  }

  # Check for valid 'n' value
  if(n > nrow(dep)) {
    stop(paste0("'n' argument is not valid.\n",
                "Run plot_pca() with 'n' <= ",
                nrow(dep),
                "."),
         call. = FALSE)
  }

  # Check for valid 'indicate'
  columns <- colnames(colData(dep))
  if(!is.null(indicate)) {
    if(length(indicate) > 3) {
      stop("Too many features in 'indicate'
        Run plot_pca() with a maximum of 3 indicate features")
    }
    if(any(!indicate %in% columns)) {
      stop(paste0("'",
        paste0(indicate, collapse = "' and/or '"),
        "' column(s) is/are not present in ",
        deparse(substitute(dep)),
        ".\nValid columns are: '",
        paste(columns, collapse = "', '"),
        "'."),
        call. = FALSE)
    }
  }

  # Get the variance per protein and take the top n variable proteins
  var <- apply(assay(dep), 1, sd)
  df <- assay(dep)[order(var, decreasing = TRUE)[seq_len(n)],]

  # Calculate PCA
  pca <- prcomp(t(df), scale = FALSE)
  pca_df <- pca$x %>%
    data.frame() %>%
    rownames_to_column() %>%
    left_join(., data.frame(colData(dep)), by = c("rowname" = "ID"))

  # Calculate the percentage of variance explained
  percent <- round(100 * pca$sdev^2 / sum(pca$sdev^2), 1)

  # Make factors of indicate features
  for(feat in indicate) {
    pca_df[[feat]] <- as.factor(pca_df[[feat]])
  }

  # Plot the PCA plot
  p <- ggplot(pca_df, aes(get(paste0("PC", x)), get(paste0("PC", y)))) +
    labs(title = paste0("PCA plot - top ", n, " variable proteins"),
      x = paste0("PC", x, ": ", percent[x], "%"),
      y = paste0("PC", y, ": ", percent[y], "%")) +
    coord_fixed() +
    theme_DEP1()

  if(length(indicate) == 0) {
    p <- p + geom_point(size = point_size)
  }
  if(length(indicate) == 1) {
    p <- p + geom_point(aes(col = pca_df[[indicate[1]]]),
                        size = point_size) +
      labs(col = indicate[1])
  }
  if(length(indicate) == 2) {
    p <- p + geom_point(aes(col = pca_df[[indicate[1]]],
                            shape = pca_df[[indicate[2]]]),
                        size = point_size) +
      labs(col = indicate[1],
           shape = indicate[2])
  }
  if(length(indicate) == 3) {
    p <- p + geom_point(aes(col = pca_df[[indicate[1]]],
                            shape = pca_df[[indicate[2]]]),
                        size = point_size) +
      facet_wrap(~pca_df[[indicate[3]]])
    labs(col = indicate[1],
         shape = indicate[2])
  }
  if(label) {
    p <- p + geom_text(aes(label = rowname), size = label_size)
  }
  if(plot) {
    return(p)
  } else {
    df <- pca_df %>%
      select(rowname, paste0("PC", c(x, y)), match(indicate, colnames(pca_df)))
    colnames(df)[1] <- "sample"
    return(df)
  }
}

#' Plot correlation matrix
#'
#' \code{plot_cor} generates a Pearson correlation matrix.
#'
#' @param dep SummarizedExperiment,
#' Data object for which differentially enriched proteins are annotated
#' (output from \code{\link{test_diff}()} and \code{\link{add_rejections}()}).
#' @param significant Logical(1),
#' Whether or not to filter for significant proteins.
#' @param lower Integer(1),
#' Sets the lower limit of the color scale.
#' @param upper Integer(1),
#' Sets the upper limit of the color scale.
#' @param pal Character(1),
#' Sets the color panel (from \pkg{RColorBrewer}).
#' @param pal_rev Logical(1),
#' Whether or not to invert the color palette.
#' @param indicate Character,
#' Sets additional annotation on the top of the heatmap
#' based on columns from the experimental design (colData).
#' @param font_size Integer(1),
#' Sets the size of the labels.
#' @param plot Logical(1),
#' If \code{TRUE} (default) the correlation matrix plot is produced.
#' Otherwise (if \code{FALSE}), the data which the
#' correlation matrix plot is based on are returned.
#' @param ... Additional arguments for Heatmap function as depicted in
#' \code{\link[ComplexHeatmap]{Heatmap}}
#' @return A heatmap plot (generated by \code{\link[ComplexHeatmap]{Heatmap}})
#' @examples
#' # Load example
#' data <- UbiLength
#' data <- data[data$Reverse != "+" & data$Potential.contaminant != "+",]
#' data_unique <- make_unique(data, "Gene.names", "Protein.IDs", delim = ";")
#'
#' # Make SummarizedExperiment
#' columns <- grep("LFQ.", colnames(data_unique))
#' exp_design <- UbiLength_ExpDesign
#' se <- make_se(data_unique, columns, exp_design)
#'
#' # Filter, normalize and impute missing values
#' filt <- filter_missval(se, thr = 0)
#' norm <- normalize_vsn(filt)
#' imputed <- impute(norm, fun = "MinProb", q = 0.01)
#'
#' # Test for differentially expressed proteins
#' diff <- test_diff(imputed, "control", "Ctrl")
#' dep <- add_rejections(diff, alpha = 0.05, lfc = 1)
#'
#' # Plot correlation matrix
#' plot_cor(dep)
#' @export
plot_cor <- function(dep, significant = TRUE, lower = -1, upper = 1,
  pal = "PRGn", pal_rev = FALSE, indicate = NULL,
  font_size = 12, plot = TRUE, ...) {
  # Show error if inputs are not the required classes
  assertthat::assert_that(inherits(dep, "SummarizedExperiment"),
    is.logical(significant),
    length(significant) == 1,
    is.numeric(lower),
    length(lower) == 1,
    is.numeric(upper),
    length(upper) == 1,
    is.character(pal),
    length(pal) == 1,
    is.logical(pal_rev),
    length(pal_rev) == 1,
    is.numeric(font_size),
    length(font_size) == 1,
    is.logical(plot),
    length(plot) == 1)

  # Check for valid lower and upper values
  if(!(lower >= -1  & upper >= -1 & lower <= 1 & upper <= 1)) {
    stop("'lower' and/or 'upper' arguments are not valid
         Run plot_pca() with 'lower' and 'upper' between -1 and 1",
         call. = FALSE)
  }

  # Check for valid pal
  pals <- RColorBrewer::brewer.pal.info %>%
    rownames_to_column() %>%
    filter(category != "qual")
  if(!pal %in% pals$rowname) {
    stop("'", pal,"' is not a valid color panel",
         " (qualitative panels also not allowed)\n",
         "Run plot_pca() with one of the following 'pal' options: ",
         paste(pals$rowname, collapse = "', '"), "'",
         call. = FALSE)
  }
  if(any(is.na(assay(dep)))) {
    stop("Missing values in '", deparse(substitute(dep)), "'. Use plot_dist() instead")
  }

  # Heatmap annotation
  if(!is.null(indicate)) {
    assertthat::assert_that(is.character(indicate))

    col_data <- colData(dep) %>%
      as.data.frame()
    columns <- colnames(col_data)
    if(any(!indicate %in% columns)) {
      stop("'",
           paste0(indicate, collapse = "' and/or '"),
           "' column(s) is/are not present in ",
           deparse(substitute(dep)),
           ".\nValid columns are: '",
           paste(columns, collapse = "', '"),
           "'.",
           call. = FALSE)
    }

    # Get annotation
    anno <- colData(dep) %>%
      data.frame() %>%
      select(indicate)

    # Annotation color
    names <- colnames(anno)
    anno_col <- vector(mode="list", length=length(names))
    names(anno_col) <- names
    for(i in names) {
      var = anno[[i]] %>% unique() %>% sort()
      if(length(var) == 1)
        cols <- c("black")
      if(length(var) == 2)
        cols <- c("orangered", "cornflowerblue")
      if(length(var) < 7 & length(var) > 2)
        cols <- RColorBrewer::brewer.pal(length(var), "Pastel1")
      if(length(var) >= 7)
        cols <- RColorBrewer::brewer.pal(length(var), "Set3")
      names(cols) <- var
      anno_col[[i]] <-  cols
    }

    # HeatmapAnnotation object
    ha1 = HeatmapAnnotation(df = anno,
                            col = anno_col,
                            show_annotation_name = TRUE)
  } else {
    ha1 <- NULL
  }

  # Filter for significant proteins
  if(significant) {

    # Check for significant column
    if(!"significant" %in% colnames(rowData(dep, use.names = FALSE))) {
      stop("'significant' column is not present in '",
           deparse(substitute(dep)),
           "'\nRun add_rejections() to obtain the required column",
           call. = FALSE)
    }

    dep <- dep[rowData(dep, use.names = FALSE)$significant, ]
  }

  # Calculate correlation matrix
  cor_mat <- cor(assay(dep))

  # Plot heatmap
  ht1 = Heatmap(cor_mat,
    col = circlize::colorRamp2(
      seq(lower, upper, ((upper-lower)/7)),
      if(pal_rev) {
        rev(RColorBrewer::brewer.pal(8, pal))
      } else {
        RColorBrewer::brewer.pal(8, pal)
      }),
    heatmap_legend_param = list(
      color_bar = "continuous",
      legend_direction = "horizontal",
      legend_width = unit(5, "cm"),
      title_position = "topcenter"),
    name = "Pearson correlation",
    column_names_gp = gpar(fontsize = font_size),
    row_names_gp = gpar(fontsize = font_size),
    top_annotation = ha1,
    ...)
  if(plot) {
    draw(ht1, heatmap_legend_side = "top")
  } else {
    df <- as.data.frame(cor_mat)
    return(df)
  }
}

#' Plot Gower's distance matrix
#'
#' \code{plot_dist} generates a distance matrix heatmap using the Gower's distance.
#'
#' @param dep SummarizedExperiment,
#' Data object for which differentially enriched proteins are annotated
#' (output from \code{\link{test_diff}()} and \code{\link{add_rejections}()}).
#' @param significant Logical(1),
#' Whether or not to filter for significant proteins.
#' @param pal Character(1),
#' Sets the color panel (from \pkg{RColorBrewer}).
#' @param pal_rev Logical(1),
#' Whether or not to invert the color palette.
#' @param indicate Character,
#' Sets additional annotation on the top of the heatmap
#' based on columns from the experimental design (colData).
#' @param font_size Integer(1),
#' Sets the size of the labels.
#' @param plot Logical(1),
#' If \code{TRUE} (default) the distance matrix plot is produced.
#' Otherwise (if \code{FALSE}), the data which the
#' distance matrix plot is based on are returned.
#' @param ... Additional arguments for Heatmap function as depicted in
#' \code{\link[ComplexHeatmap]{Heatmap}}
#' @return A heatmap plot (generated by \code{\link[ComplexHeatmap]{Heatmap}})
#' @examples
#' # Load example
#' data <- UbiLength
#' data <- data[data$Reverse != "+" & data$Potential.contaminant != "+",]
#' data_unique <- make_unique(data, "Gene.names", "Protein.IDs", delim = ";")
#'
#' # Make SummarizedExperiment
#' columns <- grep("LFQ.", colnames(data_unique))
#' exp_design <- UbiLength_ExpDesign
#' se <- make_se(data_unique, columns, exp_design)
#'
#' # Filter, normalize and impute missing values
#' filt <- filter_missval(se, thr = 0)
#' norm <- normalize_vsn(filt)
#' imputed <- impute(norm, fun = "MinProb", q = 0.01)
#'
#' # Test for differentially expressed proteins
#' diff <- test_diff(imputed, "control", "Ctrl")
#' dep <- add_rejections(diff, alpha = 0.05, lfc = 1)
#'
#' # Plot correlation matrix
#' plot_dist(dep)
#' @export
plot_dist <- function(
  dep, significant = TRUE, pal = "YlOrRd",
  pal_rev = TRUE, indicate = NULL,
  font_size = 12, plot = TRUE, ...) {
  # Show error if inputs are not the required classes
  assertthat::assert_that(inherits(dep, "SummarizedExperiment"),
    is.logical(significant),
    length(significant) == 1,
    is.character(pal),
    length(pal) == 1,
    is.logical(pal_rev),
    length(pal_rev) == 1,
    is.numeric(font_size),
    length(font_size) == 1,
    is.logical(plot),
    length(plot) == 1)

  # Check for valid pal
  pals <- RColorBrewer::brewer.pal.info %>%
    rownames_to_column() %>%
    filter(category != "qual")
  if(!pal %in% pals$rowname) {
    stop("'", pal,"' is not a valid color panel",
      " (qualitative panels also not allowed)\n",
      "Run plot_pca() with one of the following 'pal' options: ",
      paste(pals$rowname, collapse = "', '"), "'",
      call. = FALSE)
  }

  # Heatmap annotation
  if(!is.null(indicate)) {
    assertthat::assert_that(is.character(indicate))

    col_data <- colData(dep) %>%
      as.data.frame()
    columns <- colnames(col_data)
    if(any(!indicate %in% columns)) {
      stop("'",
        paste0(indicate, collapse = "' and/or '"),
        "' column(s) is/are not present in ",
        deparse(substitute(dep)),
        ".\nValid columns are: '",
        paste(columns, collapse = "', '"),
        "'.",
        call. = FALSE)
    }

    # Get annotation
    anno <- colData(dep) %>%
      data.frame() %>%
      select(indicate)

    # Annotation color
    names <- colnames(anno)
    anno_col <- vector(mode="list", length=length(names))
    names(anno_col) <- names
    for(i in names) {
      var = anno[[i]] %>% unique() %>% sort()
      if(length(var) == 1)
        cols <- c("black")
      if(length(var) == 2)
        cols <- c("orangered", "cornflowerblue")
      if(length(var) < 7 & length(var) > 2)
        cols <- RColorBrewer::brewer.pal(length(var), "Pastel1")
      if(length(var) >= 7)
        cols <- RColorBrewer::brewer.pal(length(var), "Set3")
      names(cols) <- var
      anno_col[[i]] <-  cols
    }

    # HeatmapAnnotation object
    ha1 = HeatmapAnnotation(df = anno,
      col = anno_col,
      show_annotation_name = TRUE)
  } else {
    ha1 <- NULL
  }

  # Filter for significant proteins
  if(significant) {

    # Check for significant column
    if(!"significant" %in% colnames(rowData(dep, use.names = FALSE))) {
      stop("'significant' column is not present in '",
        deparse(substitute(dep)),
        "'\nRun add_rejections() to obtain the required column",
        call. = FALSE)
    }

    dep <- dep[rowData(dep, use.names = FALSE)$significant, ]
  }

  # Calculate distance matrix
  dist_mat <- cluster::daisy(t(assay(dep)), metric = "gower") %>%
    as.matrix()
  max <- max(dist_mat)

  # Plot heatmap
  ht1 = Heatmap(dist_mat,
    col = circlize::colorRamp2(
      seq(0, max, ((max)/7)),
      if(pal_rev) {
        rev(RColorBrewer::brewer.pal(8, pal))
      } else {
        RColorBrewer::brewer.pal(8, pal)
      }),
    heatmap_legend_param = list(color_bar = "continuous",
      legend_direction = "horizontal",
      legend_width = unit(5, "cm"),
      title_position = "topcenter"),
    name = "Gower's distance",
    column_names_gp = gpar(fontsize = font_size),
    row_names_gp = gpar(fontsize = font_size),
    top_annotation = ha1,
    ...)
  if(plot) {
    draw(ht1, heatmap_legend_side = "top")
  } else {
    df <- as.data.frame(dist_mat)
    return(df)
  }
}