R/plot_functions_QC.R

Defines functions meanSdPlot plot_normalization plot_imputation plot_detect plot_missval

Documented in meanSdPlot plot_detect plot_imputation plot_missval plot_normalization

#' Plot row standard deviations versus row means
#'
#' \code{meanSdPlot} generates a hexagonal heatmap
#' of the row standard deviations versus row means
#' from SummarizedExperiment objects.
#' See \code{\link[vsn]{meanSdPlot}}.
#'
#' @param x SummarizedExperiment,
#' Data object.
#' @param ranks Logical,
#' Whether or not to plot the row means on the rank scale.
#' @param xlab Character,
#' x-axis label.
#' @param ylab Character,
#' y-axis label.
#' @param pch Ignored -
#' exists for backward compatibility.
#' @param plot Logical,
#' Whether or not to produce the plot.
#' @param bins Numeric vector,
#' Data object before normalization.
#' @param ... Other arguments,
#' Passed to \code{\link[ggplot2:geom_hex]{stat_binhex}}.
#' @return A scatter plot of row standard deviations
#' versus row means(generated by \code{\link[ggplot2:geom_hex]{stat_binhex}})
#' @examples
#' # Load example
#' data <- UbiLength
#' data <- data[data$Reverse != "+" & data$Potential.contaminant != "+",]
#' data_unique <- make_unique(data, "Gene.names", "Protein.IDs", delim = ";")
#'
#' # Make SummarizedExperiment
#' columns <- grep("LFQ.", colnames(data_unique))
#' exp_design <- UbiLength_ExpDesign
#' se <- make_se(data_unique, columns, exp_design)
#'
#' # Filter and normalize
#' filt <- filter_missval(se, thr = 0)
#' norm <- normalize_vsn(filt)
#'
#' # Plot meanSdPlot
#' meanSdPlot(norm)
#' @export
meanSdPlot <- function(x,
  ranks = TRUE,
  xlab = ifelse(ranks, "rank(mean)", "mean"),
  ylab = "sd",
  pch,
  plot = TRUE,
  bins = 50,
  ...) {
  vsn::meanSdPlot(assay(x),
    ranks = ranks,
    xlab = xlab,
    ylab = ylab,
    pch = pch,
    plot = plot,
    ...)
}

#' Visualize normalization
#'
#' \code{plot_normalization} generates boxplots
#' of all conditions for input objects, e.g. before and after normalization.
#'
#' @param se SummarizedExperiment,
#' Data object, e.g. before normalization (output from \code{\link{make_se}()}
#' or \code{\link{make_se_parse}()}).
#' @param ... Additional SummarizedExperiment object(s),
#' E.g. data object after normalization
#' (output from \code{\link{normalize_vsn}}).
#' @return Boxplots of all conditions
#' for input objects, e.g. before and after normalization
#'   (generated by \code{\link[ggplot2]{ggplot}}).
#' Adding components and other plot adjustments can be easily done
#' using the ggplot2 syntax (i.e. using '+')
#' @examples
#' # Load example
#' data <- UbiLength
#' data <- data[data$Reverse != "+" & data$Potential.contaminant != "+",]
#' data_unique <- make_unique(data, "Gene.names", "Protein.IDs", delim = ";")
#'
#' # Make SummarizedExperiment
#' columns <- grep("LFQ.", colnames(data_unique))
#' exp_design <- UbiLength_ExpDesign
#' se <- make_se(data_unique, columns, exp_design)
#'
#' # Filter and normalize
#' filt <- filter_missval(se, thr = 0)
#' norm <- normalize_vsn(filt)
#'
#' # Plot normalization
#' plot_normalization(se, filt, norm)
#' @export
plot_normalization <- function(se, ...) {
  # Get arguments from call
  call <- match.call()
  arglist <- lapply(call[-1], function(x) x)
  var.names <- vapply(arglist, deparse, character(1))
  arglist <- lapply(arglist, eval.parent, n = 2)
  names(arglist) <- var.names

  # Show error if inputs are not the required classes
  lapply(arglist, function(x) {
    assertthat::assert_that(inherits(x,
      "SummarizedExperiment"),
      msg = "input objects need to be of class 'SummarizedExperiment'")
    if (any(!c("label", "ID", "condition", "replicate") %in% colnames(colData(x)))) {
      stop("'label', 'ID', 'condition' and/or 'replicate' ",
        "columns are not present in (one of) the input object(s)",
        "\nRun make_se() or make_se_parse() to obtain the required columns",
        call. = FALSE)
    }
  })

  # Function to get a long data.frame of the assay data
  # annotated with sample info
  gather_join <- function(se) {
    assay(se) %>%
      data.frame() %>%
      gather(ID, val) %>%
      left_join(., data.frame(colData(se)), by = "ID")
  }

  df <- map_df(arglist, gather_join, .id = "var") %>%
    mutate(var = factor(var, levels = names(arglist)))

  # Boxplots for conditions with facet_wrap
  # for the original and normalized values
  ggplot(df, aes(x = ID, y = val, fill = condition)) +
    geom_boxplot(notch = TRUE, na.rm = TRUE) +
    coord_flip() +
    facet_wrap(~var, ncol = 1) +
    labs(x = "", y = expression(log[2]~"Intensity")) +
    theme_DEP1()
}

#' Visualize imputation
#'
#' \code{plot_imputation} generates density plots
#' of all conditions for input objects, e.g. before and after imputation.
#'
#' @param se SummarizedExperiment,
#' Data object, e.g. before imputation
#' (output from \code{\link{normalize_vsn}()}).
#' @param ... Other SummarizedExperiment object(s),
#' E.g. data object after imputation
#' (output from \code{\link{impute}()}).
#' @return Density plots of all conditions
#' of all conditions for input objects, e.g. before and
#' after imputation (generated by \code{\link[ggplot2]{ggplot}}).
#' @examples
#' # Load example
#' data <- UbiLength
#' data <- data[data$Reverse != "+" & data$Potential.contaminant != "+",]
#' data_unique <- make_unique(data, "Gene.names", "Protein.IDs", delim = ";")
#'
#' # Make SummarizedExperiment
#' columns <- grep("LFQ.", colnames(data_unique))
#' exp_design <- UbiLength_ExpDesign
#' se <- make_se(data_unique, columns, exp_design)
#'
#' # Filter, normalize and impute missing values
#' filt <- filter_missval(se, thr = 0)
#' norm <- normalize_vsn(filt)
#' imputed <- impute(norm, fun = "MinProb", q = 0.01)
#'
#' # Plot imputation
#' plot_imputation(filt, norm, imputed)
#' @export
plot_imputation <- function(se, ...) {
  # Get arguments from call
  call <- match.call()
  arglist <- lapply(call[-1], function(x) x)
  var.names <- vapply(arglist, deparse, character(1))
  arglist <- lapply(arglist, eval.parent, n = 2)
  names(arglist) <- var.names

  # Show error if inputs are not the required classes
  lapply(arglist, function(x) {
    assertthat::assert_that(inherits(x,
      "SummarizedExperiment"),
      msg = "input objects need to be of class 'SummarizedExperiment'")
    if (any(!c("label", "ID", "condition", "replicate") %in% colnames(colData(x)))) {
      stop("'label', 'ID', 'condition' and/or 'replicate' ",
        "columns are not present in (one of) the input object(s)",
        "\nRun make_se() or make_se_parse() to obtain the required columns",
        call. = FALSE)
    }
  })

  # Function to get a long data.frame of the assay data
  # annotated with sample info
  gather_join <- function(se) {
    assay(se) %>%
      data.frame() %>%
      gather(ID, val) %>%
      left_join(., data.frame(colData(se)), by = "ID")
  }

  df <- map_df(arglist, gather_join, .id = "var") %>%
    mutate(var = factor(var, levels = names(arglist)))

  # Density plots for different conditions with facet_wrap
  # for original and imputed samles
  ggplot(df, aes(val, col = condition)) +
    geom_density(na.rm = TRUE) +
    facet_wrap(~var, ncol = 1) +
    labs(x = expression(log[2]~"Intensity"), y = "Density") +
    theme_DEP1()
}

#' Visualize intensities of proteins with missing values
#'
#' \code{plot_detect} generates density and CumSum plots
#' of protein intensities with and without missing values
#'
#' @param se SummarizedExperiment,
#' Data object with missing values.
#' @return Density and CumSum plots of intensities of
#' proteins with and without missing values
#' (generated by \code{\link[ggplot2]{ggplot}}).
#' @examples
#' # Load example
#' data <- UbiLength
#' data <- data[data$Reverse != "+" & data$Potential.contaminant != "+",]
#' data_unique <- make_unique(data, "Gene.names", "Protein.IDs", delim = ";")
#'
#' # Make SummarizedExperiment
#' columns <- grep("LFQ.", colnames(data_unique))
#' exp_design <- UbiLength_ExpDesign
#' se <- make_se(data_unique, columns, exp_design)
#'
#' # Filter
#' filt <- filter_missval(se, thr = 0)
#'
#' # Plot intensities of proteins with missing values
#' plot_detect(filt)
#' @export
plot_detect <- function(se) {
    # Show error if inputs are not the required classes
    assertthat::assert_that(inherits(se, "SummarizedExperiment"))

    se_assay <- assay(se)
    # Show error if there are no missing values
    if(!any(is.na(se_assay))) {
      stop("No missing values in '", deparse(substitute(se)), "'",
           call. = FALSE)
    }

    # Get a long data.frame of the assay data annotated with sample info
    df <- se_assay %>%
      data.frame() %>%
      rownames_to_column() %>%
      gather(ID, val, -rowname)

    # Get a summarized table with mean protein intensities and
    # indication whether the protein has missing values
    stat <- df %>%
      group_by(rowname) %>%
      summarize(mean = mean(val, na.rm = TRUE), missval = any(is.na(val)))

    # Calculate cumulative fraction
    cumsum <- stat %>%
      group_by(missval) %>%
      arrange(mean) %>%
      mutate(num = 1, cs = cumsum(num), cs_frac = cs/n())

    # Plot the densities and cumalitive fractions for
    # proteins with and without missing values
    p1 <- ggplot(stat, aes(mean, col = missval)) +
      geom_density(na.rm = TRUE) +
      labs(x = expression(log[2]~"Intensity"), y = "Density") +
      guides(col = guide_legend(title = "Missing values")) +
      theme_DEP1()
    p2 <- ggplot(cumsum, aes(mean, cs_frac, col = missval)) +
      geom_line() +
      labs(x = expression(log[2]~"Intensity"), y = "Cumulative fraction") +
      guides(col = guide_legend(title = "Missing values")) +
      theme_DEP1()
    gridExtra::grid.arrange(p1, p2, ncol = 1)
}

#' Plot a heatmap of proteins with missing values
#'
#' \code{plot_missval} generates a heatmap of proteins
#' with missing values to discover whether values are missing by random or not.
#'
#' @param se SummarizedExperiment,
#' Data object with missing values.
#' @return A heatmap indicating whether values are missing (0) or not (1)
#' (generated by \code{\link[ComplexHeatmap]{Heatmap}}).
#' @examples
#' # Load example
#' data <- UbiLength
#' data <- data[data$Reverse != "+" & data$Potential.contaminant != "+",]
#' data_unique <- make_unique(data, "Gene.names", "Protein.IDs", delim = ";")
#'
#' # Make SummarizedExperiment
#' columns <- grep("LFQ.", colnames(data_unique))
#' exp_design <- UbiLength_ExpDesign
#' se <- make_se(data_unique, columns, exp_design)
#'
#' # Filter, normalize and impute missing values
#' filt <- filter_missval(se, thr = 0)
#'
#' # Plot missing values heatmap
#' plot_missval(filt)
#' @export
plot_missval <- function(se) {
    # Show error if input is not the required classes
    assertthat::assert_that(inherits(se, "SummarizedExperiment"))

    se_assay <- assay(se)
    # Show error if there are no missing values
    if(!any(is.na(se_assay))) {
      stop("No missing values in '", deparse(substitute(se)), "'",
           call. = FALSE)
    }

    # Make assay data binary (1 = valid value, 0 = missing value)
    df <- se_assay %>% data.frame(.)
    missval <- df[apply(df, 1, function(x) any(is.na(x))), ]
    missval <- ifelse(is.na(missval), 0, 1)
    # Plot binary heatmap
    ht2 = Heatmap(missval,
      col = c("white", "black"),
      column_names_side = "top",
      show_row_names = FALSE,
      show_column_names = TRUE,
      name = "Missing values pattern",
      column_names_gp = gpar(fontsize = 16),
      heatmap_legend_param = list(at = c(0, 1),
        labels = c("Missing value", "Valid value")))
    draw(ht2, heatmap_legend_side = "top")
}

#' Plot a P value histogram
#'
#' \code{plot_p_hist} generates a p value histogram.
#'
#' @param dep SummarizedExperiment,
#' Data object for which differentially enriched proteins are annotated
#' (output from \code{\link{test_diff}()} and \code{\link{add_rejections}()}).
#' @param adjusted Logical(1),
#' Whether or not to use adjusted p values.
#' @param wrap Logical(1),
#' Whether or not to display different histograms for the different contrasts.
#' @return A histogram (generated by \code{\link[ggplot2]{ggplot}}).
#' @examples
#' # Load example
#' data <- UbiLength
#' data <- data[data$Reverse != "+" & data$Potential.contaminant != "+",]
#' data_unique <- make_unique(data, "Gene.names", "Protein.IDs", delim = ";")
#'
#' # Make SummarizedExperiment
#' columns <- grep("LFQ.", colnames(data_unique))
#' exp_design <- UbiLength_ExpDesign
#' se <- make_se(data_unique, columns, exp_design)
#'
#' # Filter, normalize and impute missing values
#' filt <- filter_missval(se, thr = 0)
#' norm <- normalize_vsn(filt)
#' imputed <- impute(norm, fun = "MinProb", q = 0.01)
#'
#' # Test for differentially expressed proteins
#' diff <- test_diff(imputed, "control", "Ctrl")
#' dep <- add_rejections(diff, alpha = 0.05, lfc = 1)
#'
#' # Plot p value histogram
#' plot_p_hist(dep)
#' plot_p_hist(dep, wrap = TRUE)
#' @export
plot_p_hist <- function(dep, adjusted = FALSE, wrap = FALSE) {
  assert_that(inherits(dep, "SummarizedExperiment"),
    is.logical(adjusted),
    length(adjusted) == 1,
    is.logical(wrap),
    length(wrap) == 1)

  # Get rowData
  row_data <- rowData(dep, use.names = FALSE) %>%
    data.frame()

  if(length(grep("_p.adj", colnames(row_data))) < 1) {
    stop("'[contrast]_p.adj' columns are not present in '",
         deparse(substitute(dep)),
         "'\nRun test_diff() to obtain the required columns",
         call. = FALSE)
  }
  if(length(grep("_p.val", colnames(row_data))) < 1) {
    stop("'[contrast]_p.val' columns are not present in '",
         deparse(substitute(dep)),
         "'\nRun test_diff() to obtain the required columns",
         call. = FALSE)
  }
  if(!"name" %in% colnames(row_data)) {
    stop("'name' column not present in '",
         deparse(substitute(dep)),
         "'\nRun make_se() or make_se_parse() to obtain the required columns",
         call. = FALSE)
  }

  # Grep P value columns
  if(adjusted) {
    p_val <- row_data %>%
      select(name, ends_with("_p.adj")) %>%
      gather(var, val, -name)
  } else {
    p_val <- row_data %>%
      select(name, ends_with("_p.val")) %>%
      gather(var, val, -name)
  }

  # Plot histogram
  p <- ggplot(p_val, aes(val)) +
    geom_histogram(bins = 100, center = 0.005) +
    labs(title = "P value histogram",
         x = "P-value") +
    theme_DEP1()
  if(wrap) {
    p <- p + facet_wrap(~ var)
  }
  p
}
arnesmits/DEP documentation built on Aug. 7, 2019, 10:44 a.m.