R/plot_filter_diagnostic.R

Defines functions plot_filter_diagnostic

Documented in plot_filter_diagnostic

#' Diagnostic plots for read filtering
#' @param x matrix with features in rows, samples in columns
#' @param method method for analysis (counts / cpm)
#' @param threshold value for threshold
#' @param group grouping factor
#' @param as_boxplot generate boxplots?
#' @param facet facet variable
#' @param non_zero delete variables without counts for any observation?
#' @export

plot_filter_diagnostic <- function(x, method = c("counts", "cpm"),
                                   threshold, group = NULL,
                                   as_boxplot = TRUE,
                                   facet = c("group", "value"),
                                   not_zero = TRUE,
                                   groups_total = FALSE) {

  method <- match.arg(method)
  facet <- match.arg(facet)

  if(groups_total) {
    x <- t(apply(x, 1, function(y) tapply(y, group, sum, na.rm = TRUE)))
  }
  if(not_zero) {
    message("Removing features with zero counts for all the groups from this analysis...\n")
    x <- x[-which(rowSums(x) == 0), ]
  }

  if(method == "cpm") {
    library_size <- colSums(x)
    x <- t(apply(x, 1, function(x) 1E6 * x / library_size))
  }

  total_features <- nrow(x)
  out <- list()

  i <- 1
  for(th in threshold) {
    out[[i]] <-  100 * apply(x, 2, function(y) sum(y >= th) / total_features)
    i <- i + 1
  }
  out <- round(do.call("cbind", out), 1)
  out <- data.frame(out)
  colnames(out) <- threshold

  result <- ggplot2::ggplot(data = out)

  if(!is.null(group) && !groups_total) {
    out <- cbind(rownames(out), group, out)
    out <- reshape2::melt(out)
    colnames(out) <- c("Sample", "Treatment", "Value",  "Proportion")

    if(!as_boxplot) {
      result <-  result + ggplot2::geom_line(data = out,
                                             ggplot2::aes(x = Value,
                                                          y = Proportion,
                                                          group = Sample,
                                                          col = factor(Treatment)))
    } else {
      if(facet == "group") {
        result <-   ggplot2::ggplot(data = out)+
          ggplot2::geom_boxplot(data = out,
                                ggplot2::aes(x = Value, y = Proportion,
                                             fill = Treatment),
                                show.legend = FALSE) +
          ggplot2::facet_wrap(~Treatment)
      } else {

        result <-   ggplot2::ggplot(data = out)+
          ggplot2::geom_boxplot(data = out,
                                ggplot2::aes(x = Treatment, y = Proportion,
                                             fill = Treatment),
                                show.legend = FALSE) +
          ggplot2::facet_wrap(~Value)
      }
    }

  } else {

    out <- cbind(rownames(out), out)
    out[, 1] <- as.factor(out[, 1])
    out <- reshape2::melt(out)
    colnames(out) <- c("Sample", "Value",  "Proportion")
    result <-  result + ggplot2::geom_line(data = out,
                                           ggplot2::aes(x = Value,
                                                        y = Proportion,
                                                        group = Sample,
                                                        col = Sample)
    )
  }

  x_label <- ifelse(method == "cpm", "CPM", "Counts")
  result <- result +
    ggplot2::xlab(x_label) +
    ggplot2::ylab("Proportion (%)") +
    ggplot2::theme(axis.text.x = ggplot2::element_text(angle = 90, hjust = 1))

  result
}
leandroroser/RNASeqFunctions documentation built on May 17, 2019, 7:31 p.m.