R/pre-analysis.R

Defines functions DESeq_PCA raw_counts_PCA run_NOISeq

Documented in DESeq_PCA raw_counts_PCA run_NOISeq

#' @title NOISeq wrapper
#'
#' @description NOISeq wrapper
#' @param ns_dat an object of eSet class to be used by NOISeq functions, produced by the NOISeq readData function
#' @param min_count an integer specifying the cutoff for a feature to be considered undetected (feature count > min_count to be detected)
#' @import NOISeq
#' @import ggplot2
#' @export
#' @examples
#' run_NOISeq(ns_dat, min_count)

run_NOISeq <- function(ns_dat, min_count) {
  samples <- sampleNames(ns_dat)

  # generate a saturation plot ------------------------------------------------
    saturation_dat <- dat(
      ns_dat, k = min_count, ndepth = 6, type = "saturation"
    )

    png(
      filename = ("Saturation Plot.png"),
      width = 2500, height = 2000, units = "px", res = 288
    )
    explo.plot(saturation_dat, samples = 1:length(samples), toplot = "global")
    dev.off()

  # generate count distribution and sensitivity plots -------------------------
    countsBio_dat <- dat(ns_dat, type = "countsbio")

    png(
      filename = ("Count Distribution.png"),
      width = 3000, height = 3000, units = "px", res = 288
    )
    explo.plot(
      countsBio_dat, samples = NULL, toplot = "global", plottype = "boxplot"
    )
    dev.off()

    png(
      filename = ("Sensitivity Plot.png"),
      width = 3000, height = 3000, units = "px", res = 288
    )
    explo.plot(countsBio_dat, samples = NULL, toplot = 1, plottype = "barplot")
    title(ylab = "% of features")
    dev.off()

  # generate length bias plots ------------------------------------------------
    lengthBias_dat = dat(ns_dat, factor = "timepoint", type = "lengthbias")

    for (n in levels(ns_dat$timepoint)) {
      png(
        filename = (stri_join(n, " Length Bias.png")),
        width = 2000, height = 2000, units = "px", res = 288
      )
      explo.plot(lengthBias_dat, samples = n, toplot = "global")
      dev.off()
    }
}



#' @title Wrapper for PCA Plots
#'
#' @description Wrapper for creating PCA plots using raw count data
#' @param raw_counts data.frame with raw gene/transcript counts
#' @param timepoint factor specifying timepoints
#' @param animal factor specifying animal number
#' @export
#' @examples
#' raw_counts_PCA(raw_counts, timepoint, animal)

raw_counts_PCA <- function(raw_counts, timepoint, animal) {
  raw_counts_mat <- data.matrix(t(raw_counts))
  pca_counts <- prcomp(raw_counts_mat, scale. = TRUE)
  df_counts <- data.frame(timepoint, animal, raw_counts_mat)

  autoplot(
    pca_counts, data = df_counts,
    colour = 'timepoint', shape = 'animal', size = 3
  )
}



#' @title DESeq2 Wrapper for PCA Plots
#'
#' @description wrapper for creating PCA plots from VST normalized counts using the DESeq2 package
#' @param raw_counts data.frame with raw gene/transcript counts
#' @param timepoint factor specifying timepoints
#' @param animal factor specifying animal number
#' @import DESeq2
#' @export
#' @examples
#' DESeq_PCA(raw_counts, timepoint, animal)

DESeq_PCA <- function(raw_counts, timepoint, animal) {
  counts_int <- data.matrix(round(raw_counts))
  dds <- DESeqDataSetFromMatrix(
    countData = counts_int,
    colData = data.frame(timepoint, animal),
    design = ~ animal + timepoint
  )
  vsd <- vst(dds, blind = FALSE)
  pca <- plotPCA(vsd, intgroup = c("timepoint"), returnData = TRUE)
  percentVar <- round(100 * attr(pca, "percentVar"))

  ggplot(pca, aes(PC1, PC2, color = timepoint, shape = animal)) +
    geom_point(size=3) +
    xlab(paste0("PC1: ",percentVar[1],"% variance")) +
    ylab(paste0("PC2: ",percentVar[2],"% variance")) +
    coord_fixed()
}
Emask7/ZIKVtranscriptomics documentation built on Jan. 25, 2022, 12:32 a.m.