R/dada2_upto_qualPlots.R

Defines functions dada2.upto.qualPlots

Documented in dada2.upto.qualPlots

#' Dada2 up to Quality Plots
#'
#' This function runs the first part of the pipeline, up through making quality plots, allowing the user to manually choose the truncation lengths based on those results.
#' @param fastq.path Path to raw FASTQ files.
#' @param file.split.pattern A character string containg the pattern you want to use to separate the sample name and R1/R2 in the symlink names. Default is '--'.
#' @param maxCores The maximum number of cores to utilize in steps that are parallelizabe. Default is 4.
#' @param random.seed A integer utilized by `set.seed` to make pipeline reproducible
#' @param user.out.path Path to a directory if you want to use a specific output directory instead of the one programmatically generated by the pipeline. Primarily used if you are re-running a pipeline and want to save to previously generated output path. Default is NULL (which will autogenerate the output path).
#' @param force Logical. By default the pipeline will skip steps if their corresponding output objects already exist. Set to TRUE to force the pipeline to run through all steps. Default is FALSE.
#' @param ask Logical. Whether to prompt user to verify files in `fastq.path`. FALSE skips this verification. Default TRUE.
#' @seealso \code{\link{dada2}}, \code{\link{plotQualityProfile}}
#' @export


dada2.upto.qualPlots <- function(
    fastq.path,
    file.split.pattern = "--",
    maxCores = 4,
    random.seed = 42,
    user.output.path = NULL,
    force = FALSE,
    ask = TRUE
) {
  if (ask) {
    print(head(list.files(fastq.path), 20))
    cat("Above are the first 20 (or fewer) files in the provided path,", sep = "\n")
    proceed <- readline(
      prompt = "\tdo you want to proceed? [y/n]: "
    )
    while (!(proceed %in% c("y", "n"))) {
      proceed <- readline(prompt = "Please answer y or n: ")
    }
  } else {
    proceed <- "y"
  }
  if (proceed == "n") {
    my.cat("Terminated")
  } else {
    theme_set(theme_cowplot())
    if (is.null(user.output.path)) {
      output <- run.env$output.path
    } else {
      output <- user.output.path
    }
    run.env$fnFs <- list.files(
      fastq.path,
      pattern = paste0(file.split.pattern, "R1"),
      full.names = TRUE
    ) %>% sort()
    run.env$fnRs <- list.files(
      fastq.path,
      pattern = paste0(file.split.pattern, "R2"),
      full.names = TRUE
    ) %>% sort()
    run.env$sample.names <- strsplit(
      basename(run.env$fnFs),
      file.split.pattern
    ) %>% sapply(`[`, 1)
    if (length(run.env$sample.names) > 20) {
      set.seed(random.seed)
      plot.samples <- sort(sample(run.env$sample.names, 20, replace = F))
      plot.fnFs <- run.env$fnFs[
        str_detect(run.env$fnFs, paste(plot.samples, collapse = "|"))
      ]
      plot.fnRs <- run.env$fnRs[
        str_detect(run.env$fnRs, paste(plot.samples, collapse = "|"))
      ]
      plot.fnFs.file <- "fwdReads_subset20_qualPlot.pdf"
      plot.fnRs.file <- "revReads_subset20_qualPlot.pdf"
    } else {
      plot.fnFs <- run.env$fnFs
      plot.fnRs <- run.env$fnRs
      plot.fnFs.file <- "fwdReads_qualPlot.pdf"
      plot.fnRs.file <- "revReads_qualPlot.pdf"
    }

    plot.files <- c(file.path(output, plot.fnFs.file), file.path(output, plot.fnRs.file))
    if (all(file.exists(plot.files)) & !force) {
      my.cat("Quality plots already exist, skipping...")
    } else {
      my.cat("Making quality plots...")
      fnFs.qualPlot0 <- plotQualityProfile(plot.fnFs)
      fnFs.qualPlot <- fnFs.qualPlot0 +
        scale_x_continuous(breaks = seq(0, 300, 25)) +
        background_grid(major = "x", minor = "x") +
        theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5))
      ggsave(
        fnFs.qualPlot,
        file = plot.files[1]
      )

      fnRs.qualPlot0 <- plotQualityProfile(plot.fnRs)
      fnRs.qualPlot <- fnRs.qualPlot0 +
        scale_x_continuous(breaks = seq(0, 300, 25)) +
        background_grid(major = "x", minor = "x") +
        theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5))
      ggsave(
        fnRs.qualPlot,
        file = plot.files[2]
      )
    }
    my.cat("\tDONE")
  }
}
kstagaman/sharpton-lab-dada2-pipeline documentation built on Sept. 24, 2022, 11:41 a.m.