R/quality_collapse.r

Defines functions quality_collapse

Documented in quality_collapse

#' Quality Control Stats Collapser
#'
#' @param workdir Data directory path
#'
#' @return Sequencing Throughput as tibble
#' @export
#'
#' @importFrom readr read_delim
#' @importFrom rstudioapi getSourceEditorContext
#' @import dplyr
#' @examples
quality_collapse <- function(workdir = NA) {

  # Setting working directory
  if (is.na(workdir)) {
    message(' Using RStudio hardcoded WD variable')
    rstudioapi::getSourceEditorContext()$path %>% ## maybe better with here::here()
      setwd()
  } else {
    workdir %>%
      setwd()
  }

  # Load and processing read stats
  read_dat <- read_delim("AllSamples_allReadStats.txt", delim = ' ',
                         col_names = c('SampleID', 'RawData', 'FilteredData', 'HumanFiltered', 'NonHumanFiltered')) %>%
    mutate(QFailReads = RawData - FilteredData,
           SampleID = factor(SampleID, SampleID, ordered = TRUE)) %>%
    select(SampleID, QFailReads, Human = HumanFiltered, QPassReads = NonHumanFiltered) %>%
    arrange(QPassReads) %>%
    gather('Step', 'value', -SampleID)

  message( ' QPassReads quantile at 2% = ',
           read_dat %>%
             filter(Step == "QPassReads") %>%
             pull(value) %>%
             quantile(probs = 0.02))

  # Mark outliers from QPassReads and export
  read_dat %>%
    filter(Step == "QPassReads") %>%
    pull(value) %>%
    summary()

  excluded_vals <- read_dat %>%
    filter(Step == "QPassReads") %>%
    pull(value) %>%
    boxplot.stats(coef = 5) %>%
    .[['out']]

  read_dat <- read_dat %>%
    mutate(included = if_else(! value %in% excluded_vals, 'Yes', 'No'))

  saveRDS(read_dat, 'SequencingThroughput.rds')
  return(read_dat)
}
xec-cm/metar documentation built on Oct. 13, 2020, 8:40 p.m.