R/read_fastq.R

Defines functions read_fastq

Documented in read_fastq

#' Parse a fastq format sequence file into a tibble
#'
#' @param file A file in fastq format
#'
#' @return A tibble containing the read ids, base calls and quality strings
#' @export
#'
#' @examples
#' read_fastq(system.file("good.fq",package="packagetest"))
read_fastq <- function(file) {
  assertthat::assert_that(assertthat::is.readable(file))
  assertthat::assert_that(assertthat::has_extension(file,"fq"))

  base::scan(file, character()) -> file.lines
  file.lines[c(T,F,F,F)] -> ids
  file.lines[c(F,T,F,F)] -> sequences
  file.lines[c(F,F,F,T)] -> qualities

  if (!all(base::startsWith(ids,"@"))) {
    base::stop("Some ID lines didn't start with @")
  }

  stringr::str_sub(ids,2) -> ids

  if (!all(base::nchar(sequences)==base::nchar(qualities))) {
    base::stop("Some sequences were a different length to the qualities")
  }

  if (any(base::duplicated(ids))) {
    base::stop("Some IDs are duplicated")
  }

  tibble::tibble(
    ID = ids,
    Bases=sequences,
    Qualities=qualities,
    GC=gc_content(sequences)
  ) %>%
    return()

}
s-andrews/packagetest documentation built on Dec. 22, 2021, 8:21 p.m.