R/RcppExports.R

Defines functions calc_over_rep_seq gc_per_read qual_score_per_read calc_format_score find_format calc_adapter_content

Documented in calc_adapter_content calc_format_score calc_over_rep_seq find_format gc_per_read qual_score_per_read

# Generated by using Rcpp::compileAttributes() -> do not edit by hand
# Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393

#' Compute adapter content in reads. This function is only available for macOS/Linux.
#' 
#' @param infile filepath to fastq sequence
#' @param adapters filepath to adapters
#' @return map object with adapter names as the key and the number of times the adapters appears in the reads as the value
#' @examples
#' if(.Platform$OS.type != "windows") {
#' adapter_file <- system.file("extdata", "adapters.txt", package = "qckitfastq")
#' infile <- system.file("extdata", "test.fq.gz", package = "qckitfastq")
#' content <- calc_adapter_content(infile, adapter_file)
#' }
#' @export
calc_adapter_content <- function(infile, adapters) {
    .Call('_qckitfastq_calc_adapter_content', PACKAGE = 'qckitfastq', infile, adapters)
}

#' Gets quality score encoding format from the FASTQ file. Return possibilities are Sanger(/Illumina1.8),
#' Solexa(/Illumina1.0), Illumina1.3, and Illumina1.5. This encoding is heuristic based and may not be 100% accurate
#' since there is overlap in the encodings used, so it is best if you already know the format.
#'
#' @param infile  A string giving the path for the fastq file
#' @param reads_used int, the number of reads to use to determine the encoding format.
#' @examples
#' infile <- system.file("extdata", "10^5_reads_test.fq.gz", package = "qckitfastq")
#' find_format(infile,100)
#' @return A string denoting the read format. Possibilities are Sanger, Solexa, Illumina1.3, and Illumina1.5.
#' @export
find_format <- function(infile, reads_used) {
    .Call('_qckitfastq_find_format', PACKAGE = 'qckitfastq', infile, reads_used)
}

#' Calculate score based on Illumina format
#'
#' @param score  An ascii quality score from the fastq
#' @param score_format The illumina format
#' @examples
#' calc_format_score("A","Sanger")
#' @return a string as with the best guess as to the illumina format
#' @export
calc_format_score <- function(score, score_format) {
    .Call('_qckitfastq_calc_format_score', PACKAGE = 'qckitfastq', score, score_format)
}

#' Calculate the mean quality score per read of the FASTQ gzipped file
#' 
#' @param infile A string giving the path for the fastqfile
#' @examples
#' infile <- system.file("extdata", "10^5_reads_test.fq.gz", package = "qckitfastq")
#' qual_score_per_read(infile)$q50_per_position[1:10]
#' @return mean quality per read
#' @export
qual_score_per_read <- function(infile) {
    .Call('_qckitfastq_qual_score_per_read', PACKAGE = 'qckitfastq', infile)
}

#' Calculate GC nucleotide sequence content per read of the FASTQ gzipped file
#' @param infile A string giving the path for the fastqfile
#' @examples
#' infile <- system.file("extdata", "10^5_reads_test.fq.gz", package = "qckitfastq")
#' gc_per_read(infile)[1:10]
#' @return GC content perncentage per read
#' @export
gc_per_read <- function(infile) {
    .Call('_qckitfastq_gc_per_read', PACKAGE = 'qckitfastq', infile)
}

#' Calculate sequece counts for each unique sequence and create a table with unique sequences
#' and corresponding counts
#' @param infile A string giving the path for the fastqfile
#' @param min_size An int for thhresholding over representation
#' @param buffer_size An int for the number of lines to keep in memory
#' @return calculate overrepresented sequence count
#' @examples
#' infile <- system.file("extdata", "10^5_reads_test.fq.gz", package = "qckitfastq")
#' calc_over_rep_seq(infile)[seq_len(5)]
#' @export
calc_over_rep_seq <- function(infile, min_size = 5L, buffer_size = 1000000L) {
    .Call('_qckitfastq_calc_over_rep_seq', PACKAGE = 'qckitfastq', infile, min_size, buffer_size)
}

Try the qckitfastq package in your browser

Any scripts or data that you put into this service are public.

qckitfastq documentation built on Nov. 8, 2020, 5:24 p.m.