R/RcppExports.R

# This file was generated by Rcpp::compileAttributes
# Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393

baf_stats_v1 <- function(calls, quals, ref, minq = 0L) {
    .Call('coveRage_baf_stats_v1', PACKAGE = 'coveRage', calls, quals, ref, minq)
}

#' @name baf_stats
#' @title baf_stats
#' @rdname baf_stats
#' 
#' @description Convert mpileup output to count tables
#' 
#' @param inMatrix input matrix
#' @param minq minimum quality for call to be retained
#' 
#' @details The character matrix \strong{inMatrix} should consist of columns  5 columns. 
#' The first column is the chromosome name (and is not presently used).
#' The second column is the chromosomal position.
#' The third column is the reference allele.
#' The fourth column is a string of calls.
#' The fifth column is a string of qualities.
#' This is expected to come from mpileup output.
#' Note that while mpileup can include data for multiple samples, here we need to process each sample seperately.
#' 
#' The reference alleles must be in all upper case.
#' See \code{toupper} if they are not.
#' 
#' @export
baf_stats <- function(inMatrix, minq = 0L) {
    .Call('coveRage_baf_stats', PACKAGE = 'coveRage', inMatrix, minq)
}

#' @title Parse data by a bed file
#' @rdname bedify
#' @name bedify
#' 
#' @description Seperate a data matrix into list elements based on coordinates from bed format data.
#' 
#' @param myBed matrix of bed format data
#' @param myData StringMatrix or IntegerMatrix to be sorted
#' @param fill_missing include records for when there is no data (0, 1).  By default these records are omitted.
#' @param verbose should verbose output be generated (0, 1)
#' 
#' @details
#' 
#' \strong{Bed format} data contain at least three columns.
#' The first column indicates the chromosome (i.e., supercontig, scaffold, contig, etc.).
#' The second cotains the starting positions.
#' The third the ending positions.
#' Optional columns are in columns four through nine.
#' For example, the fourth column may contain the names of features.
#' All subsequent columns are ignored here.
#' In an attempt to optimize performance the data are expected to be formatted as a character matrix.
#' The starting and end positions are converted to numerics internally.
#' 
#' The \strong{matrix format} used here is based on vcf type data.
#' Typically these data have a chromosome as the first column.
#' Each chromosome has its own coordinate system which begins at one.
#' This means that using multiple chromosomes will necessitate some fix to the coordinate systems.
#' Here I take the perspective that you should simply work on one chromosome at a time, so the chromosome information is ignored.
#' The first column is the chromosome, which I ignore.
#' The second column is the position, which is used for sorting.
#' Subsequent columns are not treated but are brought along with the subset.
#' 
#' 
#' When the matrix is of numeric form the first column, which contains the chromosome identifier (CHROM), must also be numeric.
#' This is because matrix elements must all be of the same type.
#' 
#' 
#' \href{https://genome.ucsc.edu/FAQ/FAQformat.html#format1}{Bed format} at UCSC
#' 
#' 
#' @examples
#' 
#' bed <- structure(c("chr_290", "chr_4176", "chr_126921", "chr_126921", 
#' "chr_125157", "chr_125157", "chr_125157", "chr_125157", "chr_126888", 
#' "chr_126888", "47", "400", "4344", "1", "3712", "6025", "2269", 
#' "1779", "7930", "4637", "80", "500", "4967", "9066", "6566", 
#' "6450", "2933", "2226", "11939", "7913", "gene_1", "gene_2", 
#' "gene_3", "gene_4", "gene_5", "gene_6", "gene_7", "gene_8", "gene_9", 
#' "gene_10"), .Dim = c(10L, 4L), .Dimnames = list(NULL, c("chrom", 
#' "chromStart", "chromEnd", "name")))
#' 
#' 
#' vcf.matrix <- structure(c("chr_290", "chr_290", "chr_4176", "chr_4176", "chr_50514", 
#' "chr_64513", "chr_107521", "chr_121987", "chr_122006", "chr_122006", 
#' "78", "96", "406", "425", "863", "2853", "77", "103", "243", 
#' "636", "0/1:5,4:9:99:117,0,153", "0/0:9,0:9:99:0,27,255", "0/1:10,11:21:99:255,0,255", 
#' "0/1:10,11:21:99:255,0,255", "0/1:14,14:28:99:255,0,255", "0/1:29,13:42:99:255,0,255", 
#' "0/1:26,11:37:99:255,0,255", "0/1:21,14:35:99:255,0,255", "0/0:12,1:13:67:0,4,255", 
#' "0/1:55,8:63:99:99,0,255", "0/1:10,8:18:99:234,0,255", "0/0:17,0:17:99:0,51,255", 
#' "0/1:16,13:29:99:255,0,255", "0/1:16,13:29:99:255,0,255", "0/1:26,19:45:99:255,0,255", 
#' "0/1:50,19:69:99:255,0,255", "0/1:62,17:79:99:255,0,255", "0/1:95,22:117:99:255,0,255", 
#' "0/1:32,5:37:99:68,0,255", "0/1:69,21:90:99:255,0,255"), .Dim = c(10L, 
#' 4L), .Dimnames = list(NULL, c("CHROM", "POS", "sample_1", "sample_2"
#' )))
#' 
#' 
#' class(bed)
#' is.character(bed)
#' class(vcf.matrix)
#' is.character(vcf.matrix)
#' 
#' var.list <- bedify(bed, vcf.matrix)
#' table(unlist(lapply(var.list, nrow)))
#' 
#' @export
bedify <- function(myBed, myData, fill_missing = 0L, verbose = 0L) {
    .Call('coveRage_bedify', PACKAGE = 'coveRage', myBed, myData, fill_missing, verbose)
}

rcpp_hello_world <- function() {
    .Call('coveRage_rcpp_hello_world', PACKAGE = 'coveRage')
}

#' @title File input
#' @name File input
#' 
#' @description Fast but featureless input of tabular data in either *.txt or *.gz format.
#' 
#' 
#' @rdname read_matrix
#' @aliases file_stats
#' 
#' @param filename name of a file
#' @param sep character which delimits columns
#' @param nrows number of rows to read
#' @param skip number of rows to skip
#' @param verbose should verbose output be generated
#' 
#' @details \strong{file_stats} returns a three element vector containing a summary of a file's contents.
#' 'Total_rows' reports the total number of rows read.
#'  This is either the number of rows in the file or the number of skipped rows and the number of rows read in.
#'  'Rows' is the number of rows read in.
#'  This is either the same as nrows or however many rows were read in after skip and before the end of the file (when less than nrows).
#'  'Columns' is the number of columns resulting after delimiting with sep.
#'  This information is intended to be used with read_matrix.
#'  
#' @return \strong{file_stats} returns a three element vector.
#'  
#' 
#' @export
file_stats <- function(filename, sep = "\t", nrows = -1L, skip = 0L, verbose = 1L) {
    .Call('coveRage_file_stats', PACKAGE = 'coveRage', filename, sep, nrows, skip, verbose)
}

#' @rdname read_matrix
#' @aliases read_matrix
#' 
#' @param cols vector of column numbers to include in the matrix
#' 
#' @details \strong{read_matrix} returns a matrix of strings of dimension specified by nrows and cols.
#' The cols parameter is a vector of integers specifying which columns to read in.
#'
#' @return \strong{read_matrix} returns a matrix of strings
#'
#' 
#' @seealso
#' \href{http://cran.r-project.org/package=readr}{readr}
#' \href{http://cran.r-project.org/package=data.table}{data.table::fread}
#'
#' @export
read_matrix <- function(filename, sep = "\t", nrows = 1L, cols = 0L, skip = 0L, verbose = 1L) {
    .Call('coveRage_read_matrix', PACKAGE = 'coveRage', filename, sep, nrows, cols, skip, verbose)
}

parallelVectorSum <- function(x) {
    .Call('coveRage_parallelVectorSum', PACKAGE = 'coveRage', x)
}

#' @name write_matrix
#' @title write_matrix
#' @rdname write_matrix
#' 
#' @description Write matrix data to a gzipped file.
#' 
#' @param filename filename for output
#' @param mymatrix matrix to be written to file
#' @param sep delimiting character
#' @param verbose should verbose output be generated?
#'
#' @details
#' Writes matrix data to a gzipped file delimited by sep.
#' Data is appended to the file.
#' This is intended to allow header information to be included by other calls.
#' It should also allow files to grow with incremental processes.
#'
#' @export
write_matrix <- function(filename, mymatrix, sep = "\t", verbose = 1L) {
    invisible(.Call('coveRage_write_matrix', PACKAGE = 'coveRage', filename, mymatrix, sep, verbose))
}
knausb/coveRage documentation built on May 20, 2019, 12:52 p.m.