R/read-snp-matrix.R

Defines functions read_snp_matrix

Documented in read_snp_matrix

#' Read counts file
#'
#' Reads a gzipped output file from \code{snp-pileup}.
#'
#' @param input_file Path to input file.
#' @param err.thresh Threshold for errors at locus.
#' @param del.thresh Threshold for deletions at locus.
#' 
#' @source \code{snp-pileup} is part of \href{www.github.com/mskcc/facets}{FACETS}.
#'
#' @return A SNP count matrix, with the following columns:
#' \itemize{
#'       \item{\code{Chromosome} and \code{Position}:} {Chromosomal position of SNP.}
#'       \item{\code{NOR.DP}:} {Total read depth in normal sample.}
#'       \item{\code{TUM.DP}:} {Total read depth in tumor sample.}
#'       \item{\code{NOR.RD}:} {Reference allele read depth in normal sample.}
#'       \item{\code{TUM.RD}:} {Reference allele read depth in tumor sample.}
#' }
#' 
#' @import data.table

#' @export
read_snp_matrix = function(input_file,
                           err.thresh = 10,
                           del.thresh = 10) {
    
    read_counts = data.table::fread(cmd = paste('gunzip -c', input_file), key = c('Chromosome', 'Position'))
    
    if (nrow(read_counts) == 0) { # necessary since fread command doesn't throw errors for certain cases
        stop(paste(input_file, 'does not exist or cannot be read properly.'), call. = F)
    }
    
    read_counts = read_counts[File1E <= err.thresh & File2E <= err.thresh &
                              File1D <= del.thresh & File2D <= del.thresh &
                              !Chromosome %in% c('MT', 'chrM', 'Y', 'chrY')]
    
    read_counts[, `:=`(
        NOR.DP = File1R + File1A,
        TUM.DP = File2R + File2A,
        NOR.RD = File1R,
        TUM.RD = File2R,
        Chromosome = gsub('chr', '', Chromosome)
        )][, ('Chromosome') := factor(get('Chromosome'), levels = c(1:22, 'X'))]

    read_counts[order(Chromosome, Position)][, list(Chromosome, Position, NOR.DP, TUM.DP, NOR.RD, TUM.RD)]
}
mskcc/facets-suite documentation built on Sept. 13, 2022, 4:14 a.m.