badger: Bayesian approach for detecting copy number alterations from single cell RNA-seq data

Documented in calcErrorRate

# Calculate various errors and bias metrics


# Helper function Peter likes
sn <- function(x) { names(x) <- x; x }

# Helper to convert from list of "chr:start:end" to data frame
snp2df <- function(pos, delim=':') {
    posp <- as.data.frame(do.call(rbind, strsplit(pos, delim)))
    posp[,1] <- as.character(posp[,1])
    posp[,2:ncol(posp)] <- as.numeric(as.character(posp[,2:ncol(posp)]))
    return(posp)
}

#' Calculate effective error rate
#'
#' Calculate effective error rate from sequencing, alignment, and other potential error sources
#'
#' @param type Type of SNPs provided in \code{snpsCov}, \code{snpsRef}, and \code{snpsAlt} used to estimate effective error. This should be (an unambiguous abbreviation of) one of 'homo_ref', 'homo_alt', or 'het'.
#' @param snpsCov A matrix of snp positions as rows and cells as columns with each value corresponding to the total read count at the corresponding snp position in the corresponding cell
#' @param snpsRef A matrix of snp positions as rows and cells as columns with each value corresponding to the reference read count at the corresponding snp position in the corresponding cell. Should have the same snp positions and cells as in \code{snpsCov}
#' @param snpsAlt A matrix of snp positions as rows and cells as columns with each value corresponding to the alternative read count at the corresponding snp position in the corresponding cell. Should have the same snp positions and cells as in \code{snpsCov}
#' @param cnvs A vector of copy number variation regions or other regions with entry as a string in format 'chromosome:start:end'. Optional. Default = NULL
#' @param n Minimum fraction of cell in which we must have coverage at a snp position. Useful for eliminating false positives. Default = 0.1
#'
#' @return Effective error rate
#'
#' @examples
#' \dontrun{
#' deletion.region <- data.frame('chr'=2, start=0, end=1e9)
#' calcErrorRate('het', snpsCov, snpsRef, snpsAlt, cnvs=deletion.region) # ignore regions imapcted by deletion
#' calcErrorRate('het', snpsCov, snpsRef, snpsAlt) # use all regions
#' }
#'
#' @export
calcErrorRate <- function(type, snpsCov, snpsRef, snpsAlt, cnvs=NULL, n=0.1) {

    snpsCovName <- snp2df(rownames(snpsCov))

    calcErrorRateHomoRef <- function(snpsCov, snpsRef, cnvs, n) {
        # Filter for SNPs that are not inside CNV regions and have coverage in more than n*100% of cells
        if (!is.null(cnvs)) {
            vi <- !(insideCnvs(snpsCovName, cnvs)) & rowSums(snpsCov > 0) > round(ncol(snpsCov)*n)
        } else {
            vi <- rowSums(snpsCov > 0) > round(ncol(snpsCov)*n)
        }
        # Exclude positions that never showed any ref coverage (mistake? somatic?)
        vi2 <- vi & rowSums(snpsRef) > 0
        # Calculate error as frequency of observing non-reference allele
        er <- sum(rowSums(snpsCov[vi2,]) - rowSums(snpsRef[vi2,]))/ sum(rowSums(snpsCov[vi2,]))
        return(er)
    }

    calcErrorRateHomoAlt <- function(snpsCov, snpsAlt, cnvs, n) {
        # Filter for SNPs that are not inside CNV regions and have coverage in more than n*100% of cells
        if (!is.null(cnvs)) {
            vi <- !(insideCnvs(snpsCovName, cnvs)) & rowSums(snpsCov > 0) > round(ncol(snpsCov)*n)
        } else {
            vi <- rowSums(snpsCov > 0) > round(ncol(snpsCov)*n)
        }
        # Exclude positions that never showed any alt coverage (mistake? somatic?)
        vi2 <- vi & rowSums(snpsAlt) > 0
        # Calculate error as frequency of observing non-alternative allele
        er <- sum(rowSums(snpsCov[vi2,]) - rowSums(snpsAlt[vi2,]))/ sum(rowSums(snpsCov[vi2,]))
        return(er)
    }

    calcErrorRateHet <- function(snpsCov, snpsRef, snpsAlt, cnvs, n) {
        # Filter for SNPs that are not inside CNV regions and have coverage in more than n*100% of cells
        if (!is.null(cnvs)) {
            vi <- !(insideCnvs(snpsCovName, cnvs)) & rowSums(snpsCov > 0) > round(ncol(snpsCov)*n)
        } else {
            vi <- rowSums(snpsCov > 0) > round(ncol(snpsCov)*n)
        }
        # Calculate error as frequency of observing neither known alleles
        er <- sum(rowSums(snpsCov[vi,]) - rowSums(snpsAlt[vi,]) - rowSums(snpsRef[vi,]))/ sum(rowSums(snpsCov[vi,]))
        er*2
    }

    switch(type,
           homo_ref = calcErrorRateHomoRef(snpsCov, snpsRef, cnvs, n),
           homo_alt = calcErrorRateHomoAlt(snpsCov, snpsAlt, cnvs, n),
           het = calcErrorRateHet(snpsCov, snpsRef, snpsAlt, cnvs, n)
    )
}

JEFworks/badger documentation built on May 7, 2019, 7:40 a.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

JEFworks/badger
Bayesian approach for detecting copy number alterations from single cell RNA-seq data

R/calculate.R
In JEFworks/badger: Bayesian approach for detecting copy number alterations from single cell RNA-seq data

Defines functions sn snp2df calcErrorRate

Documented in calcErrorRate

R Package Documentation

Browse R Packages

We want your feedback!

JEFworks/badger Bayesian approach for detecting copy number alterations from single cell RNA-seq data

R/calculate.R In JEFworks/badger: Bayesian approach for detecting copy number alterations from single cell RNA-seq data

Defines functions sn snp2df calcErrorRate

Documented in calcErrorRate

R Package Documentation

Browse R Packages

We want your feedback!

JEFworks/badger
Bayesian approach for detecting copy number alterations from single cell RNA-seq data

R/calculate.R
In JEFworks/badger: Bayesian approach for detecting copy number alterations from single cell RNA-seq data