R/stabiliseVariance.R
In BUMHMM: Computational pipeline for computing probability of modification from structure probing experiment data

Documented in stabiliseVariance

# BUM-HMM
# Copyright (C) 2016 Alina Selega, Sander Granneman, Guido Sanguinetti

.stabiliseVariance <- function(covFile, dorFile, analysedC, analysedCT, Nc, Nt)
{

    if ((Nc < 2) | (Nt < 2)) {
        stop('Number of control and treatment replicates must be at least 2.')
    }
    else if ((length(analysedC) == 0) | (length(analysedCT) == 0)) {
        stop('All lists of positions selected for pair-wise comparisons should
             be non-empty.')
    }
    else if (any(sapply(analysedC, function(x) length(x) == 0))) {
        stop('All lists of positions selected for pair-wise comparisons should
             be non-empty.')
    }
    else if (any(sapply(analysedCT, function(x) length(x) == 0))) {
        stop('All lists of positions selected for pair-wise comparisons should
             be non-empty.')
    }
    else if (any(is.na(covFile)) | any(is.na(dorFile))) {
        stop('The coverage and drop-off count matrices should not have NA
             entries.')
    }
    else {
        ## Number of nucleotides
        nNucl <- dim(covFile)[1]
        ## Number of comparisons between controls
        nCompC <- length(analysedC)

        covVector <- array()
        ldRatVector <- array()

        ## Enumerate all pairs of control replicates
        index <- t(combn(Nc, 2))

        for (i in 1:length(analysedC)) {

            ## Extract coverage and drop-off rates of nucleotides in
            ## control-control comparisons
            coverageC <- covFile[analysedC[[i]], index[i, ]]
            dorC <- dorFile[analysedC[[i]], index[i, ]]

            ## Compute average coverage between the pair of control replicates
            ## at the selected nucleotides
            avgCov <- rowMeans(coverageC)
            dim(avgCov) <- c(length(analysedC[[i]]), 1)

            ## Put them in a vector
            covVector <- c(avgCov, covVector)
            if (i == 1) {
                covVector <- covVector[1:(length(covVector) - 1)]
            }

            ## Compute LDRs for the pair of replicates
            logRat <- log(dorC[, 1]) - log(dorC[, 2])
            dim(logRat) <- c(length(analysedC[[i]]), 1)

            ## Put them in a vector
            ldRatVector <- c(logRat, ldRatVector)
            if (i == 1) {
                ldRatVector <- ldRatVector[1:(length(ldRatVector) - 1)]
            }
        }

        ## Sort by average coverage and order coverages and log-ratios
        orderedInd <- sort(covVector, index.return = TRUE)$ix
        orderedCov <- covVector[orderedInd]
        orderedLd <- ldRatVector[orderedInd]

        ## Split all nucleotides in bins of spanning average coverage in
        ## increments of 100
        bins <- list()

        ## The first nucleotide (sorted by coverage) is placed in the first bin
        c <- 1
        bins[c] <- 1

        ## Store the current average coverage
        current_avgCov <- orderedCov[1]

        ## If the greatest average coverage is not 100 greater than the first,
        ## set the increment to be their difference divided by 10
        if ((orderedCov[length(orderedCov)] - orderedCov[1]) > 100) {
            cov_diff <- 100
        } else {
            cov_diff <- (orderedCov[length(orderedCov)] - orderedCov[1]) / 10
        }

        ## Once a nucleotide is found with coverage greater than the current
        ## coverage + increment, place it in the next bin
        for (i in 2:length(orderedCov)) {
            if (orderedCov[i] >= current_avgCov + cov_diff) {
                c <- c + 1
                bins[c] <- i
                current_avgCov <- orderedCov[i]
            }
        }

        bins <- unlist(bins)

        quantiles <- matrix(, length(bins), 1)
        binCoverage <- matrix(, length(bins), 1)

        ## For each bin, compute the 95th quantile of LDRs with subtracted mean,
        ## and find the average coverage in that bin
        for (i in 1:length(bins)) {
            if (i < length(bins)) {
                binnedLd <- orderedLd[bins[i] : (bins[i+1] - 1)]
                q <- stats::quantile(binnedLd, c(0.95))
                quantiles[i] <- as.numeric(q) - mean(binnedLd)
                binCoverage[i] <- mean(orderedCov[bins[i] : (bins[i+1] - 1)])
            } else {
                binnedLd <- orderedLd[bins[length(bins)] : length(orderedCov)]
                q <- stats::quantile(binnedLd, c(0.95))
                quantiles[i] <- as.numeric(q) - mean(binnedLd)
                binCoverage[i] <- mean(orderedCov[bins[length(bins)] :
                                                    length(orderedCov)])
            }
        }

        ## Find non-linear least-squares estimates for the parameters of a model
        ## for the distribution of LDRs

        if (all(quantiles == 0)) {
            stop("Unable to fit the model for correcting the coverage bias.")
        } else {
            parameters <- stats::nls(quantiles ~ (1/sqrt(binCoverage)) * k + b,
                              start=c(k = 1, b = 0))
            k <- as.numeric(parameters$m$getPars()[1])
            b <- as.numeric(parameters$m$getPars()[2])

            quantileModel <- (1 / sqrt(covVector)) * k + b

            ## Scale LDRs of observed nucleotides according to the model
            ldRatVector <- ldRatVector / quantileModel

            LDR_C <- matrix(, nrow=nNucl, ncol=length(analysedC))

            start <- 0

            ## Arrange these back into a matrix with columns for each comparison
            for (i in 1:length(analysedC)) {
                LDR_C[analysedC[[i]], i] <- ldRatVector[(start + 1):
                                            (start + length(analysedC[[i]]))]
                start <- start + length(analysedC[[i]])
            }

            ## Enumerate all pairs of control replicates
            indexT <- t(matrix(c(rep((Nc+1):(Nc+Nt), each=Nc), rep(1:Nc, Nt)),
                               2, byrow=TRUE))

            ## Number of treatment-control comparisons
            nCompCT <- length(analysedCT)

            LDR_CT <- matrix(, nrow=nNucl, ncol=nCompCT)

            for (i in 1:length(analysedCT)) {

                ## Extract coverage and drop-off rates of nucleotides in
                ## treatment-control comparisons
                coverageCT <- covFile[analysedCT[[i]], indexT[i, ]]
                dorCT <- dorFile[analysedCT[[i]], indexT[i, ]]

                ## Compute average coverage between the pair of replicates at
                ## the selected nucleotides
                avgCov <- rowMeans(coverageCT)
                dim(avgCov) <- c(length(analysedCT[[i]]), 1)

                ## Compute LDRs for the pair of replicates
                logRat <- log(dorCT[, 1]) - log(dorCT[, 2])
                dim(logRat) <- c(length(analysedCT[[i]]), 1)

                # Scale LDRs by the same transformation
                f <- (1 / sqrt(avgCov)) * k + b
                logRat <- logRat / f
                LDR_CT[analysedCT[[i]], i] <- logRat
            }

            return(list("LDR_C" = LDR_C, "LDR_CT" = LDR_CT))

        }
    }
}

stabiliseVariance <- function(se, nuclSelection, Nc, Nt) {
    .stabiliseVariance(assay(se, "coverage"), assay(se, "dropoff_rate"),
                       nuclSelection$analysedC, nuclSelection$analysedCT,
                       Nc, Nt)
}

Any scripts or data that you put into this service are public.

BUMHMM documentation built on Nov. 8, 2020, 5:13 p.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

BUMHMM
Computational pipeline for computing probability of modification from structure probing experiment data

R/stabiliseVariance.R
In BUMHMM: Computational pipeline for computing probability of modification from structure probing experiment data

Defines functions stabiliseVariance .stabiliseVariance

Documented in stabiliseVariance

Try the BUMHMM package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

BUMHMM Computational pipeline for computing probability of modification from structure probing experiment data

R/stabiliseVariance.R In BUMHMM: Computational pipeline for computing probability of modification from structure probing experiment data

Defines functions stabiliseVariance .stabiliseVariance

Documented in stabiliseVariance

Try the BUMHMM package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

BUMHMM
Computational pipeline for computing probability of modification from structure probing experiment data

R/stabiliseVariance.R
In BUMHMM: Computational pipeline for computing probability of modification from structure probing experiment data