#===============================================================================
# nulldistrib.R
#===============================================================================
# Imports ======================================================================
#' @import VGAM
# Functions ====================================================================
#' @title Change "counts" into density
#'
#' @param d.combined.sorted.binned The combined, sorted, and binned
#' pseudodensity
#' @return The combined, sorted, and binned density
#' @export
#' @seealso \code{\link{bin_according_to_empirical_distribution}}
change_counts_into_density <- function(d.combined.sorted.binned) {
d.combined.sorted.binned[,2] <- d.combined.sorted.binned[,2] / sum(
d.combined.sorted.binned[,2]
)
d.combined.sorted.binned
}
#' Bin the combined and sorted pseudodistribution
#'
#' Bin it according to the empirical distribution
#'
#' empirical right closed, left open ?hist; right=TRUE
#' (range] so no double counts
#' but zero gets excluded!!
#'
#' @param d.combined.sorted The combined and sorted pseudodistribution
#' @param binSize Integer. The approximate number of bins.
#' @return The binned density
#' @export
#' @seealso \code{\link{nulldistrib}}
bin_according_to_empirical_distribution <- function(
d.combined.sorted,
binSize = 40
) {
bins <- pretty(0:1, binSize)
start <- 0
end <- 0
d.combined.sorted.binned <- matrix(0, length(bins) - 1, 2)
for (z in 2:length(bins)) { ##skip 0
start <- bins[z - 1]
end <- bins[z]
row <- z - 1
d.combined.sorted.binned[row, 1] <- mean(c(end, start))
d.combined.sorted.binned[row, 2] <- sum(
d.combined.sorted[
(d.combined.sorted[,1] <= end & d.combined.sorted[,1] > start),
2
]
)
if (row == 1) {
d.combined.sorted.binned[row, 2] = sum(
d.combined.sorted[
(d.combined.sorted[,1] <= end & d.combined.sorted[,1] >= start),
2
]
)
}
}
change_counts_into_density(d.combined.sorted.binned)
}
#' Weight each coverage level with actual counts in empirical
#'
#' @param d.combined The combined pseudodistribution
#' @param d The parametric density value
#' @param w w
#' @param i i
#' @param k k
#' @param ptr ptr
#' @param minN Integer. The minimum coverage level.
#' @return The combined pseudodistribution weighted by the empirical
#' distribution
#' @export
#' @seealso \code{\link{nulldistrib}}
weight_with_actual_counts_in_empirical <- function(
d.combined,
d,
w,
i,
k,
ptr,
minN = 6
) {
d.w <- d * w[i, 1]
if (i == minN) {
d.combined[ptr:length(k), 1] <- k / i
d.combined[ptr:length(k), 2] <- d.w
colnames(d.combined) = c('allelicRatio', 'wBinDist')
} else {
d.combined[ptr:(ptr + length(k) - 1), 1] <- k / i
d.combined[ptr:(ptr + length(k) - 1), 2] <- d.w
}
d.combined
}
#' @title A parametric probability mass value
#'
#' @description From a binomial or beta-binomial distribution
#'
#' @param k k
#' @param i i
#' @param distrib Character string. The distribution to draw from, either
#' "binomial" or "betabinomial"
#' @param p Numeric. The binomial parameter.
#' @param b Numeric. The beta parameter.
#' @export
#' @seealso \code{\link{nulldistrib}}
parametric_probability_mass <- function(
k,
i,
distrib = "binomial",
p = 0.5,
b = 0
) {
if (distrib == "binomial") {
dbinom(k, i, p)
} else if (distrib == "betabinomial") {
dbetabinom(k, i, p, b)
}
}
#' @title Weighted beta/binomial distribution
#'
#' @details
#' \code{d.combined} collects all results and correspond it to an allelic ratio:
#' col1=allelicRatio (based on binomial n=6, ar=0,1/6,2/6...)
#' col2=corresponding weighted value in binomial distribution, i.e.
#' pdf(n, k, p) * (num of empirical SNPs at n counts)
#'
#' @param minN Minimum coverage (min total num of reads (since it's left open right closed))
#' @param p Binomial parameter (null probability)
#' @param w Data frame or matrix giving
#' @return The weighted null beta/binomial probability mass function
#' @export
nulldistrib <- function(
w,
minN = 6,
p = 0.5,
binSize = 40,
distrib = "binomial",
b = 0
) {
maxN <- min(2500, nrow(w))
d.combined <- matrix(0, sum(seq(minN + 1, maxN + 1)), 2)
ptr <- 1
for (i in minN:maxN) {
k <- seq(0, i)
d <- parametric_probability_mass(k, i, distrib = distrib, p = p, b = b)
d.combined <- weight_with_actual_counts_in_empirical(
d.combined,
d,
w,
i,
k,
ptr,
minN = minN
)
ptr <- ptr + length(k)
}
d.combined.sorted <- d.combined[order(d.combined[,1], d.combined[,2]),]
bin_according_to_empirical_distribution(d.combined.sorted, binSize = binSize)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.