knitr::opts_chunk$set( collapse = TRUE, comment = "#>" )
This vignette requires the npbin package.
library(chenimbalance) library(npbin) total_reads <- dnase[["m"]] data <- data.frame( total = total_reads, allelicRatio = dnase[["xm"]] / dnase[["m"]] ) data <- data[total_reads >= 5,] data <- data[1:2000,] head(data)
Compute the empirical allelic ratio distribution
binSize <- 40 bins <- pretty(0:1, binSize) minN <- 6 maxN <- min(2500, max(data[["total"]])) empirical <- empirical_allelic_ratio( data, bins, maxN = maxN, minN = minN, plot = TRUE )
Compute the weighted expected binomial distribution
w <- weight_by_empirical_counts(data[["total"]]) d_combined_sorted_binned <- nulldistrib( w, minN = minN, binSize = binSize )
Compute the sum of squared errors for the empirical distribution vs the weighted expected binomial distribution.
sse <- sum((empirical - d_combined_sorted_binned[,2])^2) sse
Choose the overdispersion parameter for the beta-binomial distribution
w_grad <- graded_weights_for_sse_calculation(r_min = 0, r_max = 1, bins = bins) overdispersion_details <- choose_overdispersion_parameter( w_grad, w, empirical, sse ) head(overdispersion_details[["b_and_sse"]])
Generate a plot of the weighted expected binomial and weighted expected beta-binomial distributions overlaid on the empirical distribution
plot_distributions( minN, maxN, bins, empirical, d_combined_sorted_binned, overdispersion_details[["e_combined_sorted_binned"]], yuplimit = 0.15 )
overdispersion_details
is a list whose elements include the chosen value of
b
and the sum of squared errors.
paste( "b_chosen =", overdispersion_details[["b_choice"]], ", SSE_chosen =", overdispersion_details[["sse"]] )
Optimize the overdispersion parameter
optimized_overdispersion_details <- optimize_overdispersion_parameter( w_grad, w overdispersion_details[["b_and_sse"]], overdispersion_details[["b_choice"]], overdispersion_details[["sse"]], empirical, overdispersion_details[["counter"]], minN = minN, binSize = binSize ) plot_distributions( minN, maxN, bins, empirical, d_combined_sorted_binned, optimized_overdispersion_details[["e_combined_sorted_binned"]], yuplimit = 0.15 )
Check the optimized value
list( b = optimized_overdispersion_details[["b_choice"]], sse = optimized_overdispersion_details[["sse"]] )
Plot the parameter search space
b_and_sse <- ( optimized_overdispersion_details[["b_and_sse"]] [1:(optimized_overdispersion_details[["counter"]] + 2),] ) plot( b_and_sse[order(b_and_sse[,1]),], type = "b", pch = 16, xlim = c(min(b_and_sse[,1]), max(b_and_sse[,1])), ylim = c(min(b_and_sse[,2]), max(b_and_sse[,2])) ) par(new = TRUE) plot( optimized_overdispersion_details[["b_choice"]], optimized_overdispersion_details[["sse"]], bty = "n", ylab = "", xlab = "", yaxt = "n", xaxt = "n", col = "red", pch = 8, xlim = c(min(b_and_sse[,1]), max(b_and_sse[,1])), ylim = c(min(b_and_sse[,2]), max(b_and_sse[,2])) )
Compute the symmetric shape parameter and plot the estimated null beta (gold) superimposed with the null beta estimated from NPBin (blue).
shape = 1 / (2 * optimized_overdispersion_details[["b_choice"]]) - 1 / 2 plot_estimated_null( data[["allelicRatio"]], shape1_shape2 = c(15.30666, 15.11449), shape3_shape4 = c(shape, shape) )
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.