#' @title Train integrated CASTLE model
#'
#' @description Estimates the parameters of the statistical model for the
#' integrated version the CASTLE algorithm based on a set of training samples,
#' assumed to have no mutant DNA present.
#'
#' @details In the test an integrated version of the Likelihood Ratio test
#' statistic is used, and this is approximated using a Riemann sum on a
#' 3D-grid. The parameter \code{abc_grid_resolution} controls this
#' approximation as the sum is calculated on \code{abc_grid_resolution^3}
#' points. Higher values give better approximation but longer computation
#' times.
#'
#' @param background_samples \code{data.frame} with cancer negative training
#' samples as rows. These are the samples used to train/estimate the
#' parameters of the model. At least the following columns should be present:
#' \itemize{
#' \item{\code{WildtypeOnlyDroplets}}
#' \item{\code{MutantOnlyDroplets}}
#' \item{\code{DoubleNegativeDroplets}}
#' \item{\code{DoublePositiveDroplets}} } If data from QuataSoft is available,
#' these can be imported using \code{\link{import_QS_files}}.
#' @param abc_grid_resolution The resolution of the 3D-grid on which the
#' integrated LR-test statistic is approximated. Default is 25 (equal to a
#' grid of 25^3 = 15,625 points).
#'
#' @return List of parameter estimates for the integrated CASLTE model. This
#' should be used as input for \code{\link{test_tumor_sample_integrated}}.
#'
#' @seealso \code{\link{import_QS_files}, \link{test_tumor_sample_integrated}}
#'
#' @export
train_integrated_ddpcr_model <- function(background_samples,
abc_grid_resolution = 25) {
# Train parameters (get MLE on training data)
# NOTE: This checks validity of the input!
simple_model <- train_simple_ddpcr_model(
background_samples = background_samples
)
# Unpack data
N_WT_only_vec <- background_samples$WildtypeOnlyDroplets
N_M_only_vec <- background_samples$MutantOnlyDroplets
N_d_neg_vec <- background_samples$DoubleNegativeDroplets
N_d_pos_vec <- background_samples$DoublePositiveDroplets
# Get parameters from model
l_est_vec <- simple_model$l_est_vec
a_est <- simple_model$a_est
b_est <- simple_model$b_est
c_est <- simple_model$c_est
# Get bounds of a, b and c to make a grid where training lik function has mass
bounds <- find_abd_confidence_intervals(
N_WT_only_vec = N_WT_only_vec,
N_M_only_vec = N_M_only_vec,
N_d_neg_vec = N_d_neg_vec,
N_d_pos_vec = N_d_pos_vec,
l_est_vec = l_est_vec,
a_est = a_est,
b_est = b_est,
c_est = c_est,
alpha = 1e-5
)
# Get symmetric buffer around estimate
a_buffer <- max(a_est - bounds$a_CI_lower, bounds$a_CI_upper - a_est)
b_buffer <- max(b_est - bounds$b_CI_lower, bounds$b_CI_upper - b_est)
c_buffer <- max(c_est - bounds$c_CI_lower, bounds$c_CI_upper - c_est)
# Get min and max values for error parameters
a_min <- max(TOL_0, a_est - a_buffer)
a_max <- a_est + a_buffer
b_min <- max(TOL_0, b_est - b_buffer)
b_max <- b_est + b_buffer
c_min <- max(TOL_0, c_est - c_buffer)
c_max <- c_est + c_buffer
# Make intervals
a_vec <- seq(from = a_min, to = a_max, length.out = abc_grid_resolution)
b_vec <- seq(from = b_min, to = b_max, length.out = abc_grid_resolution)
c_vec <- seq(from = c_min, to = c_max, length.out = abc_grid_resolution)
# Grid of a, b and c
abc_grid <- expand.grid(a_vec, b_vec, c_vec)
# Get training log lik
abc_grid_train_log_lik <- get_abc_grid_log_lik(
N_WT_only_vec = N_WT_only_vec,
N_M_only_vec = N_M_only_vec,
N_d_neg_vec = N_d_neg_vec,
N_d_pos_vec = N_d_pos_vec,
l_vec = l_est_vec,
abc_grid = abc_grid
)
# Gather results
integrated_model <- list(
a_est = a_est,
b_est = b_est,
c_est = c_est,
l_est_vec = l_est_vec,
abc_grid = abc_grid,
abc_grid_train_log_lik = abc_grid_train_log_lik
)
return(integrated_model)
}
#' @title Test sample using integrated CASTLE model
#'
#' @description Tests samples for the presence of mutant DNA using the
#' integrated version of the CASTLE algorithm.
#'
#' @param test_samples \code{data.frame} with samples to be tested as rows. At
#' least the following columns should be present:
#' \itemize{
#' \item{\code{WildtypeOnlyDroplets}}
#' \item{\code{MutantOnlyDroplets}}
#' \item{\code{DoubleNegativeDroplets}}
#' \item{\code{DoublePositiveDroplets}}
#' }
#' If data from QuataSoft is available, these can be imported using
#' \code{\link{import_QS_files}}.
#' @param integrated_model A fitted model as generated by
#' \code{\link{train_integrated_ddpcr_model}}.
#' @param include_mutant_CI Logical. Should confidence intervals for mutant
#' DNA statistics be included? Default is \code{TRUE}.
#' @param include_wildtype_CI Logical. Should confidence intervals for wild type
#' DNA statistics be included? Default is \code{TRUE}.
#' @param alpha The alpha used for the statistical test and confidence
#' intervals. Default is 0.01.
#'
#' @return A \code{data.frame} with results from the CASTLE algorithm (parameter
#' estimates, CIs, p-values etc.) joined to the right of \code{test_samples}.
#' Following is the full list of information added:
#' \describe{ \item{\code{test_statistic}/\code{p_val}}{Log-likelihood-ratio
#' test statistic/p-value from the statistical test for the presence of mutant
#' DNA (H0: No mutant DNA, HA: Mutant DNA present).}
#' \item{\code{mutation_detected}}{Logical. TRUE if the mutation is detected
#' i.e. if the p-value < \code{alpha}.}
#' \item{\code{allele_frequency}}{Estimated allele frequency of the mutant allele (error corrected).}
#' \item{\code{allele_frequency_CI_[lower|upper]}}{Confidence interval for
#' \code{allele_frequency}.}
#' \item{\code{mutant_molecules_per_droplet}}{Estimated number of mutant
#' molecules per droplet (error corrected).}
#' \item{\code{mutant_molecules_per_droplet_CI_[lower|upper]}}{Confidence
#' interval for \code{mutant_molecules_per_droplet}.}
#' \item{\code{wildtype_molecules_per_droplet}}{Estimated number of wildtype
#' molecules per droplet.}
#' \item{\code{wildtype_molecules_per_droplet_CI_[lower|upper]}}{Confidence
#' interval for \code{wildtype_molecules_per_droplet}.}
#' \item{\code{total_mutant_molecules}}{Estimated total number of mutant
#' molecules in sample (error corrected).}
#' \item{\code{total_mutant_molecules_CI_[lower|upper]}}{Confidence interval
#' for \code{total_mutant_molecules}.}
#' \item{\code{total_wildtype_molecules}}{Estimated number of wildtype
#' molecules per droplet.}
#' }
#'
#' @seealso \code{\link{train_integrated_ddpcr_model}}
#' @export
#'
#' @import dplyr
test_tumor_sample_integrated <- function(test_samples,
integrated_model,
alpha = 0.01,
include_mutant_CI = TRUE,
include_wildtype_CI = TRUE) {
# Check input
check_input_samples(test_samples)
# Test samples
res_df <- NULL
for (i in 1:nrow(test_samples)) {
# Unpack input for sample i
test_sample <- test_samples[i, ]
# Data
N_WT_only <- test_sample$WildtypeOnlyDroplets
N_M_only <- test_sample$MutantOnlyDroplets
N_d_neg <- test_sample$DoubleNegativeDroplets
N_d_pos <- test_sample$DoublePositiveDroplets
# Model parameters
abc_grid <- integrated_model$abc_grid
abc_grid_train_log_lik <- integrated_model$abc_grid_train_log_lik
# Estimate l and r for test sample
# Get starting value from simple method
l_est <- estimate_l(
N_WT_only = N_WT_only,
N_M_only = N_M_only,
N_d_neg = N_d_neg,
N_d_pos = N_d_pos
)
r_est <- estimate_r_integrated(
N_WT_only = N_WT_only,
N_M_only = N_M_only,
N_d_neg = N_d_neg,
N_d_pos = N_d_pos,
l_est = l_est, integrated_model = integrated_model
)
res <- test_r_equal_0_integrated(
l_est = l_est,
r_est = r_est,
N_WT_only = N_WT_only,
N_M_only = N_M_only,
N_d_neg = N_d_neg,
N_d_pos = N_d_pos,
abc_grid_train_log_lik = abc_grid_train_log_lik,
abc_grid = abc_grid,
include_mutant_CI = include_mutant_CI,
alpha = alpha
)
if (include_wildtype_CI) {
l_CI <- get_l_CI(
N_M_only = N_M_only,
N_d_neg = N_d_neg,
N_WT_only = N_WT_only,
N_d_pos = N_d_pos,
alpha = alpha
)
# Add CI's to results
res <- append(res, list(
# Estimated values for l:
wildtype_molecules_per_droplet_CI_lower = l_CI$lower,
wildtype_molecules_per_droplet_CI_upper = l_CI$upper
))
}
# Collect results
res_df <- bind_rows(res_df, bind_cols(test_sample, res))
}
return(res_df)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.