#' @title Train simple CASTLE model
#'
#' @description Estimates the parameters of the statistical model for the simple
#' version the CASTLE algorithm based on a set of training samples, assumed to
#' have no mutant DNA present.
#'
#' @param background_samples \code{data.frame} with cancer negative training
#' samples as rows. These are the samples used to train/estimate the
#' parameters of the model. At least the following columns should be present:
#' \itemize{
#' \item{\code{WildtypeOnlyDroplets}}
#' \item{\code{MutantOnlyDroplets}}
#' \item{\code{DoubleNegativeDroplets}}
#' \item{\code{DoublePositiveDroplets}} } If data from QuantaSoft is available,
#' these can be imported using \code{\link{import_QS_files}}.
#'
#' @return List of parameter estimates for the simple CASLTE model. This should
#' be used as input for \code{\link{test_tumor_sample_simple}}.
#' @seealso \code{\link{import_QS_files}},
#' \code{\link{test_tumor_sample_simple}}
#' @export
train_simple_ddpcr_model <- function(background_samples) {
# Check input
check_input_samples(background_samples)
# Check if samples are appropriate for training
check_background_samples(background_samples)
# Unpack data
N_WT_only_vec <- background_samples$WildtypeOnlyDroplets
N_M_only_vec <- background_samples$MutantOnlyDroplets
N_d_neg_vec <- background_samples$DoubleNegativeDroplets
N_d_pos_vec <- background_samples$DoublePositiveDroplets
# Estimate lambdas
l_est_vec <- log((N_WT_only_vec + N_d_pos_vec) / (N_d_neg_vec + N_M_only_vec) + 1)
# Get starting values (rough estimates) of a, b and c
start_values <- get_start_values_abc(
N_WT_only_vec = N_WT_only_vec,
N_M_only_vec = N_M_only_vec,
N_d_neg_vec = N_d_neg_vec,
N_d_pos_vec = N_d_pos_vec,
l_est_vec = l_est_vec
)
# Set start at mean of individual samples
start_vec <- log(c(
start_values$a_start,
start_values$b_start,
start_values$c_start
))
# Estimate alpha, beta and gamma by maximizing log-likelihood
optim_res <- stats::optim(
par = start_vec,
method = "BFGS",
fn = full_log_lik_train,
gr = grad_full_log_lik_train,
l_est_vec = l_est_vec,
N_WT_only_vec = N_WT_only_vec,
N_M_only_vec = N_M_only_vec,
N_d_neg_vec = N_d_neg_vec,
N_d_pos_vec = N_d_pos_vec,
control = list(
fnscale = -1,
reltol = RELTOL
)
)
par_est <- exp(optim_res$par)
res <- list(
a_est = par_est[1],
b_est = par_est[2],
c_est = par_est[3],
l_est_vec = l_est_vec
)
return(res)
}
#' @title Test samples using simple CASTLE model
#'
#' @description Tests samples for the presence of mutant DNA using the
#' simple version of the CASTLE algorithm.
#'
#' @param test_samples \code{data.frame} with samples to be tested as rows. At
#' least the following columns should be present:
#' \itemize{
#' \item{\code{WildtypeOnlyDroplets}}
#' \item{\code{MutantOnlyDroplets}}
#' \item{\code{DoubleNegativeDroplets}}
#' \item{\code{DoublePositiveDroplets}}
#' }
#' If data from QuataSoft is available, these can be imported using
#' \code{\link{import_QS_files}}.
#' @param simple_model A fitted model as generated by
#' \code{\link{train_simple_ddpcr_model}}.
#' @param include_mutant_CI Logical. Should confidence intervals for mutant
#' DNA statistics be included? Default is \code{TRUE}.
#' @param include_wildtype_CI Logical. Should confidence intervals for wild type
#' DNA statistics be included? Default is \code{TRUE}.
#' @param alpha The alpha used for the statistical test and confidence
#' intervals. Default is 0.01.
#'
#' @return A \code{data.frame} with results from the CASTLE algorithm (parameter
#' estimates, CIs, p-values etc.) joined to the right of \code{test_samples}.
#' Following is the full list of information added:
#' \describe{ \item{\code{test_statistic}/\code{p_val}}{Log-likelihood-ratio
#' test statistic/p-value from the statistical test for the presence of mutant
#' DNA (H0: No mutant DNA, HA: Mutant DNA present).}
#' \item{\code{mutation_detected}}{Logical. TRUE if the mutation is detected
#' i.e. if the p-value < \code{alpha}.}
#' \item{\code{allele_frequency}}{Estimated allele frequency of the mutant allele (error corrected).}
#' \item{\code{allele_frequency_CI_[lower|upper]}}{Confidence interval for
#' \code{allele_frequency}.}
#' \item{\code{mutant_molecules_per_droplet}}{Estimated number of mutant
#' molecules per droplet (error corrected).}
#' \item{\code{mutant_molecules_per_droplet_CI_[lower|upper]}}{Confidence
#' interval for \code{mutant_molecules_per_droplet}.}
#' \item{\code{wildtype_molecules_per_droplet}}{Estimated number of wildtype
#' molecules per droplet.}
#' \item{\code{wildtype_molecules_per_droplet_CI_[lower|upper]}}{Confidence
#' interval for \code{wildtype_molecules_per_droplet}.}
#' \item{\code{total_mutant_molecules}}{Estimated total number of mutant
#' molecules in sample (error corrected).}
#' \item{\code{total_mutant_molecules_CI_[lower|upper]}}{Confidence interval
#' for \code{total_mutant_molecules}.}
#' \item{\code{total_wildtype_molecules}}{Estimated number of wildtype
#' molecules per droplet.} }
#'
#' @seealso \code{\link{train_simple_ddpcr_model}}
#' @export
test_tumor_sample_simple <- function(test_samples,
simple_model,
include_wildtype_CI = TRUE,
include_mutant_CI = TRUE,
alpha = 0.01) {
# Check input
check_input_samples(test_samples)
res_df <- NULL
for (i in 1:nrow(test_samples)) {
test_sample <- test_samples[i, ]
# Unpack parameters
N_WT_only <- test_sample$WildtypeOnlyDroplets
N_M_only <- test_sample$MutantOnlyDroplets
N_d_neg <- test_sample$DoubleNegativeDroplets
N_d_pos <- test_sample$DoublePositiveDroplets
a_est <- simple_model$a_est
b_est <- simple_model$b_est
c_est <- simple_model$c_est
# Estimate parameters
l_est <- estimate_l(
N_WT_only = N_WT_only,
N_M_only = N_M_only,
N_d_neg = N_d_neg,
N_d_pos = N_d_pos
)
r_est <- estimate_r(
N_WT_only = N_WT_only,
N_M_only = N_M_only,
N_d_neg = N_d_neg,
N_d_pos = N_d_pos,
l_est = l_est,
a_est = a_est,
b_est = b_est,
c_est = c_est
)
res <- test_r_equal_0_simple(
N_WT_only = N_WT_only,
N_M_only = N_M_only,
N_d_neg = N_d_neg,
N_d_pos = N_d_pos,
l_est = l_est,
a_est = a_est,
b_est = b_est,
c_est = c_est,
r_est = r_est,
include_mutant_CI = include_mutant_CI,
alpha = alpha
)
if (include_wildtype_CI) {
l_CI <- get_l_CI(
N_M_only = N_M_only,
N_d_neg = N_d_neg,
N_WT_only = N_WT_only,
N_d_pos = N_d_pos,
alpha = alpha
)
# Add CI's to results
res <- append(res, list(
# Estimated values for l:
wildtype_molecules_per_droplet_CI_lower = l_CI$lower,
wildtype_molecules_per_droplet_CI_upper = l_CI$upper
))
}
# Collect results
res_df <- dplyr::bind_rows(res_df, dplyr::bind_cols(test_sample, res))
}
return(res_df)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.