#' dimsum__fit_error_model_bootstrap
#'
#' Fit error model on random subsample of the input data.
#'
#' @param m bootstrap number; dummy variable (required)
#' @param input_dt input data.table (required)
#' @param all_reps list of replicates to retain (required)
#' @param idx vector of replicate combinations (required)
#' @param maxN maximum size of subsample (required)
#' @param max_tries_per_fit maximum number of fitting attempts (required)
#' @param lower_rep replicate error lower bound (required)
#' @param Fcorr vector of fitness normalization factors (default:NULL)
#'
#' @return Nothing
#' @export
dimsum__fit_error_model_bootstrap <- function(
m,
input_dt,
all_reps,
idx,
maxN,
max_tries_per_fit,
lower_rep,
Fcorr = NULL
){
require(data.table)
Nreps <- length(all_reps)
y <- NA
counter <- 0
while(!is.list(y) & counter < max_tries_per_fit){ #this is a routine to catch failed fitting attempts
tryCatch({
#Create data concatenation for all combinations of replicates (can be improved?!)
bs_data <- input_dt[input_above_threshold == T & all_reads == T & is.na(WT)][sample(.N,min(.N,maxN),replace = T)]
F_data_list <- list() #fitness data
E_data_list <- list() #count based error data (for weighting of datapoints)
C_data_list <- list() #inverse count data
NR <- list() # simple counter for how many replicates are in each data row
for(i in 1:length(idx)){
F_data_list[[i]] <- bs_data[,.SD,.SDcols = grep(paste0("^fitness(", paste0(all_reps[idx[[i]]], collapse = "|"), ")$"), names(input_dt))]
E_data_list[[i]] <- bs_data[,.SD,.SDcols = grep(paste0("^cbe(", paste0(all_reps[idx[[i]]], collapse = "|"), ")$"), names(input_dt))]
C_data_list[[i]] <- bs_data[,1/.SD,.SDcols = grep(paste0("count_e(", paste0(all_reps[idx[[i]]], collapse = "|"), ")_s"), names(input_dt))]
NR[[i]] <- data.table(rep(length(idx[[i]]),nrow(bs_data)))
}
#make matrices with #replicates = #columns
F_data <- as.matrix(rbindlist(F_data_list, fill = T))
#Calculate variance from fitness data
V_data <- apply(F_data,1,var,na.rm = T)
#count based error weighting of data points (needed because deviation is proportional to expcectation)
E_data <- as.matrix(rbindlist(E_data_list, fill = T))
Ew <- rowMeans(E_data, na.rm = T)^2
C_data <- as.matrix(rbindlist(C_data_list, fill = T))
#Avoid NA for fitting
C_data[is.na(C_data)] <- 0
#Correct count terms for fitness normalization factors
if(!is.null(Fcorr)){
C_data = C_data * matrix(rep(Fcorr, 2), nrow = nrow(C_data), ncol = Nreps*2, byrow = T)
}
#weighting according to #variants with same mutation
Dw <- bs_data$error_model_weighting
#how many replicates are in the replicate combinations
NRT <- as.matrix(rbindlist(NR))
#Binary variable to avoid replicate error fitting for replicates that are not present in a data row
BV <- F_data
BV[!is.na(BV)] <- 1
BV[is.na(BV)] <- 0
#fit formula
fit_formula <- as.formula(paste0(
"V_data ~ (",
paste0("BV[,",
1:Nreps,
"] * (p[",
1:Nreps,
"] * C_data[,",
1:Nreps,
"] + p[",
Nreps + 1:Nreps,
"] * C_data[,",
Nreps + 1:Nreps,
"] + p[",
2*Nreps + 1:Nreps,
"])",collapse = " + "),
") / NRT"))
#Fitting
y <- nls(formula = fit_formula,
start = list(p = c(
rep(1, each = Nreps*2) + 10^rnorm(2*Nreps),
rep(0, each = Nreps) + 0.01*10^rnorm(1*Nreps))),
lower = c(rep(1, each = Nreps*2), rep(lower_rep, each = Nreps)),
weights = 1 / Ew / Dw,
algorithm = "port")
}, error = function(cond){})
counter <- counter + 1}
#If fit was succesful after less than max_tries_per_fit attempts, transfer parameters to p
if(counter < max_tries_per_fit){
p <- summary(y)[["parameters"]][,1]
}else{ #otherwise move on to next fit
p <- rep(NA, 3*Nreps)
}
return(p)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.