#' Infer growth rates for each clone and subclone.
#'
#' @param x a mvnmm object.
#' @param steps maximum number of steps for inference.
#' @param highlight set of clusters to run the inference for.
#' If not specified, it will be run on all the clusters.
#' @param timepoints_to_int if the provided timepoints are not integers nor a timepoints-to-int list is
#' stored in \code{x}, a list mapping their values to integers is required.
#' @param growth_model string specifying the type of growth model to use, between \code{exp} and \code{log}
#' corresponding to exponential and logistic models, respectively.
#' @param force if the model has already been fitted, setting \code{force} to \code{FALSE} will keep the
#' computed rates. Setting \code{force} to \code{TRUE} will fit the model again for the specified clusters.
#'
#' @return A \code{mvnmm} object with the additional tibble \code{growth.rates} containing the estimated population genetics parameters.
#'
#' @importFrom purrr is_empty
#' @importFrom dplyr select filter add_row mutate pull contains select
#'
#' @export
fit_growth_rates = function(x,
steps=500,
warmup_steps=500,
highlight=c(),
timepoints_to_int=c(),
timepoints=get_timepoints(x),
growth_model="exp.log",
force=T,
tree_score=1,
py_pkg=NULL,
mutations=F) {
highlight.cov = get_highlight(x, highlight=highlight, mutations=F)
highlight.muts = get_highlight(x, highlight=highlight, mutations=T)
timepoints_to_int = map_timepoints_int(x, timepoints_to_int)
if (!mutations && have_muts_fit(x)) mutations = T
pop_df = x %>%
get_muller_pop(mutations=mutations, timepoints_to_int=timepoints_to_int,
tree_score=tree_score) %>%
dplyr::select(-dplyr::contains("Pop.subcl"), -dplyr::contains("Pop.plot"), -dplyr::contains("theta")) %>%
dplyr::filter(Identity %in% highlight.muts)
rates.df = data.frame()
evaluated = list()
# if force is TRUE, then all the clusters are fitted again
if (have_growth_rates(x)) {
rates.df = x %>% get_growth_rates()
if (force) rates.df = x %>% get_growth_rates() %>% dplyr::filter(!Identity %in% highlight.muts)
evaluated = rates.df$Identity %>% unique()
}
# edges = get_muller_edges(x, mutations=mutations, tree_score=tree_score)
for (cluster in highlight.cov) {
if (cluster %in% evaluated) next
cli::cli_process_start(paste0("Starting growth models inference of clone ", cluster))
# filter the dataset
pop_df.cl = pop_df %>%
dplyr::filter(Identity==cluster |
grepl(paste0(cluster, "."), Identity, fixed=T))
# get the identity-parents dataframe of clone "cluster"
# parents = get_parents(x, highlight=cluster, tree_score=tree_score)
parents = pop_df.cl %>%
dplyr::select(Parent, Identity) %>% unique()
rates.df = fit_growth_utils(rates.df=rates.df,
cluster=cluster,
pop_df.cl=pop_df.cl,
parents=parents,
growth_model=growth_model,
steps=steps,
warmup_steps=warmup_steps,
timepoints_to_int=timepoints_to_int,
py_pkg=py_pkg)
evaluated = c(cluster, evaluated)
cli::cli_process_done()
}
x = add_growth_rates(x, rates.df)
x = add_tp_int(x, timepoints_to_int)
return(x)
}
fit_growth_utils = function(rates.df,
cluster,
pop_df.cl,
parents,
growth_model,
steps,
warmup_steps,
timepoints_to_int,
py_pkg) {
# first in the clonal cluster of ISs
rates.df = rates.df %>%
fit_growth_clones(clusters=cluster,
pop_df.cl=pop_df.cl,
parents=parents,
growth_model=growth_model,
steps=steps,
warmup_steps=warmup_steps,
clonal=TRUE,
timepoints_to_int=timepoints_to_int,
py_pkg=py_pkg)
subclones = c(parents$Identity, parents$Parent) %>%
sort_clusters_edges(edges=parents) # get the clusters sorted by tree hierarchy
# fit the growth rate in all the parents
rates.df = rates.df %>%
fit_growth_clones(clusters=subclones,
pop_df.cl=pop_df.cl,
parents=parents,
growth_model=growth_model,
steps=steps,
warmup_steps=warmup_steps,
clonal=FALSE,
timepoints_to_int=timepoints_to_int,
py_pkg=py_pkg)
return(rates.df)
}
fit_growth_clones = function(rates.df,
clusters,
pop_df.cl,
parents,
growth_model,
steps,
warmup_steps,
clonal,
timepoints_to_int,
py_pkg=NULL) {
if (!clonal & nrow(parents)==0) return(rates.df)
# if (!clonal) clusters = sort_clusters_edges(clusters, parents)
for (subcl in clusters)
# par = pop_df.cl %>% dplyr::filter(Identity==subcl) %>% dplyr::pull(Parent) %>% unique()
if ( (!subcl %in% rates.df$Identity) && (subcl %in% pop_df.cl$Identity) && (subcl != "P") )
rates.df = run_py_growth(rates.df,
pop_df.cl=pop_df.cl,
cluster=subcl,
timepoints_to_int=timepoints_to_int,
# p.rates=get_parent_rate(par, rates.df, subcl),
clonal=clonal,
growth_model=growth_model,
steps=steps,
warmup_steps=warmup_steps,
py_pkg=py_pkg)
return(rates.df)
}
run_py_growth = function(rates.df,
pop_df.cl,
cluster,
timepoints_to_int,
steps=500,
warmup_steps=500,
# p.rates=list("exp"=NULL, "log"=NULL),
clonal=FALSE,
growth_model="exp.log",
random_state=25,
py_pkg=NULL) {
par = pop_df.cl %>% dplyr::filter(Identity==cluster) %>% dplyr::pull(Parent) %>% unique()
p.rates = get_parent_rate(par, rates.df, cluster)
torch = reticulate::import("torch")
if (is.null(py_pkg)) py_pkg = reticulate::import("pylineaGT")
input.clone = pop_df.cl %>%
dplyr::filter(Identity == cluster) %>%
## Run with population size
# dplyr::select(-Frequency) %>%
# dplyr::mutate(Population=ifelse((Generation == 0 & clonal), 1, Population)) %>%
# tidyr::pivot_wider(id_cols="Generation", names_from="Lineage", values_from="Population") %>%
## Run with re-scaled population size
dplyr::mutate(Frequency=Frequency * 1000) %>%
dplyr::select(-Population) %>%
dplyr::mutate(Frequency=ifelse((Generation == 0 & clonal), 1, Frequency)) %>%
tidyr::pivot_wider(id_cols="Generation", names_from="Lineage", values_from="Frequency") %>%
dplyr::arrange(Generation)
times = input.clone$Generation
y = input.clone[2:ncol(input.clone)]
lineages = colnames(y)
times = torch$tensor(times)$int()$unsqueeze(as.integer(1))
y = torch$tensor(y %>% as.matrix())
p.rate.exp = p.rate.log = NULL
posterior_samples.exp = posterior_samples.log = NULL
x.reg = py_pkg$explogreg$Regression(times, y)
if (grepl("exp", growth_model)) { # exp training
if (!is.null(p.rates[["exp"]])) p.rate.exp = torch$tensor(p.rates[["exp"]])$float()
losses.exp = x.reg$train(regr="exp", p_rate=p.rate.exp, steps=as.integer(steps), random_state=as.integer(random_state))
# posterior_samples.exp = x.reg$train_mcmc(regr="exp",
# p_rate=p.rate.exp,
# num_samples=as.integer(steps),
# warmup_steps=as.integer(warmup_steps), num_chains=as.integer(1),
# random_state=as.integer(random_state))
p.exp = x.reg$get_learned_params()
ll.exp = x.reg$compute_log_likelihood() %>% setNames(nm=lineages)
}
if (grepl("log", growth_model)) { # log training
if (!is.null(p.rates[["log"]])) p.rate.log = torch$tensor(p.rates[["log"]])$float()
losses.log = x.reg$train(regr="log", p_rate=p.rate.log, steps=as.integer(steps), random_state=as.integer(random_state))
# posterior_samples.log = x.reg$train_mcmc(regr="log",
# p_rate=p.rate.log,
# num_samples=as.integer(steps),
# warmup_steps=as.integer(warmup_steps), num_chains=as.integer(1),
# random_state=as.integer(random_state))
p.log = x.reg$get_learned_params()
ll.log = x.reg$compute_log_likelihood() %>% setNames(nm=lineages)
}
params = get_growth_params(timepoints_to_int,
rates.exp=p.exp,
rates.log=p.log,
lineages=pop_df.cl$Lineage %>% unique(),
cluster=cluster,
posterior_samples.exp=posterior_samples.exp,
posterior_samples.log=posterior_samples.log)
best = c()
for (ll in lineages)
if (ll.exp[ll] > ll.log[ll])
best = c(best, "exp") %>% setNames(nm=c(names(best), ll)) else
best = c(best, "log") %>% setNames(nm=c(names(best), ll))
if (purrr::is_empty(rates.df))
return(
params %>%
dplyr::mutate(best_model=best[Lineage],
ll.log=ll.log[Lineage],
ll.exp=ll.exp[Lineage])
)
return(
rates.df %>%
dplyr::add_row(
params %>%
dplyr::mutate(best_model=best[Lineage],
ll.log=ll.log[Lineage],
ll.exp=ll.exp[Lineage])
)
)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.