## Get muller edges dataframe ####
get_muller_edges = function(x,
mutations=FALSE,
tree_score=1,
highlight=c()) {
highlight = get_highlight(x, highlight=highlight, mutations=F)
edges = data.frame("Parent"="P", "Identity"=get_unique_labels(x)) %>%
dplyr::filter(Identity %in% highlight)
if (!mutations) return(edges)
edges_mut = x %>% get_edges_muts(tree_score=tree_score, highlight=highlight)
missed = setdiff(get_unique_muts_labels(x), edges_mut %>% dplyr::pull(Identity))
return(edges_mut %>% dplyr::add_row(edges))
}
get_edges_muts = function(x, highlight=c(), tree_score=1) {
highlight = get_highlight(x, highlight=highlight, min_frac=0, mutations=F)
# create an empty dataset with colnames
edges = setNames(data.frame(matrix(ncol=2, nrow=0)), c("Parent", "Identity")) %>%
tibble::as_tibble() %>%
mutate(Parent=as.character(Parent), Identity=as.character(Identity))
trees = get_trees(x, verbose=F)
for (cluster in highlight) {
tree = trees[[cluster]]
muts = x %>% get_unique_muts_labels(cluster=cluster)
if (!is.null(tree))
edges = edges %>%
dplyr::add_row(
tree[[tree_score]] %>%
get_adj() %>%
as.data.frame() %>%
tibble::rownames_to_column(var="Label") %>%
reshape2::melt(id="Label", variable.name="Identity") %>%
dplyr::filter(value==1) %>%
dplyr::filter(!Label %in% c("GL"), !Identity %in% c("GL", "P", cluster)) %>%
dplyr::select(-value) %>%
dplyr::rename(Parent=Label) %>%
dplyr::mutate(Parent=ifelse(Parent==cluster, Parent, paste(cluster, Parent, sep="."))) %>%
dplyr::mutate(Identity=paste(cluster,Identity,sep=".")) %>%
tibble::as_tibble()
)
else if (length(muts) > 0)
edges = edges %>%
dplyr::add_row(
data.frame(Parent=cluster, Identity=muts)
)
}
return(edges)
}
## Get muller population dataframe ####
# means format must be a dataframe with columns: labels, timepoints, lineage, mean_cov
get_muller_pop = function(x,
mutations=F,
timepoints_to_int=c(),
highlight=c(),
add_t0=T,
single_clone=T,
tree_score=1,
estimate_npops=FALSE,
vcn=NULL,
edges=NULL) {
if ((purrr::is_empty(highlight) | !single_clone) && mutations && have_pop_df_muts(x) && have_corrected_pops(x, estimate_npops))
return(
get_pop_df(x) %>%
dplyr::filter(Identity %in% c("P", get_highlight(x, mutations=mutations, highlight=c())))
)
if ((purrr::is_empty(highlight) | !single_clone) && !mutations && have_pop_df(x) && have_corrected_pops(x, estimate_npops))
return(
get_pop_df(x) %>%
dplyr::filter(Identity %in% c("P", get_highlight(x, mutations=mutations, highlight=c())))
)
if (single_clone)
highlight = get_highlight(x, mutations=mutations, highlight=highlight)
else
highlight = get_highlight(x, mutations=mutations, highlight=c())
# if highlight is specified -> we want to observe the frequencies of them related one to the other
timepoints_to_int = map_timepoints_int(x, timepoints_to_int=timepoints_to_int)
if(is.integer(timepoints_to_int)) value = 0 else value = "init"
if (!0 %in% timepoints_to_int && add_t0)
timepoints_to_int = c(0, timepoints_to_int) %>% setNames(nm=c(value, names(timepoints_to_int))) else
value = which(timepoints_to_int==0) %>% names()
if (is.null(edges))
edges = get_muller_edges(x, mutations=mutations, highlight=highlight) %>%
dplyr::arrange(Parent)
# tibble with columns Identity, Generation, Lineage, Population, theta_binom, theta.par, Parent
# Population is the mean coverage of the cluster
# theta_binom is the Binomial theta
# theta.par is the Binomial theta of the parent, for clonal clusters it is NA
# Parent is the cluster parent
means.clonal = x %>%
get_mean_long() %>%
dplyr::filter(labels %in% highlight) %>%
dplyr::rename(Identity=labels, Generation=timepoints, Lineage=lineage, Population=mean_cov) %>%
dplyr::mutate(Identity=as.character(Identity)) %>%
dplyr::mutate(Population=ifelse(Population==0, 0.001, Population)) %>%
dplyr::arrange(Identity, Lineage, Generation) %>%
dplyr::mutate(theta_binom=1, theta.par=NA, Parent="P")
if (estimate_npops) {
n_pops = data.frame(estimate_n_pops(x, vcn=vcn)) %>% setNames("n_pops") %>% tibble::rownames_to_column(var="Identity")
means.clonal = dplyr::inner_join(means.clonal, n_pops, by="Identity") %>%
dplyr::rename(Population.orig=Population) %>%
dplyr::mutate(Population=Population.orig*n_pops)
}
pop_df = x %>%
get_pop_muts(means.clonal=means.clonal, edges=edges, mutations=mutations) %>%
dplyr::select(-Parent) %>%
add_parent(x=x) %>%
add_time_0(x=x, force=add_t0, value=value) %>%
convert_tp(timepoints_to_int=timepoints_to_int) %>%
dplyr::full_join(edges, by="Identity")
if (estimate_npops)
pop_df = pop_df %>%
dplyr::rename(Population.corr=Population) %>%
dplyr::rename(Population=Population.orig)
return(pop_df)
}
get_pop_muts = function(x, means.clonal, edges, mutations=F) {
if (!mutations)
return(
means.clonal %>%
dplyr::select(-theta.par) %>%
dplyr::group_by(Generation, Lineage) %>%
dplyr::mutate(Frequency=Population/sum(Population)) %>%
dplyr::ungroup() %>%
dplyr::mutate(Pop.plot=Population)
)
# obtain the correct theta for each parent
# columns Identity, Generation, Lineage, theta_binom, theta.par, Parent
pop = means.clonal %>%
correct_theta(edges=edges, x=x) %>%
# add_population(means.clonal=means.clonal, x=x, edges=edges) %>%
dplyr::inner_join(edges, by=c("Identity","Parent")) %>%
dplyr::group_by(Generation, Lineage) %>%
dplyr::mutate(Frequency=dplyr::case_when(
sum(Population)>0 ~ Population / sum(Population),
sum(Population)==0 ~ 0)
) %>%
dplyr::ungroup()
pop.subcl = pop %>%
dplyr::group_by(Parent, Generation, Lineage) %>%
dplyr::summarise(Pop.subcl=sum(Population), .groups="keep") %>%
dplyr::ungroup() %>%
dplyr::filter(Parent != "P") %>%
dplyr::rename(Identity=Parent)
return(
pop %>%
dplyr::full_join(pop.subcl, by=c("Identity", "Generation", "Lineage")) %>%
replace(is.na(.), 0) %>%
dplyr::mutate(Pop.plot=Population - Pop.subcl) %>%
dplyr::mutate(Pop.plot=replace(Pop.plot, Pop.plot < 0, 0))
)
# return(pop)
}
correct_theta = function(x, means.clonal, edges) {
clonal = get_parents(edges, clonal=T) # parents clonal
not.clonal = lapply(clonal, sort_clusters_edges, # sorted based on phylogeny
clusters=get_parents(edges, clonal=F), # not clonal parents
edges=edges) %>%
unlist()
# theta of all clonal clusters of IS, with theta=1 and parent="P"
theta.df = means.clonal %>% dplyr::mutate(Pop.par=NA)
for (node in clonal)
# correct the theta values of direct children of the clonal cluster
theta.df = correct_theta_node(x, theta.df, edges, node)
for (node in not.clonal)
theta.df = correct_theta_node(x, theta.df, edges, node)
return(
theta.df %>%
dplyr::select(-theta.par, -Pop.par) %>%
dplyr::arrange(Parent, Generation, Lineage)
)
}
# given "node", it checks if its children thetas sum is
# lower than or equal to its own theta
correct_theta_node = function(x, theta.df, edges, node) {
# children of "node", might be one or more than one node
node.c = edges %>%
dplyr::filter(Parent==node) %>%
dplyr::pull(Identity) %>% unique()
# if FALSE, it means "node" has been evaluated already
# and we can correct theta of its children
if (!node %in% theta.df$Identity)
cli::cli_alert_warning("Node {node} not present in the dataframe")
# theta.df = correct_theta_node(x, theta.df, edges, get_parent(edges, node))
# if also the children have been evaluated already
if (all(node.c %in% theta.df$Identity)) return(theta.df)
# get theta of "node" from the dataframe
theta.node = get_theta_node(x, theta.df, node, edges) %>%
dplyr::rename(theta.par=theta_binom, Parent=Identity, Pop.par=Population)
theta.tmp = x %>%
# get uncorrected theta of "node" children from vaf dataframe
get_vaf_dataframe() %>%
dplyr::rename(Identity=labels_mut, Generation=timepoints, Lineage=lineage) %>%
dplyr::select(Identity, Generation, Lineage, theta_binom) %>%
dplyr::inner_join(edges, by="Identity") %>%
dplyr::filter(Parent==node) %>%
unique() %>%
# add the corrected theta of the parent
dplyr::inner_join(theta.node, by=c("Parent","Generation","Lineage")) %>%
# correct children theta
dplyr::group_by(Generation, Lineage) %>%
dplyr::mutate(theta_binom=replace(theta_binom,
sum(theta_binom) > theta.par,
theta_binom / sum(theta_binom) * theta.par)) %>%
dplyr::ungroup() %>%
dplyr::mutate(Population=theta_binom*Pop.par)
return(
theta.df %>%
dplyr::add_row(
theta.tmp
)
)
}
get_theta_node = function(x, theta.df, node, edges) {
if (node %in% theta.df$Identity)
return(
theta.df %>%
dplyr::filter(Identity==node) %>%
dplyr::select(theta_binom, Identity, Generation, Lineage, Population)
)
return(
x %>%
get_vaf_dataframe() %>%
dplyr::inner_join(get_mean_long(x), by=c("labels", "timepoints", "lineage")) %>%
dplyr::filter(labels_mut==node) %>%
dplyr::select(theta_binom, labels_mut, timepoints, lineage, mean_cov) %>%
unique() %>%
dplyr::rename(Identity=labels_mut, Generation=timepoints, Lineage=lineage, Population=mean_cov) %>%
dplyr::mutate(Population=NA)
)
}
get_parents = function(edges, clonal) {
if (clonal)
return(
edges %>%
dplyr::filter(Parent=="P") %>%
dplyr::pull(Identity) %>%
unique()
)
return(
edges %>%
dplyr::filter(Parent!="P") %>%
dplyr::filter(Parent %in% Identity) %>%
dplyr::pull(Parent) %>%
unique()
)
}
# add_population = function(x, means.clonal, theta.df, edges) {
# parents = theta.df$Parent %>% unique()
#
# means.clonal = means.clonal %>%
# dplyr::select(-Parent, -theta.par)
#
# means.clonal.par = means.clonal %>%
# dplyr::select(-theta_binom) %>%
# dplyr::rename(Parent=Identity, Pop.par=Population)
#
# # first compute the population size of the parents
# pop.parents = theta.df %>%
# dplyr::filter(Identity %in% parents) %>%
# dplyr::full_join(means.clonal, by=c("Identity", "Generation", "Lineage", "theta_binom")) %>%
# dplyr::left_join(means.clonal.par, by=c("Parent", "Generation", "Lineage")) %>%
#
# dplyr::rowwise() %>%
# dplyr::mutate(Population=replace(Population, is.na(Population), theta_binom * Pop.par)) %>%
# dplyr::ungroup() %>%
#
# dplyr::select(Identity, Generation, Lineage, theta_binom, Population) %>%
# dplyr::rename(Pop.par=Population, Parent=Identity, theta.par=theta_binom)
#
# pop = theta.df %>%
# # add to each clonal Identity its population
# dplyr::full_join(means.clonal, by=c("Identity", "Generation", "Lineage", "theta_binom")) %>%
#
# # add to each subclonal parent its parent population and compute its own pop
# dplyr::left_join(pop.parents, by=c("Parent", "Generation", "Lineage")) %>%
#
# # compute the population size
# dplyr::rowwise() %>%
# dplyr::mutate(Population=replace(Population, is.na(Population), theta_binom * Pop.par)) %>%
# dplyr::ungroup() %>%
#
# dplyr::select(Identity, Generation, Lineage, Population, theta_binom)
#
# return( pop )
# }
# Checks the fraction of non-clonal parents
# -> their pop and freqs are computed as pop_s = theta_s*pop_p (same for freq)
# -> the pop and freqs of their children are computed as theta_c*pop_s
# check_fracs = function(means, edges, x) {
# # not clonal parents
# not_clonal = edges %>% dplyr::filter(Parent!="P") %>%
# dplyr::filter(Parent %in% Identity) %>% dplyr::pull(Parent) %>%
# unique()
# if (length(not_clonal) == 0) return(means)
#
# frac.par = lapply(not_clonal, get_frac_parent, means=means, edges=edges, x=x) %>%
# setNames(nm=not_clonal) %>%
# tibble::as_tibble_col() %>% # tibble of tibbles
# tidyr::unnest(cols=c(value)) # unnest the values
#
# return(
# means %>%
# dplyr::add_row( frac.par )
# )
# }
# Function to retrieve the pops and fracs of the parent of "node"
# recursively retrieves/computes the pops/fracs of the parents
# get_frac_parent = function(node, means, edges, x) {
# n.par = get_parent(edges, node) # parent of "node"
#
# # case 1 -> node is clonal, just return the fracs from "means"
# if (n.par == "P")
# return( means %>% dplyr::filter(Identity==node) )
#
# # case 2 -> node is a subclone, compute the fracs as fracs_par * theta_s
# # recursively retrieve the parent fractions
# p.frac = get_frac_parent(n.par, means, edges, x) %>% # get the frac of the parent
# dplyr::rename(Parent=Identity, Pop.par=Population, Freq.par=Frequency, theta.par=theta_binom)
#
# n.frac = x %>%
# get_vaf_dataframe() %>%
# dplyr::select(labels_mut, timepoints, lineage, theta_binom) %>%
# dplyr::filter(labels_mut==node) %>%
# dplyr::rename(Identity=labels_mut, Generation=timepoints, Lineage=lineage) %>%
# unique() %>%
#
# dplyr::inner_join(p.frac, by=c("Generation","Lineage")) %>%
#
# # correct for the thetas > than the theta of the parent
# dplyr::mutate(theta_binom=replace(theta_binom, theta_binom > theta.par, theta.par)) %>%
#
# # dplyr::mutate(Frequency=Freq.par*theta_binom, Population=Pop.par*theta_binom) %>%
# dplyr::mutate(Population=Pop.par*theta_binom) %>%
# dplyr::select(Identity, Generation, Lineage, Frequency, Population, theta_binom)
#
# return(n.frac)
# }
# check_thetas = function(means) {
# tmp = means %>%
# dplyr::mutate(cond=theta_binom>theta.par)
#
# if (any(tmp$cond))
# cli::cli_alert_warning("Subclone frequency > parent frequency. Resetting it to the parent frequency.")
# }
# substract_subclonal_fracs = function(pop, edges) {
# # get the list of parents
# parents = edges %>% dplyr::filter(Parent!="P") %>%
# dplyr::pull(Parent) %>% unique()
#
# # for each parent, we have two columns Pop.sum and Freq.sum
# # -> sum of fracs and pops of all descendants
# subcl.fracs = lapply(parents, get_frac_desc, pop=pop) %>%
# setNames(nm=parents) %>%
# tibble::as_tibble_col() %>% # tibble of tibbles
# tidyr::unnest(cols=c(value)) # unnest the values
#
# return(
# pop %>%
# # it will be NA if Identity is not a parent
# dplyr::full_join(subcl.fracs, by=c("Generation","Lineage","Identity")) %>%
# dplyr::rowwise() %>%
# dplyr::mutate(Freq.plot=replace(Frequency, !is.na(Freq.sum) & Frequency>=Freq.sum, Freq.plot - Freq.sum),
# Pop.plot=replace(Population, !is.na(Pop.sum) & Population>=Pop.sum, Pop.plot - Pop.sum)) %>%
#
# dplyr::ungroup()
# )
# }
# get_frac_desc = function(node, pop) {
#
# subcl.fracs = pop %>%
# dplyr::filter(Parent==node) %>%
# dplyr::group_by(Lineage, Generation) %>%
# dplyr::summarise(Freq.sum=sum(Frequency), Pop.sum=sum(Population),
# Identity=Parent, .groups="keep") %>% unique() %>%
# dplyr::ungroup()
#
# return(subcl.fracs)
# }
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.