#' Construct a `ctree` clone tree computing its structure.
#'
#' @description
#'
#' This constructor creates a list of objects of class `'ctree'`, after using a
#' sampling strategy to determine possible trees that fit the data. The strategy
#' to sample trees can be controlled, a maximum number of trees can be sampled
#' with a Monte Carlo procedure and the actual process can be exhausted if there
#' are less than a number of available trees to fit the data.
#'
#' Note that the parameters of this function includes the same parmeters of
#' function \code{\link{ctree}}, plus the parameters of the sampler. See
#' \code{\link{ctree}} for an explanation of the parameters.
#'
#' @param CCF_clusters Clusters of Cancer Cell Fractions available in the data of
#' this patient. See the package vignette to see the format in which this should
#' be specified.
#' @param drivers A list of driver events that should be annotated to each one
#' of the input clusters contained in the `CCF_clusters` parameter. See the package
#' vignette to see the format in which this should be specified.
#' @param samples A vector of samples names (e.g., the biopsies sequenced for
#' this patient).
#' @param patient A string id that represent this patient.
# @param M The adjacency matrix defined to connect all the nodes of this tree.
# @param score A scalar score that can be associated to this tree.
# @param annotation Any string annotation that one wants to add to this `ctree`.
# This will be used by some of the plotting functions that display `ctree` objects.
#' @param sspace.cutoff If there are less than this number of tree available, all the
#' structures are examined in an exhaustive fashion. Otherwise, if there are more than
#' this, a Monte Carlo sampler is used.
#' @param n.sampling If a Monte Carlo sampler is used, \code{n.sampling} distinct
#' trees are sampled and scored.
#' @param store.max When a number of trees are generated, scored and ranked, a maximum
#' of \code{store.max} are returned to the user (these are selected following the
#' ranking).
#'
#' @return An list of objects of class \code{"ctree"} that represent the trees that
#' can be fit to the data of this patient..
#'
#' @export
#'
#' @import tidyverse
#' @import tidygraph
#' @import clisymbols
#' @import entropy
#' @import reshape2
#' @import clisymbols
#' @import easypar
#' @import entropy
#' @importFrom matrixcalc hadamard.prod
#'
#' @examples
#'
#' data('ctree_input')
#'
#' x = ctrees(
#' ctree_input$CCF_clusters,
#' ctree_input$drivers,
#' ctree_input$samples,
#' ctree_input$patient,
#' ctree_input$sspace.cutoff,
#' ctree_input$n.sampling,
#' ctree_input$store.max
#' )
#'
#' print(x[[1]])
#' plot(x[[1]])
ctrees = function(CCF_clusters,
drivers,
samples,
patient,
sspace.cutoff = 10000,
n.sampling = 5000,
store.max = 100)
{
# TODO - check input formats
pio::pioHdr(paste("ctree ~ clone trees generator for", patient))
cat("\n")
# cli::cli_alert_info(paste(
# "Sampling ",
# ifelse(
# options$ONLY.DRIVER == 0,
# 'only driver' %>% crayon::bold(),
# 'all annotated' %>% crayon::bold()
# ),
# 'mutations. '
# ))
# pioStr('Sampler : ',
# sspace.cutoff, '(cutoff), ',
# n.sampling, '(sampling), ',
# store.max, '(max store)',
# suffix = '\n'
# )
# Sample structure for all trees
structures = trees_sampler(
CCF_clusters = CCF_clusters,
drivers = drivers,
samples = samples,
patient = patient,
sspace.cutoff = sspace.cutoff,
n.sampling = n.sampling,
store.max = store.max
)
TREES = structures[[1]]
SCORES = structures[[2]]
# Trees assembly
b = paste(min(length(TREES), store.max)) %>% crayon::bold()
cli::cli_alert_success(
"{.field {length(TREES)}} trees with non-zero score, storing {.count {b}}"
)
# pio::pioStr(
# " Trees with non-zero sscore",
# length(TREES), 'storing',
# min(length(TREES), store.max),
# prefix = crayon::green(clisymbols::symbol$tick),
# suffix = '\n')
if(length(TREES) > store.max)
{
TREES = TREES[1:store.max]
SCORES = SCORES[1:store.max]
}
LSCORES = as.data.frame(SCORES)
LSCORES = split(LSCORES, f = LSCORES[,1])
LSCORES = lapply(LSCORES, function(w) w[sample(1:nrow(w), replace = FALSE), , drop = FALSE])
# Shuffle indexes of equal-scoring fits to avoid a sistematic bias in the way I coded this
permuted.indexes = as.integer(rev(unlist(lapply(LSCORES, rownames))))
names(permuted.indexes) = NULL
TREES = TREES[permuted.indexes]
SCORES = SCORES[permuted.indexes]
#####################################################################
easypar::run(
FUN = function(i)
{
ctree(
CCF_clusters,
drivers,
samples,
patient,
M = TREES[[i]],
score = SCORES[i],
annotation = paste('ctree rank ', i, '/', length(TREES), ' for ', patient, sep ='')
)
},
PARAMS = lapply(seq_along(TREES), list),
parallel = FALSE
)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.