#' Construct a `mtree` mutation tree computing its structure.
#'
#' @description
#'
#' This constructor creates a list of objects of class `'mtree'`, after using a
#' sampling strategy to determine possible trees that fit the data. The strategy
#' to sample trees can be controlled, a maximum number of trees can be sampled
#' with a Monte Carlo procedure and the actual process can be exhausted if there
#' are less than a number of available trees to fit the data.
#'
#' Note that the parameters of this function includes the same parmeters of
#' function \code{\link{mtree}}, plus the parameters of the sampler. See
#' \code{\link{mtree}} for an explanation of the parameters.
#'
#' @param binary_clusters Clusters of binary annotations in the data of
#' this patient. See the package vignette to see the format in which this should
#' be specified.
#' @param drivers A list of driver events that should be annotated to each one
#' of the input clusters contained in the `CCF_clusters` parameter. See the package
#' vignette to see the format in which this should be specified.
#' @param samples A vector of samples names (e.g., the biopsies sequenced for
#' this patient).
#' @param patient A string id that represent this patient.
#' @param M The adjacency matrix defined to connect all the nodes of this tree.
#' @param score A scalar score that can be associated to this tree.
#' @param annotation Any string annotation that one wants to add to this `ctree`.
#' This will be used by some of the plotting functions that display `ctree` objects.
#' @param sspace.cutoff If there are less than this number of tree available, all the
#' structures are examined in an exhaustive fashion. Otherwise, if there are more than
#' this, a Monte Carlo sampler is used.
#' @param n.sampling If a Monte Carlo sampler is used, \code{n.sampling} distinct
#' trees are sampled and scored.
#' @param store.max When a number of trees are generated, scored and ranked, a maximum
#' of \code{store.max} are returned to the user (these are selected following the
#' ranking).
#' @param evaluation How Suppes conditions should be evaluated (`>=` or `>`).
#'
#' @return An list of objects of class \code{"mtree"} that represent the trees that
#' can be fit to the data of this patient.
#'
#' @export
#'
#' @import tidyverse
#' @import tidygraph
#' @import crayon
#' @import clisymbols
#' @import entropy
#' @import matrixcalc
#' @import reshape2
#' @import clisymbols
#' @import easypar
#' @import entropy
#'
#' @examples
#'
#' @examples
#' data(mtree_input)
#'
#' x = mtrees(
#' mtree_input$binary_clusters,
#' mtree_input$drivers,
#' mtree_input$samples,
#' mtree_input$patient,
#' mtree_input$sspace.cutoff,
#' mtree_input$n.sampling,
#' mtree_input$store.max
#' )
#'
#' print(x[[1]])
#' plot(x[[1]])
mtrees = function(binary_clusters,
drivers,
samples,
patient,
sspace.cutoff = 10000,
n.sampling = 5000,
store.max = 100,
evaluation = '>=')
{
# TODO - check input formats
pio::pioHdr(paste("mtree ~ generate mutation trees for", patient))
pioStr(
'Sampler : ',
sspace.cutoff,
'(cutoff), ',
n.sampling,
'(sampling), ',
store.max,
'(max store)',
suffix = '\n'
)
pioStr(
'Suppes\' conditions : ', evaluation,
suffix = '\n'
)
# Sample structure for all trees
structures = trees_sampler(binary_clusters,
drivers,
samples,
patient,
sspace.cutoff,
n.sampling,
store.max)
TREES = structures[[1]]
SCORES = structures[[2]]
# Trees assembly
pio::pioStr(
" Trees with non-zero sscore",
length(TREES),
'storing',
min(length(TREES), store.max),
prefix = crayon::green(clisymbols::symbol$tick),
suffix = '\n'
)
if (length(TREES) > store.max)
{
TREES = TREES[1:store.max]
SCORES = SCORES[1:store.max]
}
LSCORES = as.data.frame(SCORES)
LSCORES = split(LSCORES, f = LSCORES[, 1])
LSCORES = lapply(LSCORES, function(w)
w[sample(1:nrow(w), replace = FALSE), , drop = FALSE])
# Shuffle indexes of equal-scoring fits to avoid a sistematic bias in the way I coded this
permuted.indexes = as.integer(rev(unlist(lapply(LSCORES, rownames))))
names(permuted.indexes) = NULL
TREES = TREES[permuted.indexes]
SCORES = SCORES[permuted.indexes]
# print(TREES)
# print(SCORES)
#####################################################################
easypar::run(
FUN = function(i)
{
mtree(
binary_clusters,
drivers,
samples,
patient,
M = TREES[[i]],
score = SCORES[i],
annotation = paste('mtree rank ', i, '/', length(TREES), ' for ', patient, sep =
'')
)
},
PARAMS = lapply(seq_along(TREES), list),
parallel = FALSE
)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.