Nothing
#' summarize_kmer
#'
#' Summarize kmer distributions with input sequences
#'
#' @param x input data as a phyDat object
#' @param division number of cell division
#' @param k k-mer (default = 2)
#' @param reps number of simulated trees
#' @param n_samples number of samples to simulate
#' @param n_nodes number of nodes to sample (including both leaves and internval nodes)
#' @param n_targets number of targets
#' @param n_targets sequence length. If this argument is missing, the length of the input sequences will be used.
#'
#' @return a kmer_summary object
#'
#' @author Wuming Gong (gongx030@umn.edu)
#'
#' @export
#'
setMethod(
'summarize_kmer',
signature(
x = 'phyDat'
),
function(
x,
division = 16L,
k = 2,
reps = 20L,
n_samples = 200L,
n_nodes = 100L,
n_targets
){
if (k > 2)
stop(sprintf('k should be 1 or 2'))
config <- process_sequence(x, division = division)
if (!missing(n_targets))
config@n_targets <- n_targets
summarize_kmer_core(k, reps, n_samples, n_nodes, config)
}
)
#' summarize_kmer_core
#'
#' Summarize kmer distributions (core function)
#'
#' @param k k-mer (default = 2)
#' @param reps number of simulated trees
#' @param n_samples number of samples to simulate
#' @param n_nodes number of nodes to sample (including both leaves and internval nodes)
#' @param config lineage tree configuration (a lineage_tree_config object)
#'
#' @return a kmer_summary object
#'
#' @author Wuming Gong (gongx030@umn.edu)
#'
summarize_kmer_core <- function(
k = 2,
reps = 20L,
n_samples = 200L,
n_nodes = 100L,
config = NULL
){
alphabets <- config@alphabets[config@frequency > 0]
max_distance <- (config@division - 1) * 2
kmers <- do.call('paste', do.call('expand.grid', lapply(1:k, function(j) alphabets)))
sprintf('simulating | k=%d | alphabets=%d | mutation=%.3f | division=%d | sample=%d | n_targets=%d | dropout_prob=%.3f', k, length(alphabets), config@mutation_prob, config@division, reps, config@n_targets, config@dropout_prob) %>%
message()
p <- expand.grid(
from = 1:n_nodes,
to = 1:n_nodes,
start = 1:(config@n_targets - k + 1)
) %>%
filter(.data$from < .data$to)
# df <- do.call('rbind', bplapply(
df <- do.call('rbind', lapply(
seq_len(reps),
function(i){
sim <- simulate(n_samples = n_samples, config)
s <- sample(length(sim@x), n_nodes)
X <- sim@x[s] %>% as.character()
D <- sim@graph %>% distances(
v = names(sim@x)[s],
to = names(sim@x)[s]
)
d <- D[cbind(p[, 'from'], p[, 'to'])]
str_from <- do.call('paste', lapply(1:k, function(j) X[cbind(p[, 'from'], p[, 'start'] + j - 1)]))
str_to <- do.call('paste', lapply(1:k, function(j) X[cbind(p[, 'to'], p[, 'start'] + j - 1)]))
data.frame(
from = str_from,
to = str_to,
distance = d
) %>%
group_by(.data$from, .data$to, .data$distance) %>%
tally()
}
)) %>%
group_by(.data$from, .data$to, .data$distance) %>%
summarize(n = sum(n))
new(
'kmer_summary',
df = df,
k = k,
reps = reps,
alphabets = alphabets,
max_distance = max_distance,
kmers = kmers,
config = config
)
} # summarize_kmer_core
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.