Nothing
#'
#' sim_seqdata
#'
#' Generate singe cell barcode data set with tree shaped lineage information
#'
#' @param sim_n Number of cell samples to simulate.
#'
#' @param m Number of targets.
#'
#' @param mu_d Mutation rate. (a scalar or a vector)
#'
#' @param d Number of cell divisions.
#'
#' @param n_s Number of possible outcome states
#'
#' @param outcome_prob Outcome probability vector (default is NULL)
#'
#' @param p_d Dropout probability
#'
#' @return The result is a list containing two objects, 'seqs' and 'tree'. The 'seqs' is 'phyDat' object of 'sim_n' number of simulated barcodes corresponding to each cell, and The 'tree' is a 'phylo' object, a ground truth tree structure for the simulated data.
#'
#' @author Il-Youp Kwak
#'
#' @examples
#'
#' library(DCLEAR)
#' library(phangorn)
#' library(ape)
#'
#' set.seed(1)
#' mu_d1 = c( 30, 20, 10, 5, 5, 1, 0.01, 0.001)
#' mu_d1 = mu_d1/sum(mu_d1)
#' simn = 10 # number of cell samples
#' m = 10 ## number of targets
#' sD = sim_seqdata(sim_n = simn, m = m, mu_d = 0.03,
#' d = 12, n_s = length(mu_d1), outcome_prob = mu_d1, p_d = 0.005 )
#' ## RF score with hamming distance
#' D_hm = dist.hamming(sD$seqs)
#' tree_hm = NJ(D_hm)
#' RF.dist(tree_hm, sD$tree, normalize = TRUE)
#'
#' ## RF score with weighted hamming
#' InfoW = -log(mu_d1)
#' InfoW[1:2] = 1
#' InfoW[3:7] = 4.5
#' D_wh = dist_weighted_hamming(sD$seqs, InfoW, dropout=FALSE)
#' tree_wh = NJ(D_wh)
#' RF.dist(tree_wh, sD$tree, normalize = TRUE)
#'
#' ## RF score with weighted hamming, cosidering dropout situation
#' nfoW = -log(mu_d1)
#' InfoW[1] = 1
#' InfoW[2] = 12
#' InfoW[3:7] = 3
#' D_wh2 = dist_weighted_hamming(sD$seqs, InfoW, dropout = TRUE)
#' tree_wh2= NJ(D_wh2)
#' RF.dist(tree_wh2, sD$tree, normalize = TRUE)
#'
#' @export
sim_seqdata <- function(sim_n = 200, # number of samples to simulate
m = 200, # number of targets
mu_d = 0.03, # mutation rate (a scalar or a vector)
d = 15, # number of cell divisons
n_s = 23, # number of possible outcome states
outcome_prob = NULL, # outcome probability vector
p_d = 0.003 # dropout prob
){
if (length(mu_d) != m && length(mu_d) != 1 ) {
stop("length of mu_d must be the number of targets or 1.")
}
if (!is.null(outcome_prob) ) {
if (length(outcome_prob) != n_s ) {
stop("length of outcome_prob must be the number of possible mutational outcomes.")
}
}
acell = as.integer(rep(1,m))
nmstrings = c( '0', '-', LETTERS[1:n_s] )
sim_tree = list()
sim_tree[[1]] = acell
k = 2
while(k < 2^d) {
## codes for point mutation
mother_cell = sim_tree[[k%/%2]]
mu_loc = runif(m) < mu_d
mutation_cites = (mother_cell == 1) & mu_loc
n_mut = sum(mutation_cites)
if (n_mut > 0) {
if (is.null(outcome_prob)) {
mother_cell[mutation_cites] = as.integer(sample(n_s, n_mut, replace = T)+2)
} else {
mother_cell[mutation_cites] = as.integer(sample(n_s, n_mut, replace = T, prob = outcome_prob)+2)
}
}
## codes for dropout
dropout_cites = runif(m) < p_d
if (sum(dropout_cites) > 2 ) {
dropout_between = sample(which(dropout_cites), 2 )
mother_cell[dropout_between[1]:dropout_between[2]] = as.integer(2)
}
sim_tree[[k]] = mother_cell
k = k+1
}
sampled_leaves = sample(2^(d-1):(2^d -1) , sim_n)
names(sampled_leaves) = sprintf('cell_%s', str_pad(1:sim_n, 4, pad = "0"))
edges = matrix(0,2^d,2)
indexes = sampled_leaves
idx = sim_n
iterk = sim_n
edge_i = 1
names(indexes) = 1:length(indexes)
while( sum(indexes) > 1 ) {
indexes = indexes %/% 2
tb1 = table(indexes)
info_tmp = which(tb1 == 2)
if(sum(tb1 == 2) != 0) {
for (i in 1:length(info_tmp)) {
node_i = names(info_tmp)[i]
if (node_i != 0) {
edges[edge_i:(edge_i+1),1] = as.integer(idx + 1)
edges[edge_i:(edge_i+1),2] = as.integer(names(indexes[indexes == node_i]))
edge_i = edge_i + 2
indexes[indexes == node_i] = 0
indexes[idx+1] = as.integer(node_i)
idx = idx + 1
}
}
names(indexes) = 1:length(indexes)
}
}
tree_ex = list()
tree_ex$edge = edges[1:(edge_i-1),]
tree_ex$tip.label = names(sampled_leaves)
tree_ex$Nnode = length(unique(tree_ex$edge[,1]))
attr(tree_ex,'class') = 'phylo'
fixtree <- function(tree) {
tmp_tree_nw = write.tree(tree, "")
newtree2 = read.tree(text = tmp_tree_nw)
return(newtree2)
}
tree_ex2 = fixtree(tree_ex)
seqs <- sampled_leaves %>% map(~nmstrings[sim_tree[[.]]])
seqs <- do.call('rbind', seqs)
seqs <- seqs %>% phyDat(type = 'USER', levels = nmstrings)
return(list(seqs = seqs, tree = tree_ex2))
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.