#' Simulate different scenarios of abundance change in entities
#'
#' Simulate a data set with different abundance patterns for entities under
#' different conditions. These entities have their corresponding nodes on a
#' tree.
#'
#' @author Ruizhu Huang, Charlotte Soneson
#' @export
#'
#' @param tree A \code{phylo} object. Only used when \code{obj} is \code{NULL}.
#' @param data A count matrix with entities corresponding to tree leaves
#' in the rows and samples in the columns. Only used when \code{obj} is
#' \code{NULL}.
#' @param obj A \code{TreeSummarizedExperiment} object with observed data to
#' use as the input for the simulation. If \code{NULL}, \code{data} and \
#' \code{tree} must be provided instead.
#' @param assay If \code{obj} is not \code{NULL}, a numeric index or
#' character scalar indicating which assay of the object to use as the
#' basis for simulation. If \code{assay} is \code{NULL}, the first
#' assay in the object is used.
#' @param scenario The simulation scenario, either \dQuote{BS}, \dQuote{US},
#' or \dQuote{SS} (see \bold{Details}).
#' @param from.A,from.B The branch node labels of branches A and B for which the
#' signal will be swapped. By default, both are \code{NULL}, in which case
#' they will be chosen based on the restrictions provided (\code{minTip.A},
#' \code{maxTip.A}, \code{minTip.B}, \code{maxTip.B}, \code{minPr.A},
#' \code{maxPr.A}, \code{ratio}). Note: If \code{from.A} is \code{NULL},
#' \code{from.B} is also set to \code{NULL}.
#' @param minTip.A The minimum number of leaves allowed in branch A.
#' @param maxTip.A The maximum number of leaves allowed in branch A.
#' @param minTip.B The minimum number of leaves allowed in branch B.
#' @param maxTip.B The maximum number of leaves allowed in branch B.
#' @param minPr.A A numeric value in [0, 1]. The minimum abundance
#' proportion of leaves in branch A.
#' @param maxPr.A A numeric value in [0, 1]. The maximum abundance
#' proportion of leaves in branch A.
#' @param ratio A numeric value. The proportion ratio of branch B to branch A.
#' This value is used to select branches(see \bold{Details}). If there are
#' no branches having exactly this ratio, the pair with the value closest to
#' \code{ratio} will be selected.
#' @param adjB A numeric value in [0, 1] (only for \code{scenario}
#' \dQuote{SS}), or \code{NULL}. If \code{NULL}, branch A and the selected
#' part of branch B swap their proportions. If a numeric value, e.g. 0.1,
#' then the counts for the selected part of branch B decreases to 10% of
#' the original value, and this decrease is added to branch A. For example,
#' assume there are two experimental conditions (C1 & C2), branch A has
#' a count of 10 and branch B has a count of 40 in C1. If adjB is set to
#' 0.1, then in C2 branch B becomes 4 and branch A 46
#' so that the total count of the two branches stays the same.
#' @param pct The percentage of leaves in branch B that have differential
#' abundance under different conditions (only for scenario \dQuote{SS}).
#' @param nSam A numeric vector of length 2, indicating the sample size for each
#' of the two simulated conditions.
#' @param mu,size The parameters of the Negative Binomial distribution. (see mu
#' and size in \code{\link[stats:NegBinomial]{rnbinom}}). These parameters
#' are used to generate the library size for each simulated sample. If
#' \code{size} is not specified, \code{mu} should be a vector of numbers
#' from which the library size is sampled with replacement.
#' @param n A numeric value to specify how many count tables would be generated
#' with the same settings. The default is 1, i.e., one count table would be
#' obtained at the end. If greater than 1, the output is a list of matrices.
#' @param FUN A function to calculate the aggregated count at each internal
#' node based on its descendant leaves (e.g., \code{sum}, \code{mean}).
#' The argument of the function should be a numeric vector with the counts
#' of an internal node's descendant leaves.
#' @param message A logical scalar, indicating whether progress messages
#' should be printed to the console.
#'
#' @returns a TreeSummarizedExperiment object.
#' \itemize{
#' \item \strong{assays} A list of count matrices, with entities in rows and
#' samples in columns. Each row can be mapped to a node of the tree.
#' \item \strong{rowData} Annotation data for the rows.
#' \item \strong{colData} Annotation data for the columns.
#' \item \strong{rowTree} The tree structure of entities.
#' \item \strong{rowLinks} The link between rows and nodes on the tree.
#' \item \strong{metadata} More details about the simulation.
#' \itemize{
#' \item \strong{FC} the fold change of entities corresponding to the
#' tree leaves.
#' \item \strong{Branch} the information about two selected branches.
#' \itemize{
#' \item \strong{A} The branch node label (or number) of branch A.
#' \item \strong{B} The branch node label (or number) of branch B.
#' \item \strong{ratio} The count proportion ratio of branch B to
#' branch A.
#' \item \strong{A_tips} The number of leaves on branch A.
#' \item \strong{B_tips} The number of leaves on branch B.
#' \item \strong{A_prop} The count proportion of branch A.
#' \item \strong{B_prop} The count proportion of branch B.
#' }
#' }
#' }
#'
#' @details Simulate a count table for entities which are corresponding to the
#' nodes of a tree. The entities are in rows and the samples from different
#' groups or conditions are in columns. The library size of each sample is
#' sampled from a Negative Binomial distribution with mean and size
#' specified by the arguments \code{mu} and \code{size}. The counts of
#' entities, that are mapped to the leaf nodes, in a sample are assumed
#' to follow a Dirichlet-Multinomial distribution. The parameters for
#' the Dirichlet-Multinomial distribution are estimated from a real data set
#' specified by \code{data} via the function \code{dirmult} (see
#' \code{\link[dirmult]{dirmult}}). To generate different abundance patterns
#' under different conditions, we provide three different scenarios,
#' \dQuote{BS}, \dQuote{US}, and \dQuote{SS} (specified via
#' \code{scenario}).
#' \itemize{ \item BS: two branches are selected to swap their proportions,
#' and leaves on the same branch have the same fold change.
#' \item US: two branches are selected to swap their proportions. Leaves
#' in the same branch have different fold changes but same
#' direction (either increase or decrease).
#' \item SS: two branches are selected. One branch has its proportion
#' swapped with the proportion of some leaves from the other branch.}
#'
#' @importFrom S4Vectors metadata
#' @importFrom SummarizedExperiment assayNames
#' @importFrom TreeSummarizedExperiment rowTree
#'
#' @examples
#' suppressPackageStartupMessages({
#' library(TreeSummarizedExperiment)
#' })
#' ## Generate data to use as the starting point (this would usually be a
#' ## real data set)
#' set.seed(1L)
#' y <- matrix(rnbinom(120, size = 1, mu = 10), nrow = 10)
#' colnames(y) <- paste("S", seq_len(12), sep = "")
#' rownames(y) <- tinyTree$tip.label
#'
#' toy_lse <- TreeSummarizedExperiment(rowTree = tinyTree,
#' assays = list(counts = y))
#' simData(obj = toy_lse, ratio = 2, scenario = "BS", pct = 0.5)
#'
simData <- function(tree = NULL, data = NULL, obj = NULL, assay = NULL,
scenario = "BS", from.A = NULL, from.B = NULL,
minTip.A = 0, maxTip.A = Inf, minTip.B = 0, maxTip.B = Inf,
minPr.A = 0, maxPr.A = 1, ratio = 4, adjB = NULL,
pct = 0.6, nSam = c(50, 50), mu = 10000, size = NULL,
n = 1, FUN = sum, message = FALSE) {
## Check arguments
## -------------------------------------------------------------------------
.assertVector(x = tree, type = "phylo", allowNULL = TRUE)
.assertVector(x = obj, type = "TreeSummarizedExperiment", allowNULL = TRUE)
stopifnot((!is.null(tree) && !is.null(data)) || !is.null(obj))
stopifnot(is.null(assay) || length(assay) == 1)
if (!is.null(obj) && is.character(assay)) {
stopifnot(assay %in% SummarizedExperiment::assayNames(obj))
}
.assertScalar(x = scenario, type = "character",
validValues = c("BS", "SS", "US"))
.assertScalar(x = minTip.A, type = "numeric")
.assertScalar(x = maxTip.A, type = "numeric")
.assertScalar(x = minTip.B, type = "numeric")
.assertScalar(x = maxTip.B, type = "numeric")
.assertScalar(x = minPr.A, type = "numeric", rngIncl = c(0, 1))
.assertScalar(x = maxPr.A, type = "numeric", rngIncl = c(0, 1))
.assertScalar(x = ratio, type = "numeric")
.assertScalar(x = adjB, type = "numeric", rngIncl = c(0, 1),
allowNULL = TRUE)
.assertScalar(x = pct, type = "numeric", rngIncl = c(0, 1),
allowNULL = TRUE)
.assertVector(x = nSam, type = "numeric")
.assertVector(x = mu, type = "numeric")
.assertScalar(x = size, type = "numeric", allowNULL = TRUE)
.assertScalar(x = n, type = "numeric")
.assertVector(x = FUN, type = "function")
.assertScalar(x = message, type = "logical")
if (is.null(assay)) {
assay <- 1
}
## Simulate data
## -------------------------------------------------------------------------
if (is.null(obj)) {
## Tree and data provided
out <- .doData(tree = tree, data = data,
scenario = scenario,
from.A = from.A, from.B = from.B,
minTip.A = minTip.A, maxTip.A = maxTip.A,
minTip.B = minTip.B, maxTip.B = maxTip.B,
minPr.A = minPr.A, maxPr.A = maxPr.A,
ratio = ratio, adjB = adjB, pct = pct,
nSam = nSam, mu = mu, size = size,
n = n, FUN = FUN)
} else {
## TreeSummarizedExperiment provided
pars <- obj$pars
if (is.null(pars)) {
obj <- parEstimate(obj = obj, assay = assay)
pars <- S4Vectors::metadata(obj)$assays.par
tree <- TreeSummarizedExperiment::rowTree(obj)
}
out <- .doData(tree = tree, data = pars,
scenario = scenario,
from.A = from.A, from.B = from.B,
minTip.A = minTip.A, maxTip.A = maxTip.A,
minTip.B = minTip.B, maxTip.B = maxTip.B,
minPr.A = minPr.A, maxPr.A = maxPr.A,
ratio = ratio, adjB = adjB, pct = pct,
nSam = nSam, mu = mu, size = size,
n = n, FUN = FUN)
}
return(out)
}
#' Simulate a count table
#'
#' @author Ruizhu Huang
#' @keywords internal
#' @noRd
#'
#' @returns A list of objects:
#' \itemize{
#' \item{FC}{The fold change of entities corresponding to the tree leaves.}
#' \item{Count}{A list of count matrices or a count matrix.
#' Entities on the row and samples in the column. Each count matrix includes
#' entities corresponding to all nodes on the tree structure.}
#' \item{Branch}{Information about two selected branches.}
#' \describe{
#' \item{A}{The branch node label of branch A}
#' \item{B}{The branch node label of branch B}
#' \item{ratio}{The count proportion ratio of branch B to branch A}
#' \item{A_tips}{The number of leaves on branch A}
#' \item{B_tips}{The number of leaves on branch B}
#' \item{A_prop}{The count proportion of branch A (not above 1)}
#' \item{B_prop}{The count proportion of branch B (not above 1)}
#' }
#' }
#'
#' @importFrom dirmult dirmult
#' @importFrom TreeSummarizedExperiment showNode convertNode
#' TreeSummarizedExperiment
#'
.doData <- function(tree = NULL, data = NULL, scenario = "BS",
from.A = NULL, from.B = NULL, minTip.A = 0, maxTip.A = Inf,
minTip.B = 0, maxTip.B = Inf, minPr.A = 0, maxPr.A = 1,
ratio = 2, adjB = NULL, pct = 0.6, nSam = c(50, 50),
mu = 50, size = 10000, n = 1, FUN = sum) {
## Check arguments
## -------------------------------------------------------------------------
.assertVector(x = tree, type = "phylo")
## data should be either a list (from parEstimate) or a count matrix
if (!is.list(data)) {
if (!is.matrix(data)) {
stop("data should be a matrix.")
} else {
if (!setequal(rownames(data), tree$tip.label)) {
stop("The rownames of data do not match the leaf labels.")
}
}
}
## Check validity of from.A and from.B
sn <- TreeSummarizedExperiment::showNode(tree, use.alias = TRUE)
if (!is.null(from.A)) {
if (is.character(from.A)) {
is_alias <- from.A %in% names(sn)
if (!is_alias) {
## Check if from.A is a node label and convert to alias
is_label <- from.A %in% tree$node.label
if (!is_label) {
stop("The provided from.A is not a node in the tree")
} else {
## Convert to alias
tmp <- TreeSummarizedExperiment::convertNode(
tree, node = from.A)
from.A <- names(sn)[match(tmp, sn)]
}
}
} else if (is.numeric(from.A)) {
## Node number - check that it's in the tree
if (!from.A %in% sn) {
stop("The provided from.A is not a node in the tree")
}
} else {
stop("from.A must be a character or numeric value")
}
}
if (!is.null(from.B)) {
if (is.character(from.B)) {
is_alias <- from.B %in% names(sn)
if (!is_alias) {
## Check if from.B is a node label and convert to alias
is_label <- from.B %in% tree$node.label
if (!is_label) {
stop("The provided from.B is not a node in the tree")
} else {
## Convert to alias
tmp <- TreeSummarizedExperiment::convertNode(
tree, node = from.B)
from.B <- names(sn)[match(tmp, sn)]
}
}
} else if (is.numeric(from.B)) {
## Node number - check that it's in the tree
if (!from.B %in% sn) {
stop("The provided from.B is not a node in the tree")
}
} else {
stop("from.B must be a character or numeric value")
}
}
## Estimate parameters for DM distribution
## -------------------------------------------------------------------------
data <- parEstimate(obj = data)
## Pick branches
## -------------------------------------------------------------------------
if (!is.null(from.A) && !is.null(from.B)) {
pk <- .infLoc(tree = tree, data = data,
from.A = from.A, from.B = from.B)
} else {
pk <- .pickLoc(tree = tree, data = data,
from.A = from.A, minTip.A = minTip.A,
maxTip.A = maxTip.A, minTip.B = minTip.B,
maxTip.B = maxTip.B, minPr.A = minPr.A,
maxPr.A = maxPr.A, ratio = ratio)
}
## Estimate fold changes
## -------------------------------------------------------------------------
beta <- .doFC(tree = tree, data = data, scenario = scenario,
branchA = pk$A, branchB = pk$B, ratio = pk$ratio,
adjB = adjB, pct = pct)
## Generate count matrix/matrices and TreeSummarizedExperiment objects
## -------------------------------------------------------------------------
count <- .doCount(data = data, FC = beta, nSam = nSam, mu = mu,
size = size, n = n)
if (is.list(count)) {
grpDat <- data.frame(group = substr(colnames(count[[1]]), 1, 2))
lse <- TreeSummarizedExperiment::TreeSummarizedExperiment(
assays = count,
metadata = list(FC = beta, branch = pk, scenario = scenario),
colData = grpDat,
rowTree = tree,
rowNodeLab = rownames(count[[1]]))
} else if (is.matrix(count)) {
grpDat <- data.frame(group = substr(colnames(count), 1, 2))
lse <- TreeSummarizedExperiment::TreeSummarizedExperiment(
assays = list(counts = count),
metadata = list(FC = beta, branch = pk, scenario = scenario),
colData = grpDat,
rowTree = tree,
rowNodeLab = rownames(count))
}
return(lse)
}
#' @keywords internal
#' @noRd
#' @author Ruizhu Huang
#'
#' @importFrom TreeSummarizedExperiment convertNode
#'
.getParsFromData <- function(data, tree) {
pars <- parEstimate(obj = data)$pi
nam1 <- names(pars)
val1 <- TreeSummarizedExperiment::convertNode(
tree = tree, node = nam1, message = FALSE)
nam2 <- TreeSummarizedExperiment::convertNode(
tree = tree, node = val1, use.alias = TRUE, message = FALSE)
names(pars) <- nam2
pars
}
#' @keywords internal
#' @noRd
#' @author Ruizhu Huang
#'
#' @importFrom TreeSummarizedExperiment convertNode findDescendant
#'
.getInternalProps <- function(pars, tree) {
leaf <- setdiff(tree$edge[, 2], tree$edge[, 1])
leaf <- sort(leaf)
nodI <- setdiff(tree$edge[, 1], leaf)
nodI <- sort(nodI)
nodA <- c(leaf, nodI)
desI <- TreeSummarizedExperiment::findDescendant(
tree = tree, node = nodI, only.leaf = TRUE, self.include = TRUE,
use.alias = TRUE)
desI <- lapply(desI, FUN = function(x) {
TreeSummarizedExperiment::convertNode(
tree = tree, node = x, use.alias = TRUE, message = FALSE)
})
names(desI) <- TreeSummarizedExperiment::convertNode(
tree = tree, node = nodI, use.alias = TRUE, message = FALSE)
if (!is.null(pars)) {
nodP <- mapply(function(x, y) {
sum(x[y])
}, x = list(pars), y = desI)
} else {
nodP <- NULL
}
list(nodP = nodP, desI = desI, nodI = nodI, leaf = leaf)
}
#' Select branches to swap
#'
#' @author Ruizhu Huang
#' @keywords internal
#' @noRd
#'
#' @returns A \code{data.frame} with one row
#'
#' @importFrom TreeSummarizedExperiment convertNode
#'
.pickLoc <- function(tree = NULL, data = NULL, from.A = NULL,
minTip.A = 0, maxTip.A = Inf, minTip.B = 0, maxTip.B = Inf,
minPr.A = 0, maxPr.A = 1, ratio = 1) {
## Estimate tip proportions, rename using the node label alias
## -------------------------------------------------------------------------
pars <- .getParsFromData(data = data, tree = tree)
## Proportions for internal nodes
## -------------------------------------------------------------------------
propRes <- .getInternalProps(pars = pars, tree = tree)
## Abundance proportions and number of descendant leaves
## -------------------------------------------------------------------------
lenI <- unlist(lapply(propRes$desI, length))
tt <- cbind(propRes$nodP, lenI)
rownames(tt) <- TreeSummarizedExperiment::convertNode(
tree = tree, node = propRes$nodI, use.alias = TRUE, message = FALSE)
if (maxPr.A < min(tt[, 1])) {
stop("maxPr.A is lower than the minimum value of
node proportion", signif(min(tt[, 1]), 2), "\n")
}
if (minPr.A * ratio > max(tt[, 1])) {
stop("minPr.A*ratio is above the maximum value of
node proportion; try lower ratio", signif(min(tt[, 1]), 2), "\n")
}
## Only consider nodes with enough tips and desired proportions
## -------------------------------------------------------------------------
if (is.null(from.A)) {
tt.sel <- tt
} else {
## If node numbers, change them to node labels
if (!is.character(from.A)) {
from.A <- TreeSummarizedExperiment::convertNode(
tree = tree, node = from.A, use.alias = TRUE, message = FALSE)
}
tt.sel <- tt[match(from.A, rownames(tt)), , drop = FALSE]
}
st <- tt.sel[tt.sel[, 2] >= minTip.A & tt.sel[, 2] <= maxTip.A &
tt.sel[, 1] >= minPr.A & tt.sel[, 1] <= maxPr.A, ,
drop = FALSE]
if (nrow(st) == 0) {
stop("No nodes fulfill the requirements; try other values for ",
"minTip.A, maxTip.A, minPr.A, or maxPr.A")
}
st2 <- tt[tt[, 2] >= minTip.B & tt[, 2] <= maxTip.B, , drop = FALSE]
## Fold changes between any two nodes
## -------------------------------------------------------------------------
mm <- (1/st[, 1]) %o% st2[, 1]
rownames(mm) <- rownames(st)
maxM <- max(mm, na.rm = TRUE)
minM <- min(mm, na.rm = TRUE)
if (ratio > 1 & ratio > maxM) {
stop("Could not find two branches which fulfill the requirement; ",
"try lower ratio, lower minTip.A, or higher maxTip.B\n")
}
if (ratio < 1 & ratio < minM) {
stop("Could not find two branches which fulfill the requirement; ",
"try higher ratio or lower minTip.B\n")
}
nm <- mm
nm[] <- vapply(
seq_len(ncol(mm)),
FUN = function(x) {
## each column
cn <- colnames(mm)
cx <- cn[x]
## all rows
rn <- rownames(mm)
tx <- propRes$desI[rn]
cs <- lapply(
tx,
FUN = function(x) {
length(intersect(x, propRes$desI[[cx]])) > 0
})
cv <- unlist(cs)
fm <- mm[, x]
fm[cv] <- NA
fm
},
FUN.VALUE = numeric(nrow(mm))
)
colnames(nm) <- colnames(mm)
rownames(nm) <- rownames(mm)
dif <- abs(nm - ratio)
wi <- which(dif == min(dif, na.rm = TRUE), arr.ind = TRUE)
si <- sample(seq_len(nrow(wi)), 1)
an <- rownames(nm)[wi[si, 1]]
bn <- colnames(nm)[wi[si, 2]]
## Assemble information about selected branches
## -------------------------------------------------------------------------
du <- cbind.data.frame(
"A" = TreeSummarizedExperiment::convertNode(
tree = tree, node = an, use.alias = FALSE, message = FALSE),
"B" = TreeSummarizedExperiment::convertNode(
tree = tree, node = bn, use.alias = FALSE, message = FALSE),
"ratio" = unique(nm[wi]),
"A_tips" = tt[an, 2],
"B_tips" = tt[bn, 2],
"A_prop" = round(tt[an, 1], digits = 4),
"B_prop" = round(tt[bn, 1], digits = 4)
)
rownames(du) <- NULL
return(du)
}
#' Provide information about proportions and number of leaves in two branches
#'
#' @keywords internal
#' @author Ruizhu Huang
#' @noRd
#'
#' @returns A \code{data.frame} with one row
#'
#' @importFrom TreeSummarizedExperiment convertNode
#'
.infLoc <- function(tree = NULL, data = NULL,
from.A = NULL, from.B = NULL) {
## Estimate tip proportions, rename using the node label alias
## -------------------------------------------------------------------------
pars <- .getParsFromData(data = data, tree = tree)
## Proportions for internal nodes
## -------------------------------------------------------------------------
propRes <- .getInternalProps(pars = pars, tree = tree)
## Abundance proportions and number of descendant leaves
## -------------------------------------------------------------------------
lenI <- unlist(lapply(propRes$desI, length))
tt <- cbind(propRes$nodP, lenI)
rownames(tt) <- TreeSummarizedExperiment::convertNode(
tree = tree, node = propRes$nodI, use.alias = TRUE, message = FALSE)
## Get branch names
## -------------------------------------------------------------------------
labA <- ifelse(is.character(from.A), from.A,
TreeSummarizedExperiment::convertNode(
tree = tree, node = from.A, use.alias = TRUE,
message = FALSE))
labB <- ifelse(is.character(from.B), from.B,
TreeSummarizedExperiment::convertNode(
tree = tree, node = from.B, use.alias = TRUE,
message = FALSE))
## Assemble summary information about nodes
## -------------------------------------------------------------------------
rAB <- tt[labB, 1] / tt[labA, 1]
du <- cbind.data.frame(
"A" = from.A,
"B" = from.B,
"ratio" = rAB,
"A_tips" = tt[labA, 2],
"B_tips" = tt[labB, 2],
"A_prop" = round(tt[labA, 1], digits = 4),
"B_prop" = round(tt[labB, 1], digits = 4)
)
rownames(du) <- NULL
return(du)
}
#' Generate fold changes
#'
#' @author Ruizhu Huang
#' @keywords internal
#' @noRd
#'
#' @returns a numeric vector with fold changes
#'
#' @importFrom stats runif
#' @importFrom TreeSummarizedExperiment convertNode findDescendant
#'
.doFC <- function(tree = NULL, data = NULL, scenario = "BS",
branchA = NULL, branchB = NULL,
ratio = 1, adjB = NULL, pct = 1) {
## Find nodes
## -------------------------------------------------------------------------
propRes <- .getInternalProps(pars = NULL, tree = tree)
## Initialize beta
## -------------------------------------------------------------------------
beta <- rep(1, length(propRes$leaf))
names(beta) <- TreeSummarizedExperiment::convertNode(
tree = tree, node = propRes$leaf, use.alias = TRUE)
## Node labels on branch A
## -------------------------------------------------------------------------
## leaves
tip.A <- TreeSummarizedExperiment::findDescendant(
tree = tree, node = branchA, only.leaf = TRUE, self.include = TRUE,
use.alias = TRUE)
tip.A <- TreeSummarizedExperiment::convertNode(
tree = tree, node = unlist(tip.A), use.alias = TRUE, message = FALSE)
## nodes
nodA.A <- TreeSummarizedExperiment::findDescendant(
tree = tree, node = branchA, only.leaf = FALSE, self.include = TRUE,
use.alias = TRUE)
nodA.A <- TreeSummarizedExperiment::convertNode(
tree = tree, node = unlist(nodA.A), use.alias = TRUE, message = FALSE)
## internal nodes
nodI.A <- setdiff(nodA.A, tip.A)
## descendants of internal nodes
des.IA <- TreeSummarizedExperiment::findDescendant(
tree = tree, node = nodI.A, only.leaf = TRUE, self.include = TRUE,
use.alias = TRUE)
des.IA <- lapply(des.IA, FUN = function(x) {
TreeSummarizedExperiment::convertNode(
tree = tree, node = x, use.alias = TRUE, message = FALSE)
})
## Tip proportions from real data, rename using the alias of node label
## -------------------------------------------------------------------------
pars <- .getParsFromData(data = data, tree = tree)
## Swap proportions of two branches
## -------------------------------------------------------------------------
if (scenario == "BS") {
## Tips in the same branch have the same fold change
## leaves on branch B
tip.B <- TreeSummarizedExperiment::findDescendant(
tree = tree, node = branchB, only.leaf = TRUE, self.include = TRUE,
use.alias = TRUE)
tip.B <- TreeSummarizedExperiment::convertNode(
tree = tree, node = unlist(tip.B), use.alias = TRUE,
message = FALSE)
beta[tip.A] <- ratio
beta[tip.B] <- 1/ratio
} else if (scenario == "US") {
## Tips in the same branch have different fold changes but same
## direction (either increase or decrease)
tip.B <- TreeSummarizedExperiment::findDescendant(
tree = tree, node = branchB, only.leaf = TRUE, self.include = TRUE,
use.alias = TRUE)
tip.B <- TreeSummarizedExperiment::convertNode(
tree = tree, node = unlist(tip.B), use.alias = TRUE,
message = FALSE)
## Proportion in the two branches
propA <- sum(pars[tip.A])
propB <- sum(pars[tip.B])
## Make sure the ratio is above zero
if (propB < propA) {
rA <- 1 - propB / propA
rB <- 0
} else {
rA <- 0
rB <- 1 - propA / propB
}
a1 <- stats::runif(length(tip.A), rA, 1)
sa <- sum(a1*pars[tip.A])
a2 <- (propB - propA)/sa
a3 <- a1 * a2 + 1
beta[tip.A] <- a3
b1 <- stats::runif(length(tip.B), rB, 1)
sb <- sum(b1 * pars[tip.B])
b2 <- (propA - propB)/sb
b3 <- b1 * b2 + 1
beta[tip.B] <- b3
} else if (scenario == "SS") {
## Distribute signal randomly in one branch and evenly in
## another branch
iter <- 1
while (iter <= 200) {
## Select only some tips
selA <- sample(tip.A, ceiling(length(tip.A) * pct))
## Make the selected tips disperse evenly in the branch
subA <- lapply(des.IA, FUN = function(x) {
ix <- intersect(x, selA)
length(ix)/length(x)
})
subA <- unlist(subA)
sumA <- sum(pars[selA])
## The abundance proportions of the selected tips
## are roughly equal to its proportion in the branch.
## Avoid selecting all low or high abundance tips in the branch.
spr <- sumA / sum(pars[tip.A])
ind.pr <- spr <= (pct + 0.05) && spr >= (pct - 0.05)
if (all(subA <= 0.6) && ind.pr) {
break
}
iter <- iter + 1
}
tip.B <- TreeSummarizedExperiment::findDescendant(
tree = tree, node = branchB, only.leaf = TRUE, self.include = TRUE,
use.alias = TRUE)
tip.B <- TreeSummarizedExperiment::convertNode(
tree = tree, node = unlist(tip.B), use.alias = TRUE,
message = FALSE)
sumB <- sum(pars[tip.B])
if (is.null(adjB)) {
beta[selA] <- sumB/sumA
beta[tip.B] <- sumA/sumB
} else {
beta[tip.B] <- adjB
beta[selA] <- (sumB * (1 - adjB) + sumA) / sumA
}
}
## Rename beta with the node label instead of the alias of node label
## -------------------------------------------------------------------------
names(beta) <- TreeSummarizedExperiment::convertNode(
tree = tree, node = propRes$leaf, use.alias = FALSE)
return(beta)
}
#' Generate a count table given simulation details
#'
#' @author Ruizhu Huang
#' @keywords internal
#' @noRd
#'
#' @returns A matrix or a list of matrices.
#'
#' @importFrom dirmult rdirichlet
#' @importFrom stats rmultinom rnbinom
#'
.doCount <- function(data, FC, nSam, mu, size, n) {
## Parameters
## -------------------------------------------------------------------------
pars <- parEstimate(obj = data)
theta <- pars$theta
gplus <- (1 - theta) / theta
## Tip proportions
## -------------------------------------------------------------------------
pr <- pars$pi
p.c1 <- pr
p.c2 <- pr * FC[names(p.c1)]
## Parameters for Dirichlet distribution
## -------------------------------------------------------------------------
g.c1 <- p.c1 * gplus
g.c2 <- p.c2 * gplus
## Generate count matrices
## -------------------------------------------------------------------------
resList <- lapply(seq_len(n), FUN = function(j) {
## condition 1
n1 <- nSam[1]
Mp.c1 <- matrix(0, nrow = n1, ncol = length(g.c1))
rownames(Mp.c1) <- paste("C1_", seq_len(n1), sep = "")
colnames(Mp.c1) <- names(p.c1)
Mobs.c1 <- Mp.c1
if (length(mu) == 1 && !length(size)) {
## Need to treat this case separately since otherwise it will
## sample from seq_len(mu) below
nSeq1 <- rep(mu, n1)
} else if (length(mu) && !length(size)) {
nSeq1 <- sample(x = mu, size = n1, replace = TRUE)
} else {
nSeq1 <- stats::rnbinom(n = n1, mu = mu, size = size)
}
for (i in seq_len(n1)) {
Mp.c1[i, ] <- dirmult::rdirichlet(1, g.c1)[1, ]
Mobs.c1[i, ] <- stats::rmultinom(1, nSeq1[i],
prob = Mp.c1[i, ])[, 1]
}
## condition 2
n2 <- nSam[2]
Mp.c2 <- matrix(0, nrow = n2, ncol = length(g.c2))
rownames(Mp.c2) <- paste("C2_", seq_len(n2), sep = "")
colnames(Mp.c2) <- names(p.c2)
Mobs.c2 <- Mp.c2
if (length(mu) == 1 && !length(size)) {
## Need to treat this case separately since otherwise it will
## sample from seq_len(mu) below
nSeq2 <- rep(mu, n2)
} else if (length(mu) && !length(size)) {
nSeq2 <- sample(x = mu, size = n2, replace = TRUE)
} else {
nSeq2 <- stats::rnbinom(n = n2, mu = mu, size = size)
}
for (i in seq_len(n2)) {
Mp.c2[i, ] <- dirmult::rdirichlet(1, g.c2)[1, ]
Mobs.c2[i, ] <- stats::rmultinom(1, nSeq2[i],
prob = Mp.c2[i, ])[, 1]
}
cb <- t(rbind(Mobs.c1, Mobs.c2))
return(cb)
})
if (n == 1) {
count <- do.call(rbind, resList)
} else {
count <- resList
}
return(count)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.