#' LUniFrac
#'
#' Longitudinal UniFrac distances for comparing changes in
#' microbial communities across 2 time points.
#'
#' Based in part on Jun Chen & Hongzhe Li (2012), GUniFrac.
#'
#' Computes difference between time points and then calculates
#' difference of these differences, resulting in a dissimilarity
#' matrix that can be used in a variety of downstream
#' distance-based analyses.
#'
#' @param otu.tab OTU count table, containing 2*n rows (samples) and q columns (OTUs)
#' @param metadata Data frame with three columns: subject identifiers (n unique values),
#' sample identifiers (must match row names of otu.tab),
#' and time or group indicator (numeric variable, or factor with levels such that as.numeric returns
#' the desired ordering). Column names should be subjID, sampID, time.
#' @param tree Rooted phylogenetic tree of R class "phylo"
#' @param gam Parameter controlling weight on abundant lineages. The same weight is
#' used within a subjects as between subjects.
#' @param paired Logical indicating whether to use the paired (TRUE) or longitudinal (FALSE) transformation.
#' @param check.input Logical indicating whether to check the function input values for formatting or
#' entry errors (default TRUE).
#' @return Returns a (K+1) dimensional array containing the longitudinal UniFrac dissimilarities
#' with the K specified gamma values plus the unweighted distance. The unweighted dissimilarity
#' matrix may be accessed by result[,,"d_UW"], and the generalized dissimilarities by result[,,"d_G"]
#' where G is the particular choice of gamma.
#'
#' @examples
#' data("bal.long.otus")
#' data("bal.long.meta")
#' data("sim.tree")
#' D2.unifrac <- LUniFrac(otu.tab = bal.long.otus, metadata = bal.long.meta,
#' tree = sim.tree, gam = c(0, 0.5, 1), paired = FALSE, check.input = TRUE)
#' D2.unifrac[, , "d_1"] # gamma = 1 (quantitative longitudinal transformation)
#' D2.unifrac[, , "d_UW"] # unweighted LUniFrac (qualitative/binary longitudinal transf.)
#'
#' @export
#'
LUniFrac <- function(otu.tab, metadata, tree, gam = c(0, 0.5, 1), paired, check.input = TRUE) {
n <- nrow(otu.tab)
# Check input data
if (check.input) {
okdat <- data_prep(otu.tab, metadata, paired)
otu.tab <- okdat$otu.props
metadata <- okdat$metadata
remove(okdat)
}
# Check OTU name consistency
if (sum(!(colnames(otu.tab) %in% tree$tip.label)) != 0) {
stop("The OTU table contains unknown OTUs! OTU names
in the OTU table and the tree should match." )
}
# Get the subtree if tree contains more OTUs
absent <- tree$tip.label[!(tree$tip.label %in% colnames(otu.tab))]
if (length(absent) != 0) {
tree <- drop.tip(tree, absent)
warning("The tree has more OTU than the OTU table!")
}
# Reorder the otu.tab matrix if the OTU orders are different
tip.label <- tree$tip.label
otu.tab <- otu.tab[, tip.label]
ntip <- length(tip.label)
nbr <- nrow(tree$edge) # number of branches = 2*(ntip - 1)
edge <- tree$edge # edges entering a node (1 through (ntip - 1))
edge2 <- edge[, 2] # edges leaving a node (1 through 2*(ntip -1))
br.len <- tree$edge.length # branch lengths, corresponds to edge2
## Accumulate OTU proportions up the tree
## Note: columns are samples, rows are branches
cum <- matrix(0, nbr, n) # Branch abundance matrix (n = nsamp = nsubj * ntimes in this usage)
for (i in 1:ntip) {
tip.loc <- which(edge2 == i)
cum[tip.loc, ] <- cum[tip.loc, ] + otu.tab[, i]
node <- edge[tip.loc, 1] # Assume the direction of edge
node.loc <- which(edge2 == node)
while (length(node.loc)) {
cum[node.loc, ] <- cum[node.loc, ] + otu.tab[, i]
node <- edge[node.loc, 1]
node.loc <- which(edge2 == node)
}
}
colnames(cum) = rownames(otu.tab)
### Step 1: calculate within-subject distance data
if (paired) {
tsf.dat <- pltransform(otu.data = list(otu.props = t(cum), otu.clr = t(cum), metadata = metadata), paired = TRUE)
} else {
tsf.dat <- pltransform(otu.data = list(otu.props = t(cum), otu.clr = t(cum), metadata = metadata), paired = FALSE)
}
cum.avg <- t(tsf.dat$avg.prop)
cum.unw <- t(tsf.dat$dat.binary)
cum.gen <- t(tsf.dat$dat.quant.prop)
## Construct the returning array
# d_UW: unweighted
dimname3 <- c(paste("d", gam, sep="_"), "d_UW")
lunifracs <- array(NA, c(ncol(cum.avg), ncol(cum.avg), length(gam) + 1),
dimnames=list(colnames(cum.avg), colnames(cum.avg), dimname3))
for (i in 1:(length(gam)+1)){
for (j in 1:ncol(cum.avg)){
lunifracs[j, j, i] <- 0
}
}
### Step 2: calculate distances based on within-subject summaries
for (i in 2:ncol(cum.avg)) {
for (j in 1:(i-1)) {
d1 <- cum.gen[, i]
d2 <- cum.gen[, j]
avg1 <- cum.avg[, i]
avg2 <- cum.avg[, j]
ind <- which((abs(d1) + abs(d2)) != 0)
d1 <- d1[ind]
d2 <- d2[ind]
diff <- abs(d2 - d1)
br.len2 <- br.len[ind]
## Generalized LUniFrac dissimilarity
for(k in 1:length(gam)){
w <- br.len * (avg1 + avg2)^gam[k]
lunifracs[i, j, k] = lunifracs[j, i, k] = sum(diff * w[ind]) / sum(w)
}
## Unweighted LUniFrac Distance
d1 <- cum.unw[, i]
d2 <- cum.unw[, j]
diff <- abs(d2 - d1)
# only branches with some change contribute
ind <- which((abs(cum.unw[, i]) + abs(cum.unw[,j])) != 0)
if (length(ind) > 0) {
diff <- diff[ind]
br.len2 <- br.len[ind]
lunifracs[i, j, (k + 1)] = lunifracs[j, i, (k + 1)] = sum(br.len2*diff) / sum(br.len)
} else {
lunifracs[i, j, (k + 1)] = lunifracs[j, i, (k + 1)] = 0
}
}
}
return(lunifracs)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.