Nothing
#' Data transformation and driver of PhILR.
#'
#' This is the main function for building the phylogenetic ILR basis, calculating the
#' weightings (of the parts and the ILR coordinates) and then transforming the data.
#' @aliases build.phylo.ilr
#' @param df \strong{matrix} of data to be transformed (samples are rows,
#' compositional parts are columns) - zero must be dealt with either with pseudocount,
#' multiplicative replacement, or another method.
#' @param sbp (Optional) give a precomputed sbp matrix \code{\link{phylo2sbp}}
#' if you are going to build multiple ILR bases (e.g., with different weightings).
#' @param part.weights weightings for parts, can be a named vector with names
#' corresponding to \code{colnames(df)} otherwise can be a string, options include:
#' \describe{
#' \item{\code{'uniform'}}{(default) uses the uniform reference measure}
#' \item{\code{'gm.counts'}}{geometric mean of parts of df}
#' \item{\code{'anorm'}}{aitchison norm of parts of df (after closure)}
#' \item{\code{'anorm.x.gm.counts'}}{\code{'anorm'} times \code{'gm.counts'}}
#' \item{\code{'enorm'}}{euclidean norm of parts of df (after closure)}
#' \item{\code{'enorm.x.gm.counts'}}{\code{'enorm'} times \code{'gm.counts'}, often gives good results}
#' }
#' @param ilr.weights weightings for the ILR coordiantes can be a named vector with names
#' corresponding to names of internal nodes of \code{tree} otherwise can be a string,
#' options include:
#' \describe{
#' \item{\code{'uniform'}}{(default) no weighting of the ILR basis}
#' \item{\code{'blw'}}{sum of children's branch lengths}
#' \item{\code{'blw.sqrt'}}{square root of \code{'blw'} option}
#' \item{\code{'mean.descendants'}}{sum of children's branch lengths PLUS the sum of
#' each child's mean distance to its descendent tips}
#' }
#' @param return.all return all computed parts (e.g., computed sign matrix(\code{sbp}),
#' part weightings (code{p}), ilr weightings (code{ilr.weights}), contrast matrix (\code{V}))
#' as a list (default=\code{FALSE}) in addition to in addition to returning the transformed data (\code{df.ilrp}).
#' If \code{return.all==FALSE} then only returns the transformed data (not in list format)
#' If \code{FALSE} then just returns list containing \code{df.ilrp}.
#' @inheritParams calculate.blw
#' @details
#' This is a utility function that pulls together a number of other functions
#' in \code{philr}. The steps that are executed are as follows:
#' \enumerate{
#' \item Create sbp (sign matrix) if not given
#' \item Create parts weightings if not given
#' \item Shift the dataset with respect to the new reference measure (e.g., part weightings)
#' \item Create the basis contrast matrix from the sign matrix and the reference measure
#' \item Transform the data based on the contrast matrix and the reference measure
#' \item Calculate the specified ILR weightings and multiply each balance by the corresponding weighting
#' }
#' Note for both the reference measure (part weightings) and the ILR weightings, specifying \code{'uniform'} will
#' give the same results as not weighting at all. \cr \cr
#' Note that some of the prespecified part.weights assume \code{df} is given as
#' counts and not as relative abundances. Except in this case counts or relative
#' abundances can be given.
#' @return matrix if \code{return.all=FALSE}, if \code{return.all=TRUE} then a list is returned (see above).
#' @author Justin Silverman
#' @export
#' @seealso \code{\link{phylo2sbp}} \code{\link{calculate.blw}}
#' @examples
#' tr <- named_rtree(5)
#' df <- t(rmultinom(10,100,c(.1,.6,.2,.3,.2))) + 0.65 # add a small pseudocount
#' colnames(df) <- tr$tip.label
#'
#' philr(df, tr, part.weights='enorm.x.gm.counts',
#' ilr.weights='blw.sqrt', return.all=FALSE)
philr <- function(df, tree, sbp=NULL,
part.weights='uniform', ilr.weights='uniform',
return.all=FALSE){
# Convert df to mat with warning
df.name <- deparse(substitute(df))
if (is.data.frame(df)) {
st <- paste("Converting ", df.name, " to matrix from data.frame",
" consider adding as.matrix(", df.name, ") to function call",
" to control conversion manually", sep="")
warning(st)
df <- as.matrix(df)
}
# Convert vector input for df to matrix
df <- vec_to_mat(df)
# Check for Zero values in df
if (any(df == 0)){
stop('Zero values must be removed either through use of pseudocount, multiplicative replacement or other method.')
}
# # Check to make sure df is a matrix otherwise convert it to one
# if(is.vector(df)) {
# tmp <- names(df)
# df <- matrix(df, nrow=1)
# colnames(df) <- tmp
# }
# Create the sequential binary partition sign matrix
if (is.null(sbp)){
message('Building Sequential Binary Partition from Tree...')
sbp <- phylo2sbp(tree)
} else {
if ( (nrow(sbp)!=ncol(df)) | (ncol(sbp)!=ncol(df)-1) ){
stop("given sbp does not match dimentions required dimentions (e.g., ncol(df) x ncol(df)-1")
}
}
if (is.null(colnames(df))) stop("Input data must have column names that align with phylogenetic tree to prevent logical errors.")
sbp <- sbp[colnames(df), ] #Very Important line to avoid logical errors
# Now need to create the weights on the parts
if (is.null(part.weights)){part.weights <- 'uniform'}
if (part.weights=='uniform'){
p <- rep(1, ncol(df))
names(p) <- rownames(sbp)
} else if (part.weights=='gm.counts'){
p <- g.colMeans(df)
p <- p[rownames(sbp)]
} else if (part.weights=='anorm'){
p <- apply(miniclo(t(df)), 1, function(x) normp(x, rep(1, nrow(df))))
} else if (part.weights=='anorm.x.gm.counts'){
gm.counts <- g.colMeans(df)
gm.counts <- gm.counts[rownames(sbp)]
anorm <- apply(miniclo(t(df)), 1, function(x) normp(x, rep(1, nrow(df))))
anorm <- anorm[rownames(sbp)]
p <- gm.counts*anorm
} else if (part.weights=='enorm') {
enorm <- apply(miniclo(t(df)), 1, function(x) sqrt(sum(x^2)))
enorm <- enorm[rownames(sbp)]
p <- enorm
} else if (part.weights=='enorm.x.gm.counts'){
gm.counts <- g.colMeans(df)
gm.counts <- gm.counts[rownames(sbp)]
enorm <- apply(miniclo(t(df)), 1, function(x) sqrt(sum(x^2)))
enorm <- enorm[rownames(sbp)]
p <- gm.counts*enorm
}
# Make sure everything is lined up (else throw an error)
if (!all.equal(rownames(sbp), colnames(df), names(p))) {
stop("Rownames of SBP, Colnames of df, and names of part weights not equal!")
}
# shift the dataset with respect to the new reference measure
df <- shiftp(miniclo(df), p)
# Now create basis contrast matrix
message('Building Contrast Matrix...')
V <- buildilrBasep(sbp, p)
# Finally transform the df
message('Transforming the Data...')
df.ilrp <- ilrp(df, p, V)
# Now calculate ILR Weightings
if (is.character(ilr.weights)){
message('Calculating ILR Weights...')
if (ilr.weights=='blw'){
ilr.weights <- calculate.blw(tree, method='sum.children')
} else if (ilr.weights=='blw.sqrt'){
ilr.weights <- sqrt(calculate.blw(tree, method='sum.children'))
} else if (ilr.weights=='uniform'){
ilr.weights <- rep(1, ncol(df.ilrp))
names(ilr.weights) <- colnames(df.ilrp)
} else if (ilr.weights=='mean.descendants'){
ilr.weights <- calculate.blw(tree, method='mean.descendants')
}
} else { # Validate input of ilr.weights
if (!is.numeric(ilr.weights) | length(ilr.weights) != ncol(df.ilrp)){
stop("ilr.weights must be recognized string or numeric vector of length = ncol(df)-1")
}
}
#TODO: Speed up by not computing if 'uniform'
if (!is.null(colnames(df.ilrp))){
ilr.weights <- ilr.weights[colnames(df.ilrp)]
}
#ilr.weights <- ilr.weights[colnames(df.ilrp)]
tmp.names <- colnames(df.ilrp)
df.ilrp <- df.ilrp %*% diag(ilr.weights)
colnames(df.ilrp) <- tmp.names
# Build return list
if (return.all==FALSE)return(df.ilrp)
if (return.all==TRUE){
l.return = list()
l.return[['df.ilrp']] <- df.ilrp
l.return[['sbp']] <- sbp
l.return[['p']] <- p # the part weights
l.return[['V']] <- V # the contrast matrix
l.return[['ilr.weights']] <- ilr.weights
}
return(l.return)
}
#' Inverse of PhILR Transform
#'
#' @param df.ilrp transformed data to which the inverse transform will be applied
#' @param tree (optional) to be used to build sbp and contrast matrix (see details)
#' @param sbp (optional) the sbp (sequential binary partition) used to build a
#' contrast matrix (see details)
#' @param V (optional) the contrast matrix (see details)
#' @param part.weights weightings for parts, can be a named vector with names
#' corresponding to \code{colnames(df)}. Defaults to 'uniform' (part.weights = 1,...,1)
#' @param ilr.weights weightings for the ILR coordiantes can be a named vector with names
#' corresponding to names of internal nodes of \code{tree}.
#' Defaults to 'uniform' (ilr.weights = 1,...,1)
#'
#' @details This is a utility function for calculating the inverse of the
#' \code{\link{philr}} transform. Note that at least one of the following
#' parameters must be specified (\code{tree}, \code{sbp}, or \code{V}).
#' @return a matrix of compositions (rows are samples, columns are parts),
#' function removes the effects of ilr weights, part weights, and unshifts
#' the composition.
#'
#' @author Justin Silverman
#' @export
#' @seealso \code{\link{philr}}
#'
#' @examples
#' tr <- named_rtree(5)
#' df <- t(rmultinom(10,100,c(.1,.6,.2,.3,.2))) + 0.65 # add a small pseudocount
#' colnames(df) <- tr$tip.label
#' d <- philr(df, tr, part.weights='enorm.x.gm.counts',
#' ilr.weights='blw.sqrt', return.all=TRUE)
#' d.inverted <- philrInv(d$df.ilrp, V=d$V, part.weights = d$p,
#' ilr.weights = d$ilr.weights)
#' all.equal(miniclo(df), d.inverted)
philrInv <- function(df.ilrp, tree=NULL, sbp=NULL, V=NULL, part.weights=NULL, ilr.weights=NULL){
if (all(c(is.null(tree), is.null(sbp), is.null(V)))){
stop('At least one of the parameters (tree, sbp, V) must be non-null')
}
# Convert vector input for df to matrix
df.ilrp <- vec_to_mat(df.ilrp)
# if(is.vector(df.ilrp)) {
# tmp <- names(df.ilrp)
# df.ilrp <- matrix(df.ilrp, nrow=1)
# colnames(df.ilrp) <- tmp
# }
# Inverse ILR -> y (the shifted composition - but closed)
if (is.null(part.weights)){
part.weights <- rep(1, ncol(df.ilrp)+1)
}
if (!is.null(V)) NULL
else if (!is.null(sbp)) V <- buildilrBasep(sbp, part.weights)
else V <- buildilrBasep(phylo2sbp(tree), part.weights)
V <- V[,colnames(df.ilrp)] # make sure everything is lined up
# Undo ILR weights - note: doing nothing (e.g., not matching criteria)
# is equivalent to undoing uniform ilr.weights (1,...,1)
if (!is.null(ilr.weights)) {
ilr.weights <- ilr.weights[colnames(df.ilrp)] # ensure things are lined up
df.ilrp <- df.ilrp / outer(rep(1,nrow(df.ilrp)), ilr.weights)
}
# Make sure everything is lined up (else throw an error)
if (!all.equal(colnames(V), colnames(df.ilrp))) {
stop("Colnames of V, Colnames of df!")
}
if (!all(part.weights==1)){
if (!all.equal(rownames(V), names(part.weights))){
stop("rownames of V and names of parts.weights not equal")
}
}
y <- ilrpInv(df.ilrp, V)
x <- miniclo(shiftpInv(y, part.weights))
x
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.