R/name_balances.R

Defines functions tally.votes vote.annotation get.ud.tips get.ud.nodes name.balance

Documented in name.balance

#' Name a balance (coordinate) based on taxonomy
#'
#' For a given ILR balance (coordinate) assigns a name to the balance based on
#' a provided taxonomy table. This is useful for interpretation of the balances.
#'
#' @param tr an object of class \code{'phylo'}
#' @param tax a matrix/data.frame of taxonomy, rownames should correspond to
#' \code{tr$tip.labels} columns should be taxonomic levels (named) with
#' increasing taxonomic resolution from left to right (e.g., Phylum to the
#' left of Genus).
#' @param coord the name of a balance/internal node on the tree (given as a
#' string)
#' @param method currently only \code{'voting'} implemented. See Details.
#' @param thresh threshold for assignment of taxonomy to a given part of a
#' balance (must be greater than 0.5 if \code{method='voting'}; see details).
#' @param return.votes whether voting results by taxonomic level should be
#' shown for \code{coord}. Note: this is helpful when \code{name.balance} does
#' not return a clear winner, as may be the case when a given \code{coord}
#' represents more than one taxonomic lineage. votes are returned as a list
#' indexed by \code{colnames(tax)} Options include:
#' \describe{
#' \item{\code{NULL}}{(default) only returns the combined consensus name of
#'    the balance}
#' \item{\code{'up'}}{adds tallied votes for the 'up' node to the output list}
#' \item{\code{'down'}}{adds tallied votes for the 'down' node to the output
#'    list}
#' \item{\code{'self'}}{adds tallied votes for \code{coord} to the output list}
#' }
#' @return If \code{return.votes=NULL} returns a string of the form
#' (ex. 'Genus_Bacteroides/Phylum_Firmicutes'). Otherwise returns
#' a list with the above string as 'name', see Arguments for \code{show.votes}
#' for other optional returned items.
#' @details A bit of terminology:
#' \describe{
#' \item{coord}{this is the same as the names of the balances which should be
#' the same as the names of the internal nodes of \code{tr}}
#' \item{'up'}{this is the child node of \code{coord} that is represented in
#' the numerator of the \code{coord} balance.}
#' \item{'down'}{this is the child node of \code{coord} that is represented in
#' the denominator of the \code{coord} balance}
#' }
#' The method \code{'voting'} assigns the name of the each part of a balance
#' (e.g., numerator and denominator / each child of \code{coord}) as follows:
#' \enumerate{
#' \item First Subset \code{tax} to contain only descendent tips of the given
#' child of \code{coord}
#' \item Second At the finest taxonomic (farthest right of \code{tax}) see if
#' any one taxonomic label is present at or above \code{thresh}. If yes output
#' that taxonomic label (at that taxonomic level) as the label for that child
#' of \code{coord}. If no then move to coarser taxonomic level (leftward) and
#' repeat.
#' }
#' @author Justin Silverman
#' @importFrom methods as
#' @importFrom methods is
#' @export
#' @seealso \code{\link{philr}}
#' @examples
#' tr <- named_rtree(40)
#' tax <- data.frame(Kingdom=rep('A', 40),
#'                Phylum=rep(c('B','C'), each=20),
#'                Genus=c(sample(c('D','F'),20, replace=TRUE),
#'                        sample(c('G','H'), 20, replace=TRUE)))
#' rownames(tax) <- tr$tip.label
#' name.balance(tr, tax, 'n1')
#' name.balance(tr, tax, 'n34')
#' name.balance(tr,tax, 'n34', return.votes = c('up', 'down'))
name.balance <- function(tr, tax, coord, method="voting", thresh=0.95, return.votes=NULL){
    if (method=="voting"){
        # Get tips in 'up' and 'down' subtree
        l.tips <- get.ud.tips(tr,coord)

        tax <- as(tax, "matrix")
        # Subset tax table based on above
        tax.up <- tax[l.tips[['up']],]
        tax.down <- tax[l.tips[['down']],]

        # Get Voted Consensus for up and down taxa (character strings)
        up.voted <- vote.annotation(tax.up, voting.threshold=thresh)
        down.voted <- vote.annotation(tax.down, voting.threshold=thresh)

        # Combine into a string and output
        name <- paste(up.voted,"/",down.voted,sep="")

        if (is.null(return.votes)){
            return(name)
        } else {
            res <- list('name'=name)
        }
        if ('up' %in% return.votes){
            res[['up.votes']] <- tally.votes(tax, l.tips[['up']])
        }
        if ('down' %in% return.votes){
            res[['down.votes']] <- tally.votes(tax, l.tips[['down']])
        }
        if ('self' %in% return.votes){
            res[['self.votes']] <- tally.votes(tax, unlist(l.tips))
        }
    return(res)
  }
  # In the future can extend to other methods of annotation/naming
  # (other than just voting)
}


# Returns a list of the 'up' and 'down' subtree's root nodes
# e.g., the child node of a given coordinate
# nn is node number
#' @importFrom phangorn Children
get.ud.nodes <- function(tr,coord, return.nn=FALSE){
    nn <- name.to.nn(tr, coord) # get node number
    l.nodes <- list()
    child <- phangorn::Children(tr, nn)
    if (length(child) < 2) stop(paste0(coord,' is a tip'))
    if (return.nn==TRUE){
        l.nodes[['up']] <- child[1]
        l.nodes[['down']] <- child[2]
    } else{
        l.nodes[['up']] <- nn.to.name(tr, child[1])
        l.nodes[['down']] <- nn.to.name(tr, child[2])
    }
    return(l.nodes)
}

# Returns a list of the 'up' and 'down' subtree's values as a vector of tip
# ids (corresponds to up and down used for sbp creation)
# Each value is the ID of a tip
get.ud.tips <- function(tr,coord){
    l.tips <- list()
    child <- phangorn::Children(tr, name.to.nn(tr,coord))
    if (length(child) < 2) stop(paste0(coord,' is a tip'))
    # TODO: Bit of validation - consider better location    
    if (length(child) > 2) stop("Tree is not soley binary.") 
    l.tips[['up']] <- sapply(unlist(phangorn::Descendants(tr,child[1],
        type='tips')), function(x) nn.to.name(tr, x))
    l.tips[['down']] <- sapply(unlist(phangorn::Descendants(tr,child[2],
        type='tips')), function(x) nn.to.name(tr, x))
    return(l.tips)
}

# Find most concerved
# returns character
# NA are not counted but held against the winner in voting
# Candidate must have >= voting.threshold to be considered the winner
vote.annotation <- function(tax, voting.threshold=0.95){
    if (voting.threshold <= 0.5) {
        stop('voting.threshold must be > 0.5 for unique winner')
    }
    if (is(tax, "character")){ # e.g., is there only 1 voter here
        tmp.names <- names(tax)
        tax <- matrix(tax,nrow=1)
        colnames(tax) <- tmp.names
    }
    nr <- nrow(tax) # the number of tips
    nc <- ncol(tax) # the number of taxonomic ranks in table
    name <- NULL

    # evaluate in decreasing order of taxonomic resolution
    for (i in seq(nc,1)){ 
        votes <- tax[,i]
        if (all(is.na(votes))) {next}
        # drop NA votes but hold against the total number    
        votes <- votes[!is.na(votes)] 
        winner <- sort(table(votes), decreasing=TRUE)[1]
        # Arbitrarily set threshold to 95% of votes
        if (!is.na(winner) & (winner/nr >= voting.threshold)){ 
            name <- names(winner)
            # Try and append taxonomic rank to name if columns of tax table
            # are labeled
            if (!is.null(colnames(tax))) {
            name <- paste(colnames(tax)[i],'_',name,sep="")
        }
            break
        }   # Else try the next level up
    }
    if (is.null(name)){ # If no consensus vote found (above threshold)
        name <- "Unclear_Lineage_Identity"
    }
    return(name)
}

# Given a list of tax IDs (e.g., rownames in tax table)
# print out a easy to read list showing whats present at each taxonomic level.
# This is really to be used when You don't get a result you like with
# vote.annotation()
tally.votes <- function(tax,ids){
    l.votes <- list()
    nc <- ncol(tax) # the number of taxonomic ranks in the table
    for (i in seq(nc,1)){
        votes <- tax[ids,i]
        votes <- votes[!is.na(votes)]
        rank <- ifelse(!is.null(colnames(tax)), colnames(tax)[i],i)
        l.votes[[rank]] <- table(votes)
    }
    return(l.votes)
}
jsilve24/philr documentation built on April 20, 2023, 12:43 p.m.