R/samplingProbs.R
In BAMMtools: Analysis and Visualization of Macroevolutionary Dynamics on Phylogenetic Trees

Documented in samplingProbs

####################################################
#
#		samplingProbs <- function(...)
#
#		tree = object of class phylo
#		cladeTable = a dataframe with 1 column of species names and a second column of group assignment
#			Must either be a table for all species in tree, or a table of greater species richness, including those species in the tree.
#		cladeRichness = either NULL or a vector of species counts, named by group names.
#		globalSampling = percent sampling of the backbone of the phylogeny
#		output = path + output file name (.txt)
#		writeToDisk = boolean, should the table be written to disk, defaults to TRUE

##' @title Creates clade-specific sampling fractions
##'
##' @description If the user would like to specify species sampling on a
##'     clade-by-clade basis, a sampling probability table can be provided to
##'     \code{BAMM}.
##'
##' @param tree An object of class \code{phylo}.
##' @param cladeTable A dataframe with one column of species names and a
##'     second column of group assignment. 
##' @param cladeRichness Either \code{NULL} or a vector of species counts,
##'     named by clade names.
##' @param globalSampling percent sampling for the backbone of the phylogeny.
##' @param output Path + output file name.
##' @param writeToDisk A logical, should the table be written to disk,
##'     defaults to \code{TRUE}.
##'
##' @details This function handles two types of input: The cladeTable can
##'     either contain the species found in the phylogeny, along with clade
##'     assignment of those species, or it can contain more species than found
##'     in the phylogeny. If the table only contains those species in the
##'     phylogeny, then a vector \code{cladeRichness} must be provided that
##'     contains known clade richness. If the cladeTable contains more than
##'     the species in the phylogeny, then cladeRichness should be set to
##'     \code{NULL}. The \code{globalSampling} value should represent the
##'     overall completeness of species sampling in terms of major clades. See
##'     \url{http://bamm-project.org/} for additional details.
##'
##' @return If \code{writeToDisk = TRUE}, then no object is returned. If
##'     \code{writeToDisk = FALSE}, then a dataframe is returned. The
##'     resultant table must contain one row for each species in the
##'     phylogeny, along with clade assignment, and sampling fraction. The
##'     first line must contain the overall sampling fraction for the
##'     phylogeny and must be written as tab-delimited, with no headers.
##'
##' @author Pascal Title
##'
##' @examples
##' # Generate dummy data
##' tree <- read.tree(text="(((t1:2,(t2:1,t3:1):1):1,((t4:1,t5:1):1,t6:2):1)
##'                   :1,(t7:3,(t8:2,t9:2):1):1);")
##' tree$tip.label <- paste(rep('Species',9),1:9,sep='')
##'
##' spTable <- as.data.frame(matrix(nrow=9,ncol=2))
##' spTable[,1] <- tree$tip.label
##' spTable[1:3,2] <- 'cladeA'
##' spTable[4:6,2] <- 'cladeB'
##' spTable[7:9,2] <- 'cladeC'
##' richnessVec <- c(cladeA=5, cladeB=4, cladeC=12)
##'
##' # Option 1: We have a table of clade assignment for the species in the
##' #           tree, along with a vector of known clade richness
##' spTable
##' richnessVec
##' samplingProbs(tree, cladeTable = spTable, cladeRichness = richnessVec,
##'               globalSampling = 1, writeToDisk = FALSE)
##' 
##' # Option 2: We have a table of known species, beyond the sampling in the
##' #           phylogeny
##' spTable <- rbind(spTable, c('Species10','cladeA'),c('Species11','cladeA'),
##'                  c('Species12','cladeC'), c('Species13','cladeC'),
##'                  c('Species14','cladeC'))
##'
##' spTable
##'
##' samplingProbs(tree, cladeTable = spTable, cladeRichness = NULL, 
##'               globalSampling = 0.9, writeToDisk = FALSE)
##' @export
samplingProbs <- function(tree, cladeTable, cladeRichness = NULL, globalSampling, output, writeToDisk = TRUE) {
	
	if (length(intersect(tree$tip.label,cladeTable[,1])) != length(tree$tip.label)) {
		stop("Not all species from tree are in cladeTable.");
	}
	
	if (nrow(cladeTable) == length(tree$tip.label)) {
		if (is.null(cladeRichness)) {
			stop("If cladeTable only contains species from tree, then cladeRichness must be provided.");
		}
	}
	
	if (nrow(cladeTable) > length(tree$tip.label)) {
		if (!is.null(cladeRichness)) {
			warning("cladeTable contains more species than in tree, so cladeRichness vector will be ignored.");
		}
	}
	
	if (!is.null(cladeRichness)) {
		if (length(cladeRichness) != length(unique(cladeTable[,2]))) {
			stop("The cladeRichness vector must contain the same number of clades as are described in the cladeTable.");
		}
	}
	
	if (!is.vector(cladeRichness) & !is.null(cladeRichness)) {
		stop("Error: cladeRichness must either be NULL or an integer vector named with clade names.");
	}
	
	if (ncol(cladeTable) > 2) {
		stop("cladeTable must contain 2 columns: one of species, and one of clade assignment.");
	}
	
	if (is.matrix(cladeTable)) {
		cladeTable <- as.data.frame(cladeTable, stringsAsFactors=FALSE);
	}
	
	if (nrow(cladeTable) > length(tree$tip.label)) {
		probs <- as.data.frame(matrix(nrow=length(tree$tip.label),ncol=3));
		colnames(probs) <- c('sp','clade','prob');
		for (i in 1:length(tree$tip.label)) {
			probs[i,1] <- tree$tip.label[i];
			clade <- cladeTable[cladeTable[,1] == tree$tip.label[i],2];
			inTree <- intersect(cladeTable[cladeTable[,2] == clade,1],tree$tip.label);
			probs[i,2] <- clade;
			probs[i,3] <- length(inTree) / length(cladeTable[cladeTable[,2] == clade,1]);
		}
		probs <- rbind(c(globalSampling,'',''),probs);
	}
	
	if (nrow(cladeTable) == length(tree$tip.label) & !is.null(cladeRichness)) {
		probs <- as.data.frame(matrix(nrow=length(tree$tip.label),ncol=3));
		colnames(probs) <- c('sp','clade','prob');
		for (i in 1:length(tree$tip.label)) {
			probs[i,1] <- tree$tip.label[i];
			clade <- cladeTable[cladeTable[,1] == tree$tip.label[i],2];
			probs[i,2] <- clade;
			probs[i,3] <- nrow(cladeTable[cladeTable[,2] == clade,]) / cladeRichness[clade];
		}
		probs <- rbind(c(globalSampling,'',''),probs);
	}
	if (writeToDisk) {
		write.table(probs, file=output, quote=F, col.names=F, row.names=F, sep='\t');
	} else {
		return(probs);
	}
}

Any scripts or data that you put into this service are public.

BAMMtools documentation built on Sept. 13, 2024, 1:10 a.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

BAMMtools
Analysis and Visualization of Macroevolutionary Dynamics on Phylogenetic Trees

R/samplingProbs.R
In BAMMtools: Analysis and Visualization of Macroevolutionary Dynamics on Phylogenetic Trees

Defines functions samplingProbs

Documented in samplingProbs

Try the BAMMtools package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

BAMMtools Analysis and Visualization of Macroevolutionary Dynamics on Phylogenetic Trees

R/samplingProbs.R In BAMMtools: Analysis and Visualization of Macroevolutionary Dynamics on Phylogenetic Trees

Defines functions samplingProbs

Documented in samplingProbs

Try the BAMMtools package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

BAMMtools
Analysis and Visualization of Macroevolutionary Dynamics on Phylogenetic Trees

R/samplingProbs.R
In BAMMtools: Analysis and Visualization of Macroevolutionary Dynamics on Phylogenetic Trees