R/run_sampler_phy.R

#' Aggregated and overdispersed sampling for phylo objects
#'
#' @author Bruno Vilela
#'
#' @description Generate phylogenetically aggregated or overdispersed subsamples
#' from any given phylogenetic tree (class phylo). Results can be used to design
#' sampling schemes for future research, for resampling proposes, and for
#' removing phylogenetic bias from data.
#'
#' @param x \code{phylo} object.
#' @param n A positive integer number indicating the sample size.
#' @param alpha Number indicating the strength of aggregation (if negative) or
#' overdispersion (if positive). When alpha = 0 sample is random.
#' There are no limits to alpha, but combinations of big numbers and big
#' distances may result in an error depending on your R configurations.
#' @param dist.func Function to calculate the phylogenetic distance.
#' The function should be able to receive a phylo object and return
#' a \code{matrix} indicating the pairwise distance
#' between tips. Row and column names should be given.
#' Default uses \code{\link{cophenetic}} from \code{ape} package.
#' Other examples include
#' @param n_start Number of initial selected tips. Default is one starting tip.
#' @param return_start if \code{TRUE} the tip label of the starting tip is returned.
#' @param starting Character vector indicating tip labels of starting tip(s).
#' If not provided, random starting tip(s) is(are) selected.
#'
#' @details The function uses the algorithm in \code{\link{run_sampler}},
#' but accepts a phylo object as input.
#'
#' @return The function returns a pruned phylogenetic tree.
#'
#' @seealso \code{\link{run_sampler}}
#' @seealso \code{\link{Nee_May_1997}}
#'
#' @examples
#' # Generate a random tree
#' require(ape)
#' set.seed(100)
#' tree <- rcoal(10)
#' set.seed(2)
#' # Highly overdispersed 50% resample design (alpha = 100)
#' overdispersed <- run_sampler_phy(tree, 5, alpha = 100, starting = "t10")
#' # Highly aggregated 50% resample design (alpha = -100)
#' aggregated <- run_sampler_phy(tree, 5, alpha = -100, starting = "t10")
#' # Random 50% resample design (alpha = 0)
#' random <- run_sampler_phy(tree, 5, alpha = 0, starting = "t10")
#' # Plot to compare
#' par(mfrow = c(2, 2))
#' plot(tree, main = "Full tree", cex = 1)
#' axis(1)
#' plot(overdispersed, main = "Overdispersed 50% sampling", cex = 1)
#' axis(1)
#' plot(aggregated, main = "Aggregated 50% sampling", cex = 1)
#' axis(1)
#' plot(random, main = "Random 50% sampling", cex = 1)
#' axis(1)


run_sampler_phy <- function (x, n, alpha, dist.func = cophenetic,
                             n_start = 1, return_start = FALSE,
                             starting = NULL) {
  #
  if (class(x) != 'phylo') {
    stop("x must be a phylogeny of class phylo")
  }
  if (class(dist.func) != "function") {
    stop("dist.func must be a function")
  }
  tree <- x
  x <- dist.func(tree)

  .error_control2(x, tree$tip.label)

  selected <- run_sampler(x, n, alpha, n_start, return_start,
                          starting)
  if (return_start) {
    selected[[1]] <- drop.tip(tree, tree$tip.label[!tree$tip.label %in% selected[[1]]])
    return(selected)
  } else {
    selected <- drop.tip(tree, tree$tip.label[!tree$tip.label %in% selected])
    return(selected)
  }
}

.error_control2 <- function(x, labels) {
  if (!is.matrix(x)) {
    stop("dist.func provided do not result in a a matrix.")
  }
  if (nrow(x) != ncol(x)) {
    stop("dist.func provided do not result in a a symmetric distance matrix.")
  }
  if (is.null(colnames(x)) | is.null(rownames(x))) {
    stop("dist.func provided do not result in a matrix with
         both columns and rows named.")
  }
  if (any(duplicated(rownames(x)))) {
    stop("dist.func provided resulted in
         a matrix with duplicated names.")
  }
  if(!all(labels %in% rownames(x))) {
    stop("dist.func provided do not result in a distance matrix
         with names corresponding to x labels.")
  }
}
BrunoVilela/sampler documentation built on May 20, 2019, 2:23 p.m.