R/taxonomy.R

Defines functions tax_lineage.taxon_info check_lineage build_lineage get_lineage taxon_mrca_method_factory taxon_info_method_factory taxonomy_mrca taxonomy_subtree taxonomy_taxon_info taxonomy_about

Documented in tax_lineage.taxon_info taxonomy_about taxonomy_mrca taxonomy_subtree taxonomy_taxon_info

##' Summary information about the Open Tree Taxonomy (OTT)
##'
##' Return metadata and information about the taxonomy
##' itself. Currently, the available metadata is fairly sparse, but
##' includes (at least) the version, and the location from which the
##' complete taxonomy source files can be downloaded.
##'
##' @title Information about the Open Tree Taxonomy
##' @param ... additional arguments to customize the API request (see
##'     \code{\link{rotl}} package documentation).
##' @return A list with the following properties:
##' \itemize{
##'
##'     \item {weburl} {String. The release page for this version
##'     of the taxonomy.}
##'
##'     \item {author} {String. The author string.}
##'
##'     \item {name} {String. The name of the taxonomy.}
##'
##'     \item {source} {String. The full identifying information for
##'     this version of the taxonomy.}
##'
##'     \item {version} {String. The version number of the taxonomy.}
##' }
##' @examples
##' \dontrun{
##' taxonomy_about()
##' }
##' @export
taxonomy_about <- function(...) {
  res <- .taxonomy_about(...)
  return(res)
}


##' Information about taxa.
##'
##' Given a vector of ott ids, \code{taxonomy_taxon_info} returns
##' information about the specified taxa.
##'
##' The functions \code{tax_rank}, \code{tax_name}, and
##' \code{synonyms} can extract this information from an object
##' created by the \code{taxonomy_taxon_info()}.
##'
##' @title Taxon information
##' @param ott_ids the ott ids of the taxon of interest (numeric or
##'     character containing only numbers)
##' @param include_children whether to include information about all
##'     the children of this taxon. Default \code{FALSE}.
##' @param include_lineage whether to include information about all
##'     the higher level taxa that include the \code{ott_ids}.
##'     Default \code{FALSE}.
##' @param include_terminal_descendants whether to include the list of
##'     terminal \code{ott_ids} contained in the \code{ott_ids}
##'     provided.
##' @param ... additional arguments to customize the API request (see
##'     \code{\link{rotl}} package documentation).
##' @param tax an object generated by the \code{taxonomy_taxon_info}
##'     function
##' @return \code{taxonomy_taxon_info} returns a list detailing
##'     information about the taxa. \code{tax_rank} and
##'     \code{tax_name} return a vector. \code{synonyms} returns a
##'     list whose elements are the synonyms for each of the
##'     \code{ott_id} requested.
##'
##' @seealso \code{\link{tnrs_match_names}} to obtain \code{ott_id}
##'     from a taxonomic name.
##' @examples
##' \dontrun{
##' req <- taxonomy_taxon_info(ott_id=515698)
##' tax_rank(req)
##' tax_name(req)
##' synonyms(req)
##' }
##' @export
taxonomy_taxon_info <- function(ott_ids, include_children = FALSE,
                                include_lineage = FALSE,
                                include_terminal_descendants = FALSE, ...) {
  res <- lapply(ott_ids, function(x) {
    .taxonomy_taxon_info(
      ott_id = x,
      include_children = include_children,
      include_lineage = include_lineage,
      include_terminal_descendants = include_terminal_descendants,
      ...
    )
  })
  names(res) <- ott_ids
  class(res) <- "taxon_info"
  return(res)
}


##' Given an ott id, return the inclusive taxonomic subtree descended
##' from the specified taxon.
##'
##' If the output of this function is exported to a file, the only
##' possible value for the \code{output_format} argument is
##' \dQuote{\code{newick}}. If the file provided already exists, it
##' will be silently overwritten.
##'
##' @title Taxonomy subtree
##' @param ott_id The ott id of the taxon of interest.
##' @param output_format the format of the object to be returned. See
##'     the \sQuote{Return} section.
##' @param label_format Character. Defines the label type; one of
##'     \dQuote{\code{name}}, \dQuote{\code{id}}, or
##'      \dQuote{\code{name_and_id}} (the default).
##' @param ... additional arguments to customize the API request (see
##'     \code{\link{rotl}} package documentation).
##' @param file the file name where to save the output of the
##'     function. Ignored unless \code{output_format} is set to
##'     \dQuote{\code{phylo}}.
##' @return If the \code{file} argument is missing: \itemize{
##'
##'     \item{\dQuote{\code{taxa}}} { a list of the taxa names
##'     (species) in slot \code{tip_label}, and higher-level taxonomy
##'     (e.g., families, genera) in slot \code{edge_label}, descending
##'     from the taxa corresponding to the \code{ott_id} provided. }
##'
##'     \item{\dQuote{\code{newick}}} { a character vector containing
##'     the newick formatted string corresponding to the taxonomic
##'     subtree for the \code{ott_id} provided. }
##'
##'     \item{\dQuote{\code{phylo}}} { an object of the class
##'     \code{phylo} from the \code{ape} package. }
##'
##'     \item{\dQuote{\code{raw}}} { the direct output from the API,
##'     i.e., a list with an element named \sQuote{newick} that
##'     contains the subtree as a newick formatted string. }
##'
##'     }
##'
##'     If a \code{file} argument is provided (and
##'     \code{output_format} is set to \dQuote{\code{phylo}}), a
##'     logical indicating whether the file was successfully created.
##'
##' @examples
##' \dontrun{
##' req <- taxonomy_subtree(ott_id=515698)
##' plot(taxonomy_subtree(ott_id=515698, output_format="phylo"))
##' }
##' @export
taxonomy_subtree <- function(ott_id = NULL,
                             output_format = c("taxa", "newick", "phylo", "raw"),
                             label_format = NULL, file, ...) {
  output_format <- match.arg(output_format)
  res <- .taxonomy_subtree(ott_id = ott_id, label_format = label_format, ...)
  if (!missing(file) && !identical(output_format, "newick")) {
    warning(
      sQuote("file"),
      " argument is ignored, you can only write newick tree strings to a file."
    )
  }
  if (identical(output_format, "raw")) {
    return(res)
  } else if (identical(output_format, "newick")) {
    res <- res$newick
    if (!missing(file)) {
      unlink(file)
      cat(res, file = file)
      invisible(return(file.exists(file)))
    }
  } else if (identical(output_format, "phylo")) {
    res <- phylo_from_otl(res)
  } else { ## in all other cases use tree_to_labels
    res <- tree_to_labels(res)
  }
  return(res)
}


##' Taxonomic Least Inclusive Common Ancestor (MRCA)
##'
##' Given a set of OTT ids, get the taxon that is the most recent common
##' ancestor (the MRCA) of all the identified taxa.
##'
##' @title Taxonomic MRCA
##' @param ott_ids a vector of ott ids for the taxa whose MRCA is to
##'     be found (numeric).
##' @param tax an object generated by the \code{taxonomy_mrca}
##'     function
##' @param ... additional arguments to customize the API request (see
##'     \code{\link{rotl}} package documentation).
##' @return \itemize{
##'
##'     \item{\code{taxonomy_mrca}} { returns a list about the
##'     taxonomic information relating to the MRCA for the ott_ids
##'     provided. }
##'
##'     \item{\code{tax_rank}} { returns a character vector of the
##'     taxonomic rank for the MRCA. }
##'
##'     \item{\code{tax_name}} { returns a character vector the
##'     Open Tree Taxonomy name for the MRCA. }
##'
##'     \item{\code{ott_id}} { returns a numeric vector of the ott id
##'     for the MRCA. }
##'
##' }
##' @examples
##' \dontrun{
##' req <- taxonomy_mrca(ott_ids=c(515698,590452,643717))
##' tax_rank(req)
##' tax_name(req)
##' ott_id(req)
##' }
##' @export
taxonomy_mrca <- function(ott_ids = NULL, ...) {
  res <- .taxonomy_mrca(ott_ids = ott_ids, ...)
  class(res) <- c("taxon_mrca", class(res))
  return(res)
}



### methods for taxonomy_taxon_info ---------------------------------------------

taxon_info_method_factory <- function(.f) {
  function(tax, ...) {
    res <- lapply(tax, .f)
    names(res) <- vapply(tax, .tax_unique_name, character(1))
    res <- add_otl_class(res, .f)
    res
  }
}

##' @export
##' @rdname taxonomy_taxon_info
tax_rank.taxon_info <- taxon_info_method_factory(.tax_rank)

##' @export
##' @rdname taxonomy_taxon_info
tax_name.taxon_info <- taxon_info_method_factory(.tax_name)

##' @export
##' @rdname taxonomy_taxon_info
unique_name.taxon_info <- taxon_info_method_factory(.tax_unique_name)

##' @export
##' @rdname taxonomy_taxon_info
synonyms.taxon_info <- taxon_info_method_factory(.tax_synonyms)

##' @export
##' @rdname taxonomy_taxon_info
ott_id.taxon_info <- taxon_info_method_factory(.tax_ott_id)

##' @export
##' @rdname taxonomy_taxon_info
tax_sources.taxon_info <- taxon_info_method_factory(.tax_sources)

##' @export
##' @rdname taxonomy_taxon_info
is_suppressed.taxon_info <- taxon_info_method_factory(.tax_is_suppressed)

##' @export
##' @rdname taxonomy_taxon_info
flags.taxon_info <- taxon_info_method_factory(.tax_flags)


### methods for taxonomy_mrca ---------------------------------------------------

taxon_mrca_method_factory <- function(.f) {
  function(tax, ...) {
    res <- list(.f(tax[["mrca"]]))
    names(res) <- .tax_unique_name(tax[["mrca"]])
    res <- add_otl_class(res, .f)
    res
  }
}

##' @export
##' @rdname taxonomy_mrca
tax_rank.taxon_mrca <- taxon_mrca_method_factory(.tax_rank)

##' @export
##' @rdname taxonomy_mrca
tax_name.taxon_mrca <- taxon_mrca_method_factory(.tax_name)

##' @export
##' @rdname taxonomy_mrca
ott_id.taxon_mrca <- taxon_mrca_method_factory(.tax_ott_id)

##' @export
##' @rdname taxonomy_mrca
unique_name.taxon_mrca <- taxon_mrca_method_factory(.tax_unique_name)

##' @export
##' @rdname taxonomy_mrca
tax_sources.taxon_mrca <- taxon_mrca_method_factory(.tax_sources)

##' @export
##' @rdname taxonomy_mrca
flags.taxon_mrca <- taxon_mrca_method_factory(.tax_flags)

##' @export
##' @rdname taxonomy_mrca
is_suppressed.taxon_mrca <- taxon_mrca_method_factory(.tax_is_suppressed)

### method for extracting higher taxonomy from taxonomy_taxon_info calls  -------

get_lineage <- function(tax) {
  check_lineage(tax)
  lg <- lapply(tax[["lineage"]], build_lineage)
  lg <- do.call("rbind", lg)
  as.data.frame(lg, stringsAsFactors = FALSE)
}

build_lineage <- function(x) {
  c(
    "rank" = .tax_rank(x),
    "name" = .tax_name(x),
    "unique_name" = .tax_unique_name(x),
    "ott_id" = .tax_ott_id(x)
  )
}

check_lineage <- function(tax) {
  if (!exists("lineage", tax)) {
    stop(
      "The object needs to be created using ",
      sQuote("include_lineage=TRUE")
    )
  }
}

##' @export
##' @rdname tax_lineage
tax_lineage.taxon_info <- function(tax, ...) {
  lapply(tax, get_lineage)
}
ropensci/rotl documentation built on June 27, 2023, 4:55 p.m.