R/taxonomy.R

Defines functions duplicated_index_taxonomy duplicated_index to_index nearest_supertaxa parse_subset_argument enclosing_taxon apply_names_and_values unique.taxa_taxonomy c.taxa_taxonomy is_internode.taxa_taxonomy is_internode internodes.taxa_taxonomy internodes n_leaves.taxa_taxonomy n_leaves is_leaf.taxa_taxonomy is_leaf leaves.taxa_taxonomy leaves is_stem.taxa_taxonomy is_stem stems.taxa_taxonomy stems as_tibble.taxa_taxonomy as_data_frame.taxa_taxonomy `%in%.factor.taxa_taxonomy` `%in%.character.taxa_taxonomy` `%in%.taxa_taxonomy.default` `%in%.taxa_taxonomy` `[<-.taxa_taxonomy` `[[.taxa_taxonomy` `[.taxa_taxonomy` n_supertaxa.taxa_taxonomy n_supertaxa supertaxa.taxa_taxonomy supertaxa n_subtaxa.taxa_taxonomy n_subtaxa subtaxa.taxa_taxonomy subtaxa roots.taxa_taxonomy roots is_root.taxa_taxonomy is_root is_taxonomy vec_proxy_equal.taxa_taxonomy vec_cast.taxa_taxon.taxa_taxonomy vec_cast.taxa_taxonomy.taxa_taxon vec_cast.factor.taxa_taxonomy vec_cast.taxa_taxonomy.factor vec_cast.character.taxa_taxonomy vec_cast.taxa_taxonomy.character vec_cast.taxa_taxonomy.taxa_taxonomy vec_cast.taxa_taxonomy.default vec_cast.taxa_taxonomy vec_ptype2.factor.taxa_taxonomy vec_ptype2.taxa_taxonomy.factor vec_ptype2.character.taxa_taxonomy vec_ptype2.taxa_taxonomy.character vec_ptype2.taxa_taxonomy.taxa_taxonomy vec_ptype2.taxa_taxonomy.vctrs_unspecified vec_ptype2.taxa_taxonomy.default vec_ptype2.taxa_taxonomy pillar_shaft.taxa_taxonomy vec_ptype_full.taxa_taxonomy vec_ptype_abbr.taxa_taxonomy obj_print_footer.taxa_taxonomy obj_print_data.taxa_taxonomy format.taxa_taxonomy printed_taxonomy `names<-.taxa_taxonomy` names.taxa_taxonomy `tax_rank<-.taxa_taxonomy` tax_rank.taxa_taxonomy `tax_auth<-.taxa_taxonomy` tax_auth.taxa_taxonomy `tax_name<-.taxa_taxonomy` tax_name.taxa_taxonomy `tax_cite<-.taxa_taxonomy` tax_cite.taxa_taxonomy `tax_date<-.taxa_taxonomy` tax_date.taxa_taxonomy `tax_author<-.taxa_taxonomy` tax_author.taxa_taxonomy `tax_db<-.taxa_taxonomy` tax_db.taxa_taxonomy `tax_id<-.taxa_taxonomy` tax_id.taxa_taxonomy taxonomy new_taxonomy

Documented in duplicated_index duplicated_index_taxonomy format.taxa_taxonomy internodes is_internode is_leaf is_root is_stem is_taxonomy leaves names.taxa_taxonomy new_taxonomy n_leaves n_subtaxa n_supertaxa obj_print_data.taxa_taxonomy obj_print_footer.taxa_taxonomy pillar_shaft.taxa_taxonomy printed_taxonomy roots stems subtaxa supertaxa tax_author.taxa_taxonomy tax_auth.taxa_taxonomy tax_cite.taxa_taxonomy tax_date.taxa_taxonomy tax_db.taxa_taxonomy tax_id.taxa_taxonomy tax_name.taxa_taxonomy taxonomy tax_rank.taxa_taxonomy vec_cast.character.taxa_taxonomy vec_cast.factor.taxa_taxonomy vec_cast.taxa_taxonomy vec_cast.taxa_taxonomy.character vec_cast.taxa_taxonomy.default vec_cast.taxa_taxonomy.factor vec_cast.taxa_taxonomy.taxa_taxon vec_cast.taxa_taxonomy.taxa_taxonomy vec_cast.taxa_taxon.taxa_taxonomy vec_proxy_equal.taxa_taxonomy vec_ptype2.character.taxa_taxonomy vec_ptype2.factor.taxa_taxonomy vec_ptype2.taxa_taxonomy vec_ptype2.taxa_taxonomy.character vec_ptype2.taxa_taxonomy.default vec_ptype2.taxa_taxonomy.factor vec_ptype2.taxa_taxonomy.taxa_taxonomy vec_ptype2.taxa_taxonomy.vctrs_unspecified vec_ptype_abbr.taxa_taxonomy vec_ptype_full.taxa_taxonomy

#--------------------------------------------------------------------------------
# S3 constructors
#--------------------------------------------------------------------------------

#' Minimal taxonomy constructor
#'
#' Minimal taxonomy constructor for internal use. Only use when the input is known to be valid since
#' few validity checks are done.
#'
#' @param taxa A [taxon] vector.
#' @param supertaxa The indexes of `taxa` for each taxon's supertaxon.
#'
#' @return An `S3` object of class `taxa_taxon`
#'
#' @keywords internal
new_taxonomy <- function(taxa = taxon(), supertaxa = integer()) {
  # Check that values are the correct type
  # vctrs::vec_assert(taxa, ptype = taxon())
  vctrs::vec_assert(supertaxa, ptype = integer())

  # Create new object
  vctrs::new_rcrd(list(taxa = taxa, supertaxa = supertaxa),
                  class = 'taxa_taxonomy')
}


#' Taxonomy class
#'
#' \Sexpr[results=rd, stage=render]{taxa:::lifecycle("experimental")}
#' Used to store information about a set of taxa forming a taxonomic tree.
#'
#' @param taxa A [taxon] vector or something that can be converted to a [taxon] vector.
#' @param supertaxa The indexes of `taxa` for each taxon's supertaxon.
#' @param .names The names of the vector (not the names of taxa).
#'
#' @return An `S3` object of class `taxa_taxon`
#' @family classes
#'
#' @examples
#'
#' x <- taxonomy(c('Carnivora', 'Felidae', 'Panthera', 'Panthera leo',
#'                 'Panthera tigris', 'Ursidae', 'Ursus', 'Ursus arctos'),
#'               supertaxa = c(NA, 1, 2, 3, 3, 1, 6, 7))
#'
#' x <- taxonomy(taxon(name = c('Carnivora', 'Felidae', 'Panthera', 'Panthera leo',
#'                              'Panthera tigris', 'Ursidae', 'Ursus', 'Ursus arctos'),
#'                     rank = c('order', 'family', 'genus', 'species',
#'                              'species', 'family', 'genus', 'species'),
#'                     id = taxon_id(c('33554', '9681', '9688', '9689',
#'                                     '9694', '9632', '9639', '9644'),
#'                                   db = 'ncbi'),
#'                     auth = c('Bowdich, 1821', 'Fischer de Waldheim, 1817', 'Oken, 1816', 'L., 1758',
#'                              'L., 1758', 'Fischer de Waldheim, 1817', 'L., 1758', 'L., 1758')),
#'               supertaxa = c(NA, 1, 2, 3, 3, 1, 6, 7))
#' names(x) <- letters[1:8]
#'
#' # Subset taxonomy vector
#' x[2] # By default, all subtaxa are included
#' x['b'] # Names can also be used
#' x[2:3, subtaxa = FALSE] # Disable subtaxa
#' x[3, supertaxa = TRUE] # include supertaxa
#' x[is_leaf(x)] # Subset by logical vector
#'
#' # Get parts of the taxonomy vector
#' tax_name(x)
#' tax_rank(x)
#' tax_id(x)
#' tax_db(x)
#' tax_auth(x)
#' tax_author(x)
#' tax_date(x)
#' tax_cite(x)
#'
#' # Set parts of the taxonomy vector
#' tax_name(x) <- tolower(tax_name(x))
#' tax_rank(x)[1] <- NA
#' tax_id(x) <- '9999'
#' tax_db(x) <- 'itis'
#' tax_auth(x) <- NA
#' tax_author(x)[2:3] <- c('Joe', 'Billy')
#' tax_date(x) <- c('1999', '2013', '1796', '1899',
#'                  '1997', '2003', '1996', '1859')
#' tax_cite(x)['c'] <- 'Linnaeus, C. (1771). Mantissa plantarum altera generum.'
#'
#' # Convert to table
#' tibble::as_tibble(x)
#' as_data_frame(x)
#'
#' # Get taxonomy attributes
#' subtaxa(x)
#' subtaxa(x, value = tax_name(x))
#' subtaxa(x, value = as_taxon(x))
#' n_subtaxa(x)
#' supertaxa(x)
#' n_supertaxa(x)
#' leaves(x)
#' n_leaves(x)
#' is_leaf(x)
#' stems(x)
#' is_stem(x)
#' roots(x)
#' is_root(x)
#' internodes(x)
#' is_internode(x)
#'
#' @export
taxonomy <- function(taxa = taxon(), supertaxa = NA, .names = NULL) {
  # Cast inputs to correct values
  if (!is_taxon(taxa)) {
    taxa <- taxon(taxa)
  }
  supertaxa <- vctrs::vec_cast(supertaxa, integer())

  # Recycle ranks and databases to common length
  recycled <- vctrs::vec_recycle_common(taxa, supertaxa)
  taxa <- recycled[[1]]
  supertaxa <- recycled[[2]]

  # Create taxon object
  out <- new_taxonomy(taxa = taxa, supertaxa = supertaxa)
  names(out) <- .names

  return(out)
}


#' Taxonomy class
#'
#' Taxonomy class. See [taxonomy] for more information
#'
#' @name taxa_taxonomy-class
#' @aliases taxa_taxonomy
#' @rdname taxa_taxonomy
#' @importFrom methods setOldClass
#' @exportClass taxa_taxonomy
setOldClass(c("taxa_taxonomy", "vctrs_vctr"))



#--------------------------------------------------------------------------------
# S3 getters/setters
#--------------------------------------------------------------------------------

#' @rdname tax_id
#' @export
tax_id.taxa_taxonomy <- function(x) {
  tax_id(vctrs::field(x, "taxa"))
}

#' @rdname tax_id
#' @export
`tax_id<-.taxa_taxonomy` <- function(x, value) {
  tax_id(vctrs::field(x, "taxa")) <- value
  return(x)
}



#' @rdname tax_db
#' @export
tax_db.taxa_taxonomy <- function(x) {
  tax_db(tax_id(x))
}

#' @rdname tax_db
#' @export
`tax_db<-.taxa_taxonomy` <- function(x, value) {
  tax_db(tax_id(x)) <- value
  return(x)
}



#' @rdname tax_author
#' @export
tax_author.taxa_taxonomy <- function(x) {
  tax_author(tax_auth(x))
}

#' @rdname tax_author
#' @export
`tax_author<-.taxa_taxonomy` <- function(x, value) {
  tax_author(tax_auth(x)) <- value
  return(x)
}



#' @rdname tax_date
#' @export
tax_date.taxa_taxonomy <- function(x) {
  tax_date(tax_auth(x))
}

#' @rdname tax_date
#' @export
`tax_date<-.taxa_taxonomy` <- function(x, value) {
  tax_date(tax_auth(x)) <- value
  return(x)
}



#' @rdname tax_cite
#' @export
tax_cite.taxa_taxonomy <- function(x) {
  tax_cite(tax_auth(x))
}

#' @rdname tax_cite
#' @export
`tax_cite<-.taxa_taxonomy` <- function(x, value) {
  tax_cite(tax_auth(x)) <- value
  return(x)
}



#' @rdname tax_name
#' @export
tax_name.taxa_taxonomy <- function(x) {
  tax_name(vctrs::field(x, "taxa"))
}

#' @rdname tax_name
#' @export
`tax_name<-.taxa_taxonomy` <- function(x, value) {
  tax_name(vctrs::field(x, "taxa")) <- value
  return(x)
}



#' @rdname tax_auth
#' @export
tax_auth.taxa_taxonomy <- function(x) {
  tax_auth(vctrs::field(x, "taxa"))
}

#' @rdname tax_auth
#' @export
`tax_auth<-.taxa_taxonomy` <- function(x, value) {
  tax_auth(vctrs::field(x, "taxa")) <- value
  return(x)
}



#' @rdname tax_rank
#' @export
tax_rank.taxa_taxonomy <- function(x) {
  tax_rank(vctrs::field(x, "taxa"))
}

#' @rdname tax_rank
#' @export
`tax_rank<-.taxa_taxonomy` <- function(x, value) {
  tax_rank(vctrs::field(x, "taxa")) <- value
  return(x)
}



#' @rdname taxonomy
#' @export
names.taxa_taxonomy <- function(x) {
  names(vctrs::field(x, "taxa"))
}

#' @rdname taxonomy
#' @export
`names<-.taxa_taxonomy` <- function(x, value) {
  names(vctrs::field(x, "taxa")) <- value
  return(x)
}


#--------------------------------------------------------------------------------
# S3 printing functions
#--------------------------------------------------------------------------------


#' Prepare taxonomy for printing
#'
#' Prepare taxonomy for printing. Makes color optional.
#'
#' @param color Use color?
#'
#' @return character
#'
#' @keywords internal
printed_taxonomy <- function(x, color = FALSE) {
  # Dont print anything if nothing to print
  if (length(x) == 0) {
    return(character(0))
  }

  # Make tree print out
  if (! is.null(names(x)) && ! all(is.na(names(x)))) {
    name_printed <- paste0(font_punct('['), names(x), font_punct(']'))
  } else {
    name_printed <- ''
  }
  tax_char <- paste0(seq_len(length(x)),
                     name_printed,
                     font_punct(': '),
                     printed_taxon(vctrs::field(x, 'taxa'), color = TRUE))
  sub_char <- unname(lapply(subtaxa(x, max_depth = 1), function(i) tax_char[i]))
  tree_data <- data.frame(stringsAsFactors = FALSE,
                          taxa = tax_char,
                          subtaxa = I(sub_char))
  out <- lapply(roots(x), function(i) cli::tree(tree_data, root = tax_char[i]))
  out <- unlist(out)

  # Disable color if needed
  if (! color) {
    out <- crayon::strip_style(out)
  }

  # Return tree
  return(out)
}

#' @rdname taxa_printing_funcs
#' @export
#' @keywords internal
format.taxa_taxonomy <- function(x, ...) {
  out <- printed_taxonomy(x, color = FALSE)
  stringr::str_pad(out, width = max(nchar(out)), side = 'right')
}

#' @rdname taxa_printing_funcs
#' @importFrom vctrs obj_print_data
#' @export
#' @keywords internal
obj_print_data.taxa_taxonomy <- function(x, ...) {
  # Dont print anything if nothing to print
  if (length(x) == 0) {
    return(invisible(x))
  }
  tree <- printed_taxonomy(x, color = TRUE)
  cat(tree, sep = '\n')
  invisible(x)
}


#' @rdname taxa_printing_funcs
#' @importFrom vctrs obj_print_footer
#' @export
#' @keywords internal
obj_print_footer.taxa_taxonomy <- function(x, ...) {
  vctrs::obj_print_footer(vctrs::field(x, 'taxa'))
}



#' @rdname taxa_printing_funcs
#' @importFrom vctrs vec_ptype_abbr
#' @export
#' @keywords internal
vec_ptype_abbr.taxa_taxonomy <- function(x, ...) {
  "taxonomy"
}


#' @rdname taxa_printing_funcs
#' @importFrom vctrs vec_ptype_full
#' @export
#' @keywords internal
vec_ptype_full.taxa_taxonomy <- function(x, ...) {
  "taxonomy"
}

#' @rdname taxa_printing_funcs
#' @importFrom pillar pillar_shaft
#' @export
#' @keywords internal
pillar_shaft.taxa_taxonomy <- function(x, ...) {
  out <- printed_taxonomy(x, color = TRUE)
  pillar::new_pillar_shaft_simple(out, align = "left")
}

#--------------------------------------------------------------------------------
# S3 coercion functions
#--------------------------------------------------------------------------------


#' @rdname taxa_coercion_funcs
#' @method vec_ptype2 taxa_taxonomy
#' @importFrom vctrs vec_ptype2
#' @export
#' @keywords internal
vec_ptype2.taxa_taxonomy <- function(x, y, ...) UseMethod("vec_ptype2.taxa_taxonomy", y)


#' @rdname taxa_coercion_funcs
#' @method vec_ptype2.taxa_taxonomy default
#' @export
vec_ptype2.taxa_taxonomy.default <- function(x, y, ..., x_arg = "", y_arg = "") {
  vctrs::stop_incompatible_type(x, y, x_arg = x_arg, y_arg = y_arg)
}


#' @rdname taxa_coercion_funcs
#' @method vec_ptype2.taxa_taxonomy vctrs_unspecified
#' @export
vec_ptype2.taxa_taxonomy.vctrs_unspecified <- function(x, y, ...) x


#' @rdname taxa_coercion_funcs
#' @method vec_ptype2.taxa_taxonomy taxa_taxonomy
#' @export
vec_ptype2.taxa_taxonomy.taxa_taxonomy <- function(x, y, ...) new_taxonomy()


#' @rdname taxa_coercion_funcs
#' @method vec_ptype2.taxa_taxonomy character
#' @export
vec_ptype2.taxa_taxonomy.character <- function(x, y, ...) character()


#' @rdname taxa_coercion_funcs
#' @method vec_ptype2.character taxa_taxonomy
#' @importFrom vctrs vec_ptype2.character
#' @export
vec_ptype2.character.taxa_taxonomy <- function(x, y, ...) character()


#' @rdname taxa_coercion_funcs
#' @method vec_ptype2.taxa_taxonomy factor
#' @export
vec_ptype2.taxa_taxonomy.factor <- function(x, y, ...) factor()


#' @rdname taxa_coercion_funcs
#' @method vec_ptype2.factor taxa_taxonomy
#' @importFrom vctrs vec_ptype2.factor
#' @export
vec_ptype2.factor.taxa_taxonomy <- function(x, y, ...) factor()



#--------------------------------------------------------------------------------
# S3 casting functions
#--------------------------------------------------------------------------------

#' @rdname taxa_casting_funcs
#' @method vec_cast taxa_taxonomy
#' @importFrom vctrs vec_cast
#' @export
#' @keywords internal
vec_cast.taxa_taxonomy <- function(x, to, ..., x_arg, to_arg) UseMethod("vec_cast.taxa_taxonomy")


#' @rdname taxa_casting_funcs
#' @method vec_cast.taxa_taxonomy default
#' @export
vec_cast.taxa_taxonomy.default <- function(x, to, ..., x_arg, to_arg) vctrs::vec_default_cast(x, to, x_arg, to_arg)


#' @rdname taxa_casting_funcs
#' @method vec_cast.taxa_taxonomy taxa_taxonomy
#' @export
vec_cast.taxa_taxonomy.taxa_taxonomy <- function(x, to, ..., x_arg, to_arg) x


#' @rdname taxa_casting_funcs
#' @method vec_cast.taxa_taxonomy character
#' @export
vec_cast.taxa_taxonomy.character <- function(x, to, ..., x_arg, to_arg) taxonomy(x)


#' @rdname taxa_casting_funcs
#' @method vec_cast.character taxa_taxonomy
#' @importFrom vctrs vec_cast.character
#' @export
vec_cast.character.taxa_taxonomy <- function(x, to, ..., x_arg, to_arg) as.character(vctrs::field(x, "taxa"))


#' @rdname taxa_casting_funcs
#' @method vec_cast.taxa_taxonomy factor
#' @export
vec_cast.taxa_taxonomy.factor <- function(x, to, ..., x_arg, to_arg) taxonomy(x)


#' @rdname taxa_casting_funcs
#' @method vec_cast.factor taxa_taxonomy
#' @importFrom vctrs vec_cast.factor
#' @export
vec_cast.factor.taxa_taxonomy <- function(x, to, ..., x_arg, to_arg) as.factor(vctrs::field(x, "taxa"))


#' @rdname taxa_casting_funcs
#' @method vec_cast.taxa_taxonomy taxa_taxon
#' @export
vec_cast.taxa_taxonomy.taxa_taxon <- function(x, to, ..., x_arg, to_arg) taxonomy(x)


#' @rdname taxa_casting_funcs
#' @method vec_cast.taxa_taxon taxa_taxonomy
#' @export
vec_cast.taxa_taxon.taxa_taxonomy <- function(x, to, ..., x_arg, to_arg) vctrs::field(x, "taxa")


#--------------------------------------------------------------------------------
# S3 equality and comparison functions
#--------------------------------------------------------------------------------

#' @rdname taxa_comparison_funcs
#' @method vec_proxy_equal taxa_taxonomy
#' @importFrom vctrs vec_proxy_equal
#' @export
vec_proxy_equal.taxa_taxonomy <- function(x, ...) {
  vec_proxy_equal(as_taxon(x))
}



#--------------------------------------------------------------------------------
# Exported utility functions
#--------------------------------------------------------------------------------

#' Check if something is a [taxonomy]
#'
#' Check if an object is of the [taxonomy] class
#'
#' @param x An object to test
#'
#' @examples
#' x <- taxonomy(c('Carnivora', 'Felidae', 'Panthera', 'Panthera leo',
#'                 'Panthera tigris', 'Ursidae', 'Ursus', 'Ursus arctos'),
#'               supertaxa = c(NA, 1, 2, 3, 3, 1, 6, 7))
#' is_taxonomy(x)
#' is_taxonomy(1:2)
#'
#' @export
is_taxonomy <- function(x) {
  inherits(x, "taxa_taxonomy")
}


#' Test if taxa are roots
#'
#' Check if each taxon is a root. A root is a taxon with no supertaxon.
#'
#' @param x An object containing taxonomic relationships, such as [taxonomy] objects.
#' @param subset The subset of the tree to search for roots to that subset. Can be indexes or names.
#'
#' @family root functions
#'
#' @examples
#' x <- taxonomy(c('Carnivora', 'Felidae', 'Panthera', 'Panthera leo',
#'                 'Panthera tigris', 'Ursidae', 'Ursus', 'Ursus arctos'),
#'               supertaxa = c(NA, 1, 2, 3, 3, 1, 6, 7))
#' is_root(x)
#' is_root(x, subset = 2:8)
#'
#' @export
is_root <- function(x, subset = NULL) {
  UseMethod('is_root')
}

#' @export
is_root.taxa_taxonomy <- function(x, subset = NULL) {
  if (is.null(subset)) {
    out <- is.na(vctrs::field(x, 'supertaxa'))
  } else {
    subset <- parse_subset_argument(x, subset)
    # subset <- unname(unlist(subtaxa(x, include = TRUE)[subset])) # NOTE: replace with 'subset' version of subtaxa
    supertaxa <- vctrs::field(x, 'supertaxa')
    out <-  seq_len(length(x)) %in% subset & ((! supertaxa %in% subset) | is.na(supertaxa))
  }
  names(out) <- names(x)
  return(out)
}


#' Get root taxa
#'
#' Get the indexes of root taxa in a taxonomy.
#'
#' @inheritParams is_root
#'
#' @family taxonomy functions
#' @family root functions
#'
#' @examples
#' x <- taxonomy(c('Carnivora', 'Felidae', 'Panthera', 'Panthera leo',
#'                 'Panthera tigris', 'Ursidae', 'Ursus', 'Ursus arctos'),
#'               supertaxa = c(NA, 1, 2, 3, 3, 1, 6, 7))
#' roots(x)
#' roots(x, subset = 2:8)
#'
#' @export
roots <- function(x, subset = NULL) {
  UseMethod('roots')
}

#' @export
roots.taxa_taxonomy <- function(x, subset = NULL) {
  out <- which(is_root(x, subset = subset))
  names(out) <- names(x)[out]
  return(out)
}


#' Get subtaxa
#'
#' Get subtaxa indexes for each taxon or another per-taxon value. Subtaxa are
#' taxa contained within a taxon.
#'
#' @param x The object to get subtaxa for, such as a [taxonomy] object.
#' @param subset The subset of the tree to search. Can be indexes or names.
#' @param max_depth The number of ranks to traverse. For example, `max_depth =
#'   1` returns only immediate subtaxa. By default (NULL) information for all
#'   subtaxa is returned (i.e. subtaxa of subtaxa, etc).
#' @param include If `TRUE`, include information for each taxon in the output.
#' @param value Something to return instead of indexes. Must be the same length
#'   as the number of taxa.
#' @param ... Additional arguments.
#'
#' @family taxonomy functions
#' @family subtaxa functions
#'
#' @examples
#' # Generate example data
#' x <- taxonomy(c('Carnivora', 'Felidae', 'Panthera', 'Panthera leo',
#'                 'Panthera tigris', 'Ursidae', 'Ursus', 'Ursus arctos'),
#'               supertaxa = c(NA, 1, 2, 3, 3, 1, 6, 7))
#'
#' # The indexes of all subtaxa (with subtaxa of subtaxa, etc) for each taxon
#' subtaxa(x)
#'
#' # The indexes of immediate subtaxa (without subtaxa of subtaxa, etc) for each taxon
#' subtaxa(x, max_depth = 1)
#'
#' # Return something other than index
#' subtaxa(x, value = tax_name(x))
#'
#' # Include each taxon with its subtaxa
#' subtaxa(x, value = tax_name(x), include = TRUE)
#'
#' # Only return data for some taxa (faster than subsetting the whole result)
#' subtaxa(x, subset = 3)
#'
#' @export
subtaxa <- function(x, subset = NULL, max_depth = NULL, include = FALSE, value = NULL, ...) {
  UseMethod('subtaxa')
}

#' @export
subtaxa.taxa_taxonomy <- function(x, subset = NULL, max_depth = NULL, include = FALSE, value = NULL, ...) {
  # Parse arguments
  subset <- parse_subset_argument(x, subset)
  original_names <- names(x)
  names(x) <- as.character(seq_len(length(x)))

  # Get subtaxa
  get_children <- function(taxon) {
    which(vctrs::field(x, 'supertaxa') == taxon)
  }
  recursive_part <- function(taxon) {
    # Get immediate children of current taxon
    children <- get_children(taxon)
    # Run this function on them to get their output
    child_output <- lapply(children, recursive_part) # stops if no children
    child_output <- stats::setNames(unlist(child_output, recursive = FALSE),
                                    unlist(lapply(child_output, names)))
    # Get all subtaxa from the names of the child output
    child_taxa <- c(taxon, as.numeric(names(child_output)))
    # Combine the child output with the subtaxa for the current taxon
    output <- stats::setNames(c(list(child_taxa), child_output),
                              c(taxon, names(child_output)))
    return(output)
  }
  if (is.null(max_depth) || max_depth > 1) {
    output <- unlist(unname(lapply(roots(x, subset = subset), recursive_part)), recursive = FALSE)
  } else if (max_depth == 0) {
    output <- rep(list(integer(0)), length(x))
  } else {
    output <- stats::setNames(
      lapply(seq_len(length(x)), function(i) c(i, get_children(i))),
      names(x)
    )
  }

  # Remove query taxa from output
  if (! include) {
    output <- lapply(output, `[`, -1)
  }

  # Simulate limited recursion
  #
  # To increase speed, the recursive algorithm only searches starting at
  # root taxa, but this makes it hard to limit the number of rankes returned
  # below each taxon during recursion. Instead, a finite number of
  # recursions are simulated by filtering the results of traversing the
  # entire tree and comparing rank depth between each taxon and its subtaxa.
  if (! is.null(max_depth) && max_depth > 0) {
    rank_depth <- vapply(supertaxa(x, subset = as.numeric(names(output))), length, numeric(1))
    output[] <- lapply(seq_len(length(output)), function(i) {
      subtaxa_depth <- rank_depth[output[[i]]]
      current_depth <- rank_depth[i]
      passing_taxa <- subtaxa_depth - current_depth <= max_depth
      return(output[[i]][passing_taxa])
    })
  }

  # Only return information for the subset
  output <- output[as.character(subset)]

  # Set names and values
  names(x) <- original_names
  output <- apply_names_and_values(x, index_list = output, value = value)
  names(output) <- original_names[subset]

  return(output)
}

#' Number of subtaxa per taxon
#'
#' Get the number of subtaxa per taxon.
#'
#' @inheritParams subtaxa
#'
#' @family subtaxa functions
#'
#' @examples
#' # Generate example data
#' x <- taxonomy(c('Carnivora', 'Felidae', 'Panthera', 'Panthera leo',
#'                 'Panthera tigris', 'Ursidae', 'Ursus', 'Ursus arctos'),
#'               supertaxa = c(NA, 1, 2, 3, 3, 1, 6, 7))
#'
#' # Find number of subtaxa (including subtaxa of subtaxa, etc)
#' n_subtaxa(x)
#'
#' # Find the number of subtaxa one rank below each taxon
#' n_subtaxa(x, max_depth = 1)
#'
#' # Only return data for some taxa (faster than subsetting the whole result)
#' n_subtaxa(x, subset = 1:3)
#'
#' @export
n_subtaxa <- function(x, subset = NULL, max_depth = NULL, include = FALSE) {
  UseMethod('n_subtaxa')
}

#' @export
n_subtaxa.taxa_taxonomy <- function(x, subset = NULL, max_depth = NULL, include = FALSE) {
  output <- vapply(subtaxa(x, subset = subset, max_depth = max_depth, include = include), length, numeric(1))
  names(output) <- names(x)
  return(output)
}


#' Get supertaxa
#'
#' Get supertaxa indexes for each taxon or another per-taxon value. Supertaxa
#' are taxa a taxon is contained in.
#'
#' @param x The object to get supertaxa for, such as a [taxonomy] object.
#' @param subset The subset of the tree to search for roots to that subset. Can
#'   be indexes or names.
#' @param max_depth The number of levels to traverse. For example, `max_depth =
#'   1` returns only immediate supertaxa. By default (NULL) information for all
#'   supertaxa is returned.
#' @param include If `TRUE`, include information for each taxon in the output.
#' @param value Something to return instead of indexes. Must be the same length
#'   as the number of taxa.
#' @param use_na Add a NA to represent the root of the taxonomy (i.e. no
#'   supertaxon)
#' @param ... Additional arguments.
#'
#' @family taxonomy functions
#' @family supertaxa functions
#'
#' @examples
#' # Generate example data
#' x <- taxonomy(c('Carnivora', 'Felidae', 'Panthera', 'Panthera leo',
#'                 'Panthera tigris', 'Ursidae', 'Ursus', 'Ursus arctos'),
#'               supertaxa = c(NA, 1, 2, 3, 3, 1, 6, 7))
#'
#' # The indexes of all supertaxa (with supertaxa of supertaxa, etc) for each taxon
#' supertaxa(x)
#'
#' # Return something other than index
#' supertaxa(x, value = tax_name(x))
#'
#' # Include each taxon with its supertaxa
#' supertaxa(x, value = tax_name(x), include = TRUE)
#'
#' # Only return data for some taxa (faster than subsetting the whole result)
#' supertaxa(x, subset = 3)
#'
#' @export
supertaxa <- function(x, subset = NULL, max_depth = NULL, include = FALSE, value = NULL, use_na = FALSE, ...) {
  UseMethod('supertaxa')
}

#' @export
supertaxa.taxa_taxonomy <- function(x, subset = NULL, max_depth = NULL, include = FALSE, value = NULL, use_na = FALSE, ...) {
  # Parse arguments
  subset <- parse_subset_argument(x, subset)

  # Get supertaxa
  parent_index <- vctrs::field(x, 'supertaxa')
  recursive_part <- function(taxon, depth = 1) {
    supertaxon <- parent_index[taxon]
    if (is.na(supertaxon) || (! is.null(max_depth) && depth >= max_depth)) {
      output <- c(taxon, supertaxon)
    } else {
      output <- c(taxon, recursive_part(supertaxon, depth = depth + 1))
    }
    return(unname(output))
  }
  if (! is.null(max_depth) && max_depth == 0) {
    output <- lapply(subset, function(i) numeric(0))
  } else {
    output <- lapply(subset, recursive_part)
  }

  # Remove query taxa from output
  if (! include) {
    output <- lapply(output, `[`, -1)
  }

  # Remove NAs from output
  if (! use_na) {
    output <- lapply(output, function(x) x[!is.na(x)])
  }

  # Set names and values
  output <- apply_names_and_values(x, index_list = output, value = value)
  names(output) <- names(x)[subset]

  return(output)
}

#' Number of supertaxa per taxon
#'
#' Get the number of supertaxa each taxon is contained in.
#'
#' @inheritParams supertaxa
#'
#' @family supertaxa functions
#'
#' @examples
#' # Generate example data
#' x <- taxonomy(c('Carnivora', 'Felidae', 'Panthera', 'Panthera leo',
#'                 'Panthera tigris', 'Ursidae', 'Ursus', 'Ursus arctos'),
#'               supertaxa = c(NA, 1, 2, 3, 3, 1, 6, 7))
#'
#' # Find number of supertaxa each taxon is contained in
#' n_supertaxa(x)
#'
#' # Only return data for some taxa (faster than subsetting the whole result)
#' n_supertaxa(x, subset = 1:3)
#'
#' @export
n_supertaxa <- function(x, subset = NULL, max_depth = NULL, include = FALSE) {
  UseMethod('n_supertaxa')
}

#' @export
n_supertaxa.taxa_taxonomy <- function(x, subset = NULL, max_depth = NULL, include = FALSE) {
  output <- vapply(supertaxa(x, subset = subset, max_depth = max_depth, include = include), length, numeric(1))
  names(output) <- names(x)
  return(output)
}


#' @rdname taxonomy
#' @export
#' @keywords internal
`[.taxa_taxonomy` <- function(x, ..., subtaxa = TRUE, supertaxa = FALSE, invert = FALSE) {

  index <- seq_len(length(x))
  names(index) <- names(x)
  subset <- index[...]

  # Get taxa of subset
  taxa_subset <- subset
  if (is.null(subtaxa) || (is.logical(subtaxa) && subtaxa) || (is.numeric(subtaxa) && subtaxa > 0)) {
    if (is.logical(subtaxa) && subtaxa) {
      subtaxa = NULL
    }
    taxa_subset <- c(taxa_subset, unlist(subtaxa(x, max_depth = subtaxa, include_input = FALSE)[subset]))
  }
  if (is.null(supertaxa) || (is.logical(supertaxa) && supertaxa) || (is.numeric(supertaxa) && supertaxa > 0)) {
    if (is.logical(supertaxa) && supertaxa) {
      supertaxa = NULL
    }
    taxa_subset <- c(taxa_subset, unlist(supertaxa(x, max_depth = supertaxa, include_input = FALSE)[subset]))
  }
  taxa_subset <- sort(unique(taxa_subset))

  # Invert selection
  if (invert) {
    taxa_subset <- index[-taxa_subset]
  }

  # Apply supset
  taxonomy(taxa = vctrs::field(x, 'taxa')[taxa_subset],
           supertaxa = match(nearest_supertaxa(x, taxa_subset), taxa_subset),
           .names = names(x)[taxa_subset])
}

#' @rdname taxonomy
#' @export
#' @keywords internal
`[[.taxa_taxonomy` <- function(x, i, ..., subtaxa = TRUE, supertaxa = FALSE, invert = FALSE) {
  must_be_length_1(i)
  x[i, ..., subtaxa = subtaxa, supertaxa = supertaxa, invert = invert]
}



#' @export
`[<-.taxa_taxonomy` <- function(x, i, supertaxa = NULL, subtaxa = NULL, value) {
  # Standardize index input
  i <- to_index(x, i)

  # If supertaxa are undefined, use current supertaxa for replaced values and NA for new values
  if (is.null(supertaxa)) {
    parents <- rep(NA, length(i))
    is_replacing <- i %in% seq_len(length(x))
    parents[is_replacing] <- unname(unlist(supertaxa(x, subset = i[is_replacing], max_depth = 1, use_na = TRUE)))
  } else {
    parents <- supertaxa
  }

  # Simplify subtaxa and infer supertaxa if not specified
  if (is.null(subtaxa)) {
    children <- subtaxa(x, subset = i, max_depth = 1)
  } else {
    if (! is.list(subtaxa)) {
      subtaxa <- list(subtaxa)
    }
    children <- lapply(subtaxa, function(s) roots(x, subset = s))
    if (is.null(supertaxa)) {
      parents <- vapply(subtaxa, function(s) enclosing_taxon(x, subset = s), numeric(1))
    }
  }

  # Recycle inputs to same length
  rec <- vctrs::vec_recycle_common(i, parents, children, value)
  i = rec[[1]]
  parents = rec[[2]]
  children = rec[[3]]
  value = rec[[4]]

  # Check that taxa are not made supertaxa of themselves
  if (any(! is.na(parents) & i == parents)) {
    stop(call. = FALSE, 'Cannot set a taxon to be a supertaxon of itself')
  }

  # Check that taxa are not made subtaxa of themselves
  cyclical_subtaxa <- any(vapply(seq_len(length(i)),
                                 function(index) i[[index]] %in% children[[index]],
                                 logical(1)))
  if (cyclical_subtaxa) {
    stop(call. = FALSE, 'Cannot set a taxon to be a subtaxon of itself')
  }

  if (is.null(supertaxa) && is.null(subtaxa)) {
    x <- NextMethod()
  } else {
    # Calls this function to make new rows
    x[i] <- value
  }

  # Remove any cycles in the taxonomy
  # parents_to_reset <- parents[mapply(function(p, c) p %in% unlist(subtaxa(x, subset = c, include = TRUE)), parents, children)]
  children <- lapply(seq_len(length(children)), function(i) {
    children[[i]][children[[i]] != parents[i]]
  })

  # Reset overwritten supertaxa
  nearest_supertaxa <- unname(unlist(lapply(supertaxa(x), function(s) {
    s[! s %in% i][1]
  })))
  vctrs::field(x, 'supertaxa') <- nearest_supertaxa

  # Set supertaxa
  vctrs::field(x, 'supertaxa')[i] <- parents

  # Set subtaxa
  to_set <- unlist(children)
  vctrs::field(x, 'supertaxa')[to_set] <- rep(i, vapply(children, length, numeric(1)))

  return(x)
}


#' @method %in% taxa_taxonomy
#' @export
`%in%.taxa_taxonomy` <- function(x, table) {
  UseMethod("%in%.taxa_taxonomy", table)
}

#' @export
`%in%.taxa_taxonomy.default` <- function(x, table) {
  as_taxon(x) %in% table
}

#' @export
`%in%.character.taxa_taxonomy` <- function(x, table) {
  x %in% as.character(as_taxon(table))
}

#' @export
`%in%.factor.taxa_taxonomy` <- function(x, table) {
  x %in% as.character(as_taxon(table))
}

#--------------------------------------------------------------------------------
# Exported utility functions
#--------------------------------------------------------------------------------


#' @export
as_data_frame.taxa_taxonomy <- function(x, row.names = NULL, optional = FALSE, ...,
                                        stringsAsFactors = FALSE) {
  out <- as_data_frame(as_taxon(x))
  cbind(supertaxon = vctrs::field(x, 'supertaxa'), out)
}

#' @importFrom tibble as_tibble
#' @export
as_tibble.taxa_taxonomy <- function(x, ...) {
  tibble::as_tibble(as_data_frame(x, stringsAsFactors = FALSE), ...)
}

#' Get stems
#'
#' Get stem indexes for each taxon or another per-taxon value.
#'
#' @param x An object with taxonomic relationships, like [taxonomy] objects.
#' @param value Something to return instead of indexes. Must be the same length as the number of taxa.
#' @param ... Additional arguments.
#'
#' @family taxonomy functions
#' @family stem functions
#'
#' @examples
#' x <- taxonomy(c('Carnivora', 'Felidae', 'Panthera', 'Panthera leo',
#'                 'Panthera tigris'),
#'               supertaxa = c(NA, 1, 2, 3, 3))
#' x <- c(x, x)
#' stems(x)
#' stems(x, value = tax_name(x))
#'
#' @export
stems <- function(x, value = NULL, ...) {
  UseMethod('stems')
}

#' @export
stems.taxa_taxonomy <- function(x, value = NULL, ...) {
  # Search until taxa with multiple subtaxa are found
  recursive_part <- function(index) {
    children <- which(vctrs::field(x, 'supertaxa') == index)
    if (length(children) == 0) {
      output <- index
    } else if (length(children) == 1) {
      output <- c(index, recursive_part(children))
    } else {
      output <- numeric(0)
    }
    return(unname(output))
  }
  output <- lapply(roots(x), recursive_part)

  # Set names and values
  output <- apply_names_and_values(x, index_list = output, value = value)

  return(output)
}

#' Check if taxa are stems
#'
#' Check if each taxon is a stem. A stem is any taxa from a root to the first taxon with multiple subtaxa.
#'
#' @inheritParams stems
#' @family stem functions
#'
#' @examples
#' x <- taxonomy(c('Carnivora', 'Felidae', 'Panthera', 'Panthera leo',
#'                 'Panthera tigris'),
#'               supertaxa = c(NA, 1, 2, 3, 3))
#' is_stem(x)
#'
#' @export
is_stem <- function(x) {
  UseMethod('is_stem')
}

#' @export
is_stem.taxa_taxonomy <- function(x) {
  output <- seq_len(length(x)) %in% unlist(stems(x))
  names(output) <- names(x)
  return(output)
}


#' Get leaves
#'
#' Get leaves indexes for each taxon or another per-taxon value. Leaves are taxa with no subtaxa.
#'
#' @param x The object to get leaves for, such as a [taxonomy] object
#' @param value Something to return instead of indexes. Must be the same length as the number of taxa.
#' @param ... Additional arguments.
#'
#' @family taxonomy functions
#' @family leaf functions
#'
#' @examples
#' x <- taxonomy(c('Carnivora', 'Felidae', 'Panthera', 'Panthera leo',
#'                 'Panthera tigris', 'Ursidae', 'Ursus', 'Ursus arctos'),
#'               supertaxa = c(NA, 1, 2, 3, 3, 1, 6, 7))
#' leaves(x)
#' leaves(x, value = tax_name(x))
#'
#' @export
leaves <- function(x, value = NULL, ...) {
  UseMethod('leaves')
}

#' @export
leaves.taxa_taxonomy <- function(x, value = NULL, ...) {
  # Find taxa without subtaxa (leaves)
  subtax <- subtaxa(x)
  no_subtaxa <- which(vapply(subtax, length, numeric(1)) == 0)

  # Subset subtaxa results to just leaves
  output <- lapply(subtax, function(i) i[i %in% no_subtaxa])

  # Set names and values
  output <- apply_names_and_values(x, index_list = output, value = value)
  names(output) <- names(x)

  return(output)
}

#' Check if taxa are leaves
#'
#' Check if each taxon is a leaf. A leaf is a taxon with no subtaxa.
#' subtaxa.
#'
#' @inheritParams leaves
#'
#' @family leaf functions
#' @examples
#' x <- taxonomy(c('Carnivora', 'Felidae', 'Panthera', 'Panthera leo',
#'                 'Panthera tigris', 'Ursidae', 'Ursus', 'Ursus arctos'),
#'               supertaxa = c(NA, 1, 2, 3, 3, 1, 6, 7))
#' is_leaf(x)
#'
#' @export
is_leaf <- function(x) {
  UseMethod('is_leaf')
}

#' @export
is_leaf.taxa_taxonomy <- function(x) {
  output <- ! seq_len(length(x)) %in% vctrs::field(x, 'supertaxa')
  names(output) <- names(x)
  return(output)
}

#' Number of leaves per taxon
#'
#' Get the number of leaves per taxon. A leaf is a taxon with no subtaxa.
#'
#' @inheritParams leaves
#' @family leaf functions
#'
#' @examples
#' x <- taxonomy(c('Carnivora', 'Felidae', 'Panthera', 'Panthera leo',
#'                 'Panthera tigris', 'Ursidae', 'Ursus', 'Ursus arctos'),
#'               supertaxa = c(NA, 1, 2, 3, 3, 1, 6, 7))
#' n_leaves(x)
#'
#' @export
n_leaves <- function(x) {
  UseMethod('n_leaves')
}

#' @export
n_leaves.taxa_taxonomy <- function(x) {
  output <- vapply(leaves(x), length, numeric(1))
  names(output) <- names(x)
  return(output)
}


#' Get internodes
#'
#' Get internodes indexes for each taxon or another per-taxon value. An
#' internode is a taxon with exactly one supertaxon and one subtaxon. These taxa
#' can be removed without losing information on the relationships of the
#' remaining taxa.
#'
#' @param x The object to get internodes for, such as a [taxonomy] object.
#'
#' @family taxonomy functions
#' @family internode functions
#'
#' @examples
#' x <- taxonomy(c('Carnivora', 'Felidae', 'Panthera', 'Panthera leo',
#'                 'Panthera tigris', 'Ursidae', 'Ursus', 'Ursus arctos'),
#'               supertaxa = c(NA, 1, 2, 3, 3, 1, 6, 7))
#' internodes(x)
#'
#' @export
internodes <- function(x) {
  UseMethod('internodes')
}

#' @export
internodes.taxa_taxonomy <- function(x) {
  output <- which(is_internode(x))
  names(output) <- names(x)[output]
  return(output)
}

#' Check if taxa are internodes
#'
#' Check if each taxon is an internode. An internode is a taxon with exactly one
#' supertaxon and one subtaxon. These taxa can be removed without losing
#' information on the relationships of the remaining taxa.
#'
#' @inheritParams internodes
#'
#' @family internode functions
#' @examples
#' x <- taxonomy(c('Carnivora', 'Felidae', 'Panthera', 'Panthera leo',
#'                 'Panthera tigris', 'Ursidae', 'Ursus', 'Ursus arctos'),
#'               supertaxa = c(NA, 1, 2, 3, 3, 1, 6, 7))
#' is_internode(x)
#'
#' @export
is_internode <- function(x) {
  UseMethod('is_internode')
}

#' @export
is_internode.taxa_taxonomy <- function(x) {
  output <- n_subtaxa(x, max_depth = 1) == 1 & n_supertaxa(x, max_depth = 1) == 1
  names(output) <- names(x)
  return(output)
}


#' @export
c.taxa_taxonomy <- function(...) {
  out <- NextMethod()
  if (is_taxonomy(out) && length(list(...)) > 1) {
    in_lengths <- vapply(list(...), length, numeric(1))
    supertaxon_index_offsets <- rep(c(0, cumsum(in_lengths)[-length(in_lengths)]), in_lengths)
    vctrs::field(out, 'supertaxa') <- as.integer(vctrs::field(out, 'supertaxa') + supertaxon_index_offsets)
    attr(tax_rank(out), 'levels') <- do.call(c, lapply(list(...), function(x) attr(tax_rank(x), 'levels')))
  }
  return(out)
}

#' @export
unique.taxa_taxonomy <- function(x, incomparables = FALSE, fromLast = FALSE,
                                 nmax = NA, ..., proxy_func = NULL) {
  # Find duplicated taxa
  unique_indexes <- duplicated_index_taxonomy(x, proxy_func = proxy_func)

  # Set supertaxa for subtaxa of combined taxa
  vctrs::field(x, 'supertaxa') <- unique_indexes[vctrs::field(x, 'supertaxa')]

  # Remove duplicate taxa
  x[unique(unique_indexes), subtaxa = FALSE]
}


#--------------------------------------------------------------------------------
# Internal utility functions
#--------------------------------------------------------------------------------


#' @keywords internal
apply_names_and_values <- function(x, index_list, value) {
  if (is.null(value)) {
    value <- seq_len(length(x))
  }
  lapply(index_list, function(i) {
    stats::setNames(value[i], names(x)[i])
  })
}


#' @keywords internal
enclosing_taxon <- function(x, subset) {
  super <- table(unname(unlist((supertaxa(x)[subset])))) # NOTE use subset version of supertaxa
  possible <- as.numeric(names(super[super == length(subset)]))
  if (length(possible) == 0) {
    possible <- NA
  } else if (length(possible) > 1) {
    possible <- possible[which.max(n_supertaxa(x)[possible])] # NOTE use subset version of n_supertaxa
  }
  return(possible)
}


#' @keywords internal
parse_subset_argument <- function(x, subset) {
  if (is.null(subset)) {
    subset <- seq_len(length(x))
  } else {
    if (is.character(subset)) {
      subset <- match(subset, names(x))
    } else if (! is.numeric(subset)) {
      stop(call. = FALSE, "Invalid value for 'subset' argument of class '", class(x)[1], "'.")
    }
  }
  return(subset)
}


# Returns the nearest supertaxa in the subset for each item in the subset
#' @keywords internal
nearest_supertaxa <- function(x, taxa_subset) {
  unname(unlist(lapply(supertaxa(x)[taxa_subset], function(s) {
    s[s %in% taxa_subset][1]
  })))

}


# Convert a logical or name vector to index
#' @keywords internal
to_index <- function(x, i) {
  if (is.numeric(i)) {
    return(i)
  }
  if (is.character(i)) {
    return(match(i, names(x)))
  }
  if (is.logical(i) && length(i) == length(x)) {
    return(which(i))
  }
  stop('Cannot convery input into an index')
}


#' Returns the index of the first occurrence of each unique element
#'
#' @keywords internal
duplicated_index <- function(x, proxy_func = NULL) {
  if (is.null(proxy_func)) {
    proxy <- x
  } else {
    proxy <- proxy_func(x)
  }
  vapply(seq_len(length(x)), function(i) which(proxy == proxy[[i]])[1], numeric(1))
}

#' Returns the index of the first occurrence of each unique taxon in a taxonomy
#'
#' @keywords internal
duplicated_index_taxonomy <- function(x, proxy_func = NULL) {
  if (is.null(proxy_func)) {
    proxy_func <- taxon_comp_hash
  }

  # Precalculate subtaxa and comparison proxy object for speed
  cached_supertaxa <- vctrs::field(x, 'supertaxa')
  cached_subtaxa <- lapply(seq_len(length(x)), function(i) which(cached_supertaxa == i))
  proxy <- proxy_func(x)

  # Find which taxa are the same and should be combined
  find_changes <- function(i) {
    t_sub <- cached_subtaxa[i]
    unique_i <- i[duplicated_index(proxy[i])]
    is_duplicated <- i != unique_i
    out <- data.frame(from = i[is_duplicated], to = unique_i[is_duplicated])
    combined_subtaxa <- lapply(unique(unique_i), function(identical_i) unlist(t_sub[unique_i == identical_i]))
    c(list(out), unlist(lapply(combined_subtaxa, find_changes), recursive = FALSE)) # NOTE: This is recursive
  }
  changes <- dplyr::bind_rows(find_changes(roots(x)))
  out <- seq_len(length(x))
  out[changes$from] <- changes$to
  return(out)
}
ropenscilabs/taxa documentation built on Feb. 23, 2024, 6:31 p.m.