R/opentree_taxonomy.R

Defines functions recover_mrcaott get_ott_children get_valid_children get_ott_clade .get_ott_lineage get_ott_lineage check_ott_input clean_taxon_info_children

Documented in check_ott_input clean_taxon_info_children get_ott_children get_ott_clade .get_ott_lineage get_ott_lineage get_valid_children recover_mrcaott

# functions (from phunding package) to deal with open tree Taxonomy rotl
# functions, namely, rotl::taxonomy_taxon_info()

#' Identify, extract and clean taxonomic children names from a [taxonomy_taxon_info()]
#' output.
#'
#' @param taxon_info An output of [rotl::taxonomy_taxon_info()].
#' @param invalid A character vector of "flags", i.e., characteristics that are
#'    used by Open Tree of Life Taxonomy to detect invalid taxon names.
#' @return A list with valid children unique OTT names, OTT ids and taxonomic ranks.
#' @description `clean_taxon_info_children` eliminates all taxa that will give
#'   problems when trying to retrieve an induced subtree from Open Tree of Life.
# # ' @examples
# # ' # Some suppressed taxa within "Gleicheniales" family of ferns:
# # ' tt <- rotl::taxonomy_taxon_info(3866048)
# # ' rotl::taxonomy_taxon_info(5262766)
#' @export
clean_taxon_info_children <- function(taxon_info,
                                      invalid = c("barren",
                                                  "extinct",
                                                  "uncultured",
                                                  "major_rank_conflict",
                                                  "incertae_sedis",
                                                  "unplaced",
                                                  "conflict",
                                                  "environmental",
                                                  "not_otu",
                                                  "hidden",
                                                  "hybrid")) {
  for (i in seq(taxon_info)) {
    # length(tax_info[[i]][[1]]$children)
    # sapply(tax_info[[i]][[1]]$children, length)
    # length(sapply(sapply(tax_info[[i]][[1]]$children, "[", "flags"), function(y) unlist(tolower(y))))
    # sapply(sapply(tax_info[[i]][[1]]$children, "[", "flags"), function(y) unlist(tolower(y)))[1]
    ii <- lapply(sapply(sapply(taxon_info[[i]]$children, "[", "flags"), unlist), function(y) any(invalid %in% y))
    # ii <- lapply(sapply(sapply(taxon_info[[i]][[1]]$children, "[", "flags"), unlist), function(y) any(toupper(invalid) %in% y))
    if (length(ii) > 0) {
      taxon_info[[i]]$children <- taxon_info[[i]]$children[!unlist(ii)]
    }
    # now clean names with sp. or environmental
    ii <- unlist(sapply(c("sp\\.", "environmental", "unclassified", "incertae"), function(x) grep(x, sapply(sapply(taxon_info[[i]]$children, "[", "unique_name"), unlist))))

    if (length(ii) > 0) {
      taxon_info[[i]]$children <- taxon_info[[i]]$children[-unique(ii)]
    }
    # taxon_info[[i]][[1]]$children <- taxon_info[[i]][[1]]$children[!unlist(ii)]
  }
  return(taxon_info)
}

#' Check input for usage in other `datelife` functions
#'
#' `check_ott_input is currently used in functions `[get_ott_clade()],
#' [get_ott_children()], and [get_otol_synthetic_tree()].
#'
#' @param input Optional. A character vector of names or a `datelifeQuery` object.
#' @param ott_ids If not NULL, it takes this argument and ignores input. A
#'   numeric vector of ott ids obtained with [rotl::taxonomy_taxon_info()] or
#'   [rotl::tnrs_match_names()] or [tnrs_match()].
#' @inheritDotParams make_datelife_query
#' @return A named numeric vector of valid Open Tree Taxonomy (OTT) ids.
#' @export
#' @details By default, it uses the `ott_id` argument if it is not NULL.
check_ott_input <- function(input = NULL, ott_ids = NULL, ...) {
  # for debugging:
  # utils::data(threebirds.rda)
  # input <- threebirds_query$cleaned_names
  # ott_ids <- threebirds_query$ott_ids
  # check that there is at least one input to attempt get ott ids from
  if (!curl::has_internet()) {
    message("---> There is no internet connection.")
    message("Please try again later - returning NA.")
    return(NA)
  }
  if (is.null(input) & is.null(ott_ids)) {
    message("Both 'input' and 'ott_ids' arguments are empty.")
    message("Please provide a non NULL value for 'input' or 'ott_id' - returning NA.")
    return(NA)
  }
  ##############################################################################
  # case when ott_ids argument is NULL or NA (not provided)
  # get OTT ids from input argument
  ##############################################################################
  if (is.null(ott_ids) | all(is.na(ott_ids))) {
    # checks that input is a datelifeQuery object, otherwise it uses make_datelife_query on input
    if (!is_datelife_query(input)) {
      input <- make_datelife_query(input, ...)
    }
    if (is.null(input$ott_id)) {
      message(input)
      stop("OTT ids are NULL.")
    }
    if (all(is.numeric(input$ott_id) & !is.na(input$ott_id))) {
      ott_ids <- input$ott_ids
      names(ott_ids) <- input$cleaned_names
    } else {
      input_tnrs <- datelife::tnrs_match(input = input$cleaned_names)
      # should we clean tnrs from invalid? what about NA's? Maybe only NA's at the end, so we can tell users what was unsuccessful
      # if we decide to clean, the two following lines should be uncommented:
      # input_tnrs <- clean_tnrs(tnrs = input_tnrs)
      # input_tnrs <- input_tnrs[!is.na(input_tnrs$unique_name),]  # gets rid of names not matched with rotl::tnrs_match_names; otherwise rotl::tol_induced_subtree won't run
      ott_ids <- input_tnrs$ott_id
      names(ott_ids) <- input_tnrs$unique_name
    }
  }
  ##############################################################################
  # checking values from ott_ids
  ##############################################################################
  # ott_ids <- suppressWarnings(as.numeric(ott_ids))
  ott_ids_original <- ott_ids
  if (!is.numeric(ott_ids)) {
    message("---> Coercing input to numeric.")
    ott_ids <- suppressWarnings(as.numeric(ott_ids))
  }
  if (any(is.na(ott_ids))) {
    fails <- ott_ids_original[which(is.na(ott_ids))]
    verb <- ifelse(length(fails) == 1, "is", "are")
    fails <- paste(fails, collapse = '", ')
    message(paste0('---> Input ("',
            paste(ott_ids_original[which(is.na(ott_ids))],
                  collapse = '", "'),
            '") ',
            verb,
            ' not numeric.'))
  }
  if (all(is.na(ott_ids))) {
    message("---> Input provided is invalid.")
    message("Please fix and try again - returning NA.")
    return(NA)
  }
  ##############################################################################
  # adding names to ott_ids if needed
  ##############################################################################
  if (is.null(names(ott_ids))) {
    # ott_ids <- c(1, ott_ids) # if ott_ids does not exist it will give an error
    # so we're catching it in an sapply, and giving "error" when it errors.
    names(ott_ids) <- unlist(sapply(seq(length(ott_ids)), function(i) {
      tryCatch(rotl::tax_name(rotl::taxonomy_taxon_info(ott_ids = ott_ids[i])),
        error = function(e) "error"
      )
    }))
    # then we subset it:
    if (any(grepl("error", names(ott_ids)))) {
      message(paste0("\nott_id '",
              paste(ott_ids[grepl("error", names(ott_ids))], collapse = "', '"),
              "' not found in Open Tree of Life Taxonomy."))
      ott_ids <- ott_ids[!grepl("error", names(ott_ids))]
    }
  }
  # now check for NAs:
  if (any(is.na(ott_ids))) {
    message(paste0("\nInput '",
                   paste(names(input)[which(is.na(ott_ids))], collapse = "', '"),
                   "' not found in Open Tree of Life Taxonomy."))
    ott_ids <- ott_ids[which(!is.na(ott_ids))]
  }
  if (length(ott_ids) < 1) {
    message("---> At least one valid taxon name or numeric OTT id is needed to continue.")
    message("Please check your inputs and try again - returning NA.")
    return(NA)
  }
  return(ott_ids)
}
#' Get the Open Tree of Life Taxonomic identifier (OTT id) and name of all lineages
#'  from one or more input taxa.
#' @inheritParams check_ott_input
#' @return A list of named numeric vectors of ott ids from input and all the clades it belongs to.
#' @examples
#' \dontrun{ # This is a flag for package development. You are welcome to run the example.
#'
#' taxa <- c("Homo", "Bacillus anthracis", "Apis", "Salvia")
#' lin <- get_ott_lineage(taxa)
#' lin
#'
#' # Look up an unknown OTT id:
#' get_ott_lineage(ott_id = 454749)
#'
#' } # end dontrun
#' @export
get_ott_lineage <- function(input = NULL, ott_ids = NULL) {
  # ott_ids <- c(335590, 555178, 748370, 1070795, 3942422, 907458, 472526, 820645, 31926, 756728, 605194, 490099)
  # ott_ids <- c(417116, 120960)
  # ott_ids <- 417116
  input_ott_match <- check_ott_input(input, ott_ids)
  tax_info <- .get_ott_lineage(input_ott_match)
  lin <- lapply(tax_info, "[", "lineage")
  ott_names <- lapply(lin, function(x) unlist(sapply(x[[1]], "[", "unique_name")))
  # unlist(sapply(lin[[1]][[1]], "[", "unique_name"))
  # length(ott_names) == length(tax_info)
  # ott_ids <- sapply(seq(length(lin)), function(x) stats::setNames(unlist(sapply(lin[[x]][[1]], "[", "ott_id")), ott_names[[x]]))
  ott_ids <- lapply(seq(length(lin)), function(x) unlist(sapply(lin[[x]][[1]], "[", "ott_id")))
  ott_ranks <- lapply(seq(length(lin)), function(x) unlist(sapply(lin[[x]][[1]], "[", "rank")))
  # if(length(ott_ids) == 1){
  #     res <- list(matrix(c(ott_ids, ott_ranks), ncol =2, dimnames = list(ott_names, c("ott_ids", "ott_ranks"))))
  # } else {
  mat <- function(x) {
    matrix(c(ott_ids[[x]], ott_ranks[[x]]), ncol = 2, dimnames = list(ott_names[[x]], c("ott_ids", "ott_ranks")))
  }
  res <- lapply(seq(length(lin)), mat)
  # }
  # enhance: ott_id names should be the name of the rank, look at the example to see why
  names(res) <- names(input_ott_match)
  # stats::setNames(ott_ids, names(input_ott_match))
  return(res)
}
#' Get the lineage of a set of taxa.
#' `.get_ott_lineage` uses [rotl::taxonomy_taxon_info()] with `include_lineage = TRUE`.
#' @param input_ott_match An Output of check_ott_input function.
#' @return A taxonomy_taxon_info object
.get_ott_lineage <- function(input_ott_match) {
  tax_info <- vector(mode = "list", length = length(input_ott_match))
  progression <- utils::txtProgressBar(min = 0, max = length(tax_info), style = 3)
  for (i in seq(length(input_ott_match))) {
    tax_info[i] <- tryCatch(rotl::taxonomy_taxon_info(input_ott_match[i], include_lineage = TRUE),
      error = function(e) NA
    )
    utils::setTxtProgressBar(progression, i)
  }
  cat("\n") # just to make the progress bar look better
  tax_info
}


#' Get the Open Tree of Life Taxonomic identifiers (OTT ids) and name of one or
#' several given taxonomic ranks from one or more input taxa.
#' @inheritParams check_ott_input
#' @inheritParams get_ott_children
#' @return A list of named numeric vectors with OTT ids from input and all requested ranks.
#' @export
get_ott_clade <- function(input = NULL, ott_ids = NULL, ott_rank = "family") {
  # ott_ids= c('649007', '782239', '782231', '1053057', '372826', '766272', '36015', '914245', '873016', '684051')
  # ott_ids = c('431493', '431493', '431493', '431493', '431493', '431493', '431493', '429482', '429482', '429482')
  rank <- ott_rank
  input_ott_match <- check_ott_input(input, ott_ids)
  tax_info <- .get_ott_lineage(input_ott_match)
  # names(tax_info[[10]])
  # sapply(tax_info[[10]]$lineage, "[", "rank")
  # length(tax_info[[10]]$lineage)
  # names(tax_info[[10]]$lineage[[1]])
  # tax_info[[10]]$lineage[[1]]$flags
  input_ott_names <- unlist(sapply(tax_info, "[", "unique_name"))
  rank_ott_ids <- rank_names <- lapply(seq(length(rank)), function(x) rep(NA, length(input_ott_names)))
  # I still need to drop all invalid lineages first here!!!
  for (i in seq(length(rank))) {
    ready <- grepl(rank[i], unlist(sapply(tax_info, "[", "rank")))
    if (any(ready)) {
      rank_names[[i]][ready] <- unlist(sapply(tax_info[ready], "[", "unique_name"))
      rank_ott_ids[[i]][ready] <- unlist(sapply(tax_info[ready], "[", "ott_id"))
    }
    if (!all(ready)) {
      rank_names[[i]][!ready] <- sapply(tax_info[!ready], function(x) {
        lin <- tryCatch(x$lineage[grep(paste0("^", rank[i], "$"), sapply(x$lineage, "[", "rank"))][[1]]$unique_name,
          error = function(e) NA
        )
        return(lin)
      })
      rank_ott_ids[[i]][!ready] <- sapply(tax_info[!ready], function(x) {
        lin <- tryCatch(x$lineage[grep(paste0("^", rank[i], "$"), sapply(x$lineage, "[", "rank"))][[1]]$ott_id,
          error = function(e) NA
        )
        return(lin)
      })
    }
    names(rank_ott_ids[[i]]) <- rank_names[[i]]
    # length(rank_ott_ids[i])
    # stop()
  }
  names(input_ott_match) <- input_ott_names
  res <- c(list(input_ott_match), rank_ott_ids)
  names(res) <- c("input", rank)
  return(res)
}

# .get_ott_clade <- function(input, rank){
#     tax_info <- tryCatch(rotl::taxonomy_taxon_info(input, include_lineage = TRUE),
#     error = function(e) NA)
#     input_ott_names <- tax_info$unique_name
#     rank_ott_ids <- rank_names <- vector(mode = "list", length = length(rank))
#     # I still need to drop all invalid lineages first here!!!
#     for (i in seq(length(rank))){
#         if(rank[i] %in% tax_info[[1]]$rank){
#             rank_names <- tax_info[[1]]$unique_name
#             rank_ott_ids <- tax_info[[1]]$ott_id
#         } else {
#             rank_names[[i]] <- sapply(tax_info, function(x) {
#                 lin <- tryCatch(x$lineage[grep(paste0("^", rank[i], "$"), sapply(x$lineage, "[", "rank"))][[1]]$unique_name,
#                 error = function(e) NA)
#                 return(lin)
#             })
#             rank_ott_ids[[i]] <- sapply(tax_info, function(x) {
#                 lin <- tryCatch(x$lineage[grep(paste0("^", rank[i], "$"), sapply(x$lineage, "[", "rank"))][[1]]$ott_id,
#                 error = function(e) NA)
#                 return(lin)
#             })
#         }
#         names(rank_ott_ids[[i]]) <- rank_names[[i]]
#         # length(rank_ott_ids[i])
#         # stop()
#     }
#     return(rank_ott_ids)
# }

#'
#' Extract valid children from given taxonomic name(s) or Open Tree of Life
#' Taxonomic identifiers (OTT ids) from a taxonomic source.
#'
#' @inheritParams check_ott_input
#' @param reference_taxonomy A character vector with the desired taxonomic sources.
#'  Options are "ncbi", "gbif" or "irmng". Any other value will retrieve data
#'  from all taxonomic sources. The function defaults to "ncbi".
#' @return A named list containing valid taxonomic children of given taxonomic name(s).
#' @details
#' GBIF and other taxonomies contain deprecated taxa that are not marked as such
#'  in the Open Tree of Life Taxonomy.
#' We are relying mainly in the NCBI taxonomy for now.
#' @examples
#' # genus Dictyophyllidites with ott id = 6003921 has only extinct children
#' # in cases like this the same name will be returned
#'
#' tti <- rotl::taxonomy_taxon_info(6003921, include_children = TRUE)
#' gvc <- get_valid_children(ott_ids = 6003921)
#'
#' # More examples:
#'
#' get_valid_children(ott_ids = 769681) # Psilotopsida
#' get_valid_children(ott_ids = 56601) # Marchantiophyta
# input= "Erythrospiza" # obsolete genus that is still on OTT
# input = c("Felis", "Homo", "Malvaceae")
# input = "Telespiza"
#' @export
get_valid_children <- function(input = NULL, ott_ids = NULL, reference_taxonomy = "ncbi") {
  reference_taxonomy_here <- tryCatch(match.arg(reference_taxonomy, c("ncbi", "gbif", "irmng")),
    error = function(e) NULL
  )
  input_ott_match <- check_ott_input(input, ott_ids)
  all_children <- vector(mode = "list", length = length(input_ott_match))
  # monotypic <- vector(mode = "logical", length = length(input_ott_match))
  progression <- utils::txtProgressBar(min = 0, max = length(all_children), style = 3)
  for (i in seq(length(input_ott_match))) {
    tt <- tryCatch(rotl::taxonomy_taxon_info(input_ott_match[i], include_children = TRUE),
      error = function(e) NA
    )
    # length(tt[[1]])
    tt <- clean_taxon_info_children(tt) # removes all invalid children
    if (length(tt[[1]]$children) > 0) {
      # sapply(tt[[1]]$children, "[", "flags")
      # sapply(tt[[1]]$children, "[", "unique_name")
      # which(unlist(sapply(tt[[1]]$children, "[", "unique_name")) == "Mesangiospermae")
      # tt[[1]]$children[108]
      rr <- unname(unlist(sapply(tt[[1]]$children, "[", "rank")))
      child <- unname(unlist(sapply(tt[[1]]$children, "[", "ott_id")))
      tax_sources <- unname(lapply(seq(length(tt[[1]]$children)), function(k) {
        unlist(tt[[1]]$children[[k]]$tax_sources)
      }))
      # names(tt[[1]]$children[[1]])
      # unname(unlist(sapply(tt[[1]]$children, "[", "flags")))
      # unname(unlist(sapply(tt[[1]]$children, "[", "is_suppressed")))
      # unname(unlist(sapply(tt[[1]]$children, "[", "source")))
      names(tax_sources) <- names(child) <- names(rr) <- unname(unlist(sapply(tt[[1]]$children, "[", "unique_name")))
      monotypic <- FALSE
    } else {
      child <- tt[[1]]$ott_id
      rr <- tt[[1]]$rank
      # tax_sources <- list(unlist(tt[[1]]$children[[2]]$tax_sources))
      tax_sources <- list(unlist(tt[[1]]$tax_sources))
      names(tax_sources) <- names(child) <- names(rr) <- tt[[1]]$unique_name
      monotypic <- TRUE
    }
    if (inherits(reference_taxonomy_here, "character")) {
      keep <- sapply(tax_sources, function(x) any(grepl(reference_taxonomy_here, x)))
      child <- child[keep]
      rr <- rr[keep]
    }
    all_children[[i]] <- list(children = data.frame(ott_id = child, rank = rr), is_monotypic = monotypic)
    utils::setTxtProgressBar(progression, i)
  }
  cat("\n") # just to make the progress bar look better
  names(all_children) <- names(input_ott_match)
  return(all_children)
}

#' Use this instead of [rotl::tol_subtree()] when taxa are not in synthesis tree and
#' you still need to get all species or an induced OpenTree subtree
#' @inheritParams check_ott_input
#' @param ott_rank A character vector with the ranks you wanna get lineage children from.
#' @param ... Other arguments to pass to [get_valid_children()].
#' @return A `data.frame` object.
#' @examples
#' # An example with the dog genus:
#'
#' # It is currently not possible to get an OpenTree subtree of a taxon that is
#' #  missing from the OpenTree synthetic tree.
#' # The dog genus is not monophyletic in the OpenTree synthetic tree, so in
#' #  practice, it has no node to extract a subtree from.
#' tnrs <- tnrs_match("Canis")
#'
#' \dontrun{ # This is a flag for package development. You are welcome to run the example.
#' rotl::tol_subtree(tnrs$ott_id[1])
#' #> Error: HTTP failure: 400
#' #> [/v3/tree_of_life/subtree] Error: node_id was not found (broken taxon).
#'
#' } # end dontrun
#'
#' ids <- tnrs$ott_id[1]
#' names(ids) <- tnrs$unique_name
#' children <- get_ott_children(ott_ids = ids) # or
#' children <- get_ott_children(input = "Canis")
#' if (!is.na(children)) {
#' str(children)
#' ids <- children$Canis$ott_id
#' names(ids) <- rownames(children$Canis)
#' tree_children <- datelife::get_otol_synthetic_tree(ott_ids = ids)
#' plot(tree_children, cex = 0.3)
#' }
#'
#' # An example with flowering plants:
#'
#' \dontrun{ # This is a flag for package development. You are welcome to run the example.
#'
#' oo <- get_ott_children(input = "magnoliophyta", ott_rank = "order")
#' # Get the number of orders of flowering plants that we have
#' sum(oo$Magnoliophyta$rank == "order")
#'
#' } # end dontrun
#' @export
# children <- get_ott_children(input = "Fringillidae")
# rotl::tol_subtree(ott_id = 839319) #broken taxa
# children <- get_ott_children(input = "Cetaceae")
# nrow(children[[1]])
# sum(children[[1]]$rank == "species")
get_ott_children <- function(input = NULL, ott_ids = NULL, ott_rank = "species", ...) {
  # input <- c("Felis", "Homo", "Malvaceae")
  input_ott_match <- check_ott_input(input, ott_ids)
  if (is.na(input_ott_match)) {
    # no need for message, here, they are all wrapped in check_ott_input
    return(NA)
  }
  all_children <- vector(mode = "list", length = length(input_ott_match))
  input_ott_match_ranks <- tryCatch({unlist(sapply(rotl::taxonomy_taxon_info(input_ott_match), "[", "rank"))},
                                    error = function(e) {
                                      message(e)
                                      return(NA)
                                    })
  if (is.na(input_ott_match_ranks)) {
    message("---> Could not retrieve taxonomic data from Open Tree of Life database for the provided inputs.")
    message("Returning NA.")
    return(NA)
  }
  # grepl(paste0("\\b", ott_rank, "\\b"), c("species", "subspecies"))
  ott_ranki <- grepl(paste0("\\b", ott_rank, "\\b"), input_ott_match_ranks)
  df <- data.frame(ott_id = input_ott_match[ott_ranki], rank = rep(ott_rank, sum(ott_ranki)))
  all_children[ott_ranki] <- lapply(seq(nrow(df)), function(i) df[i, ])
  # all_children[c(T,F,T)] <- input[c(T,F,T)]
  # sum(c(T,F,T))
  # progression <- utils::txtProgressBar(min = 0, max = length(all_children), style = 3)
  if (any(sapply(all_children, is.null))) {
    input_ott_match <- input_ott_match[sapply(all_children, is.null)]
    for (i in seq(length(input_ott_match))) {
      mm <- data.frame(ott_ids = vector(mode = "numeric", length = 0), rank = vector(mode = "logical", length = 0))
      vv <- get_valid_children(ott_ids = input_ott_match[i], ...)
      success <- vv[[1]]$children$rank == ott_rank | vv[[1]]$is_monotypic
      if (any(success)) {
        mm <- rbind(mm, vv[[1]]$children[success, ])
      }
      while (!all(success)) {
        vv <- get_valid_children(ott_ids = unlist(sapply(sapply(
          vv, "[",
          "children"
        ), "[", "ott_id"))[!success], ...)
        if (any(unlist(sapply(vv, "[", "is_monotypic")))) {
          dd <- do.call("rbind", sapply(vv[unlist(sapply(vv, "[", "is_monotypic"))], "[", "children"))
          rownames(dd) <- unname(unlist(sapply(sapply(vv[unlist(sapply(vv, "[", "is_monotypic"))], "[", "children"), rownames)))
          # rownames(dd) <- gsub("\\..*", "", rownames(dd))
          mm <- rbind(mm, dd)
          vv <- vv[!unlist(sapply(vv, "[", "is_monotypic"))]
        }
        success <- unlist(sapply(sapply(vv, "[", "children"), "[", "rank")) == ott_rank
        if (any(success)) {
          dd <- do.call("rbind", sapply(vv, "[", "children"))[success, ]
          rownames(dd) <- unname(unlist(sapply(sapply(vv, "[", "children"), rownames)))[success]
          # rownames(dd) <- gsub("\\..*", "", rownames(dd))
          mm <- rbind(mm, dd)
        }
      }
      # utils::setTxtProgressBar(progression, i)
      # its easier to do the row naming in the previous steps, bc the following is much time consuming:
      # rownames(mm) <- unname(unlist(sapply(rotl::taxonomy_taxon_info(mm$ott_ids), "[", "unique_name")))
      all_children[[i]] <- mm
    }
  }
  names(all_children) <- names(input_ott_match)
  return(all_children)
}

#' Get an mrcaott tag from an OpenTree induced synthetic tree and get its name and ott id
#' @param tag A character vector with the mrca tag
#' @return A numeric vector with ott id from original taxon named with the corresponding ott name
#' @export
recover_mrcaott <- function(tag) {
  # for debugging:
  # tag <- "mrcaott7813ott454749"
  # tag = "mrcaott4291ott4292"
  # tag = "mrcaott27233ott44289"
  # tag = "mrcaott80776ott602508"
  if (!grep("mrcaott", tag)) {
    message("incorrect tag")
    return(NA)
  }
  mrca_ottids1 <- gsub("mrcaott", "", tag)
  mrca_ottids1 <- gsub("ott.*", "", mrca_ottids1)
  ott_ids <- c(mrca_ottids1, gsub("mrcaott.*ott", "", tag))
  mrca_lin <- datelife::get_ott_lineage(ott_ids = as.numeric(ott_ids))
  mm <- match(rownames(mrca_lin[[1]]), rownames(mrca_lin[[2]]))
  mm <- mm[!is.na(mm)]
  return(stats::setNames(mrca_lin[[2]][mm[1], "ott_ids"], rownames(mrca_lin[[2]])[mm[1]]))
}

# TODO: all these functions could be wrapped up into two or three single ones
# one to get one or all the lineages above a taxon
# another one to get some or all lineages below a taxon
# one to get everything: all_upper, all_lower, all (both upper and lower), or th eones specified by the users
# for example:
# get_ott_lineage(input = c("Felis", "Homo"), ott_ids = NULL, ott_rank = c("all", "all_upper", "all_lower"))
# get a list of all ranks available in ott
phylotastic/datelife documentation built on April 29, 2024, 11:54 p.m.