library(tidyverse) library(taxadb)
td_create("itis")
bbs <- read_tsv(system.file("extdata/bbs.tsv", package="taxadb"))
name <- bbs$species authority <- "itis" db <- td_connect() match <- "contains"
name_pattern <- switch(match, starts_with = paste0(name, "%"), contains = paste0("%", name, "%") ) system.time({ out <- purrr::map_dfr(name_pattern, function(pattern) taxa_tbl(authority, "taxonid", db) %>% filter(name %like% pattern) %>% collect() ) }) out
## Strategy: extract all potential matches by Genus alone. assumes first name is a genus name! only_genus <- function(name) stringi::stri_extract_first_words(name) id_tbl <- ids(only_genus(name), authority = authority, db = db, collect = FALSE) %>% select(name) %>% inner_join(select(taxa_tbl(authority, "hierarchy"), id, genus), by = c(name = "genus")) %>% select(id) %>% inner_join(taxa_tbl(authority, "taxonid"), by = "id") %>% distinct() %>% collect() name_regex <- switch(match, starts_with = paste0(name, ".*"), contains = paste0(".*", name, ".*") ) id_tbl <- collect(taxa_tbl(authority, "taxonid")) ## Using the genus subset -- a much smaller list of matches -- is this good or bad? system.time({ out2 <- purrr::map_dfr(name_regex, function(pattern) filter(id_tbl, grepl(pattern, name)) ) })
## In memory, even slower!! system.time({ id_tbl <- collect(taxa_tbl(authority, "taxonid")) out2 <- purrr::map_dfr(name_regex, function(pattern) filter(id_tbl, grepl(pattern, name)) ) })
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.