knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)

taxadb:::td_disconnect()
MonetDBLite::monetdblite_shutdown()
library(taxadb)
library(dplyr)
library(tidyr)

Work in progress -- the mismatches of itis, col and wd here illustrate the complexity of name matching. Some names resolve multiple times. Some authorities (ITIS) recognize certain synonyms that they still haven't mapped to accepted id.

fish <- taxa_tbl("fb") %>%  
  select(fb = id, species) %>% 
  collect() 
species <- pull(fish, species)

itis <- ids(species, "itis")
wd <- ids(species, "wd")
col <- ids(species, "col")

## Note each of these authorities all return more rows than we had in the input table!
sapply(list(fb = fish, itis = itis, wd = wd, col = col), function(x) length(x[[1]]))
my_taxa <- fish %>%

  mutate(#itis = get_ids(species, "itis"),
         ncbi = get_ids(species, "ncbi"),
         #col = get_ids(species, "col"),
         gbif = get_ids(species, "gbif"),
         #wd = get_ids(species, "wd"),
         tpl = get_ids(species, "tpl"),
        # fb = get_ids(species, "fb"),
         slb = get_ids(species, "slb")) 
my_taxa %>% 
  select(-species) %>% 
  purrr::map_dbl(function(x) sum(!is.na(x)))

Looks like three plants have matching scientific names to some of our fish:

dup <- fish %>% pull(species) %>% ids(authority = "tpl") %>% filter(!is.na(id))
dup

This also probably explains why col and wd are returning the wrong-length matches:

species <- pull(fish, species)

col_fb <- ids(species, "col") 
dim(col_fb)[1] - length(species)

wd_fb <- ids(species, "wd") 
dim(wd_fb)[1] - length(species)
#col_hierarchy <- classification(id = col_fb$id) %>% filter(kingdom == "Animalia") 

taxa_tbl("col", "hierarchy") %>%
  filter(kingdom == "Animalia") %>%
  semi_join(select(col_fb, id), copy = TRUE) %>%
  select(id, species) %>% 
  collect()
has_match <- my_taxa %>% 
  select(-species, -fb) %>%
  purrr::map_dfc(function(x) !is.na(x)) %>% 
  rowSums() > 0

my_taxa %>% filter(!has_match)
taxadb:::td_disconnect()


cboettig/taxadb documentation built on April 17, 2024, 6:34 p.m.