knitr::opts_chunk$set(echo = TRUE)
library(here) library(tidyverse) library(lazyeval) library(rdataretriever) library(taxadb) source(here::here("data-raw", "helper_functions.R"))
Get mammal and trait data
bird_trait <- read_csv(system.file("extdata", "elton_traits/elton_traits_BirdFuncDat.csv", package = "biodivTS")) mamm_trait <- read.csv(system.file("extdata", "elton_traits/elton_traits_MammFuncDat.csv", package = "biodivTS")) %>% rename(nocturnal = activity_nocturnal, crepuscular = activity_crepuscular, diurnal = activity_diurnal)
Check which authority would result in the most matches
##Counts show that OTT is the best authority for both data sets get_match_counts(bird_trait, "scientific") get_match_counts(mamm_trait, "scientific")
Get id's for the Elton trait data
get_trait_ids <- function(data, provider, id, common = TRUE){ #id <- lazyeval::as_name(id) sci_match <- by_name(unique(data$scientific), provider) %>% drop_na(acceptedNameUsageID) %>% #get original specid so we can figure out ho w many don't have matches left_join(data %>% select_(id, "scientific"), by = c("input" = "scientific")) %>% mutate(match_type = "scientific") if(common){ com_names <- data %>% #the id argument should be inplace of specid, but it doesn't work for some reason filter(!specid %in% sci_match$specid) %>% pull(english) %>% unique() %>% by_common("itis") %>% #mutate(match_type = "common") #filter(taxonRank == "species") %>% #only want ID's to species, since that's the level of the trait data drop_na(acceptedNameUsageID) %>% left_join(bird_trait %>% select_(id, "english"), by = c("input" = "english"))%>% mutate(match_type = "common") #there are both sci and common names, only want common name return(bind_rows(sci_match, com_names)) }else{ return(sci_match)} } bird_ids <- get_trait_ids(bird_trait, "itis", "specid") mammal_ids <- get_trait_ids(mamm_trait, "itis", "msw3_id", FALSE)
Are there any unresolved ids for birds? Yes - just one, which is a synonym to two different accepted id's
unres_trait <- get_dupe_ids(bird_ids, "specid") %>% left_join(bird_ids) unres_trait #resolve to the desired ID (in this case the one with complete hierarchy data) bird_ids <- unres_trait %>% filter(!is.na(kingdom)) %>% bind_rows(bird_ids %>% filter(!sort %in% unres_trait$sort))
What about for mammals? Yes - also a synonym to two different accepted id's. Further research shows that the Callospermophilus saturatus uses the subgenus, so we'll keep the name with the genus
unres_mamm_trait <- get_dupe_ids(mammal_ids, "msw3_id") %>% left_join(mammal_ids) unres_mamm_trait mammal_ids <- unres_mamm_trait %>% filter(acceptedNameUsageID == "ITIS:632452") %>% bind_rows(mammal_ids %>% filter(!sort %in% unres_mamm_trait$sort))
Join bird ids back to original trait data
#only want one column for old name and one column for new name, these are the old ID columns to remove col_ex <- c("passnonpass", "iocorder", "blfamilylatin", "blfamilyenglish", "blfamsequid", "taxo") #join on scientific and common sci_matches <- bird_ids %>% filter(match_type == "scientific") %>% select(id = acceptedNameUsageID, input, scientificName) %>% distinct() %>% right_join(bird_trait %>% select(.dots = -col_ex), by = c("input" = "scientific"), na_matches = "never") %>% #drop_na(id) %>% distinct() %>% select(-english) comm_matches <- bird_ids %>% filter(match_type == "common") %>% select(id = acceptedNameUsageID, input, scientificName) %>% distinct() %>% right_join(bird_trait %>% filter(!specid %in% sci_matches$specid) %>% select(.dots = -col_ex), by = c("input" = "english"), na_matches = "never") %>% distinct() %>% select(-scientific) #there are more rows after ID data is joined because for species that matched on common names there may be more than one scientificName elton_bird <- bind_rows(sci_matches, comm_matches) %>% rename(sourceName = input)
Do the same for mammal trait data
elton_mamm <- mammal_ids %>% select(id = acceptedNameUsageID, input, scientificName) %>% right_join(mamm_trait %>% select(-mswfamilylatin), by = c("input" = "scientific"), na_matches = "never") %>% distinct() %>% rename(sourceName = input)
Check against alternative providers, first for mammals
syn_res <- match_providers(elton_mamm, "itis") #names that resolve to more than one id alt_dupes <- syn_res %>% select(acceptedNameUsageID, input) %>% distinct() %>% group_by(input) %>% filter(n() > 1) add_syn_res <- syn_res %>% filter(!input %in% alt_dupes$input) %>% #excluding duplicate matched species select(id = acceptedNameUsageID, sourceName = input, scientificName) %>% distinct() %>% group_by(sourceName) %>% top_n(1, scientificName) %>% left_join(elton_mamm %>% select(-c(id, scientificName)), by = "sourceName", na_matches = "never") %>% ungroup() %>% distinct() elton_mamm <- elton_mamm %>% filter(!sourceName %in% add_syn_res$sourceName) %>% bind_rows(add_syn_res)
then birds
syn_res <- match_providers(elton_bird, "itis") #names that resolve to more than one id alt_dupes <- syn_res %>% select(acceptedNameUsageID, input) %>% distinct() %>% group_by(input) %>% filter(n() > 1) add_syn_res <- syn_res %>% filter(!input %in% alt_dupes$input) %>% #excluding duplicate matched species select(id = acceptedNameUsageID, sourceName = input, scientificName) %>% distinct() %>% group_by(sourceName) %>% top_n(1, scientificName) %>% left_join(elton_bird %>% select(-c(id, scientificName)), by = "sourceName", na_matches = "never") %>% ungroup() %>% distinct() elton_bird <- elton_bird %>% filter(!sourceName %in% add_syn_res$sourceName) %>% bind_rows(add_syn_res)
Grab only the trait columns we're interested in, standardize column names
elton_bird <- elton_bird %>% select(-specid, -ends_with("source"), -ends_with("enteredby"), -ends_with("certainty"), -ends_with("speclevel"), -ends_with("comment")) elton_mamm <- elton_mamm %>% select(-msw3_id, -forstrat_value, -ends_with("source"), -ends_with("certainty"), -ends_with("speclevel"), -ends_with("comment"))
Check for multiple matches to the same ID
elton_bird <- undupe_ids(elton_bird) elton_mamm <- undupe_ids(elton_mamm)
#save locally usethis::use_data(elton_bird, elton_mamm)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.