Code here written by Erica Krimmel.
In this demo we will cover how to:
idig_search_records
# Load core libraries; install these packages if you have not already library(ridigbio) library(tidyverse) # Load library for making nice HTML output library(kableExtra)
verify_records <- FALSE #Test that examples will run tryCatch({ # Your code that might throw an error verify_records <- idig_search_records(rq = list(genus = c("manis", "rhinolophus", "paguma")), limit = 10) }, error = function(e) { # Code to run if an error occurs cat("An error occurred during the idig_search_records call: ", e$message, "\n") cat("Vignettes will not be fully generated. Please try again after resolving the issue.") # Optionally, you can return NULL or an empty dataframe verify_records <- FALSE })
First, let's find all the specimen records for the species you are interested in. Do this using the idig_search_records
function from the ridigbio
package. You can learn more about this function from the iDigBio API documentation and ridigbio documentation. In this example, we want to search for specimens identified as being in one of the following genera: Manis, Rhinolophus, or Paguma.
# Edit the fields (e.g. `genus`) and values (e.g. "manis") in `list()` # to adjust your query and the fields (e.g. `uuid`) in `fields` to adjust the # columns returned in your results records <- idig_search_records(rq = list(genus = c("manis", "rhinolophus", "paguma")), fields = c("uuid", "recordset", "institutioncode", "genus", "scientificname", "country", "data.dwc:year", "data.dwc:collectionCode", "catalognumber", "data.dwc:preparations"))
The query above returned {r eval=verify_records} as.character(nrow(records))
records from the iDigBio Portal. Here is a preview of what our results look like:
knitr::kable(head(records)) %>% kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive")) %>% scroll_box(width = "100%")
These results include all specimen records from the genera of interest, but now we would like to narrow our search to include only specimens that may have cryo-preserved tissue available. We are looking in the dwc:preparations field for this information, but there isn't a controlled vocabulary in use and so the data is not very standard. We can start by summarizing the distinct values present in this field for all {r eval=verify_records} as.character(nrow(records))
records:
# List distinct values for the `preparation` field prepsummary <- records %>% group_by(`data.dwc:preparations`) %>% tally() # Display `prepsummary` in HTML output knitr::kable(prepsummary) %>% kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"), full_width = FALSE) %>% scroll_box(height = "400px")
Based on the distinct values above, it seems like we can use the search terms "froze," "freeze," and "tissue" to narrow our results. We do this in the code below.
# Normalize values in `data.dwc:preparations` to be all lowercase; then # filter rows that include our search terms recordsfiltered <- records %>% mutate(`data.dwc:preparations` = str_to_lower(`data.dwc:preparations`)) %>% filter(grepl('freeze|froze|tissue', `data.dwc:preparations`))
Now we have {r eval=verify_records} as.character(nrow(recordsfiltered))
records, all of which are displayed in the table here:
knitr::kable(recordsfiltered) %>% kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive")) %>% scroll_box(height = "600px") # If you have this code open in R, you can uncomment the line below to # save `recordsfiltered` as a csv file to your working directory # write_csv(recordsfiltered, "recordsfiltered.csv")
We can view the preparation distinct values present in our subset of data to see if there are any other values we might want to filter on:
# List distinct values for the `preparation` field in recordsfiltered recordsfiltered %>% group_by(`data.dwc:preparations`) %>% tally() %>% knitr::kable() %>% kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"), full_width = FALSE) %>% scroll_box(height = "400px")
These data come from from multiple institutions, and if we wanted to examine the physical specimens or inquire about getting tissue samples, we would need to contact each institution. We can figure out who to contact using our results.
# Count how many records in the data were contributed by each recordset recordtally <- recordsfiltered %>% group_by(recordset) %>% tally() %>% rename() # Get metadata from the attributes of the `records` data frame collections <- tibble(collection = attr(recordsfiltered, "attribution")) %>% # Expand information captured in nested lists hoist(collection, recordset_uuid = "uuid", recordset_name = "name", recordset_url= "url", contacts = "contacts") %>% # Get rid of extraneous attribution metadata select(-collection) %>% # Expand information captured in nested lists unnest_longer(contacts) %>% # Expand information captured in nested lists unnest_wider(contacts) %>% # Remove any contacts without an email address listed filter(!is.na(email)) %>% # Get rid of duplicate contacts within the same recordset distinct() %>% # Rename some columns rename(contact_role = role, contact_email = email) %>% # Group first and last names together in the same column unite(col = "contact_name", first_name, last_name, sep = " ", na.rm = TRUE) %>% # Restructure data frame so that there is one row per recordset group_by(recordset_uuid) %>% mutate(contact_index = row_number()) %>% mutate(recordset_url = if_else(grepl("^http://", recordset_url), gsub("^http://", "https://", recordset_url), recordset_url )) %>% mutate(recordset_url = if_else(grepl("www.amnh.org/our-research/vertebrate-zoology/mammalogy", recordset_url), gsub("www.amnh.org/our-research/vertebrate-zoology/mammalogy", "www.amnh.org/research/vertebrate-zoology/mammalogy", recordset_url), recordset_url )) %>% mutate(recordset_url = if_else(grepl("www.burkemuseum.org/mammalogy", recordset_url), gsub("www.burkemuseum.org/mammalogy", "www.burkemuseum.org/collections-and-research/biology/mammalogy", recordset_url), recordset_url )) %>% mutate(recordset_url = if_else(grepl("www.nhm.org", recordset_url), gsub("www.nhm.org", "nhm.org", recordset_url), recordset_url )) %>% mutate(recordset_url = if_else( grepl("appl003.lsu.edu/natsci/lmns.nsf/\\$Content/Mammals\\?OpenDocument", recordset_url), gsub("appl003.lsu.edu/natsci/lmns.nsf/\\$Content/Mammals\\?OpenDocument", "appl103.lsu.edu/natsci/Collections/natscicolsearch.nsf/OpenMainPage?OpenAgent&ID=1042", recordset_url), recordset_url )) %>% mutate(recordset_url = if_else(grepl("www.nsrl.ttu.edu/collections/Mammals/index.htm", recordset_url), gsub("www.nsrl.ttu.edu/collections/Mammals/index.htm", "www.depts.ttu.edu/nsrl/collections/mammal.php", recordset_url), recordset_url )) %>% mutate(recordset_url = if_else(grepl("sites01.lsu.edu/wp/mns/research-collections/genetic-resources/", recordset_url), gsub("sites01.lsu.edu/wp/mns/research-collections/genetic-resources/", "appl103.lsu.edu/natsci/Collections/natscicolsearch.nsf/OpenMainPage?OpenAgent&ID=1050", recordset_url), recordset_url )) %>% mutate(recordset_url = if_else(grepl("https://www.msb.unm.edu", recordset_url), gsub("www.msb.unm.edu", "www.msb.unm.edu", recordset_url), recordset_url )) %>% pivot_wider(names_from = contact_index, values_from = c(contact_name, contact_role, contact_email)) %>% # Include how many records in the data were contributed by each recordset left_join(recordtally, by = c("recordset_uuid"="recordset")) %>% # Filter and remove n = 0 filter(!is.na(n)) %>% # Get rid of any rows which don't actually contribute data to `records`; # necessary because the attribute metadata by default includes all recordsets # in iDigBio that match the `idig_search_records` query, even if you filter # or limit those results in your own code filter(recordset_uuid %in% records$recordset)
Our newly constructed collections
data frame contains contact information for each of the collections (i.e. recordsets) providing data, and looks like this:
knitr::kable(collections) %>% kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive")) %>% scroll_box(height = "400px")
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.