Nothing
## ----message=FALSE------------------------------------------------------------
# Load core libraries; install these packages if you have not already
library(ridigbio)
library(tidyverse)
# Load library for making nice HTML output
library(kableExtra)
## ----echo = FALSE-------------------------------------------------------------
verify_records_1A <- FALSE
#Test that examples will run
tryCatch({
# Your code that might throw an error
verify_records_1A <- idig_search_records(
# `rq` is where you adjust your record query
rq = list(genus = "shortia"),
# `fields` is where you adjust what fields you want returned by the API
fields = c("uuid",
"family",
"genus",
"specificepithet",
"scientificname",
"stateprovince"),
# `limit` is where you can set a limit on the number of records to return in
# order to speed up your query; max is 100000
limit = 10)
}, error = function(e) {
# Code to run if an error occurs
cat("An error occurred during the idig_search_records call: ", e$message, "\n")
cat("Vignettes will not be fully generated. Please try again after resolving the issue.")
# Optionally, you can return NULL or an empty dataframe
verify_records_1A <- FALSE
})
## ----eval=verify_records_1A---------------------------------------------------
# Let's start with a simple search introducing the primary arguments for the
# function `idig_search_records`
records_1A <- idig_search_records(
# `rq` is where you adjust your record query
rq = list(genus = "shortia"),
# `fields` is where you adjust what fields you want returned by the API
fields = c("uuid",
"family",
"genus",
"specificepithet",
"scientificname",
"stateprovince"),
# `limit` is where you can set a limit on the number of records to return in
# order to speed up your query; max is 100000
limit = 10,
# `sort` is where you can specify fields for sorting
sort = c("stateprovince",
"scientificname"))
# Display the data frame we just created above in a nice pretty table for HTML
knitr::kable(records_1A) %>%
kable_styling(bootstrap_options =
c("striped", "hover", "condensed", "responsive")) %>%
scroll_box(height = "300px")
## ----eval=verify_records_1A---------------------------------------------------
# Now let's repeat the same search but remove all arguments other than `rq` to
# see what the defaults for the other arguments look like
records_1B <- idig_search_records(
rq = list(genus = "shortia"))
records_1B$occurrenceid <- if_else(grepl("^http://", records_1B$occurrenceid),
gsub("^http://", "", records_1B$occurrenceid),
records_1B$occurrenceid
)
records_1B$occurrenceid <- if_else(grepl("data.biodiversitydata.nl/naturalis", records_1B$occurrenceid),
gsub("data.biodiversitydata.nl/naturalis", "bioportal.naturalis.nl/nl", records_1B$occurrenceid),
records_1B$occurrenceid
)
records_1B$occurrenceid <- if_else(grepl("https://grbio.org/cool", records_1B$occurrenceid),
gsub("https://grbio.org/cool", "grbio.org/cool", records_1B$occurrenceid),
records_1B$occurrenceid
)
records_1B$occurrenceid <- if_else(grepl("https://biocol.org", records_1B$occurrenceid),
gsub("https://biocol.org", "biocol.org", records_1B$occurrenceid),
records_1B$occurrenceid
)
# Display the data frame we just created above in a nice pretty table for HTML
knitr::kable(records_1B) %>%
kable_styling(bootstrap_options =
c("striped", "hover", "condensed", "responsive")) %>%
scroll_box(height = "300px")
## ----eval=verify_records_1A---------------------------------------------------
# In the example above, we are only using one parameter in `rq` to define our
# query, but now let's search by multiple parameters
records_2A <- idig_search_records(
rq = list(basisofrecord = "fossilspecimen",
# Use `type = "exists"` to search for rows where there is a value
# present in this field; the inverse of this is `type = "missing"`
geopoint = list(type = "exists")),
limit = 10)
# Display the data frame we just created above in a nice pretty table for HTML
knitr::kable(records_2A) %>%
kable_styling(bootstrap_options =
c("striped", "hover", "condensed", "responsive")) %>%
scroll_box(height = "300px")
## ----eval=verify_records_1A---------------------------------------------------
# What if we wanted to see more fields than the default provides? Using the same
# search as above, we can retrieve all indexed fields with `fields = "all"`
records_2B <- idig_search_records(
rq = list(basisofrecord = "fossilspecimen",
geopoint = list(type="exists")),
fields = "all",
limit = 10)
records_2B$institutionid <- if_else(grepl("^http://", records_2B$institutionid),
gsub("^http://", "https://", records_2B$institutionid),
records_2B$institutionid
)
# Display the data frame we just created above in a nice pretty table for HTML
knitr::kable(records_2B) %>%
kable_styling(bootstrap_options =
c("striped", "hover", "condensed", "responsive")) %>%
scroll_box(height = "300px")
## ----eval=verify_records_1A---------------------------------------------------
# But wait, there are even more fields available than just those we retrieved
# in the query above! Using the same search, we can choose exactly what fields
# to retrieve from indexed and raw data if we call the fields out by name in
# the `fields` argument; raw data fields are prefaced by "data.dwc:" and use
# camelCase in their naming convention (vs. lowercase for iDigBio fields)
records_2C <- idig_search_records(
rq = list(basisofrecord = "fossilspecimen",
geopoint = list(type="exists")),
# Here is where we are explicitly asking for specific fields
fields = c("uuid",
"recordset",
"institutioncode", "data.dwc:institutionCode",
"country", "data.dwc:country",
"countrycode", "data.dwc:countryCode",
"stateprovince", "data.dwc:stateProvince",
"locality", "data.dwc:locality",
"geopoint", "data.dwc:decimalLongitude", "data.dwc:decimalLatitude"),
limit = 10)
# Display the data frame we just created above in a nice pretty table for HTML
knitr::kable(records_2C) %>%
kable_styling(bootstrap_options =
c("striped", "hover", "condensed", "responsive")) %>%
scroll_box(height = "300px")
## ----eval=verify_records_1A---------------------------------------------------
# You may be curious what the difference is between indexed and raw data such as
# that we saw in the search above. Indexed data has been altered by iDigBio
# (often in an attempt to standardize and/or correct values), and raw data is
# what was provided to iDigBio by the data provider, i.e. the natural history
# collection. Here we will do a new search on a data quality flag to view
# differences between indexed and raw data
records_3A <- idig_search_records(
# Data quality flags are a way for iDigBio to communicate how data was altered
# during its quality control process, i.e. how the indexed and raw data differ
rq = list(flags = "rev_geocode_lat_sign"),
fields = c("uuid",
"institutioncode", "data.dwc:institutionCode",
"country", "data.dwc:country",
"countrycode", "data.dwc:countryCode",
"stateprovince", "data.dwc:stateProvince",
"locality", "data.dwc:locality",
"geopoint", "data.dwc:decimalLongitude", "data.dwc:decimalLatitude"),
limit = 10)
# Let's format our results to be more readable by renaming and reordering columns
records_3A <- records_3A %>%
rename_at(vars(starts_with("data.dwc:")),
~str_replace(., "data.dwc:", "raw_")) %>%
select(uuid,
indexed_decimalLatitude = geopoint.lat,
raw_decimalLatitude,
indexed_decimalLongitude = geopoint.lon,
raw_decimalLongitude,
everything())
# Display the data frame we just created above in a nice pretty table for HTML
knitr::kable(records_3A) %>%
kable_styling(bootstrap_options =
c("striped", "hover", "condensed", "responsive")) %>%
scroll_box(height = "300px")
## ----eval=verify_records_1A---------------------------------------------------
# Let's test out a search using parameters we know would retrieve many records
count_1A <- idig_count_records(
rq = list(basisofrecord = "fossilspecimen",
geopoint = list(type="exists")))
# We can reformat our result to be more readable
count_1A <- format(count_1A, big.mark = ",")
# This number shows how many records in iDigBio have a value of "fossilspecimen"
# as well as geographic coordinate data
count_1A
## ----eval=verify_records_1A---------------------------------------------------
# Let's go back to our first simple search and see what the top values are for
# `scientificname` where the genus is "shortia"
top_1A <- idig_top_records(
# `rq` is where you adjust your record query
rq = list(genus = "shortia"),
# `top_fields` is where you adjust what fields you want to see summarized
top_fields = "scientificname",
# `count` is where you can set a limit on the number of top values to return
# in order to speed up your query; max is 1000
count = 10)
# We need to convert our results from a nested list into a more readable format
top_1A <- as_tibble(top_1A$scientificname) %>%
pivot_longer(everything(), names_to = "scientificname", values_to = "count")
# Display the data frame we just created above in a nice pretty table for HTML
knitr::kable(top_1A) %>%
kable_styling(bootstrap_options =
c("striped", "hover", "condensed", "responsive")) %>%
scroll_box(height = "300px")
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.