Nothing
## ----message=FALSE------------------------------------------------------------
# Load core libraries; install these packages if you have not already
library(ridigbio)
library(tidyverse)
# Load library for making nice HTML output
library(kableExtra)
# Load libraries for visualizing geographic data
library(leaflet)
library(cowplot)
## ----echo = FALSE-------------------------------------------------------------
verify_df_flagCoord <- FALSE
#Test that examples will run
tryCatch({
# Your code that might throw an error
verify_df_flagCoord <- idig_search_records(
rq = list(flags = "rev_geocode_corrected", institutioncode = "lacm"),
limit = 10
)
}, error = function(e) {
# Code to run if an error occurs
cat("An error occurred during the idig_search_records call: ", e$message, "\n")
cat("Vignettes will not be fully generated. Please try again after resolving the issue.")
# Optionally, you can return NULL or an empty dataframe
verify_df_flagCoord <- FALSE
})
## ----eval=verify_df_flagCoord-------------------------------------------------
# Edit the fields (e.g. `flags`) and values (e.g. "rev_geocode_corrected") in
# `list()` to adjust your query and the fields (e.g. `uuid`) in `fields` to
# adjust the columns returned in your results
df_flagCoord <- idig_search_records(rq = list(flags = "rev_geocode_corrected",
institutioncode = "lacm"),
fields = c("uuid",
"institutioncode",
"collectioncode",
"country",
"data.dwc:country",
"stateprovince",
"county",
"locality",
"geopoint",
"data.dwc:decimalLongitude",
"data.dwc:decimalLatitude",
"flags"),
limit = 100000) %>%
# Rename fields to more easily reflect their provenance (either from the
# data provider directly or modified by the data aggregator)
rename(provider_lon = `data.dwc:decimalLongitude`,
provider_lat = `data.dwc:decimalLatitude`,
provider_country = `data.dwc:country`,
aggregator_lon = `geopoint.lon`,
aggregator_lat = `geopoint.lat`,
aggregator_country = country,
aggregator_stateprovince = stateprovince,
aggregator_county = county,
aggregator_locality = locality) %>%
# Reorder columns for easier viewing
select(uuid, institutioncode, collectioncode, provider_lat, aggregator_lat,
provider_lon, aggregator_lon, provider_country, aggregator_country,
aggregator_stateprovince, aggregator_county, aggregator_locality,
flags)
## ----eval=verify_df_flagCoord, echo = FALSE-----------------------------------
# Subset `df_flagCoord` to show example
df_flagCoord[1:50,] %>%
select(-flags) %>%
kable() %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"),
font_size = 12,
fixed_thead = T) %>%
scroll_box(width = "100%", height = "400px")
## ----eval=verify_df_flagCoord-------------------------------------------------
# Create function to allow subsetting the `df_flagCoord` dataset by other flags
# found on these same records
df_flagSubset <- function(subsetFlag) {
df_flagCoord %>%
filter(grepl(subsetFlag, flags)) %>%
select(uuid, matches("_lat|_lon")) %>%
unite(provider_coords, c("provider_lat", "provider_lon"), sep = ",") %>%
unite(aggregator_coords, c("aggregator_lat", "aggregator_lon"), sep = ",") %>%
gather(key = type, value = coordinates, -uuid) %>%
separate(coordinates, c("lat","lon"), sep = ",") %>%
mutate(lat = as.numeric(lat)) %>%
mutate(lon = as.numeric(lon)) %>%
arrange(uuid, type)}
# Subset `df_flagCoord` by records flagged for having had their latitude negated
# to place point in stated country by reverse geocoding process
df_rev_geocode_lat_sign <- df_flagSubset("rev_geocode_lat_sign")
# Create map displaying a few examples of records with the
# rev_geocode_flip_lat_sign flag
pal <- leaflet::colorFactor(palette = c("#d7191c", "#fdae61", "#ffffbf", "#abdda4", "#2b83ba"),
domain = df_rev_geocode_lat_sign$uuid[1:10])
map <- df_rev_geocode_lat_sign[1:10,] %>%
mutate(popup = str_c(type, " = ", lat, ", ", lon, sep = "")) %>%
leaflet() %>%
addTiles() %>%
addCircleMarkers(
lng = ~lon,
lat = ~lat,
radius = 10,
weight = 1,
color = ~pal(uuid),
stroke = FALSE,
fillOpacity = 100,
popup = ~popup) %>%
addLegend("bottomright", pal = pal, values = ~uuid,
title = "Specimen Records",
opacity = 1)
## ----eval=verify_df_flagCoord, echo = FALSE, out.width = '100%'---------------
map
## ----eval=verify_df_flagCoord-------------------------------------------------
# Summarize flagged records by collection type
spmByColl <- df_flagCoord %>%
group_by(collectioncode) %>%
tally()
# Generate graph to display counts of flagged records by collection within the
# institution
graph_spmByColl <- ggplot(spmByColl,
aes(x = reorder(collectioncode, -n),
y = n,
fill = collectioncode)) +
geom_col() +
theme(panel.background = element_blank(),
legend.title = element_blank(),
axis.title.x = element_text(face = "bold"),
axis.text.y = element_blank(),
axis.ticks.y = element_blank(),
axis.title.y = element_text(face = "bold"),
plot.title = element_text(size = 12, face = "bold")) +
labs(x = "collection",
y = "# of specimen records",
title = "LACM records flagged with geo-coordinate data quality issues by iDigBio") +
geom_text(aes(label = n, vjust = -0.5))
# Get count of total records published by the institution using function
# `idig_count_records`
totalInstSpm <- idig_count_records(rq = list(institutioncode = "lacm"))
# Calculate flagged records as percent of total records
percentFlagged <- sum(spmByColl$n)/totalInstSpm*100
## ----eval=verify_df_flagCoord, out.width="700px", echo = FALSE----------------
graph_spmByColl <- graph_spmByColl +
theme_minimal_grid() +
theme(
text = element_text(size = 22),
axis.text = element_text(size = 22),
plot.title = element_text(size = 22, face = "bold")
)
knitr::include_graphics(save_plot("plot.png", graph_spmByColl, base_height = 10, base_width = 24))
## ----eval=verify_df_flagCoord-------------------------------------------------
# Collate `df_flagAssoc` to describe other data quality flags that are associated
# with rev_geocode_corrected in `df_flagCoord`
df_flagAssoc <- df_flagCoord %>%
select(uuid, flags) %>%
unnest(flags) %>%
group_by(flags) %>%
tally() %>%
mutate("category" = case_when(str_detect(flags, "geo|country|state")
~ "geography",
str_detect(flags, "dwc_datasetid_added|dwc_multimedia_added|datecollected_bounds")
~ "other",
str_detect(flags, "gbif|dwc|tax")
~ "taxonomy")) %>%
mutate("percent" = n/(nrow(df_flagCoord))*100) %>%
arrange(category, desc(n))
# Visualize associated data quality flags
graph_spmByColl <- ggplot(df_flagAssoc, aes(x = reorder(flags, -percent), y = percent, fill = category)) +
geom_col() +
theme(axis.title.x = element_text(face = "bold"),
axis.text.x = element_text(angle = 75, hjust = 1),
axis.ticks.y = element_blank(),
axis.title.y = element_text(face = "bold"),
plot.title = element_text(size = 12, face = "bold")
) +
labs(x = "additional iDigBio data quality flag",
y = "% specimen records",
title = "LACM records flagged for geo-coordinate issues are also flagged for...",
fill = "flag category")
## ----eval=verify_df_flagCoord, out.width="700px", echo = FALSE----------------
graph_spmByColl <- graph_spmByColl +
theme_minimal_grid() +
theme(
text = element_text(size = 22),
axis.text = element_text(size = 22),
axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1),
plot.title = element_text(size = 22, face = "bold")
)
knitr::include_graphics(save_plot("plot2.png", graph_spmByColl, base_height = 10, base_width = 24))
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.