Fields in ridigbio

library(ridigbio)

# Load library for making nice HTML output
library(kableExtra)
verify_record_fields <- FALSE

#Test that examples will run
tryCatch({
    # Your code that might throw an error
    verify_record_fields <- idig_meta_fields(type = "records", subset = "raw", limit = 10)
    rfall <- data.frame(matrix(ncol = 2, nrow = 0))
    colnames(rfall) <- c("Type", "FieldNameValue")

    # Stricter schema verification
    for(i in 1:length(verify_record_fields)) {
        rf <- data.frame(
            Type = verify_record_fields[[i]]$type,
            FieldNameValue = verify_record_fields[[i]]$fieldName,
            stringsAsFactors = FALSE
        )
        rfall <- rbind(rfall, rf)
    }

    if(nrow(rfall) <= 0)
        verify_record_fields <- FALSE

}, error = function(e) {
    # Code to run if an error occurs
    cat("An error occurred during the idig_search_records call: ", e$message, "\n")
    cat("Vignettes will not be fully generated. Please try again after resolving the issue.")
    # Optionally, you can return NULL or an empty dataframe
    verify_record_fields <- FALSE
})

There are many fields that can be obtained via the Search and Media API. This tutorial is meant to aid in identifying these fields.

General Overview

In this demo we will cover how to:

  1. Identifying both raw and indexed record fields with idig_meta_fields()
  2. Downloading fields of interest with idig_search_records()
  3. Identifying both raw and indexed media record fields with idig_meta_fields()
  4. Downloading fields of interest with idig_search_media()

Search API Fields

To identify the raw, or verbatim, fields available with the Search API, you can run the following:

record_fields <- idig_meta_fields(type = "records", subset = "raw")
rfall <- data.frame(matrix(ncol = 2, nrow = 0))
colnames(rfall) <- c("Type", "FieldNameValue")

for(i in 1:length(record_fields)) {
    rf <- data.frame(
        Type = record_fields[[i]]$type,
        FieldNameValue = record_fields[[i]]$fieldName,
        stringsAsFactors = FALSE
    )
    rfall <- rbind(rfall, rf)
}
colnames(rfall) <- c("type", "fieldName")
nrow(rfall)

In addition to the fields provided by data providers, the Search API contains fields that have been indexed and potentially modified by iDigBio. To identify the indexed fields available with the Search API, you can run the following:

record_fields_index <- idig_meta_fields(type = "records", subset = "indexed")
rfalli <- data.frame(matrix(ncol = 2, nrow = 0))
colnames(rfalli) <- c("Type", "FieldNameValue")

for(i in 1:length(record_fields_index)) {
    rf <- data.frame(
        Type = record_fields_index[[i]]$type,
        FieldNameValue = record_fields_index[[i]]$fieldName,
        stringsAsFactors = FALSE
    )
    rfalli <- rbind(rfalli, rf)
}
colnames(rfalli) <- c("type", "fieldName")
nrow(rfalli)

Note, these fields lists are different.

setequal(rfall, rfalli)

However, they contain similar information. For example, scientificName can be found in both the raw and index fields.

rfall[171,]
rfalli[69,]

Retain records with these fields

To obtain records with these fields with the idig_search_records() function, you simply list the fields arugument as equal to these fields.

out <- idig_search_records(rq=list(scientificname="Galax urceolata"), 
                           fields = rfall$fieldName)

Media Record API Fields

To identify the raw, or verbatim, fields available with the Media Record API, you can run the following:

media_fields <- idig_meta_fields(type = "media", subset = "raw")
mfall <- data.frame(matrix(ncol = 2, nrow = 0))
colnames(mfall) <- c("Type", "FieldNameValue")

for(i in 1:length(media_fields)) {
    mf <- data.frame(
        Type = media_fields[[i]]$type,
        FieldNameValue = media_fields[[i]]$fieldName,
        stringsAsFactors = FALSE
    )
    mfall <- rbind(mfall, mf)
}
colnames(mfall) <- c("type", "fieldName")
nrow(mfall)

Like the Search API, the Media Record API has fields that have been indexed and potentially modified by iDigBio. To identify the indexed fields available with the Media Record API, you can run the following:

media_fields_indexed <- idig_meta_fields(type = "media", subset = "indexed")
mfalli <- data.frame(matrix(ncol = 2, nrow = 0))
colnames(mfalli) <- c("Type", "FieldNameValue")

for(i in 1:length(media_fields_indexed)) {
    mf <- data.frame(
        Type = media_fields_indexed[[i]]$type,
        FieldNameValue = media_fields_indexed[[i]]$fieldName,
        stringsAsFactors = FALSE
    )
    mfalli <- rbind(mfalli, mf)
}
colnames(mfalli) <- c("type", "fieldName")
nrow(mfalli)

Note, all index fields associated with the Media Record API are returned by default with the idig_search_media() function.

mfalli$fieldName

Retain records with these fields

To obtain records with these fields with the idig_search_media() function, you simply list the fields argument as equal to these fields.

out <- idig_search_media(rq=list(scientificname="Galax urceolata"), fields = mfalli$fieldName) 

out %>%
kable() %>% 
kable_styling(bootstrap_options = c("striped", "hover", "condensed"),
                font_size = 12,
                fixed_thead = T) %>% 
  scroll_box(width = "100%", height = "400px")


Try the ridigbio package in your browser

Any scripts or data that you put into this service are public.

ridigbio documentation built on Oct. 1, 2024, 9:06 a.m.