library(knitr)
library(sf)
library(raster)
library(rgdal)
library(sp)
library(dplyr)
library(stringr)
library(velox)
library(ggplot2)
library(purrr)
library(worrms)
library(kableExtra)

Next we remove all observations with a coordinate uncertainty above 30 meters or without a recorded coordinate uncertainty.

lvl2 <- lvl2 %>% 
  filter(!is.na(coordinateUncertaintyInMeters),
         coordinateUncertaintyInMeters <= 30)

Removing Birds and Marine Animals

Our next objective in cleaning the iNaturalist observations, is to remove all observations of species that are Birds or Marine Animals. We achieve this objective through two methods.

Filtering Taxonomically

Fortunately, the hierarchical system of taxonomy favors this endeavour in part. By filtering species using their class, we are able to remove birds and fish in a fell swoop.

# Defines fish taxonomically.
fish <- c("Myxini", "Petromyzontida", "Hyperoartia", "Chondrichthyes", "Actinopterygii",
          "Sarcopterygii")

lvl2 <- lvl2 %>% 
  filter(!is.na(species), # Filters NA observations. 
         class != "Aves", # Filters birds.
         !(class %in% fish)) # Filters fish.

Filtering using the World Registry of Marine Species

Unfortunately for us (though not for biodiversity), there are many more marine species than just fish, and their taxonomy is much more mixed--for example, think of snails which can be terrestrial, saltwater, freshwater, or slugs but are all of Gastropoda. To this end, we take advantage of the resource, the World Registry of Marine Species, a comprehensive list of all Marine Species. The worrms package provides convenient access to the registries API.

To lessen the computational load of querying the WORMS API, we can first filter all of our observations, for only those within the CNC Area. Keep in mind, the ultimate goal of our analysis, is to compile urban distributions for all species within the CNC Area, any species not observed within this area is not pertinent to our analysis.

# Creates a test matrix if lvl2 observations are within cnc_area. 
test <- lvl2 %>% 
  st_as_sf(coords = c("decimalLongitude", "decimalLatitude"), # Converts lvl2 to sf object with same crs as cnc_area.
           crs = crs(cnc_area)) %>% 
  st_within(y = cnc_area, sparse = FALSE) # Creates matrix of TRUE/FALSE if points are in polygon.

# Filters lvl2 according to test matrix.  
cnc_pts <- lvl2[test[, 1] == 1,]

Next we compile a list of all the unique species (binomial nomenclature) of these observations.

# Creates a list of the unique species in cnc_pts.
unique_species <- unique(cnc_pts$species)

We query the WORMS API for each of these species. If the species exists within the registry, a record is returned, otherwise and error is returned. We tally which of these two responses occurs for every lookup.

# XXX Functionalize, and look for a way to speed up (might not be possible to speed up, as it's calling to API with limits).
# Initialize an empty vector to hold WORRMS lookup results.
marine_animals_check <- vector(mode = 'numeric')

# For every entry in species...
for (i in 1:length(unique_species)) {
  check <- NULL
  tryCatch(
    expr = {
      worrms::wm_records_name(species[i], fuzzy=FALSE) # Uses species name to query World Registry of Marine Species for a record, and assigns response to empty list value.
      message(paste("Iteration", i, "Successful"))
      check <<- 1 # 1 if no error.
    },
    error = function(e) {
      message(paste("Error caught on iteration", i, ":", e))
      check <<- 0 # 0 if error, meaning not an entry in WORRMS.
    },
    finally = {
      marine_animals_check <<- append(marine_animals_check, check)
    }
  )
}

# Filters for marine animals.
marine_animals <- data.frame(species = unique_species, marine = marine_animals_check) %>% 
  filter(marine == 1)
# XXX It's working, but I don't trust it to work programmatically, at least as of yet.
# XXX If going to functionalize this, will need to get rid of the <<- (global assignment), and figure out how to assign within function.

Finally, we filter our iNat observations according for only species that returned no record from WORMS, and voila, our data is prepared.

# Filters lvl2 species to be species not in marine_animals.
lvl2_terra <- lvl2 %>% 
  filter(!(species %in% marine_animals$species))
# Create summary tables.
# Species Counts
lvl2_terra %>% 
  group_by(kingdom, phylum, class, order, family, genus, species) %>% 
  summarize(n = n()) %>% 
  filter(n >= 100) %>% 
  arrange(species) %>% 
  kable(caption = "Observations Counts of Species with more than 100 Observations",
        col.names = c("Kingdom", "Phylum", "Class", "Order", "Family", "Genus", "Species", "Number of Observations"))

# Kingdom Counts
lvl2_terra %>% 
  group_by(kingdom) %>% 
  summarize(n = n()) %>%
  arrange(kingdom) %>% 
  kable(caption = "Observation Counts grouped by Kingdom",
        col.names = c("Kingdom", "Number of Observations"))

# Phylum Counts
lvl2_terra %>% 
  group_by(kingdom, phylum) %>% 
  summarize(n = n()) %>%
  arrange(kingdom) %>% 
  kable(caption = "Observation Counts grouped by Phylum",
        col.names = c("Kingdom", "Phylum", "Number of Observations"))

# Class Counts
lvl2_terra %>% 
  group_by(kingdom, phylum, class) %>% 
  summarize(n = n()) %>%
  arrange(kingdom) %>% 
  kable(caption = "Observation Counts grouped by Class",
        col.names = c("Kingdom", "Phylum", "Class", "Number of Observations"))

# Order Counts
lvl2_terra %>% 
  group_by(kingdom, phylum, class, order) %>% 
  summarize(n = n()) %>%
  arrange(kingdom) %>% 
  kable(caption = "Observation Counts grouped by Order",
        col.names = c("Kingdom", "Phylum", "Class", "Order", "Number of Observations"))


iozeroff/cncpointR documentation built on Feb. 4, 2020, 6:18 p.m.