R/read-csv.R

# This script reads the GBIF output csv file.
library(data.table)
library(dplyr)
library(stringr)

drop_cols <- c("scientificName", "verbatimScientificName", "gbifID", "datasetKey", "occurrenceID", "collectionCode", 
               "institutionCode", "basisOfRecord", "verbatimScientificNameAuthorship",
               "locality", "occurrenceStatus", "individualCount", "coordinatePrecision",
               "elevation", "elevationAccuracy", "depth", "depthAccuracy", "recordNumber",
               "identifiedBy", "typeStatus")

# Have to rename the and extract zip files before running below code.
lvl2 <- data.table::fread("inputs/lvl2-gbif.csv", na.strings = "", drop = drop_cols) %>% # Reads in Level II data from gbif download file.
  dplyr::as_tibble() %>%  # Converts to tibble to use with dplyr.
  dplyr::mutate(eventDate = as.POSIXct(stringr::str_remove(eventDate, "Z"), # Removes the Z at the end of each string (Z specifies Zulu time, which is also GMT or UTC. Converts character string to POSIXct 
                                tz = "GMT", format = "%Y-%m-%dT%H:%M:%S"), # Timezone is set to GMT (same as UTC).
         dateIdentified = as.POSIXct(stringr::str_remove(dateIdentified, "Z"), 
                                     tz = "GMT", format = "%Y-%m-%dT%H:%M:%S"),
         lastInterpreted = as.POSIXct(stringr::str_remove(lastInterpreted, "Z"), 
                                      tz = "GMT", format = "%Y-%m-%dT%H:%M:%S"))

lvl2 %>% saveRDS(file = "data/lvl2.rds")
iozeroff/cncpointR documentation built on Feb. 4, 2020, 6:18 p.m.