R/eq_clean.R

Defines functions eq_clean_data eq_location_clean

Documented in eq_clean_data eq_location_clean

#' Basic Cleaning of Raw NOAA Dataset
#'
#' Performs basic cleaning of raw NOAA Significant Earthquakes dataset
#'
#' @param tbl Data frame of NOAA Significant Earthquakes dataset.
#'    Ready for the pipe from \code{readr::read_*} family of functions.
#'
#' @return Data frame.
#'
#' @examples
#' \dontrun{
#' readr::read_tsv(infile) %>% eq_clean_data()
#' }
#'
#' @importFrom dplyr %>% mutate
#'
#' @export
eq_clean_data <- function(tbl){
  tbl %>%
    dplyr::mutate(
      DATE = as.Date(paste(
        as.character(YEAR),
        as.character(MONTH),
        as.character(DAY),
        sep = "/"),
        format = "%Y/%m/%d"
      )) %>%
    dplyr::mutate(LATITUDE = as.numeric(LATITUDE)) %>%
    dplyr::mutate(LONGITUDE = as.numeric(LONGITUDE))
}

#' Builds Location Label for NOAA Dataset
#'
#' Cleans the LOCATION_NAME column by stripping out the country name (including the colon)
#' and converts names to title case.
#' This is later used for annotating visualizations.
#'
#' @param tbl Data frame of NOAA Significant Earthquakes dataset.
#'
#' @return Data frame.
#'
#' @examples
#' \dontrun{
#' eq_clean_data(earthquakes) %>% eq_location_clean()
#' }
#'
#' @importFrom dplyr %>% mutate
#' @importFrom stringr str_split str_trim str_to_title
#'
#' @export
eq_location_clean <- function(tbl){
  locstring <-
    sapply(stringr::str_split(tbl$LOCATION_NAME, ':'), tail, 1) %>%
    stringr::str_trim() %>%
    stringr::str_to_title()

  dplyr::mutate(tbl, LOCATION_NAME = locstring)
}
avidclam/msdr5 documentation built on May 29, 2019, 11:02 p.m.