R/cleanNOAA.R

#' Clean NOAA Dataset
#'
#' After downloading and reading in the raw NOAA dataset, the goal of this function is to return
#' a clean version with a Date column, LATITUDE and LONGITUDE converted to numeric class along
#' with EQ_PRIMARY and DEATHS columns
#'
#' @param rawNOAA A dataframe of NOAA significant earthquakes raw data
#'
#' @return A cleaned dataframe version of the NOAA significant earthquakes dataset
#'
#' @importFrom magrittr %>%
#' @importFrom dplyr mutate_
#' @importFrom lubridate make_date
#' @importFrom readr read_delim
#'
#' @export
#'
#' @examples
#' \dontrun{data <- readr::read_delim('signif.txt', delim = '\t')}
#' \dontrun{cleandata <- eq_clean_data(data)}
#'

eq_clean_data <- function(rawNOAA) {
    rawNOAA %>% dplyr::mutate_(MONTH = ~ifelse(is.na(MONTH), 1, MONTH),
                               DAY = ~ifelse(is.na(DAY), 1, DAY)) %>%
        dplyr::mutate_(DATE = ~lubridate::make_date(YEAR, MONTH, DAY)) %>%
        dplyr::mutate_(LATITUDE = ~as.numeric(LATITUDE),
                       LONGITUDE = ~as.numeric(LONGITUDE),
                       EQ_PRIMARY = ~as.numeric(EQ_PRIMARY),
                       DEATHS = ~as.numeric(DEATHS))
}


#'
#' Clean LOCATION_NAME Column
#'
#' This simple function cleans the LOCATION_NAME column by stripping out the country name
#' (including the colon) and converts names to title case
#'
#' @param rawNOAA A dataframe of NOAA significant earthquakes data
#'
#' @return A dataframe with a clean LOCATION_NAME Column
#'
#' @importFrom magrittr %>%
#' @importFrom stringr str_split
#' @importFrom stringr str_replace
#' @importFrom stringr str_replace_all
#' @importFrom stringr str_trim
#' @importFrom stringr str_to_title
#' @importFrom stringr str_c
#' @importFrom dplyr mutate_
#' @importFrom purrr map
#'
#' @export
#'
#' @examples
#' \dontrun{data <- readr::read_delim('signif.txt', delim = '\t')}
#' \dontrun{cleandata <- eq_location_clean(data)}
#'

eq_location_clean <- function(rawNOAA) {
    clean_location <- rawNOAA$LOCATION_NAME %>%
        stringr::str_split(';') %>%
        purrr::map(function(s) stringr::str_replace(s, ':$', '')) %>%
        purrr::map(function(s) stringr::str_replace(s, '[A-Z]+:', '')) %>%
        purrr::map(function(v) stringr::str_c(v, collapse = ';')) %>%
        stringr::str_trim() %>%
        stringr::str_replace_all('[  ]+', ' ') %>%
        stringr::str_replace_all(':', ',') %>%
        stringr::str_to_title()
    rawNOAA %>% dplyr::mutate_(LOCATION_NAME = ~clean_location)
}
blnash508/EarthquakesNOAA documentation built on May 14, 2019, 5:25 p.m.