R/clean.R

Defines functions eq_clean_data eq_location_clean

Documented in eq_clean_data eq_location_clean

#' @importFrom readr read_delim
NULL
# noaa <- readr::read_delim(system.file("/extdata/signif.txt", package="CapstoneSubmit"), delim="\t")

#' Cleans the noaa data. particularly creates the Date column in the appropriate format
#'
#' @param noaa the raw dataset
#' @return cleaned dataset with an added Date column
#' @importFrom dplyr mutate
#' @importFrom magrittr %>%
#' @example eq_clean_data(noaa)
#' @export
eq_clean_data <- function(noaa) {
  noaa %>% dplyr::mutate(MONTH=ifelse(is.na(MONTH), 01, MONTH),
                         DAY=ifelse(is.na(DAY), 01, DAY),
                         DATE=as.Date(ifelse(YEAR<0,
                                             -1*as.numeric(difftime(paste(ifelse(YEAR<0, -YEAR, YEAR),
                                                                          "-", MONTH, "-", DAY, sep=""),
                                                                    "0000-01-01")),
                                             as.numeric(difftime(paste(ifelse(YEAR<0, -YEAR, YEAR),
                                                                       "-", MONTH, "-", DAY, sep=""),
                                                                 "0000-01-01"))),
                                      origin="0000-01-01"),
                         LATITUDE = as.numeric(LATITUDE),
                         LONGITUDE = as.numeric(LONGITUDE),
                         SECOND = as.numeric(SECOND),
                         EQ_PRIMARY= as.numeric(EQ_PRIMARY),
                         EQ_MAG_MW = as.numeric(EQ_PRIMARY),
                         EQ_MAG_MS = as.numeric(EQ_MAG_MS),
                         EQ_MAG_MB = as.numeric(EQ_MAG_MB),
                         EQ_MAG_ML = as.numeric(EQ_MAG_ML),
                         EQ_MAG_MFA = as.numeric(EQ_MAG_MFA),
                         EQ_MAG_UNK = as.numeric(EQ_MAG_UNK),
                         DEATHS = as.numeric(DEATHS),
                         MISSING = as.numeric(MISSING),
                         INJURIES = as.numeric(INJURIES),
                         DAMAGE_MILLIONS_DOLLARS = as.numeric(DAMAGE_MILLIONS_DOLLARS),
                         TOTAL_DEATHS = as.numeric(TOTAL_DEATHS),
                         TOTAL_MISSING= as.numeric(TOTAL_MISSING),
                         TOTAL_MISSING_DESCRIPTION = as.numeric(TOTAL_MISSING_DESCRIPTION),
                         TOTAL_DAMAGE_MILLIONS_DOLLARS = as.numeric(TOTAL_DAMAGE_MILLIONS_DOLLARS)
                         )
}

#' Cleans the LOCATION_NAME column of the noaa data.
#'
#' @param cleaned_noaa the cleaned dataset coming from eq_clean_data
#' @return cleaned dataset with LOCATION_NAME fixed
#' @importFrom dplyr mutate
#' @importFrom magrittr %>%
#' @importFrom tools toTitleCase
#' @example eq_location_clean(noaa)
#' @export
eq_location_clean <- function(cleaned_noaa) {
  cleaned_noaa %>%
    dplyr::mutate(LOCATION_NAME=tools::toTitleCase(tolower(substr(LOCATION_NAME,
                                                                           regexpr(pattern=":",
                                                                                   LOCATION_NAME)+2,
                                                                           nchar(LOCATION_NAME))))) %>%
    dplyr::mutate(LOCATION_NAME=ifelse(substr(LOCATION_NAME, 1, 1)==" ",
                                       substr(LOCATION_NAME, 2,
                                              nchar(LOCATION_NAME)),
                                       LOCATION_NAME))
}

#' processed earthquake data
#' @description Data on earthquakes
#' @usage data(noaa)
#' @format a data frame of 6049 obs. of  48 variables
#' @examples noaa %>% eq_clean_data %>% eq_location_clean()
#' noaa <- noaa %>% eq_clean_data %>% eq_location_clean()
JunlueZhao/CourseraCaptsoneWeek3 documentation built on May 20, 2019, 5:40 p.m.