#' Clean Location Name
#'
#' This function takes a string as input: if a colon ":" is present, it trim
#' the beginning of the string till the ":" and following 2 spaces.
#' If no colon ":" is present, the function returns the same string.
#' The function is designe to work on a string, so that can be called within
#' a "pipe".
#'
#' @param string a character vector with the earthquake location
#'
#' @return a string vector
#'
#' @importFrom tools toTitleCase
#'
#' @examples
#' \dontrun{
#' eq_location_clean("JORDAN: BAB-A-DARAA,AL-KARAK")
#' #[1] "Bab-a-Daraa,al-Karak"
#' }
#'
#' @export
eq_location_clean <- function(string) {
name_clean <- ifelse(grepl(":", string),
# ^ beginning of the string
# . any character
# * repeated any number of times (including 0)
# ? "lazy" evaluation: until the first colon ":"
# : the colon itself
# \\s white space
# * repeated any number of times (including 0)
gsub("^.*?\\: \\s*", "", string),
string)
# convert to Title Case and return
return(tools::toTitleCase(tolower(name_clean)))
}
#' Clean Earthquakes NOAA dataset
#'
#' This function clean the input data by cpnverting it into a data frame, bluid
#' a new variable representing the date of the earthquake, assure relevant
#' variables are numeric (\code{LATITUDE}, \code{LONGITUDE}, \code{EQ_PRIMARY},
#' \code{DEATHS}), and finally clean the \code{LOCATION_NAME} by calling the
#' \code{eq_location_clean} function.
#' If any of those variables is not present, the function stops and return an
#' error message.
#'
#' @param raw a data frame or matrix containing the information about earthquakes.
#' Must contain the following columns: YEAR, MONTH, DAY, LATITUDE, LONGITUDE,
#' EQ_PRIMARY, DEATHS.
#'
#' @return a data frame with cleaned data
#'
#' @importFrom dplyr filter
#' @importFrom dplyr mutate
#' @importFrom dplyr '%>%'
#' @importFrom tidyr replace_na
#' @importFrom lubridate as_date
#'
#' @examples
#' \dontrun{
#' data <- eq_clean_data(raw_data)
#' }
#'
#' @export
eq_clean_data <- function(raw) {
tryCatch(
data <- as.data.frame(raw) %>%
# filter out years BCE
dplyr::filter(YEAR > 0) %>%
# replace NAs in MONTH and DAY with 1
tidyr::replace_na(replace = list(MONTH = 1, DAY = 1)) %>%
dplyr::mutate(
# add DATE column with class "date"
DATE = lubridate::as_date(paste(YEAR, MONTH, DAY, sep = "-")),
# force data to numeric
LATITUDE = as.numeric(LATITUDE),
LONGITUDE = as.numeric(LONGITUDE),
EQ_PRIMARY = as.numeric(EQ_PRIMARY),
DEATHS = as.numeric(DEATHS),
# clean the LOCATION_NAME
LOCATION_NAME = eq_location_clean(LOCATION_NAME)),
error = function(e) {
stop("ups! something went wrong: check that the data set contains all the colnames specified in the help")
return(NULL)
}
)
return(data)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.