load(file.path(getwd(), "data", "quakes.rda"))
#' Convert a date element vector into a "[0-9]{2}" string
#'
#' @param vect input vector as character or numeric
#' @param replaceNA pattern used to replace NAs
#'
#' @return A vector of "[0-9]{2}" strings
#'
#' @examples
#' CapstoneProject:::std_time(c(12, 4, 2.9))
std_time <- function(vect, replaceNA = "00") {
unlist(sapply(X = vect, function(x, rep = replaceNA) {
#If time isn't specified replaced with replaceNA
if (is.na(x)) {
rep
}
#Si a time is specified
else{
#If there's a dot inside a time (eg. for seconds)
if (length(grep(pattern = "\\.", x = as.character(x))) > 0) {
#Floor the time
x <- as.character(floor(as.numeric(x)))
}
#If there a no dots / after flooring
if (nchar(x) < 2) {
paste0("0", x)
}
else{
x
}
}
}))
}
#' Returns a cleaned NOAA Significant Earthquakes data.frame
#'
#' @param data an already loaded data.frame from the NOAA dataset
#' @param filename the path to a NOAA tab separated datafile
#' @param replaceDateNA_by a five element vector. Will
#' be used to replace NA elements in dates
#' c("MONTH", "DAY", "HOUR", "MINUTE", "SECOND"). Allow
#' computation of partial dates.
#'
#' @import dplyr
#' @importFrom readr read_tsv
#' @importFrom stringr str_to_title
#' @importFrom lubridate ymd_hms
#' @importFrom stats setNames
#'
#' @return a dataframe
#' @export
#'
#' @note
#' If no input are defined, function will use "quakes"
#' dataset as an input (see ?quakes for more information)
#'
#' @examples
#' df <- eq_clean_data()
#' head(df, 5)
#'
eq_clean_data <- function(data = NULL, filename = NULL,
replaceDateNA_by = c(NA, NA, "00", "00", "00")) {
#If no data and no filename are given
if (is.null(data) & is.null(filename)) {
#data is set to quakes
data <- quakes
}
#If a filename is given
if (!is.null(filename)) {
#data is set to corresponding file
data <- readr::read_tsv(filename)
}
#Mutate columns to be compliant to a set of specifications
cleaned_data <- data %>%
dplyr::mutate_(
#LATITUDE is a numeric
LATITUDE = stats::setNames(object = ~ as.numeric(LATITUDE),
nm = "LATITUDE"),
#LONGITUDE is a numeric
LONGITUDE = stats::setNames(object = ~ as.numeric(LONGITUDE),
nm = "LONGITUDE"),
#LOCATION_NAME is title case
LOCATION_NAME = stats::setNames(
object = ~ stringr::str_to_title(LOCATION_NAME),
nm = "LOCATION_NAME"
),
#All "date" columns are to be in a two digit caracter format
#For month and year
##MONTH is set to a [0-9]{2} format or to replacement pattern if NA
MONTH = stats::setNames(object = ~ std_time(MONTH, replaceDateNA_by[1]),
nm = "MONTH"),
##DAY is set to a [0-9]{2} format or to replacement pattern if NA
DAY = stats::setNames(object = ~ std_time(DAY, replaceDateNA_by[2]),
nm = "DAY"),
##HOUR is set to a [0-9]{2} format or to replacement pattern if NA
HOUR = stats::setNames(object = ~ std_time(HOUR, replaceDateNA_by[3]),
nm = "HOUR"),
##MINUTE is set to a [0-9]{2} format or to replacement pattern if NA
MINUTE = stats::setNames(object = ~ std_time(MINUTE, replaceDateNA_by[4]),
nm = "MINUTE"),
##SECOND is set to a [0-9]{2} format or to replacement pattern if NA
SECOND = stats::setNames(object = ~ std_time(SECOND, replaceDateNA_by[5]),
nm = "SECOND")
)
#Mutate columns to be compliant to a set of specifications
cleaned_data <- cleaned_data %>%
#Add a date
dplyr::mutate_(
date = stats::setNames(
object = ~ as.Date(lubridate::ymd_hms(paste0(YEAR,
MONTH,
DAY,
HOUR,
MINUTE,
SECOND), quiet = TRUE)),
nm = "date"
),
#Extract country and colon
LOCATION_NAME = stats::setNames(
object = ~gsub(pattern = "^(.+):( )*", replacement = "",
x = LOCATION_NAME),
nm = "LOCATION_NAME"
)
)
return(cleaned_data)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.