#' Clean Earthquake Data
#'
#' \code{eq_clean_data} performs a series of edits to clean the earthquake
#' data. It converts the SECOND variable to a numeric type and rounds the
#' decimal to the nearest whole number, replaces missing values in the MONTH and
#' DAY variables with '1' and missing values in the HOUR, MINUTE and SECOND
#' variables with '0'. It uses the YEAR, MONTH, DAY, HOUR, MINUTE, and SECOND
#' variables to create a new DATE variable that contains the date of an event.
#' It converts the LATITUDE and LONGITUDE variables to a numeric type, renames the
#' I_D variable to ID, changes the FLAG_TSUNAMI variable to a logical value,
#' and changes the EQ_PRIMARY variable from a character to a numeric type.
#' Finally, it filters the dataset to remove observations with missing values in
#' the DATE, EQ_PRIMARY and TOTAL_DEATHS variables.
#'
#' @param df A data frame containing the earthquake data.
#'
#' @return A data frame containing the cleaned earthquake data. If an error
#' or warning occurs, a message will be printed to the console and the function
#' will return NULL.
#'
#' @importFrom dplyr filter mutate rename
#'
#' @importFrom magrittr %>%
#'
#' @examples
#' \dontrun{
#' earthquakes <- eq_clean_data(earthquakes)
#' }
#'
#' @export
eq_clean_data <- function(df) {
tryCatch({
# Bind variables to object to avoid global variable warning on
# R CMD CHECK.
MONTH <- DAY <- HOUR <- MINUTE <- SECOND <- LATITUDE <- NULL
LONGITUDE <- FLAG_TSUNAMI <- EQ_PRIMARY <- ID <- I_D <- NULL
DATE <- TOTAL_DEATHS <- YEAR <- NULL
# Clean data frame.
df <- df %>%
dplyr::mutate(MONTH = ifelse(is.na(MONTH), 1, MONTH),
DAY = ifelse(is.na(DAY), 1, DAY),
HOUR = ifelse(is.na(HOUR), 0, HOUR),
MINUTE = ifelse(is.na(MINUTE), 0, MINUTE),
SECOND = ifelse(is.na(SECOND), 0, SECOND),
LATITUDE = as.numeric(LATITUDE),
LONGITUDE = as.numeric(LONGITUDE),
FLAG_TSUNAMI = ifelse(is.na(FLAG_TSUNAMI), FALSE, TRUE),
EQ_PRIMARY = as.numeric(EQ_PRIMARY)) %>%
dplyr::rename(ID = I_D) %>%
dplyr::mutate(SECOND = round(as.numeric(SECOND), 0),
DATE = ISOdate(year = YEAR,
month = MONTH,
day = DAY,
hour = HOUR,
min = MINUTE,
sec = SECOND,
tz = "")) %>%
dplyr::mutate(DATE = as.Date(DATE)) %>%
dplyr::filter(!is.na(DATE) & !is.na(EQ_PRIMARY) & !is.na(TOTAL_DEATHS))
}, warning = function(w) {
print(paste("eq_clean_data: ", w, sep = ""))
return(NULL)
}, error = function(e) {
print(paste("eq_clean_data: ", e, sep = ""))
return(NULL)
}, finally = {
})
return(df)
}
#' Clean Location Values
#'
#' \code{eq_location_clean} formats the LOCATION_NAME variable by stripping
#' out the country from the name and converting the text from uppercase to
#' title case. For consistency, it also removes extra spaces from the text and
#' formats the COUNTRY variable in the same way as the LOCATION_NAME variable.
#'
#' @param df A data frame cotaining the earthquake data.
#'
#' @return A data frame containing the earthquake data with reformatted COUNTRY
#' and LOCATION_NAME variables. If an error or warning occurs, a message will
#' be printed to the console and the function will return NULL.
#'
#' @importFrom dplyr mutate
#'
#' @importFrom magrittr %>%
#'
#' @importFrom stringr str_squish str_to_title
#'
#' @details \code{eq_location_clean} assumes the country appears in the
#' LOCATION_NAME variable at the beginning of the text and is separated from the
#' location by a colon. Given this pattern, the function uses a regular
#' expression to find and remove the country from the beginning of the text.
#'
#' @examples
#' \dontrun{
#' df <- eq_location_clean(df)
#' }
#'
#' @export
eq_location_clean <- function(df) {
tryCatch({
# Bind variables to object to avoid global variable warning on
# R CMD CHECK.
LOCATION_NAME <- COUNTRY <- NULL
# Clean data frame.
df <- df %>%
dplyr::mutate(LOCATION_NAME = gsub("^.*: ", "", LOCATION_NAME),
LOCATION_NAME = stringr::str_to_title(LOCATION_NAME),
LOCATION_NAME = stringr::str_squish(LOCATION_NAME),
COUNTRY = stringr::str_to_title(COUNTRY),
COUNTRY = stringr::str_squish(COUNTRY))
}, warning = function(w) {
print(paste("eq_location_clean: ", w, sep = ""))
return(NULL)
}, error = function(e) {
print(paste("eq_location_clean: ", e, sep = ""))
return(NULL)
}, finally = {
})
return(df)
}
#' Select Data
#'
#' \code{eq_select_data} selects a subset of the variables in the earthquakes
#' dataset needed for analysis and visualization. It selects the ID, DATE,
#' COUNTRY, LOCATION_NAME, LONGITUDE, LATITUDE, EQ_PRIMARY and TOTAL_DEATHS
#' variables.
#'
#' @param df A data frame containing the earthquake data.
#'
#' @return A data frame containing the selected subset of earthquake data.
#' If an error or warning occurs, a message will be printed to the console and the
#' function will return NULL.
#'
#' @importFrom dplyr select
#'
#' @importFrom magrittr %>%
#'
#' @examples
#' \dontrun{
#' df <- eq_select_data(df)
#' }
#'
#' @export
eq_select_data <- function(df) {
tryCatch({
# Bind variables to object to avoid global variable warning on
# R CMD CHECK.
ID <- DATE <- COUNTRY <- LOCATION_NAME <- LONGITUDE <- NULL
LATITUDE <- EQ_PRIMARY <- TOTAL_DEATHS <- NULL
# Select data.
df <- df %>%
dplyr::select(ID,
DATE,
COUNTRY,
LOCATION_NAME,
LONGITUDE,
LATITUDE,
EQ_PRIMARY,
TOTAL_DEATHS)
}, warning = function(w) {
print(paste("eq_select_data: ", w, sep = ""))
return(NULL)
}, error = function(e) {
print(paste("eq_select_data: ", e, sep = ""))
return(NULL)
}, finally = {
})
return(df)
}
#' Filter Data
#'
#' \code{eq_filter_data} filters the earthquakes data to the specified
#' COUNTRY and DATE values.
#'
#' @param df A data frame containing the earthquake data.
#'
#' @param countries A character vector of countries to be used to filter the
#' observations.
#'
#' @param minimum_date A date value representing the minimum date used to filter
#' observations.
#'
#' @param maximum_date A date value representing the maximum date used to filter
#' obvserations.
#'
#' @return A data frame containing the filtered earthquake data. If an error
#' or warning occurs, a message will be printed to the console and the
#' function will return NULL. If the function finds no matching observations,
#' it will return an empty data frame.
#'
#' @importFrom dplyr filter
#'
#' @importFrom magrittr %>%
#'
#' @examples
#' \dontrun{
#' df <- eq_filter_data(df,
#' countries = c("Usa", "China"),
#' minimum_date = "2000-01-01",
#' maximum_date = "2017-12-31")
#' }
#'
#' @export
eq_filter_data <- function(df, countries, minimum_date, maximum_date) {
tryCatch({
# Bind variables to object to avoid global variable warning on
# R CMD CHECK.
COUNTRY <- DATE <- NULL
# Filter data.
df <- df %>%
dplyr::filter(COUNTRY %in% countries) %>%
dplyr::filter(DATE >= minimum_date & DATE <= maximum_date)
}, warning = function(w) {
print(paste("eq_filter_data: ", w, sep = ""))
return(NULL)
}, error = function(e) {
print(paste("eq_filter_data: ", e, sep = ""))
return(NULL)
}, finally = {
})
return(df)
}
#' Events By Country and Date Range
#'
#' \code{eq_count_events} assembles a summary data frame showing a count of
#' earthquakes for each country for a given date range.
#'
#' @param df A data frame containing the earthquake data.
#'
#' @param minimum_date A date value representing the minimum date used to filter
#' observations.
#'
#' @param maximum_date A date value representing the maximum date used to filter
#' obvserations.
#'
#' @return A data frame containing the count of earthquakes by country in
#' descending order by count. If an error or warning occurs, a message will
#' be printed to the console and the function will return NULL.
#'
#' @importFrom dplyr desc filter group_by n summarise arrange
#'
#' @importFrom magrittr %>%
#'
#' @examples
#' \dontrun{
#' df <- eq_count_events(df,
#' minimum_date = "2000-01-01",
#' maximum_date = "2017-12-31")
#' }
#'
#' @export
eq_count_events <- function(df, minimum_date, maximum_date) {
tryCatch({
# Bind variables to object to avoid global variable warning on
# R CMD CHECK.
DATE <- COUNTRY <- EVENTS <- NULL
# Count events.
df <- df %>%
dplyr::filter(DATE >= minimum_date & DATE <= maximum_date) %>%
dplyr::group_by(COUNTRY) %>%
dplyr::summarise(EVENTS = dplyr::n()) %>%
dplyr::arrange(dplyr::desc(EVENTS))
}, warning = function(w) {
print(paste("eq_count_events: ", w, sep = ""))
return(NULL)
}, error = function(e) {
print(paste("eq_count_events: ", e, sep = ""))
return(NULL)
}, finally = {
})
return(df)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.