#Andrew Spewak
#Mastering Software Development in R Capstone
# ##########
# # set up #
# ##########
# #clear R environment
# rm(list = ls())
#
# #set working directory
# setwd("C:/Users/aespe/Documents/Work/R Training/Capstone")
#
# #load packages
# library(tidyr)
# library(dplyr)
# library(lubridate)
# library(stringr)
# library(ggplot)
#' eq_clean_data
#' function to cleanhe earthquakes dataset, including creating a date variable and making latitude/longitude numeric t
#' @param datafile the raw earthquakes data file, in .txt format
#' @return a dataframe called "clean_data"
#' @importFrom readr read_delim
#' @importFrom tidyr unite
#' @importFrom dplyr mutate
#' @importFrom lubridate ymd
#'@examples
#' \dontrun{eq_clean_data("signif.txt")}
#'
#' @export
#'
eq_clean_data <- function(datafile) {
# ** import data **
raw_data <- readr::read_delim(datafile, delim='\t',progress = FALSE)
# ** create date variable **
#for months and days that are NA, replace with 1
raw_data$MONTH[is.na(raw_data$MONTH)==TRUE] <- 1
raw_data$DAY[is.na(raw_data$DAY)==TRUE] <- 1
#create and format the date
intermediate_data <<- raw_data %>%
tidyr::unite(date, YEAR, MONTH, DAY) %>% #combine year, month, and day into one date variable
dplyr::mutate(lubridate::ymd(date)) #convert to date format
# ** convert latitude and longitude to numeric **
intermediate_data$LATITUDE <<- as.numeric(intermediate_data$LATITUDE)
intermediate_data$LONGITUDE <<- as.numeric(intermediate_data$LONGITUDE)
# ** clean location name **
clean_data <<- eq_location_clean(intermediate_data)
return(clean_data)
}
#' eq_location_clean
#' function to clean the location_name variable of the earthquakes datasets; strips out the country name and converts to title case
#' @param dataframe the dataframe with earthquake data, with the original location_name_variable
#' @return a dataframe called "clean_data"
#' @importFrom stringr str_replace str_to_title
#'@examples
#' \dontrun{eq_location_clean(intermediate_data)}
#'
#' @export
#'
# ** define function and arguments **
eq_location_clean <- function(dataframe) {
# ** create clean data file from the intermediate data file **
clean_data <<- intermediate_data
# ** strip out country name; many different patterns of country names with colons, so different line of code for each pattern **
#location name preceded by - sign and followed by colon and two spaces
clean_data$LOCATION_NAME <<- stringr::str_replace(dataframe$LOCATION_NAME, paste0("-", dataframe$COUNTRY, ": "), "")
#location name preceded by - sign and followed by colon and one space
clean_data$LOCATION_NAME <<- stringr::str_replace(clean_data$LOCATION_NAME, paste0("-", clean_data$COUNTRY, ": "), "")
#location name preceded by - sign
clean_data$LOCATION_NAME <<- stringr::str_replace(clean_data$LOCATION_NAME, paste0("-", clean_data$COUNTRY), "")
#location name followed by colon and two spaces
clean_data$LOCATION_NAME <<- stringr::str_replace(clean_data$LOCATION_NAME, paste0(clean_data$COUNTRY, ": "), "")
#location name followed by colon and one space
clean_data$LOCATION_NAME <<- stringr::str_replace(clean_data$LOCATION_NAME, paste0(clean_data$COUNTRY, ": "), "")
#location name alone
clean_data$LOCATION_NAME <<- stringr::str_replace(clean_data$LOCATION_NAME, clean_data$COUNTRY, "")
# ** convert text to title case **
clean_data$LOCATION_NAME <<- stringr::str_to_title(clean_data$LOCATION_NAME)
return(clean_data)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.