##library(readr)
##library(magrittr)
##library(dplyr)
##library(stringr)
### Date column: unites year, month, day
### LATITUDE & LONGITUDE: convert to numeric class
### eq_location_clean() : cleans LOCATION_NAME col by stripping out the country name (including colon)
### and converts names to title case (from all-caps)
#----------------------------------------------------------------------------------------------
#' @name load_data
#'
#' @title Module 1: Obtain and Clean Data
#'
#' @description Take raw data frame and return a clean data frame
#'
#' @param path A character string file path to the downloaded source.
#'
#' @return Returns a dataframe result after calling readr's read_delim
#'
#' @importFrom readr read_delim
#'
#' @export
#'
#' @examples
#' \dontrun{
#' df <- load_data()
#' print(class(df))
#' head(df)
#' }
#'
load_data <- function(path=file.path("data_raw", "signif.txt")){
df <- readr::read_delim(path, delim="\t")
}
#----------------------------------------------------------------------------------------------
#' @name get_date
#'
#' @title Create a date from NOAA data given DAY, MONTH, YEAR
#'
#' @description The NOAA data has columns called DAY, MONTH, YEAR. Part of the requirements for
#' cleaning this data is to create a date column. All of the years are provided, but some of
#' the months and days are NA. We'll replace NA days with 1st and NA months with Jan.
#' Also, note that as.Date does not play nice with negative years, so these have been removed.
#' Vectors days, months and years must be of the same length.
#'
#' @param days Day of the year as integer vector
#' @param months Month of the year as integer vector
#' @param years An integer vector of years
#'
#' @return A vector of the date objects returned from passing date strings to as.Date
#'
#' @export
#'
#' @examples
#' \dontrun{
#' df <- load_data() %>% dplyr::filter(YEAR >= 0) %>%
#' dplyr::mutate(date = get_date(DAY, MONTH, YEAR))
#' print(class(df))
#' head(df)
#' }
#'
get_date <- function(days, months, years){
n <- length(days)
dates <- seq(as.Date(Sys.Date()), by=0, len=n)
for(i in 1:n){
day <- days[i]
month <- months[i]
year <- years[i]
if(is.na(day)){day <- 1}
if(is.na(month)){month <- 1}
date_str = paste(year, month, day, sep="-")
dates[i] <- as.Date(date_str)
}
return(dates)
}
#---------------------------------------------------------------------------------------------
#' @name eq_clean_data
#'
#' @title Clean the raw NOAA earthquake data
#'
#' @description This function takes the raw NOAA data and cleans it
#'
#' @param df_raw
#'
#' @return Returns a cleaned dataframe
#'
#' @importFrom dplyr filter, mutate
#'
#' @export
#'
#' @examples
#' \dontrun{
#' df <- load_data() %>% dplyr::filter(YEAR >= 0) %>%
#' dplyr::mutate(date = get_date(DAY, MONTH, YEAR))
#' print(class(df))
#' head(df)
#' }
#'
eq_clean_data <- function(df_raw){
df <- df_raw
df <- df %>% dplyr::filter(YEAR >= 0) %>%
dplyr::mutate(date = get_date(DAY, MONTH, YEAR)) %>%
dplyr::mutate(LATITUDE = as.numeric(LATITUDE), LONGITUDE = as.numeric(LONGITUDE)) %>%
eq_location_clean()
return(df)
}
#----------------------------------------------------------------------------------------------
#' @name eq_location_clean
#'
#' @title Location_NAME column
#'
#' @description eq_clean_data () function also needs functionality of clean LOCATION_NAME that is
#' devoid of country name
#'
#' @param df an object data frame
#'
#' @return Returns a cleaned dataframe as described
#'
#' @importFrom dplyr filter, mutate
#' @importFrom stringr stri_trim, str_to_title
#'
#' @export
#' @examples
#' \dontrun{
#' df <- load_data() %>% eq_location_clean()
#' print(class(df))
#' head(df)
#' }
#'
eq_location_clean <- function(df){
df <- df %>% dplyr::mutate(CLEAN_LOCATION_NAME = stringr::str_trim(gsub(".*:","", LOCATION_NAME))) %>%
dplyr::mutate(CLEAN_LOCATION_NAME = stringr::str_to_title(CLEAN_LOCATION_NAME))
return(df)
}
#----------------------------------------------------------------------------------------------
## getwd()
## if(!dir.exists("data_raw")){
## setwd('..') # go back to package directory
## }
## df_earthquakes <- load_data() %>% eq_clean_data()
## if(!file.exists(file.path("data", "df_earthquakes.rda"))){
# package this cleaned data as part of the project
## library(devtools)
## devtools::use_data(df_earthquakes)
## }
## print(names(df_earthquakes))
## print(head(df_earthquakes[c("YEAR", "MONTH", "DAY", "date")], 20)) # checking the date cleaning
## print(tail(df_earthquakes[c("YEAR", "MONTH", "DAY", "date")], 20))
## print("****************")
## print(head(df_earthquakes)) # and for the location name
## print(tail(df_earthquakes[c("LOCATION_NAME", "CLEAN_LOCATION_NAME")], 20))
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.