R/cleaningFunctions.R

#' @title Clean data from NOAA
#' @description clean function for data from NOAA
#' @param data data frame from NOAA, Default: NULL
#' @param r.na if TRUE, removes all columns where MONTH AND DAY are NA.
#' Otherwise, keeps them and replaces NA's by 1, Default: TRUE
#' @return a data frame with the following:
#'   A date column created by uniting the year, month, day and
#'   converting it to the Date class.
#'   LATITUDE and LONGITUDE columns converted to numeric class.
#'   Any other column in the original data frame, except MONTH,
#'   YEAR, DAY, HOUR, MINUTE and SECOND
#' @details This function is only useful for data from NOAA or
#' another data with similar information.
#' @examples
#' \dontrun{
#' if(interactive()){
#'  clean_data_1 <- eq_clean_data(data = Earthquakes)
#'  head(clean_data_1)
#'  }
#'  if(interactive()){
#'  clean_data_2 <- eq_clean_data(data = Earthquakes,
#'  r.na = FALSE)
#'  head(clean_data_2)
#'  }
#' }
#' @rdname eq_clean_data
#' @export
#' @importFrom dplyr `%>%` filter_ mutate_ select_ rename_
#' @importFrom chron chron
eq_clean_data <- function(data = NULL, r.na = TRUE){

  `%>%` <- dplyr::`%>%`

  make_date <- function(month, day, year){
    v_date <- NULL
    for (i in 1:length(year)) {
      date <- paste0(month[i], "/", day[i], "/", year[i])
      v_date <- c(v_date,
                  date)
    }
    v_date <- chron::chron(v_date)
    v_date <- as.Date(v_date, origin =  "1970/01/01")
    v_date
  }

  asign_one <- function(x){
    for(i in 1:length(x)){
      if(is.na(x[i])) x[i] <- 1
    }
    x
  }

  clean <- function(d = NULL){
    n_d <- d %>% dplyr::filter_(~ !is.na(MONTH)&!is.na(DAY)) %>%
      dplyr::mutate_(DATE = ~ make_date(month = MONTH,
                                        day = DAY,
                                        year = YEAR)) %>%
      dplyr::select_(.dots = c('-MONTH', '-DAY', '-YEAR',
                               '-HOUR', '-MINUTE', '-SECOND'))
  }

  n_data <- NULL
  if(r.na == TRUE){
    n_data <- clean(d = data)
  } else {
    n_data <- data %>% dplyr::mutate_(YEAR = ~ asign_one(YEAR),
                                      MONTH = ~ asign_one(MONTH),
                                      DAY = ~ asign_one(DAY)) %>%
      clean()
  }
  return(n_data)
}


#' @title Clean location name for data from NOAA
#' @description This function transforms the LOCATION_NAME column in
#' a dataset from NOAA, by stripping out the country name (including
#' the colon) and converts names to title case (as opposed to all caps)
#' @param data data frame from NOAA, Default: NULL
#' @return a data frame with any other column in the original data
#' frame.
#' @details this function uses the character vector "country_names"
#' included in this package. For more details type ?country_names.
#' @examples
#' \dontrun{
#' if(interactive()){
#'   clean_data <- eq_location_clean(data = Earthquakes)
#'   head(clean_data)
#'   }
#'  }
#' @rdname eq_location_clean
#' @export
#' @importFrom dplyr `%>%` mutate_
#' @importFrom tools toTitleCase
eq_location_clean <- function(data = NULL){

  `%>%` <- dplyr::`%>%`

  c_names <- toolsEarthquakes::country_names

  remove_country <- function(v_s){
    for(i in 1:length(v_s)){
      s <- v_s[i]
      s <- sub(pattern = ": ", replacement = "", s)
      for(j in 1:length(c_names)){
        name <- c_names[j]
        if(grepl(name, s)){
          v_s[i] <- gsub(pattern = paste0(name, ":", " "),
                         replacement = "",
                         x = s)
        }
      }
    }
    v_s
  }

  n_data <- data %>%
    dplyr::mutate_(LOCATION_NAME =
                     ~ remove_country(v_s = LOCATION_NAME)) %>%
    dplyr::mutate_(LOCATION_NAME = ~ tolower(LOCATION_NAME)) %>%
    dplyr::mutate_(LOCATION_NAME =  ~ tools::toTitleCase(LOCATION_NAME))
  return(n_data)
}
Juanin2691/toolsEarthquakes documentation built on May 28, 2019, 5:41 p.m.