R/cleaning.R

Defines functions get_date capwords parens_caps

Documented in capwords

# convert to proper date format
#' @param year The year as a number
#'
#' @param month The month as a number 0-12
#' @param day The day as a number 0-31
#'
#' @import lubridate
get_date <- function(year, month, day){
  y <- formatC(year, format = "d", flag = "0")
  d <- lubridate::ymd(paste(y, month, day, sep = "-"))
  return(d)
}

# convert to title case
capwords <- function(s, strict = FALSE){
#' Title
#'
#' @param s String to convert to title case.
#'
#' @return String in title case.
#'
#' @examples
#' capwords("how now brown cow?")
#'
#' @export
  cap <- function(s){
    paste(toupper(substring(s, 1, 1)), {
      s <- substring(s, 2); if(strict) tolower(s) else s
      },
      sep = "", collapse = " " )
  }
  sapply(strsplit(s, split = " "), cap, USE.NAMES = !is.null(names(s)))
}

# convert to title case inside parentheses
#' @param s String
#'
#' @import dplyr
parens_caps <- function(s){
  gsub("\\(", "\\( ", s) %>%
    capwords() %>%
    gsub("\\( ", "\\(", .) %>%
    return()
}

# remove state/country designation from location name
#' @param loc String containing location name
#'
#' @import dplyr
eq_location_clean <- function(loc){
  out <- c()
  for(i in 1:length(loc)){
    l <- loc[i] %>%
      strsplit(": ") %>%
      unlist %>%
      .[length(.)] %>%
      trimws() %>%
      gsub(",", ", ", .) %>%
      capwords(strict = TRUE) %>%
      parens_caps()
    out <- c(out, l)
  }
  return(out)
}

#' @param dat data frame containing ... stuff.
#'
#' @param lc_names Boolean; convert to lower case?
#'
#' @note Will raise a warning if negative dates are passed
#' @import dplyr
#' @export
eq_clean_data <- function(dat, lc_names = TRUE){
  dat <- dat %>%
    dplyr::filter(!is.na(YEAR) & YEAR > 0) %>%
    dplyr::mutate(MONTH = ifelse(is.na(MONTH), 1, MONTH),
           DAY = ifelse(is.na(DAY), 1, DAY)) %>%
    dplyr::mutate(DATE = get_date(YEAR, MONTH, DAY)) %>%
    dplyr::mutate(LOCATION = eq_location_clean(LOCATION_NAME))
  if(lc_names) names(dat) <- tolower(names(dat))
  return(dat)
}
burch-cm/noaaview documentation built on May 6, 2019, 6:03 p.m.