data-raw/processing_data.R

# Helper Functions
remove_country <- function(c_names = country_names, v_s){
  for(i in 1:length(v_s)){
    s <- v_s[i]
    s <- sub(pattern = ": ", replacement = "", s)
    for(j in 1:length(c_names)){
      name <- c_names[j]
      if(grepl(name, s)){
        v_s[i] <- gsub(pattern = paste0(name, ":", " "),
                       replacement = "",
                       x = s)
      }
    }
  }
  v_s
}


asign_one <- function(x){
  for(i in 1:length(x)){
    if(is.na(x[i])) x[i] <- 1
  }
  x
}

make_date <- function(month, day, year){
  v_date <- NULL
  for (i in 1:length(year)) {
    date <- paste0(month[i], "/", day[i], "/", year[i])
    v_date <- c(v_date,
                date)
  }
  v_date <- chron::chron(v_date)
  v_date <- as.Date(v_date, origin =  "1970/01/01")
  v_date
}


# Procesing data

`%>%` <- dplyr::`%>%`

Earthquakes <- readr::read_tsv(file = "data-raw/results.tsv")
country_names <- readr::read_table(file = "data-raw/countries.txt",
                                   col_names = "countries")
country_names <- toupper(country_names[[1]])

cleanES_1 <- Earthquakes %>%
  dplyr::filter(!is.na(MONTH)&!is.na(DAY)) %>%
  dplyr::mutate(DATE = make_date(month = MONTH,
                                 day = DAY,
                                 year = YEAR)) %>%
  dplyr::select(-MONTH, -DAY, -YEAR, -HOUR, -MINUTE, -SECOND)

cleanES_2 <- Earthquakes %>%
  dplyr::mutate(YEAR = asign_one(YEAR),
                MONTH = asign_one(MONTH),
                DAY = asign_one(DAY)) %>%
  dplyr::mutate(DATE = make_date(month = MONTH,
                                 day = DAY,
                                 year = YEAR)) %>%
  dplyr::select(-MONTH, -DAY, -YEAR, -HOUR, -MINUTE, -SECOND)


cleanES_3 <- Earthquakes %>%
  dplyr::mutate(LOCATION_NAME =
                  remove_country(v_s = LOCATION_NAME)) %>%
  dplyr::mutate(LOCATION_NAME = tolower(LOCATION_NAME)) %>%
  dplyr::mutate(LOCATION_NAME = tools::toTitleCase(LOCATION_NAME))
Juanin2691/toolsEarthquakes documentation built on May 28, 2019, 5:41 p.m.