R/clean_holiday_data.R

Defines functions clean_holiday_data

Documented in clean_holiday_data

#' @title Clean holiday data
#' @description Clean and combine holiday data from feiertagskalendar
#' @import data.table
#' @import fst
#' @importFrom lubridate days
#' @param import_folder Folder where data is located
#' @param export_folder Folder to export dataset
#' @return CSV file of holiday data
#' @export

clean_holiday_data <- function(import_folder, export_folder) {

  # Clean folder names
  import_folder <- clean_folder(import_folder)

  if (is.null(export_folder)) {
    export_folder <- import_folder
  } else {
    export_folder <- clean_folder(export_folder)
  }

  # Clean holiday files
  work_holidays <- paste0(import_folder, "work_holidays.csv")
  work_holidays <- fread(work_holidays, select = c(1:2))
  unrecognized_holidays <- paste0(import_folder, "unrecognized_holidays.csv")
  unrecognized_holidays <- fread(unrecognized_holidays, select = c(1:2))
  school_holidays <- paste0(import_folder, "school_holidays.csv")
  school_holidays <- fread(school_holidays, select = c(1:3))

  old_columns <- names(work_holidays)
  new_columns <- stringr::str_to_lower(old_columns)
  data.table::setnames(work_holidays, old_columns, new_columns)

  old_columns <- names(unrecognized_holidays)
  new_columns <- stringr::str_to_lower(old_columns)
  data.table::setnames(unrecognized_holidays, old_columns, new_columns)

  setnames(
    school_holidays,
    c("V1", "V2", "V3"),
    c("first_day", "last_day", "designation")
  )

  unrecognized_holidays <- unrecognized_holidays[!date %in% work_holidays$date]
  unrecognized_holidays[, day_type := "unrecognized_hdy"]
  work_holidays[, day_type := "work_hdy"]
  holidays <- rbind(work_holidays, unrecognized_holidays)
  holidays[, date := lubridate::dmy(date)]

  school_holidays[, first_day := lubridate::dmy(first_day)]
  school_holidays[, last_day := lubridate::dmy(last_day)]
  school_holidays[, holiday_length := as.numeric(difftime(last_day, first_day, units = "days")) + 1]
  school_holidays <- school_holidays[rep(seq(1, nrow(school_holidays)), holiday_length)]
  school_holidays[, days_since_holiday_start := 1:.N - 1, by = first_day]
  school_holidays[, date := first_day + lubridate::days(days_since_holiday_start)]
  school_holidays <- school_holidays[!date %in% holidays$date]
  school_holidays[, day_type := "school_hdy"]
  school_holidays <- school_holidays[, .(date, designation, day_type)]
  holidays <- rbind(holidays, school_holidays)
  holidays[date %in% as.Date(c("2017-12-25", "2017-12-26")), day_type := "work_hdy"]

  data.table::setnames(holidays, "designation", "day_designation")
  data.table::fwrite(holidays, paste0(export_folder, "holidays_cleaned.csv"))
}
lucasjamar/VBZtools documentation built on May 20, 2020, 3:44 a.m.