R/download_timetable.R

Defines functions download_timetable

Documented in download_timetable

#' @title Download timetable data
#' @description Download VBZ timteable data from \url{data.stadt-zuerich.ch}
#' @import data.table
#' @import stringr
#' @import ArgumentCheck
#' @importFrom lubridate as_date
#' @importFrom stats time
#' @importFrom utils setTxtProgressBar txtProgressBar
#' @param start First day to download in \code{as.Date} or \code{as.character} format
#' @param end Last day to download in \code{as.Date} or \code{as.character} format
#' @param export_folder Folder to locate CSVs in
#' @return CSV files containing VBZ timetable data
#' @export

download_timetable <- function(start, end, export_folder = NULL) {

  # Modify export_folder such that it finishes with /
  export_folder <- clean_folder(export_folder)

  # Convert to date
  initial_date <- lubridate::as_date("2015-09-20")
  # First date is 2015-09-20. Afterwards, files are splits in 7 days of data.
  start <- lubridate::as_date(start)
  end <- lubridate::as_date(end)

  # Check start and end are valid dates
  check <- ArgumentCheck::newArgCheck()
  check_start_end_dates(check, start, end)

  # List all days since start
  dates <- seq(initial_date, end, by = "weeks")
  dates <- data.table::data.table(start_date = dates)
  dates[, end_date := start_date + lubridate::days(6)]

  # Keep only files in range of input dates
  dates <- dates[end_date >= start]
  dates[, year := data.table::year(end)]
  dates[, file := paste(start_date, end_date, sep = "_")]
  dates[, file := stringr::str_remove_all(file, "-")]
  dates[, file := paste0(file, ".csv")]
  # Create corresponding URL
  dates[, url := paste0(
    "https://data.stadt-zuerich.ch/dataset/d822e74e-1cd8-425b-a1a2-526dae6c2f94/resource/ef32fc9f-9660-4040-b113-d720119fc781/download/fahrzeiten_soll_ist_",
    file
  )]
  dates[, file := paste0(export_folder, "timetable_", file)]

  pb <- txtProgressBar(
    min = 0,
    max = nrow(dates),
    style = 3
  )

  # Download data for each week
  for (row in 1:nrow(dates)) {
    setTxtProgressBar(pb, row)
    download.file(
      url = dates[row, url],
      destfile = dates[row, file]
    )
  }

  # Links for stop data
  stop_links <- data.table::data.table(
    year = 2015:2019,
    stop_url = c(
      "https://data.stadt-zuerich.ch/dataset/d822e74e-1cd8-425b-a1a2-526dae6c2f94/resource/6cd581b4-e3be-49c0-9189-2745fb7ceb3e/download/haltestelle.csv",
      "https://data.stadt-zuerich.ch/dataset/4f65e004-3fea-490d-8962-b07933a3e286/resource/0a5d37dc-30ef-4d34-a982-6aaf45c7c38c/download/haltestelle.csv",
      "https://data.stadt-zuerich.ch/dataset/67ab5ceb-26e4-413d-b03e-3ac66583af95/resource/3772bd0b-b1c0-4118-9f2c-1e22eef23459/download/haltestelle.csv",
      "https://data.stadt-zuerich.ch/dataset/eade7147-a0c0-448c-819d-e62a3314030b/resource/6f787b23-1879-4e89-ba33-3e76b9260260/download/haltestelle.csv",
      "https://data.stadt-zuerich.ch/dataset/3b960a00-4b5f-4e45-a149-5a048f7ee837/resource/ca171682-3153-4ff3-b197-e49f1cc52bdc/download/haltestelle.csv"
    ),
    coordinates_url = c(
      "https://data.stadt-zuerich.ch/dataset/d822e74e-1cd8-425b-a1a2-526dae6c2f94/resource/7ffe25da-94cc-4ebf-b05c-ade0d67ced7c/download/haltepunkt.csv",
      "https://data.stadt-zuerich.ch/dataset/4f65e004-3fea-490d-8962-b07933a3e286/resource/bce3d641-fd60-4a2e-94e7-0617d70bc50c/download/haltepunkt.csv",
      "https://data.stadt-zuerich.ch/dataset/67ab5ceb-26e4-413d-b03e-3ac66583af95/resource/02e2d7ed-4078-4ff7-a3e2-39b824b29b0b/download/haltepunkt.csv",
      "https://data.stadt-zuerich.ch/dataset/eade7147-a0c0-448c-819d-e62a3314030b/resource/f2fb6878-d7ec-4ceb-899b-d8dce867489e/download/haltepunkt.csv",
      "https://data.stadt-zuerich.ch/dataset/3b960a00-4b5f-4e45-a149-5a048f7ee837/resource/98560f7c-8ecb-4f0e-8a9b-65895b02f127/download/haltepunkt.csv"
    )
  )
  stop_links[, stop_file := paste0(export_folder, "stops_", year, ".csv")]
  stop_links[, coordinates_file := paste0(export_folder, "coordinates_", year, ".csv")]

  for (n_year in unique(dates$year)) {
    download.file(
      url = stop_links[year == n_year, stop_url],
      destfile = stop_links[year == n_year, stop_file],
    )

    download.file(
      url = stop_links[year == n_year, coordinates_url],
      destfile = stop_links[year == n_year, coordinates_file],
    )
  }
}
lucasjamar/VBZtools documentation built on May 20, 2020, 3:44 a.m.