R/list-file.R

Defines functions list_categories list_providers list_hist_file list_hist_file_nomax hist_file_versions hist_file_url

Documented in hist_file_url hist_file_versions list_categories list_hist_file list_hist_file_nomax list_providers

#' List all available categories
#'
#' @export
#'
list_categories <- function() {
  require(rvest)
  require(httr)
  require(dplyr)
  url <- "https://data.gov.hk/en/help/api-spec"
  tabs <- url %>% GET() %>% content() %>% html_table(fill = TRUE)
  tabs[
    sapply(tabs, function(x) grepl("Category", names(x[1]), ignore.case = TRUE))
    ][[1]]
}

#' List all available providers
#'
#' @export
#'
list_providers <- function() {
  require(rvest)
  require(httr)
  require(dplyr)
  url <- "https://data.gov.hk/en/help/api-spec"
  tabs <- url %>% GET() %>% content() %>% html_table(fill = TRUE)
  tabs[
    sapply(tabs, function(x) grepl("provider", names(x[1]), ignore.case = TRUE))
    ][[1]]
}

#' Historical Archive File list
#'
#' It lists at most max 500 results.
#'
#' @param start start date of a period
#' @param end end date of a period
#' @param category Category ID, see website for a list of categories,
#' use \code{list_categories()} to see list of categories
#' @param provider Provider ID, see website for a list of provider ID,
#' use \code{list_providers()} to see list of providers
#' @param format File format
#' @param search keyword search
#' @param order sort order
#' @param skip the first x number of records to omit
#'
#' @examples
#' list_hist_file("2015-01-01", "2018-01-01")
#'
#' list_hist_file("2015-01-01", "2018-01-01", category = "health")
#'
#' @export
#'
list_hist_file <- function(start = Sys.Date() - 1, end = Sys.Date() - 1,
                           category = NULL, provider = NULL,
                           format = NULL, search = NULL, order = NULL,
                           skip = NULL) {
  require(httr)
  require(jsonlite)
  api_url <- "https://api.data.gov.hk/v1/historical-archive/list-files"
  start <- format(as.Date(start), "%Y%m%d")
  end <- format(as.Date(end), "%Y%m%d")
  req <- list(
    start = start,
    end = end,
    category = category,
    provider = provider,
    format = format,
    search = search,
    order = order,
    skip = skip
  )
  res <- GET(api_url, query = req)
  fromJSON(content(res, "text", encoding = "UTF-8"))$files
}

#' List all historical files without maximum
#'
#' It continuesly call list_hist_file until no result returned
#'
#' @param ... parameters passed to list_hist_file function
#'
#' @export
#'
list_hist_file_nomax <- function(...) {
  max <- 500
  result <- list()
  i <- 0
  while (TRUE) {
    data <- list_hist_file(..., skip = i * max)
    if (length(data) == 0 || is.null(data) || nrow(data) == 0) break
    result <- rbind(result, data)
    if (nrow(data) < max) break
    i <- i + 1
  }
  result
}

#' Historical Archive File Version
#'
#' @param url url of the file
#' @param start start date
#' @param end end date
#'
#' @export
#'
hist_file_versions <- function(url, start, end = NULL) {
  require(httr)
  require(jsonlite)
  api_url <- "https://api.data.gov.hk/v1/historical-archive/list-file-versions"
  if (is.null(end)) end <- start
  start <- format(as.Date(start), "%Y%m%d")
  end <- format(as.Date(end), "%Y%m%d")
  req <- list(
    url = url,
    start = start,
    end = end
  )
  res <- GET(api_url, query = req)
  fromJSON(content(res, "text", encoding = "UTF-8"))
}

#' Historical Archive File URL For A Specific Version
#'
#' @param url url of the historical files
#' @param timestamp timestamp of the historical file to retrieve
#'
#' @export
#'
hist_file_url <- function(url, timestamp) {
  api_url <- "https://api.data.gov.hk/v1/historical-archive/get-file"
  sprintf(
    "%s?url=%s&time=%s",
    api_url,
    URLencode(url, reserved = TRUE),
    URLencode(timestamp, reserved = TRUE)
  )
}
XiangdongGu/hkdata documentation built on Aug. 3, 2019, 6:18 p.m.