R/regional_cases.R

# Turn off science
options(scipen = 999)

library(httr)
library(jsonlite)
library(dplyr)
library(zoo)
library(remotes)
library(tidyverse)
library(reshape2)
library(lubridate)

#The below code will use query the Covid-19 dashboard API. By adjusting the filters and structure, you can extract all the data. 
#The dashboard has a 'Metrics Documentation' page and you can see the inputs you need.

#API query for cases

#' Extracts paginated data by requesting all of the pages
#' and combining the results.
#'
#' @param filters    API filters. See the API documentations for 
#'                   additional information.
#'                   
#' @param structure  Structure parameter. See the API documentations 
#'                   for additional information.
#'                   
#' @return list      Comprehensive list of dictionaries containing all 
#'                   the data for the given ``filter`` and ``structure`.`
get_paginated_data <- function (filters, structure) {
  
  endpoint     <- "https://api.coronavirus.data.gov.uk/v1/data"
  results      <- list()
  current_page <- 1
  
  repeat {
    
    httr::GET(
      url   = endpoint,
      query = list(
        filters   = paste(filters, collapse = ";"),
        structure = jsonlite::toJSON(structure, auto_unbox = TRUE),
        page      = current_page
      ),
      timeout(10)
    ) -> response
    
    # Handle errors:
    if ( response$status_code >= 400 ) {
      err_msg = httr::http_status(response)
      stop(err_msg)
    } else if ( response$status_code == 204 ) {
      break
    }
    
    # Convert response from binary to JSON:
    json_text <- content(response, "text")
    dt        <- jsonlite::fromJSON(json_text)
    results   <- rbind(results, dt$data)
    
    if ( is.null( dt$pagination$`next` ) ){
      break
    }
    
    current_page <- current_page + 1;
    
  }
  
  return(results)
  
}


# Create filters:
query_filters <- c(
  "areaType=region"
)

# Create the structure as a list or a list of lists:
query_structure <- list(
  date       = "date", 
  name       = "areaName",
  daily_report = "newCasesByPublishDate"
  
)

regions <- get_paginated_data(query_filters, query_structure)

list(
  "Shape"                = dim(regions),
  "Data (first 3 items)" = regions[0:3, 0:-1]
) -> report

print(report)

# Calculate the rolling seven day average using zoo() 
region_averages <- regions %>%
  dplyr::mutate(pub_seven_day = zoo::rollmean(daily_report, k = 7, align="left", fill = NA))

# Reshaping/transposing the data so it is fit for viz on DW
publish_date <- region_averages %>%
  reshape2::dcast(date ~ name, value.var = "pub_seven_day", fun.aggregate = sum) %>%
  arrange(desc(date))

# Starting the data from the beginning of July
publish_date <- publish_date %>%
  filter(date >= "2021-07-01")

# Export results to the data file in the repo
write.csv(publish_date, file="data/regions_cases_publish_date.csv")
GWilloughby99/covidauto documentation built on Jan. 28, 2022, 8:11 a.m.