# Turn off science
options(scipen = 999)
library(httr)
library(jsonlite)
library(dplyr)
library(zoo)
library(remotes)
library(tidyverse)
library(reshape2)
library(lubridate)
#The below code will use query the Covid-19 dashboard API. By adjusting the filters and structure, you can extract all the data.
#The dashboard has a 'Metrics Documentation' page and you can see the inputs you need.
#API query for cases
#' Extracts paginated data by requesting all of the pages
#' and combining the results.
#'
#' @param filters API filters. See the API documentations for
#' additional information.
#'
#' @param structure Structure parameter. See the API documentations
#' for additional information.
#'
#' @return list Comprehensive list of dictionaries containing all
#' the data for the given ``filter`` and ``structure`.`
get_paginated_data <- function (filters, structure) {
endpoint <- "https://api.coronavirus.data.gov.uk/v1/data"
results <- list()
current_page <- 1
repeat {
httr::GET(
url = endpoint,
query = list(
filters = paste(filters, collapse = ";"),
structure = jsonlite::toJSON(structure, auto_unbox = TRUE),
page = current_page
),
timeout(10)
) -> response
# Handle errors:
if ( response$status_code >= 400 ) {
err_msg = httr::http_status(response)
stop(err_msg)
} else if ( response$status_code == 204 ) {
break
}
# Convert response from binary to JSON:
json_text <- content(response, "text")
dt <- jsonlite::fromJSON(json_text)
results <- rbind(results, dt$data)
if ( is.null( dt$pagination$`next` ) ){
break
}
current_page <- current_page + 1;
}
return(results)
}
# Create filters:
query_filters <- c(
"areaType=region"
)
# Create the structure as a list or a list of lists:
query_structure <- list(
date = "date",
name = "areaName",
daily_report = "newCasesByPublishDate"
)
regions <- get_paginated_data(query_filters, query_structure)
list(
"Shape" = dim(regions),
"Data (first 3 items)" = regions[0:3, 0:-1]
) -> report
print(report)
# Calculate the rolling seven day average using zoo()
region_averages <- regions %>%
dplyr::mutate(pub_seven_day = zoo::rollmean(daily_report, k = 7, align="left", fill = NA))
# Reshaping/transposing the data so it is fit for viz on DW
publish_date <- region_averages %>%
reshape2::dcast(date ~ name, value.var = "pub_seven_day", fun.aggregate = sum) %>%
arrange(desc(date))
# Starting the data from the beginning of July
publish_date <- publish_date %>%
filter(date >= "2021-07-01")
# Export results to the data file in the repo
write.csv(publish_date, file="data/regions_cases_publish_date.csv")
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.