knitr::opts_chunk$set( collapse = TRUE, comment = "#>" )
library(diseasystore)
if (rlang::is_installed("withr")) { withr::local_options("tibble.print_min" = 5) withr::local_options("diseasystore.verbose" = FALSE) } else { opts <- options("tibble.print_min" = 5, "diseasystore.verbose" = FALSE) } # We have a "hard" dependency for duckdb to render parts of this vignette suggests_available <- rlang::is_installed("duckdb") not_on_cran <- interactive() || identical(Sys.getenv("NOT_CRAN"), "true") || identical(Sys.getenv("CI"), "true")
The Google COVID-19 data repository is a comprehensive open repository of COVID-19 data.
This vignette shows how to use (some of) this data through the diseasystore
package.
First, it is a good idea to copy the relevant Google COVID-19 data files locally and store that location as an option
for the package.
?DiseasystoreGoogleCovid19
uses only the age-stratified metrics for COVID-19, so only a subset of the repository is
needed to download.
# First we set the path we want to use as an option options( "diseasystore.DiseasystoreGoogleCovid19.source_conn" = file.path("local", "path") ) # Ensure folder exists source_conn <- diseasyoption("source_conn", "DiseasystoreGoogleCovid19") if (!dir.exists(source_conn)) { dir.create(source_conn, recursive = TRUE, showWarnings = FALSE) } # Define the Google files to download google_files <- c("by-age.csv", "demographics.csv", "index.csv", "weather.csv") # Download each file and compress them to reduce storage purrr::walk(google_files, ~ { url <- paste0(diseasyoption("remote_conn", "DiseasystoreGoogleCovid19"), .) destfile <- file.path( diseasyoption("source_conn", "DiseasystoreGoogleCovid19"), . ) if (!file.exists(destfile)) { download.file(url, destfile) } })
# The files we need are stored remotely in Google's API google_files <- c("by-age.csv", "demographics.csv", "index.csv", "weather.csv") remote_conn <- diseasyoption("remote_conn", "DiseasystoreGoogleCovid19") # In practice, it is best to make a local copy of the data which is # stored in the "vignette_data" folder. # This folder can either be in the package folder # (preferred, please create the folder) or in the tempdir(). local_conn <- purrr::detect( "vignette_data", checkmate::test_directory_exists, .default = tempdir() ) if (rlang::is_installed("withr")) { withr::local_options( "diseasystore.DiseasystoreGoogleCovid19.source_conn" = local_conn, "diseasystore.DiseasystoreGoogleCovid19.n_max" = 1000 ) } else { opts <- c( opts, options( "diseasystore.DiseasystoreGoogleCovid19.source_conn" = local_conn, "diseasystore.DiseasystoreGoogleCovid19.n_max" = 1000 ) ) } # Then we download the first n rows of each data set of interest try({ purrr::discard( google_files, ~ checkmate::test_file_exists(file.path(local_conn, .)) ) |> purrr::walk(\(file) { paste0(remote_conn, file) |> readr::read_csv(n_max = 1000, show_col_types = FALSE, progress = FALSE) |> readr::write_csv(file.path(local_conn, file)) }) }) # Check that the files are available after attempting to download files_missing <- purrr::some( google_files, ~ !checkmate::test_file_exists(file.path(local_conn, .)) ) if (files_missing) { data_available <- FALSE } else { data_available <- TRUE }
The diseasystores
require a database to store its features in.
These should be configured before use and can be stored in the packages options.
# We define target_conn as a function that opens a DBIconnection to the DB target_conn <- \() DBI::dbConnect(duckdb::duckdb()) options( "diseasystore.DiseasystoreGoogleCovid19.target_conn" = target_conn )
target_conn <- \() DBI::dbConnect(duckdb::duckdb()) if (rlang::is_installed("withr")) { withr::local_options( "diseasystore.DiseasystoreGoogleCovid19.target_conn" = target_conn ) } else { opts <- c( opts, options("diseasystore.DiseasystoreGoogleCovid19.target_conn" = target_conn) ) }
Once the files are downloaded and the target DB is configured, we can initialize the diseasystore
that uses the
Google COVID-19 data.
ds <- DiseasystoreGoogleCovid19$new()
Once configured such, we can use the feature store directly to get data.
# We can see all the available features in the feature store ds$available_features
# And then retrieve a feature from the feature store ds$get_feature(feature = "n_hospital", start_date = as.Date("2020-01-01"), end_date = as.Date("2020-06-01"))
if (exists("ds")) rm(ds) gc() if (!rlang::is_installed("withr")) { options(opts) }
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.