knitr::opts_chunk$set( collapse = TRUE, comment = "#>", fig.path = "man/figures/README-", fig.retina = 10, out.width = "100%" )
This package support to import, prepare, model and visualize the data about COVID19 infections from the Johns Hopkins University github repository.
You can install the released version of corona from github with:
install.packages("devtools") devtools::install_github("jnshsrs/corona")
To load the dataset, just call read_corona()
. The function will fetch the latest data from the JHU github repository.
Opposed to the JHU github data, which is structured in a wide format (each day has a column and each row represents a country), this dataset is rearranged into a long format where each country and day reprents a row.
library(dplyr, warn.conflicts = FALSE) library(corona) # Import the corona data <- read_corona() data
Additionally, the corona-package comes with reader functions for the three statistics, i.e., number of infections, number of deaths and number of recoveries, but these functions are mainly used as helper functions for the read_corona
.
# Import numbers of infection read_infections() # Import numbers of deaths read_deaths() # Import number of recoveries read_recoveries()
To work with the data, we have to preprocess the data
# Prepare data data_germany <- data %>% preprocess_corona_data(statistic = "infections", countries = "Germany", n = 100)
# Predict the cases data_germany %>% predict_growth()
# Data pipeline data %>% preprocess_corona_data(statistic = "infections", countries = "Spain", n = 100) %>% predict_growth() %>% plot_country(show_model = TRUE)
# Data pipeline data %>% preprocess_corona_data(statistic = "deaths", countries = "Italy", n = 10) %>% predict_growth() %>% plot_country(show_model = TRUE) + ggplot2::ggtitle("Corona Death Growth Curve in Italy", subtitle = "Starte date is the first day with > 10 deaths")
The function lm_corona
takes a preprocessed corona dataset (as tibble or dataframe) and returns a dataframe with the parameters of a exponential growth model.
The column base_rate and growth rate indicte the initial case numbers and the estimated growth across the entire time period.
data %>% preprocess_corona_data( countries = "Germany", statistic = "infections", n = 100 ) %>% lm_corona()
The funcition plot_country
plots the exponential growth model for the given country (note that this function can process only one country, a function to compare countries is not available so far).
# Data pipeline data %>% preprocess_corona_data(statistic = "deaths", countries = "Germany", n = 100) %>% predict_growth() %>% plot_country(show_model = TRUE) + ggplot2::ggtitle("Number of cumulative deaths in Germany", "Days since the 100th case included")
# Data pipeline data %>% preprocess_corona_data(statistic = "infections", countries = "Germany", n = 10000) %>% predict_growth() %>% plot_country(show_model = TRUE) + ggplot2::ggtitle("Number of cumulative deaths in Germany", "Days since the 10000th (1e4) case included")
data %>% group_by(country, date) %>% summarise_at(c("infections", "deaths", "recoveries"), sum)
library(ggplot2) data %>% preprocess_corona_data(statistic = "infections", countries = c("Germany", "Italy", "Spain", "US", "Vietnam"), n = 100) %>% mutate(daily_growth_rate = statistic / lag(statistic)) %>% filter(!is.na(daily_growth_rate)) %>% ggplot(aes(x = date, y = daily_growth_rate, col = country)) + geom_line(alpha = .4) + geom_smooth(method = "loess", se = FALSE, span = .55) + scale_y_continuous("Daily growth rate (smoothed)") + scale_x_date(breaks = seq(min(data$date), max(data$date), by = "2 days"), label = scales::date_format(format = "%d %b")) + scale_color_discrete("Country") + theme_minimal() + theme(axis.text.x = element_text(angle = 90), axis.title.x = element_blank()) + ggtitle("Daily growth rates since the 100th case", "Observed data is superimposed by smoothed lines")
library(ggplot2) data %>% preprocess_corona_data(statistic = "deaths", countries = c("Germany", "Italy", "Spain", "US", "Vietnam"), n = 100) %>% mutate(daily_growth_rate = statistic / lag(statistic)) %>% filter(!is.na(daily_growth_rate)) %>% ggplot(aes(x = date, y = daily_growth_rate, col = country)) + geom_line(alpha = .4) + geom_smooth(method = "loess", formula = "y ~ x", se = FALSE, span = .55) + scale_y_continuous("Daily growth rate (smoothed)") + scale_x_date(breaks = seq(min(data$date), max(data$date) + lubridate::days(3), by = "2 days"), label = scales::date_format(format = "%d %b")) + scale_color_discrete("Country") + theme_minimal() + theme(axis.text.x = element_text(angle = 90), axis.title.x = element_blank()) + ggtitle("Daily deaths growth rates since the 100th case", "Observed data is superimposed by smoothed lines")
data %>% preprocess_corona_data(statistic = "infections", countries = c("Germany", "Italy", "Spain", "US", "Vietnam"), n = 7000) %>% mutate(daily_growth_rate = statistic / lag(statistic)) %>% mutate_at("daily_growth_rate", function(x) x - 1) %>% filter(country == c("Germany")) %>% mutate(format = scales::percent(daily_growth_rate, accuracy = 1)) %>% filter(!is.na(daily_growth_rate)) %>% ggplot(aes(x = date, y = statistic)) + geom_point(aes(size = daily_growth_rate)) + geom_line(linetype = 3, size = .5) + geom_text(aes(label = format), nudge_x = 0, nudge_y = .2) + scale_y_log10("Cumulative Infections", breaks = c(1e4, 2e4, 5e4, 1e5, 2e5, 5e5), limits = c(1e4, 5e5), labels = c("10k", "20k", "50k", "100k", "200k", "500k"), minor_breaks = NULL) + facet_wrap(~ country, ncol = 1) + theme_minimal() + scale_size("Daily Growth Rate", labels = scales::percent) + scale_x_date(breaks = seq(min(data$date), max(data$date) + lubridate::days(3), by = "2 days"), label = scales::date_format(format = "%d %b")) + theme(legend.position = "bottom", axis.title.x = element_blank(), axis.text.x = element_text(angle = 90)) + ggtitle("Cumulative Infection Count", "Daily Growth Rate for each Day in percent")
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.