rmarkdown::render('_README.Rmd', output_file = 'README.md')
rmarkdown::clean_site() rmarkdown::render_site(quiet = TRUE)
knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) devtools::load_all() library(pdftools) library(rvest) library(dplyr) library(tibble) library(DT) library(anytime) library(glue) library(reshape2) library(zoo) library(xml2) library(ggplot2) library(ggrepel) library(plotly) Sys.setlocale('LC_TIME', 'en_GB.UTF-8')
dat <- download.updated.pt() dgs.pt.new <- dat$dgs.pt dgs.pt <- tibble() tryCatch(dgs.pt <- covid19.pt.data::dgs.pt, error = function(err) { }) # DGS PT if (digest::digest(dgs.pt.new, algo = "sha256") != digest::digest(dgs.pt, algo = "sha256")) { send.discord.msg(dgs.pt.new, dgs.pt) dgs.pt <- dgs.pt.new usethis::use_data(dgs.pt, overwrite = TRUE) readr::write_csv(dgs.pt, file = '../data/dgs_pt.csv') }
R package with latest data scrapped from official sources (last data from
r format(max(dgs.pt.new$date), "%A, %B %d, %Y")
)
It downloads the daily report from DGS and stores this in data-friendly format under /data
directory. It also retrieves age data from the DGS' ESRI dashboard
If you are here just for the data, this is what you want:
note: The EU CDC data has been removed as the daily reports from EU have been discontinued in favor of weekly counts.
A mortality analysis of Portugal is available here
download.updated.pt()
dgs.pt <- dgs.pt.new
age.data <- get.age.data(dgs.pt)
Last 5 days (how each column is changing over these days)
dgs.pt %>% arrange(desc(date)) %>% top_n(6, date) %>% dplyr::relocate(second_vaccine, .after = in.icu) %>% dplyr::relocate(first_vaccine, .after = in.icu) %>% dplyr::mutate_if(is.integer, function(my.input) {return(zoo::rollapply(data = my.input, width = 2, FUN = function(a) {return(a[1] - mean(a[-1]))}, align = 'left', partial = TRUE))}) %>% dplyr::filter(!is.nan(confirmed)) %>% dplyr::select(-tests, -country) %>% dplyr::rename_with(~gsub("_", " ", .x, fixed = TRUE)) %>% knitr::kable()
dgs.pt %>% reshape2::melt( id.vars = c('country', 'date'), variable.name = 'type', measure.vars = c( 'deaths', 'confirmed', 'hospitalized', 'in.icu', 'recovered', 'first_vaccine', 'second_vaccine' ) ) %>% mutate(type = gsub('confirmed', 'Confirmed cases', type) %>% gsub('first_vaccine', 'First vaccine', .) %>% gsub('second_vaccine', 'Second vaccine', .) %>% gsub('deaths', 'Deaths', .) %>% gsub('hospitalized', 'Hospitalized', .) %>% gsub('recovered', 'Recovered', .) %>% gsub('tests', 'Suspected individuals', .) %>% gsub('in.icu', 'In ICU', .)) %>% group_by(country, type) %>% arrange(date) %>% mutate( label = if_else( date == last(date), paste0(type, ': ', format(value, digits = 2, big.mark =',', trim = FALSE)), NA_character_ ) ) %>% ggplot(aes(x = date, y = value, color = type, label = label, fill = type)) + geom_line(size = 1, alpha = .6) + geom_point(size = 1) + ggrepel::geom_label_repel(color = 'white', segment.color = 'red', na.rm = TRUE, min.segment.length = 0, #nudge_x = -2, #nudge_y = -.5, segment.alpha = .8, max.time = 5, alpha = .85, max.iter = 100000, force = 40, force_pull = 0, max.overlaps = 10, size = 3, seed = 1985) + scale_x_date(date_labels = '%B %d (%a)', date_breaks = "10 day", date_minor_breaks = "5 day") + labs(x = 'Date', y = 'Number of individuals', title = 'Individuals that were Infected, Recovered and Died', subtitle = 'Showing in Logarithmic scale (base 10)', caption = "{format(max(dgs.pt.new$date), '%A, %B %d, %Y')}\n Label points to the maximum point in time" %>% glue::glue()) + scale_y_continuous(trans = 'log10', labels = function(ix) { format(ix, big.mark = ',', scientific = FALSE) }) + scale_color_viridis_d(end = .7) + scale_fill_viridis_d(end = .7) + theme_minimal() + theme(legend.position = 'none', axis.text.x = element_text(angle = 45, hjust = 1))
plot.data <- dgs.pt %>% reshape2::melt( id.vars = c('country', 'date'), variable.name = 'type', measure.vars = c('deaths', 'confirmed', 'hospitalized', 'in.icu', 'recovered', 'first_vaccine', 'second_vaccine') ) %>% mutate(type = gsub('confirmed', 'Confirmed cases', type) %>% gsub('first_vaccine', 'First vaccine', .) %>% gsub('second_vaccine', 'Second vaccine', .) %>% gsub('deaths', 'Deaths', .) %>% gsub('hospitalized', 'Hospitalized', .) %>% gsub('recovered', 'Recovered', .) %>% gsub('tests', 'Suspected individuals', .) %>% gsub('in.icu', 'In ICU', .)) %>% group_by(country, type) %>% arrange(desc(date)) %>% mutate(value = zoo::rollapply(value, 2, function(ix) { if(length(ix) <= 1) { return(ix) } else { ix[1] - sum(ix[-1]) } }, fill = c(0, 0, 0), align = 'left', partial = TRUE)) %>% group_by(country, type) %>% arrange(date) %>% filter(date > min(date)) %>% ## values will not be for that day build.labels('value', 'type', show_plus = TRUE) roundUp <- function(x) 10^ceiling(log10(x)) my.breaks <- c(-100, -40, -20, -10, -5, -2, 0, 2, 5, 10, 30, 100, 300, 1000, 3000, 10000, 20000) my.breaks <- c(my.breaks, max(plot.data$value, na.rm = TRUE) %>% roundUp(), -1 * (abs(min(plot.data$value, na.rm = TRUE)) %>% roundUp())) %>% unique %>% sort plot.data %>% ggplot(aes(x = date, y = value, color = type, label = label, fill = type)) + geom_hline(aes(yintercept = 0), size = .7, alpha = .3, linetype = 3) + geom_line(size = 1, alpha = .6) + geom_point(size = 1) + ggrepel::geom_label_repel(color = 'white', segment.color = 'red', na.rm = TRUE, min.segment.length = 0, #nudge_x = -2, #nudge_y = -.5, segment.alpha = .8, max.time = 5, alpha = .85, max.iter = 100000, force = 40, force_pull = 0, size = 3, seed = 1985) + labs(x = 'Date', y = 'Number of individuals', title = 'Individuals that were Infected, Recovered and Died', subtitle = 'Showing in a (pseudo) logarithmic scale', caption = "data from {format(max(dgs.pt.new$date), '%A, %B %d, %Y')}\n Label points to the maximum point in time" %>% glue::glue()) + scale_x_date(date_labels = '%B %d (%a)', date_breaks = "10 day", date_minor_breaks = "5 day") + scale_color_viridis_d(end = .7) + scale_fill_viridis_d(end = .7) + scale_y_continuous(trans = scales::pseudo_log_trans(sigma = 1, base = 10), labels = function(ix) { format(ix, big.mark = ',', scientific = FALSE) }, breaks = my.breaks) + theme_minimal() + theme(legend.position = 'none', axis.text.x = element_text(angle = 45, hjust = 1, size = 5))
plot.data <- dgs.pt %>% reshape2::melt( id.vars = c('country', 'date'), variable.name = 'type', measure.vars = c('deaths', 'confirmed', 'hospitalized', 'in.icu', 'recovered', 'first_vaccine', 'second_vaccine') ) %>% mutate(type = gsub('confirmed', 'Confirmed cases', type) %>% gsub('first_vaccine', 'First vaccine', .) %>% gsub('second_vaccine', 'Second vaccine', .) %>% gsub('deaths', 'Deaths', .) %>% gsub('hospitalized', 'Hospitalized', .) %>% gsub('recovered', 'Recovered', .) %>% gsub('tests', 'Suspected individuals', .) %>% gsub('in.icu', 'In ICU', .)) %>% group_by(country, type) %>% filter(!is.na(value)) %>% arrange(desc(date)) %>% mutate(value = zoo::rollapply(value, 2, function(ix) { if(length(ix) <= 1) { return(ix) } else { ix[1] - sum(ix[-1]) } }, fill = c(0, 0, 0), align = 'left', partial = TRUE)) %>% # Remove first day of each type as the difference for the previous day is not real group_by(country, type) %>% arrange(date) %>% filter(row_number() > 1L) %>% arrange(desc(date)) %>% # Calculate rolling average mutate(value = zoo::rollapply(value, 7, mean, align = 'left', fill = c(0,0,0), partial = TRUE)) %>% group_by(country, type) %>% arrange(date) %>% filter(date > min(date)) %>% ## values will not be for that day build.labels('value', 'type', digits = 0, show_plus = TRUE) roundUp <- function(x) 10^ceiling(log10(x)) my.breaks <- c(-100, -40, -20, -10, -5, -2, 0, 2, 5, 10, 30, 100, 300, 1000, 3000, 10000, 20000) my.breaks <- c(my.breaks, max(plot.data$value, na.rm = TRUE) %>% roundUp(), -1 * (abs(min(plot.data$value, na.rm = TRUE)) %>% roundUp())) %>% unique %>% sort plot.data %>% ggplot(aes(x = date, y = value, color = type, label = label, fill = type)) + geom_hline(aes(yintercept = 0), size = .7, alpha = .3, linetype = 3) + geom_line(size = 1, alpha = .6) + geom_point(size = 1) + ggrepel::geom_label_repel(color = 'white', segment.color = 'red', na.rm = TRUE, min.segment.length = 0, #nudge_x = -2, #nudge_y = -.5, segment.alpha = .8, size = 3, alpha = .85, max.time = 5, max.iter = 1000000, max.overlaps = 10, force = 40, force_pull = 0, seed = 1985) + labs(x = 'Date', y = 'Number of individuals', title = '7-day average of Individuals that were Infected, Recovered and Died', subtitle = 'Showing in a (pseudo) logarithmic scale', caption = "{format(max(dgs.pt.new$date), '%A, %B %d, %Y')}\n Label points to the maximum point in time" %>% glue::glue()) + scale_x_date(date_labels = '%B %d (%a)', date_breaks = "10 day", date_minor_breaks = "5 day") + scale_color_viridis_d(end = .7) + scale_fill_viridis_d(end = .7) + scale_y_continuous(trans = scales::pseudo_log_trans(sigma = 1, base = 10), labels = function(ix) { format(ix, big.mark = ',', scientific = FALSE) }, breaks = my.breaks) + theme_minimal() + theme(legend.position = 'none', axis.text.x = element_text(angle = 45, hjust = 1, size = 5))
age.ix.f <- dgs.pt %>% filter(!is.na(`confirmed_m_00-09`)) %>% pull(date) %>% max age.data.all <- get.age.data.with.labels(age.data, age.ix.f) age.data.all.new <- get.age.new.data(age.data, age.data.all, age.ix.f)
if(age.ix.f != dgs.pt %>% pull(date) %>% max) { cat('*note: data update may be delayed by a couple of hours (since age data was been removed from daily report and we\'ve been retrieving it from DGS\'s covid19 dashboard, which may have a delay)*') }
Age data may show different numbers from absolutes in the daily report. This is out of our control and dependent on the data source (DGS covid19 dashboard).
if(!is.null(age.data.all.new)) { my.plots.new <- get.plot.for.new(age.data.all.new, age.ix.f) print(my.plots.new$confirmed + theme(plot.title = element_text(size=10), plot.subtitle = element_text(size=8))) print(my.plots.new$deaths + theme(plot.title = element_text(size=10), plot.subtitle = element_text(size=8))) } else { cat('Missing age grouping information for this day:', format(age.ix.f)) }
Age data may show different numbers from absolutes in the daily report. This is out of our control and dependent on the data source (DGS covid19 dashboard).
if(!is.null(age.data.all.new)) { my.plots.all <- get.plot.for.all(age.data.all, age.ix.f, dgs.pt = dgs.pt) print(my.plots.all$confirmed + theme(plot.title = element_text(size=9), plot.subtitle = element_text(size=8))) print(my.plots.all$deaths + theme(plot.title = element_text(size=9), plot.subtitle = element_text(size=8))) } else { cat('Missing age grouping information for this day:', format(age.ix.f)) }
Only showing last 20 days
dgs.pt %>% arrange(desc(date)) %>% top_n(20, date) %>% select(-country, -tests) %>% dplyr::rename_with(~gsub("_", " ", .x, fixed = TRUE)) %>% knitr::kable()
Based on the daily new confirmed cases and current 'mortality rate' per age group (see plots above)
Only shows last 20 days
age.data.all %>% dplyr::mutate(rate = if_else(death == 0 | is.nan(death) | is.nan(confirmed), 0, abs(death) / abs(confirmed))) %>% dplyr::select(type, gender, rate) %>% dplyr::right_join(age.data, by = c('type', 'gender')) %>% dplyr::filter(age_type == 'confirmed') %>% dplyr::select(-age_type) %>% dplyr::group_by(country, type, gender) %>% dplyr::arrange(desc(date)) %>% dplyr::mutate(value = zoo::rollapply(value, 2, function(ix) { if(length(ix) <= 1) { return(ix) } else { ix[1] - sum(ix[-1]) } }, fill = c(0, 0, 0), align = 'left', partial = TRUE)) %>% dplyr::filter(value != 0) %>% dplyr::ungroup() %>% dplyr::mutate(value = abs(value), predicted = value * rate) %>% dplyr::select(date, type, gender, predicted) %>% # top_n(10, predicted) %>% reshape2::dcast(date ~ type + gender, value.var = 'predicted') %>% tibble::tibble() %>% dplyr::filter(date != '2020-11-20') %>% # weird number dplyr::filter(date != '2020-11-22') %>% # weird number dplyr::mutate(across(!starts_with('date'), ~tidyr::replace_na(.x, 0))) %>% dplyr::mutate(across(!starts_with('date'), ~round(.x, 3))) %>% dplyr::rowwise() %>% dplyr::mutate(predicted_future_deaths = sum(across(!starts_with('date')))) %>% dplyr::relocate(predicted_future_deaths, .after = date) %>% dplyr::left_join(dgs.pt %>% arrange(desc(date)) %>% dplyr::mutate_if(is.integer, function(my.input) {return(zoo::rollapply(data = my.input, width = 2, FUN = function(a) {return(a[1] - mean(a[-1]))}, align = 'left', partial = TRUE))}) %>% dplyr::filter(!is.nan(confirmed)) %>% select(date, confirmed), by = "date") %>% dplyr::mutate(confirmed = format(confirmed, big.mark = ' ')) %>% dplyr::relocate(confirmed, .after = date) %>% dplyr::ungroup() %>% dplyr::top_n(20, date) %>% dplyr::arrange(desc(date)) %>% dplyr::rename_with(~gsub("_", " ", .x, fixed = TRUE)) %>% knitr::kable()
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.