# ## Read first: Implicit non-exclusivity agreement # # - I (Joe) am working right now only under conditions of non-exclusivity. # - This means that if a collaborator (you) is working with me, you understand that I may publish, share, or disseminate any COVID-19 info (other than that which is related to private, personal data) to whomever I deem suitable # - The condition goes both ways: the collaborator (you) also can share publicly anything I do with you, or with whomever you deem suitable # - The reason for this non-exclusivity pact is principle: the COVID-19 crisis requires extremely fast action, and any delays in publication or dissemination for the purposes of getting an "exclusive" publication, authorship, ownership, recognition, etc. is unacceptable # - This non-exclusivity principile applies to everything we work on together, unless explicitly stated otherwise. # - If you don't like non-exclusivity (that is, you want me to "keep secret" any ideas we generate or work on together), that's fine. But find somebody else to work with. # - Just to be clear: If you are working with me, this means all of our code / data is public and can be shared by any of the collaborators (you and me) with anybody else, at any time.
This document seeks to answer the question: how soon until Spain runs out of ICU beds?
Click "Code" to see code snippets.
How soon until Spain runs out of UCI beds?
We define some parameters at the state level, generate a simple log-linear predictive model for daily UCI admissions weighted by days ago, and compare the capacity "ceiling" with prediction.
Here are the parameters:
beds_public = 3508, beds_private = 896, normal_occupancy = 25, need_hospitalization = 15, # percent which requires hospitalization need_uci = 5, # percent which requires uci beds = 4404, average_days_in_uci = 10 # total rough estimate
Here is pseudo-code of model definition:
model = log(daily UCI admission) ~ days, weights = 1 / (1 + today - date)
# Basic knitr options library(knitr) opts_chunk$set(comment = NA, # echo = FALSE, warning = FALSE, message = FALSE, error = TRUE, cache = FALSE, tidy = TRUE, # tidy.opts=list(width.cutoff=60), # tidy=TRUE, # fig.width = 8.64, # fig.height = 4.86, fig.path = 'figures/')
## Load libraries library(covid19) #devtools::install_github('databrew/covid19') library(ggplot2) library(lubridate) library(dplyr) library(ggplot2) library(sp) library(raster) library(viridis) library(ggthemes) library(sf) library(rnaturalearth) library(rnaturalearthdata)
# Define parameters # Basing off: https://www.niusdiario.es/multimedia/nius-te-explica/colpaso-sistema-sanitario-espana-uci-unidad-cuidados-intensivos-hospitales-coronavirus_18_2914020237.html p <- list( beds_public = 3508, beds_private = 896, normal_occupancy = 25, need_hospitalization = 15, # percent which requires hospitalization need_uci = 5, # percent which requires uci beds = 4404, average_days_in_uci = 10 # total rough estimate ) options(scipen = '999') make_prediction <- function(data, n_start = 5, cumulative = FALSE, time_ahead = 7, var = 'uci'){ sub_data <- data # Get the var the_var <- paste0(var, ifelse(cumulative, '', '_non_cum')) sub_data$var <- as.numeric(unlist(sub_data[,the_var])) # narrow down sub_data <- sub_data %>% dplyr::select(date, var) pd <- sub_data %>% filter(!is.na(var)) %>% mutate(start_date = min(date[var >= n_start])) %>% mutate(days_since = date - start_date) %>% filter(days_since >= 0) %>% mutate(days_since = as.numeric(days_since)) %>% mutate(the_weight = 1/(1 + (as.numeric(max(date) - date)))) fit <- lm(log(var) ~ days_since, weights = the_weight, data = pd) # Predict days ahead day0 <- pd$date[pd$days_since == 0] fake <- tibble(days_since = seq(0, max(pd$days_since) + time_ahead, by = 1)) fake <- fake %>%mutate(date = seq(day0, day0+max(fake$days_since), by = 1)) fake <- left_join(fake, pd %>% dplyr::select(days_since, var, date)) fake$predicted <- exp(predict(fit, newdata = fake)) # fake$predictedlo <- predict(fitlo, newdata = fake) ci <- exp(predict(fit, newdata = fake, interval = 'prediction')) # cilo <- predict(fitlo, newdata = fake, interval = 'prediction') fake$lwr <- ci[,'lwr'] fake$upr <- ci[,'upr'] # fake$lwrlo <- ci[,'lwr'] # fake$uprlo <- ci[,'upr'] # Doubling time dt <- log(2)/fit$coef[2] fake %>% mutate(doubling_time = dt) } plot_prediction <- function(data, ylog = F, ci = FALSE){ long <- data %>% tidyr::gather(key, value, var:predicted) %>% mutate(key = ifelse(key == 'var', 'Observed', key)) %>% mutate(key = Hmisc::capitalize(key)) g <- ggplot() if(ci){ g <- g + geom_ribbon(data = data %>% filter(date > max(long$date[!is.na(long$value) & long$key == 'Observed'])), aes(x = date, ymax = upr, ymin = lwr), alpha =0.6, fill = 'darkorange') } g <- g + geom_line(data = long, aes(x = date, y = value, group = key, lty = key)) + geom_bar(data = long %>% filter(key == 'Observed'), stat = 'identity', alpha = 0.6, aes(x = date, y = value)) + theme_simple() + theme(legend.position = 'right', legend.title = element_blank()) if(ylog){ g <- g + scale_y_log10() } return(g) } spain_data <- esp_df %>% group_by(date) %>% summarise_at(.vars = vars(uci, deaths, confirmed_cases, uci_non_cum, deaths_non_cum, confirmed_cases_non_cum), .fun = function(x){sum(x, na.rm = TRUE)}) pd <- make_prediction(data = spain_data, n_start = 20, cumulative = FALSE, time_ahead = 10, var = 'uci') # Get the number of daily admissions to "spillover" for the number of days they need to be in UCI # this function not estimating confidence bounds at this point spill_over <- function(data, days = p$average_days_in_uci){ out <- data out$predicted_spilled_over <- NA out_list <- list() for(i in 1:nrow(out)){ # Get the sub data for up to days days before sub_data <- out %>% filter(date >= out$date[i] -( days-1), date <= out$date[i]) # Get the sum of ingresado people during that window sum_ingresado <- sum(sub_data$var, na.rm = T) # Get the predicted sum too sum_predicted <- mean(sub_data$predicted, na.rm = TRUE) * nrow(sub_data) # Manually replace with observed out_predicted <- ifelse(!is.na(sub_data$var[i]), sum_ingresado, sum_predicted) message(i, ": ", round(out_predicted)) # pop back into dataframe # out_list[[i]] <- out_predicted out$predicted_spilled_over[i] <- out_predicted } # out <- unlist(out_list) return(out) } # preds <- spill_over(pd) pd <- pd %>% spill_over(days = p$average_days_in_uci)
already_occupied <- (p$normal_occupancy/100) * p$beds total_beds <- p$beds # Shape data for plotting plot_data <- pd %>% dplyr::select(date, var, predicted, predicted_spilled_over) %>% mutate(already_occupied = already_occupied) %>% tidyr::gather(key, value, var:already_occupied) %>% mutate(key = ifelse(key == 'already_occupied', 'UCI beds occupied (normal)', ifelse(key == 'predicted_spilled_over', 'Predicted COVID-19 UCI beds', key))) basic_data <- plot_data %>% filter(key %in% c('UCI beds occupied (normal)', 'Predicted COVID-19 UCI beds')) # basic_data$key <- factor(basic_data$key, # levels = rev(c('UCI beds occupied (normal)', # 'Predicted COVID-19 UCI beds'))) ggplot() + geom_bar(data = basic_data, aes(x = date, y = value, fill = key), stat = 'identity', position = position_stack(), alpha = 0.7, width = 1, color = 'white', lwd = 0.1) + geom_text(data = basic_data %>% filter(key == 'Predicted COVID-19 UCI beds'), aes(x = date, y = value + already_occupied * 1.05, label = round(value, digits = 0)), # stat = 'identity', position = position_stack(), alpha = 0.7) + # scale_y_log10() + geom_hline(yintercept = total_beds, lty = 2) + geom_text(data = tibble(x = min(plot_data$date) +2, y = total_beds + 300, label = 'Total UCI beds'), aes(x = x, y = y, label = label)) + geom_text(data = tibble(x = min(plot_data$date) +2, y = already_occupied - 500, label = paste0('Regular UCI occupancy:\n', round(already_occupied), ' (', p$normal_occupancy, '%)')), aes(x = x, y = y, label = label)) + theme_simple() + theme(legend.position = 'top') + scale_fill_manual(name = '', values = c('red', 'black')) + labs(x = 'Date', y = 'UCI beds', title = 'UCI bed capacity, Spain', subtitle = paste0('Based on simple log-linear growth model in daily UCI admissions.\nAssumes an average of ', p$average_days_in_uci, ' days in ICU.'), caption = paste0('Code at https://github.com/databrew/covid19/blob/master/misc/uci/uci.Rmd\nAssumes: ', total_beds, ' total beds, and a ', p$normal_occupancy, ' non-COVID19 occupancy rate.')) + scale_x_date(breaks = sort(unique(plot_data$date)), labels = format(sort(unique(plot_data$date)), '%d\n%b')) ggsave('~/Desktop/uci25.png', height = 7, width = 10)
Table of results:
Note: Predicted including stay-over time
means that amount of people in the UCI taking into account the fact that they stay for p$average_days_in_uci
days.
pd %>% dplyr::select(date, `Observed daily admissions` = var, `Predicted daily admissions based on model` = predicted, `Predicted including stay-over time` = predicted_spilled_over) %>% kable
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.