# Basic knitr options library(knitr) opts_chunk$set(comment = NA, # echo = FALSE, warning = FALSE, message = FALSE, error = TRUE, cache = FALSE, fig.width = 9.64, fig.height = 5.9, fig.path = 'figures/')
## Load libraries library(covid19) library(ggplot2) library(lubridate) library(dplyr) library(ggplot2) library(sp) library(raster) library(viridis) library(ggthemes) library(sf) library(rnaturalearth) library(rnaturalearthdata) library(RColorBrewer) library(readr) library(zoo) library(tidyr) options(scipen = '999')
# Matrix for Aleix Prat # All comarques pd <- muni %>% ungroup %>% group_by(date, ComarcaDescripcio) %>% summarise(new_cases = sum(confirmed_cases_non_cum), pop = sum(pop)) %>% ungroup %>% arrange(date) %>% group_by(ComarcaDescripcio) %>% mutate(cases_roll = zoo::rollmeanr(new_cases, k = 7, fill = NA)) %>% mutate(yp = new_cases / pop * 100000, ypr = cases_roll / pop * 100000) %>% mutate(ypl = sqrt(yp), yprl = sqrt(ypl)) # Make wide matrix pdw <- pd %>% dplyr::select(date, ComarcaDescripcio, yp) %>% tidyr::spread(key = date, value = yp) write_csv(pdw, 'aleix_prat.csv') # Create weekly (instead of pd) weekly <- pd %>% mutate(week = lubridate::week(date)) %>% group_by(week, ComarcaDescripcio) %>% mutate(week_start = min(date)) %>% mutate(n_days = length(unique(date))) ggplot(data = pd %>% filter(date >= '2020-03-11', date <= '2020-08-20'), aes(x = date, y = ComarcaDescripcio, fill = ypl)) + geom_tile( # color = 'black',size = 0.2 ) + databrew::theme_simple() + scale_fill_gradientn(name = '', colors = colorRampPalette(c('beige', # 'grey', # 'yellow', # 'white', # 'orange', 'darkorange', 'red', 'darkred', 'black'))(20), breaks = labeler, labels = round(labeler^2)) + labs(x = '', y = '', title = 'Casos confirmats nous (per 100.000 habitants)', caption = 'Aix-x: dia; Aix-y: comarca; Color: casos nous.\nDades de https://analisi.transparenciacatalunya.cat/. Gràfic: @joethebrew') + theme(axis.text.y = element_text(size = 10)) ggsave('~/Desktop/daily.png', width = 10, height = 6)
covid19::plot_day_zero(ylog = F, day0 = 1, by_district = TRUE, calendar = TRUE, pop = TRUE, roll = 7, alpha = 0.7, point_size = 0.5, point_alpha =0.5, countries = c('Spain', 'Italy', 'US'), districts = c('Cataluña', 'Aragón', 'Madrid', 'Lombardia', 'Florida', 'New York', 'Emilia-Romagna'), cumulative = FALSE) + labs(x = 'Date', y = 'Cases per million', title = 'Confirmed Covid-19 cases per 1,000,000 population\nRolling average of previous 7 days', subtitle = 'Data from Italy / USA as of Aug 21 2020; Data from Spain as of Aug 16 2020') + scale_color_colorblind(name = '', guide = guide_legend(reverse = TRUE)) ggsave('~/Desktop/plot.png')
# US vs Spain pd <- owid %>% filter(location %in% c('United States', 'Spain', 'Italy')) %>% mutate(location = ifelse(location == 'United States', 'USA', location)) ggplot(data = pd, aes(x = date, y = new_cases_smoothed_per_million, color = location, shape = location)) + geom_line(size = 1) + geom_point(data = pd %>% dplyr::filter(weekdays(date) == 'Friday') , size = 3) + databrew::theme_simple() + labs(x = 'Date', y = 'Cases per million (smoothed)', title = 'Daily cases per million population', subtitle = '7-day rolling average', caption = 'Chart: @joethebrew. Data source: Our World in Data.\nhttps://github.com/owid/covid-19-data/tree/master/public/data') + theme(legend.position = 'right', legend.title = element_blank()) + scale_color_colorblind(name = '', guide = guide_legend(reverse = TRUE)) # scale_color_manual(name = '', # values = c('black', 'red', 'blue'), # guide = guide_legend(reverse = TRUE)) ggsave('~/Desktop/comparsion.png', height = 4, width = 7.5)
# ECDC pd <- ecdc %>% mutate(date = as.Date(dateRep, format = '%d/%m/%Y')) %>% mutate(cum14 = Cumulative_number_for_14_days_of_COVID.19_cases_per_100000) %>% mutate(country = countriesAndTerritories) %>% arrange(date) %>% filter(popData2019 >= 100000, continentExp == 'Europe') %>% group_by(country) %>% mutate(deaths14 = zoo::rollsumr(deaths, 14, fill = NA)) %>% ungroup %>% mutate(deathsp = deaths14 / popData2019 * 100000) %>% filter(date == '2020-08-19') %>% arrange(desc(cum14)) pd[1:5,] pd <- ecdc %>% mutate(date = as.Date(dateRep, format = '%d/%m/%Y')) %>% mutate(cum14 = Cumulative_number_for_14_days_of_COVID.19_cases_per_100000) %>% mutate(country = countriesAndTerritories) %>% arrange(date) %>% filter(popData2019 >= 1000000) %>% filter(continentExp == 'Europe') %>% group_by(country) %>% mutate(deaths14 = zoo::rollsumr(deaths, 14, fill = NA)) %>% ungroup %>% mutate(deathsp = deaths14 / popData2019 * 100000) %>% arrange(desc(cum14)) %>% filter(country %in% c('Spain', 'Italy')) # mutate(spain = ifelse(country == 'Spain', 'Spain', # 'Others')) ggplot(data = pd, aes(x = date, y = cum14, color = country, group = country)) + geom_line(alpha = 0.7) + # geom_line(data = pd %>% filter(country == 'Spain'), # aes(x = date, # y = cum14, # color = spain, # group = country), # size = 2) + scale_color_manual(name = '', values = c('black', 'red')) + # scale_y_log10() + databrew::theme_simple() + labs(x = 'Fecha', y = 'Casos por 100.000, últimos 14 días', title = 'Incidencia Covid-19, Italia y España') ggsave('~/Desktop/esp_vs_ita.png') ggplot(data = pd, aes(x = date, y = deathsp, color = country, group = country)) + geom_line(alpha = 0.7) + # geom_line(data = pd %>% filter(country == 'Spain'), # aes(x = date, # y = cum14, # color = spain, # group = country), # size = 2) + scale_color_manual(name = '', values = c('black', 'red')) + # scale_y_log10() + databrew::theme_simple() + labs(x = 'Fecha', y = 'Muertos por 100.000, últimos 14 días', title = 'Muertes Covid-19, Italia y España') ggsave('~/Desktop/deaths_esp_vs_ita.png') ggplot(data = pd, aes(x = date, y = deaths, color = country, group = country)) + geom_line(alpha = 0.7) + # geom_line(data = pd %>% filter(country == 'Spain'), # aes(x = date, # y = cum14, # color = spain, # group = country), # size = 2) + scale_color_manual(name = '', values = c('black', 'red')) + # scale_y_log10() + databrew::theme_simple() + labs(x = 'Fecha', y = 'Muertes diarias', title = 'Muertes Covid-19, Italia y España')
pd <- ecdc %>% mutate(date = as.Date(dateRep, format = '%d/%m/%Y')) %>% mutate(cum14 = Cumulative_number_for_14_days_of_COVID.19_cases_per_100000) %>% mutate(country = countriesAndTerritories) %>% arrange(date) %>% filter(popData2019 >= 10000000) %>% group_by(country) %>% mutate(deaths14 = zoo::rollsumr(deaths, 14, fill = NA)) %>% ungroup %>% mutate(deathsp = deaths14 / popData2019 * 100000) %>% arrange(desc(cum14)) ggplot(data = pd, aes(x = date, y = cum14, group = country, color = continentExp)) + geom_line(alpha = 0.6) + geom_line(data = pd %>% filter(country == 'Spain'), aes(x = date, y = cum14, color = 'red', group = country), size = 2)
# Leaflet for municipi pd <- abs %>% # filter(RegioSanitariaDescripcio %in% c('Barcelona Ciutat')) %>% filter(RegioSanitariaDescripcio %in% c('Barcelona Ciutat', 'Metropolità Nord', 'Metropolità Sud')) %>% arrange(date) %>% group_by(ABSCodi) %>% mutate(cases_roll = zoo::rollsumr(confirmed_cases_non_cum, k = 7, fill = NA)) %>% mutate(incidence_week_100k = cases_roll / pop * 100000) map <- abs_shp right <- pd %>% filter(date == (Sys.Date()-3)) map@data$ABSCodi <- map@data$CODIABS map@data <- left_join(map@data, right) # map <- map[!map@data$CODIABS %in% pd$ABSCodi,] map <- map[map@data$CODIABS %in% pd$ABSCodi | map@data$NOMABS %in% c('Montcada i Reixac - 2', 'Montcada i Reixac - 1', 'Castellbisbal'),] map <- spTransform(map, "+init=epsg:4326") bins <- c(-1, 10, 20, 50, 100, 200, 500,Inf) library(RColorBrewer) library(leaflet) pal <- colorBin("YlOrRd", domain = map@data$incidence_week_100k, bins = bins) leaflet() %>% addProviderTiles(providers$Stamen.TonerBackground) %>% addPolygons(data = map, fillColor = ~pal(incidence_week_100k), weight = 0.5,#2, opacity = 0.2, color = 'black', #"white", # dashArray = "3", popup = paste0(map@data$NOMABS, ' ', map@data$CODIABS), # popup = paste0(map@data$ABSDescripcio, ': ', round(map@data$incidence_week_100k)), fillOpacity = 0.9 )
US deaths and cases
pd <- owid %>% filter(location == 'United States') %>% mutate(deaths_roll = zoo::rollmeanr(new_deaths, k = 7, fill = NA)) %>% mutate(cases_roll = zoo::rollmeanr(new_cases, k = 7, fill = NA)) %>% dplyr::select(date,new_deaths, new_cases, deaths_roll, cases_roll) %>% filter(date >= '2020-03-10') # tidyr::gather(key, value, new_deaths:cases_roll) ggplot(data = pd, aes(x = date)) + # geom_bar(stat = 'identity', aes(y = new_cases, color = 'Cases', fill = 'Cases')) + geom_line(aes(y = cases_roll, color = 'Cases')) + geom_line(aes(y = deaths_roll * 15, color = 'Deaths')) + geom_point(aes(y = new_cases, color = 'Cases'), alpha = 0.3) + geom_point(aes(y = new_deaths * 15, color = 'Deaths'), alpha = 0.3) + scale_y_continuous(sec.axis = sec_axis(~./15, name = "Deaths")) + databrew::theme_simple() + scale_color_manual(name = '', values = c('black', 'red')) + # scale_fill_manual(name = '', # values = c('black', 'red')) + labs(y = 'Cases', x = 'Date', title = 'US Covid-19 Cases and deaths', subtitle = 'Dot = daily observation; Line = 7 day rolling average') + theme(legend.position = 'bottom') ggsave('~/Desktop/us_cases_and_deaths.png')
pd <- covid19::muni pd$ComarcaDescripcio <- trimws(as.character(pd$ComarcaDescripcio)) keep5 <- function(x){substr(x, 1, 5)} # muni$ComarcaDescripcio <- gsub('à', 'a', muni$ComarcaDescripcio) lleida_comarques <- c('Aran', 'Alta Ribagorça', 'Pallars Sobirà', 'Pallars Jussà', 'Alt Urgell', 'Cerdanya', 'Solsonès', 'Noguera', "Pla d'Urgell", 'Segrià', 'Garrigues', 'Urgell', 'Segarra') pd$ComarcaDescripcio <- keep5(pd$ComarcaDescripcio) lleida_comarques <- keep5(lleida_comarques) agg <- pd %>% group_by(ComarcaDescripcio, date) %>% summarise(cases = sum(confirmed_cases_non_cum, na.rm = TRUE), pop = sum(pop)) %>% ungroup %>% arrange(date) %>% group_by(ComarcaDescripcio) %>% mutate(cases_roll = zoo::rollmeanr(cases, k = 7, fill = NA)) %>% ungroup %>% mutate(cases100k = cases / pop * 100000, cases_roll100k = cases_roll / pop * 100000) ggplot(data = agg %>% filter(ComarcaDescripcio %in% lleida_comarques), aes(x = date, y = cases100k)) + geom_point(alpha = 0.3) + geom_line(alpha =0.4) + geom_line(aes(y = cases_roll100k), color = 'red') + facet_wrap(~ComarcaDescripcio) + cowplot::theme_cowplot() + xlim(as.Date('2020-05-01'), (Sys.Date()-1))
# Santa Coloma scq <- muni %>% filter(MunicipiDescripcio == 'Santa Coloma de Queralt') %>% dplyr::select(date, confirmed_cases_non_cum) ggplot(data = scq, aes(x = date,y = confirmed_cases_non_cum )) + geom_bar(stat = 'identity') + databrew::theme_simple() + xlim(Sys.Date() - 40, Sys.Date() - 1) + labs(x = 'Data', y = 'Casos confirmats') conca <- muni %>% filter(grepl('Conca de Barbe', ComarcaDescripcio)) ggplot(data = conca, aes(x = date, y = confirmed_cases_non_cum)) + # geom_point() + facet_wrap(~MunicipiDescripcio) + geom_line() # All comarques pd <- muni %>% ungroup %>% group_by(date, ComarcaDescripcio) %>% summarise(new_cases = sum(confirmed_cases_non_cum), pop = sum(pop)) %>% ungroup %>% arrange(date) %>% group_by(ComarcaDescripcio) %>% mutate(cases_roll = zoo::rollmeanr(new_cases, k = 7, fill = NA)) %>% mutate(yp = new_cases / pop * 100000, ypr = cases_roll / pop * 100000) %>% mutate(ypl = sqrt(yp), yprl = sqrt(ypl)) # ggplot(data = pd, # aes(x = date, # y = cases_roll)) + # geom_point(aes(y = new_cases), alpha = 0.3, # size = 0.3) + # geom_line(color = 'red', size = 0.3) + # facet_wrap(~ComarcaDescripcio, scales = 'free_y') labeler <- sqrt(c(0, 10, 50, 100, 200)) # pd <- pd %>% filter(date <= '2020-07-16') levs <- pd %>% filter(date == max(date)) %>% arrange(desc(ypr)) %>% .$ComarcaDescripcio pd$ComarcaDescripcio <- factor(pd$ComarcaDescripcio, levels = rev(levs)) # Create weekly (instead of pd) weekly <- pd %>% mutate(week = lubridate::week(date)) %>% group_by(week, ComarcaDescripcio) %>% mutate(week_start = min(date)) %>% mutate(n_days = length(unique(date))) %>% ggplot(data = pd %>% filter(date >= '2020-03-11', date <= '2020-08-18'), aes(x = date, y = ComarcaDescripcio, fill = ypl)) + geom_tile( # color = 'black',size = 0.2 ) + databrew::theme_simple() + scale_fill_gradientn(name = '', colors = colorRampPalette(c('beige', # 'grey', # 'yellow', # 'white', # 'orange', 'darkorange', 'red', 'darkred', 'black'))(20), breaks = labeler, labels = round(labeler^2)) + labs(x = '', y = '', title = 'Casos confirmats nous (per 100.000 habitants)', caption = 'Aix-x: dia; Aix-y: comarca; Color: casos nous.\nDades de https://analisi.transparenciacatalunya.cat/. Gràfic: @joethebrew') + theme(axis.text.y = element_text(size = 10)) ggsave('~/Desktop/daily.png', width = 10, height = 6)
# Weekly version pd <- muni %>% ungroup %>% group_by(date, ComarcaDescripcio) %>% summarise(new_cases = sum(confirmed_cases_non_cum), pop = sum(pop)) %>% ungroup %>% arrange(date) %>% group_by(ComarcaDescripcio) %>% mutate(cases_roll = zoo::rollmeanr(new_cases, k = 7, fill = NA)) %>% mutate(yp = new_cases / pop * 100000, ypr = cases_roll / pop * 100000) %>% mutate(ypl = sqrt(yp), yprl = sqrt(ypl)) labeler <- sqrt(c(0, 10, 50, 100, 200)) # pd <- pd %>% filter(date <= '2020-07-16') levs <- pd %>% filter(date == max(date)) %>% arrange(desc(ypr)) %>% .$ComarcaDescripcio pd$ComarcaDescripcio <- factor(pd$ComarcaDescripcio, levels = rev(levs)) ggplot(data = pd %>% filter(date >= '2020-03-11', date <= '2020-08-18'), aes(x = date, y = ComarcaDescripcio, fill = ypl)) + geom_tile( # color = 'black',size = 0.2 ) + databrew::theme_simple() + scale_fill_gradientn(name = '', colors = colorRampPalette(c('beige', # 'grey', # 'yellow', # 'white', # 'orange', 'darkorange', 'red', 'darkred', 'black'))(20), breaks = labeler, labels = round(labeler^2)) + labs(x = '', y = '', title = 'Casos confirmats nous (per 100.000 habitants)', caption = 'Aix-x: dia; Aix-y: comarca; Color: casos nous.\nDades de https://analisi.transparenciacatalunya.cat/. Gràfic: @joethebrew') + theme(axis.text.y = element_text(size = 10)) ggsave('~/Desktop/weekly.png', width = 10, height = 6)
# 50 as the cut-off pd$fifty <- ifelse(pd$yp >= 50/7, '>50', ifelse(pd$yp >= 25/7, '25-50', '<25')) pd$fifty <- factor(pd$fifty, levels = c('<25', '25-50', '>50')) ggplot(data = pd %>% filter(date >= '2020-03-11'), aes(x = date, y = ComarcaDescripcio, fill = fifty)) + geom_tile( # color = 'black',size = 0.2 ) + databrew::theme_simple() + scale_fill_manual(name = 'Incidència\nper 100.000', values = c('beige', 'darkorange', 'darkred')) + labs(x = '', y = '', title = 'Casos nous\n(convertit en incidència setmanal per 100.000)', caption = 'Eix-x: dia; Eix-y: comarca; Color: casos nous.\nDades de https://analisi.transparenciacatalunya.cat/. Gràfic: @joethebrew') + theme(axis.text.y = element_text(size = 10)) ggsave('~/Desktop/daily_binary.png', width = 10, height = 6)
# Not population-adjusted ggplot(data = pd, aes(x = date, y = ComarcaDescripcio, fill = new_cases)) + geom_tile(color = 'white', size = 0.2) + databrew::theme_simple() + scale_fill_gradientn(name = '', colors = colorRampPalette(c('white', 'grey', # 'white', 'darkorange', 'red', 'black'))(20)) +#, # breaks = labeler, labels = round(labeler^2)) + labs(x = '', y = '', title = 'Casos confirmats nous') + theme(axis.text.y = element_text(size = 10))
# Anoia pd <- muni %>% filter(ComarcaDescripcio == 'Anoia') %>% group_by(date_month = lubridate::month(date), date_day = lubridate::day(date)) %>% summarise(cases = sum(confirmed_cases_non_cum)) %>% filter(date_month %in% c(3, 7)) %>% mutate(date_month = factor(date_month, levels = c('3', '7'), labels = c('Març', 'Juliol'))) ggplot(data = pd, aes(x = date_day, y = cases, group = date_month, color = date_month)) + geom_line()
# Top few # All comarques pd <- muni %>% ungroup %>% group_by(date, ComarcaDescripcio) %>% summarise(new_cases = sum(confirmed_cases_non_cum), pop = sum(pop)) %>% ungroup %>% arrange(date) %>% group_by(ComarcaDescripcio) %>% mutate(cases_roll = zoo::rollmeanr(new_cases, k = 7, fill = NA)) %>% mutate(yp = new_cases / pop * 100000, ypr = cases_roll / pop * 100000) %>% mutate(ypl = sqrt(yp), yprl = sqrt(ypl)) # ggplot(data = pd, # aes(x = date, # y = cases_roll)) + # geom_point(aes(y = new_cases), alpha = 0.3, # size = 0.3) + # geom_line(color = 'red', size = 0.3) + # facet_wrap(~ComarcaDescripcio, scales = 'free_y') labeler <- sqrt(c(0, 10, 50, 100, 200)) # pd <- pd %>% filter(date <= '2020-07-16') levs <- pd %>% filter(date == max(date)) %>% arrange(desc(ypr)) %>% .$ComarcaDescripcio pd$ComarcaDescripcio <- factor(pd$ComarcaDescripcio, levels = (levs)) # Keep only top few pd <- pd %>% filter(ComarcaDescripcio %in% levs[1:10]) %>% filter(date >= '2020-07-01') ggplot(data = pd, aes(x = date, y = yp, color = ComarcaDescripcio)) + geom_line()
# Above the 50 per 100.000 week pd <- muni %>% filter(date >= (Sys.Date() - 7), date <= (Sys.Date()-1)) %>% # filter(pop < 4000) %>% group_by(MunicipiDescripcio) %>% summarise(cases = sum(confirmed_cases_non_cum), pop = mean(pop), id = dplyr::first(MunicipiCodi)) %>% ungroup %>% mutate(per100000week = (cases/pop) * 100000) %>% arrange(desc(per100000week), pop) pd
library(maptools) catalan_codes <- c('43', '08', '25', '17') map <- municipios map <- map[substr(map@data$id, 1, 2) %in% catalan_codes, ] map_fort <- fortify(map, id = 'id' )# %>% mutate(id = as.numeric(as.character(id))) # Dot plot animation dot_plot <- function(the_date = Sys.Date(), out_dir = '~/Desktop/animation'){ if(!dir.exists(out_dir)){ dir.create(out_dir) } map <- municipios map <- map[substr(map@data$id, 1, 2) %in% catalan_codes, ] right <- muni %>% mutate(id = MunicipiCodi) %>% # filter(date == the_date) %>% # filter(date %in% seq( (the_date - 6), the_date, 1)) %>% group_by(id) %>% summarise(cases = sum(confirmed_cases_non_cum[date == the_date], na.rm = TRUE), cases1 = sum(confirmed_cases_non_cum[date == (the_date -1)], na.rm = TRUE), cases2 = sum(confirmed_cases_non_cum[date == (the_date -2)], na.rm = TRUE)) map@data <- map@data %>% left_join(right) %>% mutate(cases = ifelse(is.na(cases), 0, cases), cases1 = ifelse(is.na(cases1), 0, cases1), cases2 = ifelse(is.na(cases2), 0, cases2)) final <- map_fort %>% left_join(right) %>% mutate(cases = ifelse(is.na(cases), 0, cases), cases1 = ifelse(is.na(cases1), 0, cases1), cases2 = ifelse(is.na(cases2), 0, cases2)) dots <- dotsInPolys(map, as.integer(map@data$cases)) dots1 <- dotsInPolys(map, as.integer(map@data$cases1)) dots2 <- dotsInPolys(map, as.integer(map@data$cases2)) dots_df <- tibble(long = coordinates(dots)[,1], lat = coordinates(dots)[,2], ID = dots$ID) dots_df1 <- tibble(long = coordinates(dots1)[,1], lat = coordinates(dots1)[,2], ID = dots1$ID) dots_df2 <- tibble(long = coordinates(dots2)[,1], lat = coordinates(dots2)[,2], ID = dots2$ID) g <- ggplot() + geom_polygon(data = map_fort, aes(x = long, y = lat, group = group), fill = 'black', color = 'darkgrey', size = 0.1) + geom_point(data = dots_df, aes(x = long, y = lat), color = 'red', alpha = 0.9, size = 0.15) + geom_point(data = dots_df1, aes(x = long, y = lat), color = 'red', alpha = 0.5, size = 0.15) + geom_point(data = dots_df2, aes(x = long, y = lat), color = 'red', alpha = 0.2, size = 0.15) + labs(title = the_date) + databrew::theme_simple() + theme_map() + coord_map() + theme(plot.title = element_text(size = 22)) ggsave(paste0(out_dir, '/', the_date, '.png'), height = 8, width = 10) } # dot_plot(the_date = Sys.Date() -5 ) dates <- seq(as.Date('2020-03-10'), Sys.Date()-1, 1) # dates <- dates[weekdays(dates) == 'Tuesday'] for(i in 1:length(dates)){ message(dates[i]) dot_plot(dates[i]) }
# choropleth map <- municipios map <- map[substr(map@data$id, 1, 2) %in% catalan_codes, ] map_fort <- fortify(map, region = 'id' )# %>% mutate(id = as.numeric(as.character(id))) # Dot plot animation choro_plot <- function(the_date = Sys.Date(), out_dir = '~/Desktop/animation_choro'){ if(!dir.exists(out_dir)){ dir.create(out_dir) } right <- muni %>% mutate(id = MunicipiCodi) %>% # filter(date == the_date) %>% # filter(date %in% seq( (the_date - 6), the_date, 1)) %>% group_by(id) %>% summarise(cases = sum(confirmed_cases_non_cum[date == the_date], na.rm = TRUE), cases1 = sum(confirmed_cases_non_cum[date == (the_date -1)], na.rm = TRUE), cases2 = sum(confirmed_cases_non_cum[date == (the_date -2)], na.rm = TRUE), cases_lag = sum(confirmed_cases_non_cum[date <= the_date & date >= (the_date-6)]), pop = dplyr::first(pop)) %>% ungroup %>% mutate(cases_k = cases_lag / pop * 100000) %>% mutate(cases = ifelse(is.na(cases), 0, cases), cases1 = ifelse(is.na(cases1), 0, cases1), cases2 = ifelse(is.na(cases2), 0, cases2), cases_lag = ifelse(is.na(cases_lag), 0, cases_lag), cases_k = ifelse(is.na(cases_k), 0, cases_k)) final <- map_fort %>% left_join(right) final$category <- ifelse(final$cases_k == 0, '0', ifelse(final$cases_k <= 20, '0-20', ifelse(final$cases_k <= 50, '20-50', ifelse(final$cases_k <= 200, '50-200', '200+')))) final$category <- ifelse(is.na(final$category), 0, final$category) final$category <- factor(final$category, levels = c('0', '0-20','20-50', '50-200', '200+')) g <- ggplot() + geom_polygon(data = final, aes(x = long, y = lat, group = group, fill = category), color = 'black', size = 0.1) + labs(title = the_date, subtitle = 'Casos confirmats (darrers 7 dies) per 100.000 habitants' ) + databrew::theme_simple() + theme_map() + coord_map() + theme(plot.title = element_text(size = 24), plot.subtitle = element_text(size = 20), legend.text = element_text(size = 20)) + scale_fill_manual(name = '', values = c( 'beige', 'yellow', 'darkorange', 'red', 'darkred')) + # scale_fill_gradientn(name = 'Incidència setmanal\n(per 100.000)', # colors = c('white', 'beige', 'darkorange', 'red', 'darkred'), # breaks = c(0, 50, 200, 500), # limits = c(0, (round(max(muni$confirmed_cases_non_cum / muni$pop * 100000)))), # na.value = 'white') theme(legend.position = 'bottom') ggsave(paste0(out_dir, '/', the_date, '.png'), height = 8, width = 10) } # dot_plot(the_date = Sys.Date() -5 ) dates <- seq(as.Date('2020-03-10'), Sys.Date()-3, 1) dates <- dates[c(1:5, (length(dates) -20):length(dates))] # dates <- dates[weekdays(dates) == 'Tuesday'] for(i in 1:length(dates)){ message(dates[i]) choro_plot(dates[i]) }
# Command line cd ~/Desktop/animation mogrify -resize 50% *.png convert -delay 20 -loop 0 *.png result.gif
pd <- muni %>% ungroup %>% group_by(date, ComarcaDescripcio) %>% summarise(new_cases = sum(confirmed_cases_non_cum), pop = sum(pop)) %>% ungroup %>% arrange(date) %>% group_by(ComarcaDescripcio) %>% mutate(cases_roll = zoo::rollsumr(new_cases, k = 7, fill = NA)) %>% mutate(yp = new_cases / pop * 100000, ypr = cases_roll / pop * 100000) %>% mutate(danger = ypr >= 50) library(ggrepel) make_plot <- function(the_date, out_dir = 'ts_chart'){ if(!dir.exists('ts_chart')){ dir.create('ts_chart') } sub_data <- pd %>% filter(date <= the_date) %>% filter(!is.na(danger)) label_df <- sub_data %>% filter(date == the_date) %>% arrange(desc(ypr)) label_df <- label_df[1:3,] label_df <- label_df %>% filter(ypr >= 50) top3 <- sub_data %>% filter(ComarcaDescripcio %in% label_df$ComarcaDescripcio) sub_data <- sub_data %>% filter(!ComarcaDescripcio %in% label_df$ComarcaDescripcio) ggplot() + geom_line(alpha = 0.5, data = sub_data, aes(x = date, y = ypr, group = ComarcaDescripcio)) + geom_line(alpha = 0.9, data = top3, aes(x = date, y = ypr, group = ComarcaDescripcio), color = 'darkred') + # scale_y_log10() + geom_hline(yintercept = 50, color = 'blue') + databrew::theme_simple() + geom_point(data = label_df, aes(x = date , y = ypr), color = 'darkred') + geom_text(data = label_df, aes(x = date + 0.5, y = ypr, label = ComarcaDescripcio)) + labs(title = the_date, x = 'Data', y = 'Casos per 100.000 (darrers 7 dies)') + theme(legend.position = 'none') + xlim(min(sub_data$date), max(sub_data$date) + 2) + ylim(0, max(pd$ypr, na.rm = T)+1) ggsave(paste0(out_dir, '/', the_date, '.png'), height = 8, width = 10) } # dates <- sort(unique(pd$date)) dates <- seq(as.Date('2020-03-10'), Sys.Date()-1, by = 1) for(i in 1:length(dates)){ make_plot(the_date = dates[i]) }
# Command line cd ts_chart mogrify -resize 50% *.png convert -delay 15 -loop 0 *.png result.gif
pd <- muni %>% filter(date >= (Sys.Date() - 8)) %>% filter(pop < 4000) %>% group_by(MunicipiDescripcio) %>% summarise(cases = sum(confirmed_cases_non_cum), pop = mean(pop)) %>% arrange(desc(cases), pop) %>% filter(cases > 5) pd <- muni %>% filter(date >= (Sys.Date() - 8)) %>% group_by(MunicipiDescripcio) %>% summarise(cases = sum(confirmed_cases_non_cum), pop = mean(pop)) %>% arrange(desc(cases), pop) %>% filter(cases > 1) fit <- lm(cases ~ pop, data = pd) ggplot(data = pd, aes(x = pop, y = cases)) + geom_point() + scale_x_log10() + scale_y_log10() + geom_smooth(formula= y~x)
# Now municipalities pd <- muni %>% ungroup %>% group_by(date, MunicipiDescripcio) %>% summarise(new_cases = sum(confirmed_cases_non_cum), pop = sum(pop)) %>% ungroup %>% arrange(date) %>% group_by(MunicipiDescripcio) %>% mutate(cases_roll = zoo::rollmeanr(new_cases, k = 7, fill = NA)) %>% mutate(yp = new_cases / pop * 100000, ypr = cases_roll / pop * 100000) %>% mutate(ypl = sqrt(yp), yprl = sqrt(ypl)) # ggplot(data = pd, # aes(x = date, # y = cases_roll)) + # geom_point(aes(y = new_cases), alpha = 0.3, # size = 0.3) + # geom_line(color = 'red', size = 0.3) + # facet_wrap(~ComarcaDescripcio, scales = 'free_y') labeler <- sqrt(c(0, 10, 50, 100, 200)) pd <- pd %>% filter(date <= (Sys.Date()-1)) levs <- pd %>% filter(date == max(date)) %>% arrange(desc(ypr)) %>% .$MunicipiDescripcio pd$MunicipiDescripcio <- factor(pd$MunicipiDescripcio, levels = rev(levs)) pd$fifty <- ifelse(pd$yp >= 50, '>50', ifelse(pd$yp >= 40, '40-50', ifelse(pd$yp >= 30, '30-40', ifelse(pd$yp >= 20, '20-30', ifelse(pd$yp >= 10, '10-20', ifelse(pd$yp > 0, '<10', '0')))))) pd$fifty <- factor(pd$fifty, levels = c('0', '<10', '10-20', '20-30', '30-40', '40-50', '>50')) comarques <- sort(unique(muni$ComarcaDescripcio)) dir.create('~/Desktop/comarques') for(i in 1:length(comarques)){ this_comarca <- comarques[i] these_municipis <- muni$MunicipiDescripcio[muni$ComarcaDescripcio == this_comarca] sub_pd <- pd %>% filter(MunicipiDescripcio %in% these_municipis) sub_pd$MunicipiDescripcio <- factor(sub_pd$MunicipiDescripcio, levels = rev(unique(sub_pd$MunicipiDescripcio))) ggplot(data = sub_pd, aes(x = date, y = MunicipiDescripcio, fill = yp # fill = fifty )) + geom_tile( color = 'white', size = 0.02 ) + databrew::theme_simple() + scale_fill_gradientn(name = '', colors = colorRampPalette(c('beige', 'darkorange', 'red', 'darkred','black'))(20)) + # scale_fill_manual(name = 'Incidència', # values = colorRampPalette(c('white', # 'beige', # 'darkorange', # 'darkred'))(length(unique(pd$fifty)))) + labs(x = '', y = '', title = paste0(this_comarca, ': Casos nous confirmats per 100.000')) ggsave(paste0('~/Desktop/comarques/', this_comarca, '.png'), height = 6, width = 11.5) }
# Number of municipios with any cases pd <- muni %>% group_by(date) %>% summarise(any_case = length(which(confirmed_cases_non_cum > 0)), new_cases = sum(confirmed_cases_non_cum)) %>% ungroup ggplot(data = pd, aes(x = date, y = any_case)) + geom_point() + geom_line()
# Date of comarcas first case x <- covid19::muni %>% group_by(MunicipiDescripcio) %>% summarise(`Casos confirmats` = sum(confirmed_cases_non_cum), `Població` = mean(pop), date = min(date[confirmed_cases_non_cum > 0])) %>% ungroup y <- x %>% filter(`Casos confirmats` == 0) %>% arrange(desc(`Població`)) %>% dplyr::select(-date)
# Catalonia age edat <- read_csv('https://analisi.transparenciacatalunya.cat/api/views/qwj8-xpvk/rows.csv?accessType=DOWNLOAD&sorting=true') pd <- edat %>% mutate(date = as.Date(TipusCasData, format = '%d/%m/%Y'), age_group = EdatRang) %>% filter(!TipusCasDescripcio %in% c('Sospitós'), !EdatRang %in% c('No classificat')) %>% group_by(date, age_group) %>% summarise(n = sum(NumCasos, na.rm = TRUE)) left <- expand.grid(date = seq(min(pd$date), max(pd$date), by = 1), age_group = sort(unique(pd$age_group))) joined <- left_join(left, pd) %>% mutate(n = ifelse(is.na(n), 0, n)) %>% filter(date >= '2020-03-04') # Get by week bw <- joined %>% mutate(week = lubridate::week(date)) %>% group_by(week) %>% mutate(week_start = min(date)) %>% mutate(n_days = length(unique(date))) %>% ungroup %>% dplyr::select(-date) %>% group_by(date = week_start, age_group) %>% summarise(n = sum(n, na.rm = TRUE), n_days = dplyr::first(n_days)) %>% ungroup %>% mutate(per_day = n / n_days) ggplot(data = bw %>% filter(date != max(date)), aes(x = date, y = age_group, fill = per_day)) + geom_tile() + databrew::theme_simple() + scale_fill_gradientn(name = '', colors = colorRampPalette(c('lightyellow', # 'yellow', # 'lightblue', # 'white', 'darkorange', 'darkred'))(20)) + labs(x = '', y = '', title = 'Catalunya: casos confirmats per edat', subtitle = 'Catalonia: confirmed cases by age', caption = 'Eix-x: setmana; Eix-y: edat. Color: casos nous.\nDades de https://analisi.transparenciacatalunya.cat/. Gràfic: @joethebrew') ggsave('~/Desktop/by_age.png', height = 4, width = 7)
# Age by month pd <- edat %>% mutate(date = as.Date(TipusCasData, format = '%d/%m/%Y'), age_group = EdatRang) %>% filter(!TipusCasDescripcio %in% c('Sospitós'), !EdatRang %in% c('No classificat')) %>% mutate(date_month = paste0(format(date, '%Y-%m'), '-01')) %>% mutate(age = ifelse(age_group %in% c('60-69', '70-79', '80-89', '90+'), '>60', '<60')) %>% group_by(date_month, age) %>% summarise(n = sum(NumCasos, na.rm = TRUE), days = length(unique(date))) %>% group_by(date_month) %>% mutate(p = n / sum(n) * 100) %>% filter(date_month >= '2020-03-01') %>% mutate(date_label = format(as.Date(date_month), '%b')) %>% mutate(date_label = factor(date_label, levels = c('Mar','Apr','May','Jun','Jul', 'Aug'), labels = c('Mar','Abr','Mai','Jun','Jul', 'Ago'))) %>% mutate(n = n / days) %>% dplyr::select(-days) %>% gather(key, value, n:p) %>% mutate(key = ifelse(key == 'n', 'Casos per dia', 'Percentatge de tots els casos')) # pd <- pd %>% filter(date_label != 'Ago') ggplot(data = pd, aes(x = date_label, y = value, color = age, group = age)) + geom_line(size = 2, alpha = 0.6) + # geom_point(size = 2) + databrew::theme_simple() + facet_wrap(~key, scales = 'free_y') + labs(y = '', x = 'Mes', title = 'Casos confirmats i edat: Covid-19 a Catalunya') + scale_color_manual(name = 'Edat', values = c('darkorange', 'purple')) + theme(legend.position = 'top', strip.text = element_text(size = 18)) ggsave('~/Desktop/age2.png', width = 8.5, height = 5)
# Age by percentage bwp <- bw %>% group_by(date) %>% mutate(p = per_day / sum(per_day) * 100) %>% ungroup ggplot(data = bwp, aes(x = date, y = age_group, fill = p)) + geom_tile() + databrew::theme_simple() + scale_fill_gradientn(name = '', colors = colorRampPalette(c('lightyellow', # 'yellow', # 'lightblue', # 'white', 'darkorange', 'darkred'))(20)) + labs(x = '', y = '', title = 'Catalunya: casos confirmats per edat', subtitle = 'Catalonia: confirmed cases by age', caption = 'Aix-x: setmana; Aix-y: edat. Color: casos nous.\nDades de https://analisi.transparenciacatalunya.cat/. Gràfic: @joethebrew') ggsave('~/Desktop/by_age.png', height = 4, width = 7)
fl <- df %>% filter(district == 'Florida') ggplot(data = df, aes(x = date, y = cases)) + geom_line()
Municipalities
pd <- muni %>% filter(date == max(date)) %>% # Get confirmed cases by population mutate(p = confirmed_cases / pop * 100000) %>% arrange(desc(p))
c25 <- c( "dodgerblue2", "#E31A1C", # red "green4", "#6A3D9A", # purple "#FF7F00", # orange "black", "gold1", "skyblue2", "#FB9A99", # lt pink "palegreen2", "#CAB2D6", # lt purple "#FDBF6F", # lt orange "gray70", "khaki2", "maroon", "orchid1", "deeppink1", "blue1", "steelblue4", "darkturquoise", "green1", "yellow4", "yellow3", "darkorange4", "brown" ) comarcas <- sort(unique(muni$ComarcaDescripcio)) for(i in 1:length(comarcas)){ this_comarca <- comarcas[i] pd <- muni %>% # Get confirmed cases by population mutate(p = confirmed_cases / pop * 100000) %>% dplyr::filter(ComarcaDescripcio %in% this_comarca) # pie(rep(1, 25), col = c25) cols <- colorRampPalette(c25)(length(unique(pd$MunicipiDescripcio))) g <- ggplot(data = pd, aes(x = date, y = p)) + geom_line(aes(group = MunicipiDescripcio, color = MunicipiDescripcio, size = pop), alpha = 0.8) + # scale_y_log10() + # facet_wrap(~MunicipiDescripcio, scales = 'free_y') + scale_color_manual(name = '', values = cols) + databrew::theme_simple() + labs(x = 'Data', y = 'Casos per 100.000', title = paste0(this_comarca), subtitle = 'Incidència acumulada per 100.000 habitants') + scale_size(name = 'Població') + guides(color = guide_legend(override.aes = list(size = 2))) print(g) # Sys.sleep(10) }
Cumulative incidence map, Catalonia, by municipality
# map <- municipios[municipios@data$id %in% muni$MunicipiCodi,] catalan_codes <- c('43', '08', '25', '17') map <- municipios[substr(municipios@data$id, 1, 2) %in% catalan_codes, ] map_fortified <- fortify(map, region = 'id') # Define function for getting each day get_day <- function(the_date = Sys.Date()-3){ pd <- muni %>% filter(date == the_date) joined <- left_join(map_fortified, pd %>% dplyr::rename(id = MunicipiCodi)) %>% mutate(p = confirmed_cases / pop * 100000) ggplot(data = joined, aes(x = long, y = lat, fill = p)) + geom_polygon(aes(group = id)) + scale_fill_gradientn(name = '', colors = c('white', 'yellow', 'darkorange', 'red', 'darkred', 'black'), # colors = rev(RColorBrewer::brewer.pal(n = 9, name = 'Spectral')), limits = c(0, 10000)) + xlim(1,2.5) + ylim(41, 42.1) } dates <- seq(as.Date('2020-03-08'), max(muni$date), by = 1) for(i in 1:length(dates)){ this_date <- dates[i] get_day(the_date = this_date) }
# Excess mortality spain pd <- excess %>% dplyr::rename(ccaa = region) %>% filter(ccaa != 'Spain') %>% left_join(esp_pop %>% mutate(ccaa = ifelse(ccaa == 'Baleares', 'Balears, Illes', ifelse(ccaa == 'CLM', 'Castile-La Mancha', ifelse(ccaa == 'CyL', 'Castile & León', ifelse(ccaa == 'C. Valenciana', 'Comunitat Valenciana', ccaa)))))) %>% mutate(ccaa = ifelse(ccaa == 'Cataluña', 'Catalonia', ccaa)) %>% mutate(ccaa = ifelse(ccaa == 'Comunitat Valenciana', 'Valencia', ccaa)) %>% mutate(ccaa = ifelse(ccaa == 'País Vasco', 'Basque country', ccaa)) %>% mutate(date = end_date) # Calculate percentage of normal pd <- pd %>% mutate(p = total_deaths / expected_deaths * 100) %>% filter(!ccaa %in% c('Ceuta', 'Melilla', 'Canarias')) %>% mutate(type = ifelse(p > 100, 'Above\nexpected\n', 'Below\nexpected\n')) # pd <- pd %>% filter(ccaa %in% c('Castile & León', # 'Castile-La Mancha', # 'Catalonia', # 'Madrid')) pd <- pd %>% filter(date > '2020-01-01') ggplot(data = pd, aes(x = date, y = p, group = ccaa)) + geom_hline(yintercept = 100, color = 'darkgrey') + # geom_line() + # geom_area(aes(ymin = 100)) + geom_ribbon(aes(ymin=100, ymax=p), fill = 'darkgrey', alpha = 0.5) + geom_line() + geom_point(aes(color = type), alpha = 0.7) + facet_wrap(~ccaa) + scale_color_manual(name = '', values = c('darkred', 'blue')) + # scale_fill_manual(name = '', # values = c('darkred', 'blue')) + databrew::theme_simple() + theme(legend.position = 'right', strip.text = element_text(size = 12, hjust = 0.5)) + labs(x = '', y = 'Percent', title = 'Excess mortality by region', caption = 'Raw from the Economist: https://github.com/TheEconomist/covid-19-excess-deaths-tracker/blob/master/output-data/historical-deaths/spain_weekly_deaths.csv\nChart by Joe Brew. Ceuta, Melilla, and Canarias removed due to low numbers.') # geom_hline(yintercept = 100) ggsave('~/Desktop/plot.png')
pd <- excess %>% dplyr::rename(ccaa = region) %>% filter(ccaa != 'Spain') %>% left_join(esp_pop %>% mutate(ccaa = ifelse(ccaa == 'Baleares', 'Balears, Illes', ifelse(ccaa == 'CLM', 'Castile-La Mancha', ifelse(ccaa == 'CyL', 'Castile & León', ifelse(ccaa == 'C. Valenciana', 'Comunitat Valenciana', ccaa)))))) %>% mutate(ccaa = ifelse(ccaa == 'Cataluña', 'Catalonia', ccaa)) %>% mutate(ccaa = ifelse(ccaa == 'Comunitat Valenciana', 'Valencia', ccaa)) %>% mutate(ccaa = ifelse(ccaa == 'País Vasco', 'Basque country', ccaa)) %>% mutate(date = end_date) # Calculate percentage of normal pd <- pd %>% mutate(p = total_deaths / expected_deaths * 100) %>% filter(!ccaa %in% c('Ceuta', 'Melilla', 'Canarias')) %>% mutate(type = ifelse(p > 100, 'Above\nexpected\n', 'Below\nexpected\n')) # pd <- pd %>% filter(ccaa %in% c('Castile & León', # 'Castile-La Mancha', # 'Catalonia', # 'Madrid')) pd <- pd %>% filter(date > '2020-01-01') pd$grp <- ifelse(grepl('Madrid|Castil|Rioj|Catal|Navarr', pd$ccaa), pd$ccaa, 'Others') cols <- c(RColorBrewer::brewer.pal(n = length(unique(pd$grp))-1, 'Set1'), 'black') cols[length(cols)-1] <- 'pink' pd <- pd %>% arrange(desc(p)) pd$ccaa <- factor(pd$ccaa, levels = unique(pd$ccaa)) ggplot(data = pd, aes(x = date, y = p, group = ccaa)) + geom_hline(yintercept = 100, color = 'darkgrey') + geom_line(data = pd %>% filter(grp == 'Others'), aes(color = grp), alpha = 0.7) + geom_line(data = pd %>% filter(grp != 'Others'), aes(color = grp), size = 2, alpha = 0.8) + databrew::theme_simple() + theme(legend.position = 'right', strip.text = element_text(size = 12, hjust = 0.5)) + scale_color_manual(name = '', values = cols) + labs(x = '', y = 'Percent', title = 'Excess mortality by region', caption = 'Raw from the Economist: https://github.com/TheEconomist/covid-19-excess-deaths-tracker/blob/master/output-data/historical-deaths/spain_weekly_deaths.csv\nChart by Joe Brew. Ceuta, Melilla, and Canarias removed due to low numbers.') ggsave('~/Desktop/plot2.png')
pd <- df_country %>% filter(country == 'India') ggplot(data = pd, aes(x = date, y = cases_non_cum)) + geom_point() + geom_segment(aes(xend = date, yend = 0)) + theme_simple() + labs(x = 'Date', y = 'Incident cases', title = 'INDIA: Confirmed COVID-19 cases')
pd <- esp_df %>% filter(ccaa %in% c('Cataluña', 'Madrid')) ggplot(data = pd, aes(x = date, y = cases_non_cum)) + geom_point() + geom_segment(aes(xend = date, yend = 0)) + theme_simple() + labs(x = 'Date', y = 'Incident cases', title = 'Confirmed COVID-19 cases') + facet_wrap(~ccaa)
# Africa cases # Number of Africa countries pd <- df_country pd %>% left_join(world_pop) %>% # filter(sub_region %in% c('Sub-Saharan Africa')) %>% filter(region %in% 'Africa') %>% filter(cases > 0) %>% group_by(date) %>% summarise(cases = sum(cases))
# Number of Africa countries pd <- df_country pd <- pd %>% left_join(world_pop) %>% filter(sub_region %in% c('Sub-Saharan Africa')) %>% filter(cases > 0) pd <- pd %>% group_by(date) %>% tally ggplot(data = pd, aes(x = date, y = n)) + # geom_line() + geom_area(fill = 'darkorange', color = 'black', alpha = 0.6) + theme_simple() + labs(x = 'Date', y = 'Countries', title = 'Sub-Saharan African countries with confirmed COVID-19 cases')
# Overall Africa cases pd <- df_country pd <- pd %>% left_join(world_pop) %>% filter(sub_region %in% c('Sub-Saharan Africa')) x = pd %>% group_by(date) %>% summarise(cases = sum(cases), deaths = sum(deaths), n = length(country[cases > 0])) x %>% filter(date == '2020-04-13' | date == '2020-05-13' | date == max(date) | date == '2020-03-13')
# Tests pd <- testing entity_split <- strsplit(pd$Entity, split = ' - ') pd$country <- unlist(lapply(entity_split, function(x){x[1]})) pd$key <- unlist(lapply(entity_split, function(x){x[2]})) # pd <- pd %>% filter(key == 'tests performed') pd <- pd %>% left_join(world_pop)# %>% # filter(sub_region %in% c('Sub-Saharan Africa', 'Southern Europe', 'Northern Europe', 'Western Europe')) ggplot(data = pd, aes(x = Date, y = `Cumulative total`, group = country)) + geom_line(aes(color = country))
x = pd %>% group_by(country) %>% filter(Date == max(Date)) %>% ungroup %>% mutate(sub_region = ifelse(sub_region == 'Sub-Saharan Africa', 'SSA', 'Other')) %>% filter(!is.na(sub_region)) %>% arrange(`Cumulative total`) x$sub_region <- factor(x$sub_region, levels = unique(x$sub_region)) x$country <- factor(x$country, levels = unique(x$country)) ggplot(data = x, aes(x = country, y = `Cumulative total`)) + geom_bar(aes(fill = sub_region), stat = 'identity') + theme_simple() + scale_fill_manual(name = '', values = c('grey', 'darkorange')) + theme(axis.text.x = element_text(angle = 90, hjust = 1)) + labs(x= 'Country', title = 'Tests administered') ggsave('~/Desktop/man/3.png')
pd = esp_df %>% left_join(esp_pop) %>% mutate(p = deaths_non_cum / pop * 1000000) ggplot(data = pd, aes(x = date, y = p)) + # geom_step() + geom_bar(stat = 'identity', fill = 'red', alpha = 0.6, color = NA) + # geom_ribbon(aes(x = date, ymin = 0, ymax = p), data = pd, fill = 'blue') + facet_wrap(~ccaa) + theme_minimal() + labs(x = 'Date', y = 'Daily deaths per 1,000,000', title = 'Daily deaths per 1,000,000 population')
pd <- df_country %>% filter(country == 'Spain') ggplot(data = pd, aes(x = date, y = cases_non_cum)) + geom_bar(stat = 'identity') + theme_simple() + labs(x = 'Fecha', y = 'Casos diarios', title = 'Casos confirmados nuevos')
pd <- df_country %>% filter(country == 'Spain') ggplot(data = pd, aes(x = date, y = deaths_non_cum)) + geom_bar(stat = 'identity') + theme_simple() + labs(x = 'Fecha', y = 'Muertes diarias', title = 'Muertes')
covid19::plot_day_zero(countries = c('Italy', 'Spain', 'US', 'Germany', 'Canada', 'UK', 'Netherlands' ), ylog = F, day0 = 1, cumulative = F, calendar = T, pop = T, point_alpha = 0, color_var = 'geo')
pd <- df_country %>% left_join(world_pop %>% dplyr::select(iso, pop)) %>% group_by(country) %>% mutate(max_date = max(date)) %>% mutate(week_ago = max_date - 6) %>% # filter(date == max(date)) %>% filter(date >= week_ago, date <= max_date) %>% group_by(country) %>% summarise(y = sum(cases_non_cum), pop = dplyr::first(pop), date_range = paste0(min(date), ' - ', max(date)), yp = sum(cases_non_cum) / dplyr::first(pop) * 1000000) %>% ungroup %>% filter(pop > 1000000) %>% arrange(desc(yp)) %>% head(15) %>% mutate(country = ifelse(country == 'United Kingdom', 'UK', country)) pd$country <- factor(pd$country, levels = unique(pd$country)) ggplot(data = pd, aes(x = country, y = yp)) + geom_bar(stat = 'identity') + theme_simple() + theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5, size = 12)) + geom_text(aes(label = round(yp, digits = 0)), nudge_y = -50, color = 'white') + labs(x = '', y = 'Cases per 1,000,000 (last 7 days)', title = 'New confirmed COVID-19 cases per million population, last 7 days')
pd <- df_country %>% left_join(world_pop %>% dplyr::select(iso, pop)) %>% group_by(country) %>% mutate(max_date = max(date)) %>% mutate(week_ago = max_date - 6) %>% # filter(date == max(date)) %>% filter(date >= week_ago, date <= max_date) %>% group_by(country) %>% summarise(y = sum(deaths_non_cum), pop = dplyr::first(pop), date_range = paste0(min(date), ' - ', max(date)), yp = sum(deaths_non_cum) / dplyr::first(pop) * 1000000) %>% ungroup %>% filter(pop > 1000000) %>% arrange(desc(yp)) %>% head(10) %>% mutate(country = ifelse(country == 'United Kingdom', 'UK', country)) pd$country <- factor(pd$country, levels = unique(pd$country)) ggplot(data = pd, aes(x = country, y = yp)) + geom_bar(stat = 'identity') + theme_simple() + theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5, size = 12)) + geom_text(aes(label = round(yp, digits = 0)), nudge_y = -10, color = 'white') + labs(x = '', y = 'Deaths per 1,000,000 (last 7 days)', title = 'New confirmed COVID-19 deaths per million population, last 7 days')
pd <- esp_df %>% left_join(esp_pop %>% dplyr::select(ccaa, pop)) %>% mutate(country = 'Spain') %>% bind_rows( ita %>% left_join(ita_pop %>% dplyr::select(ccaa, pop)) %>% mutate(country = 'Italy') ) %>% bind_rows( df %>% filter(country == 'US') %>% mutate(ccaa = district) %>% left_join(regions_pop %>% dplyr::select(ccaa, pop)) %>% mutate(country = 'US')) %>% group_by(ccaa) %>% mutate(max_date = max(date)) %>% mutate(week_ago = max_date - 6) %>% # filter(date == max(date)) %>% filter(date >= week_ago, date <= max_date) %>% group_by(ccaa) %>% summarise(y = sum(cases_non_cum), country = dplyr::first(country), pop = dplyr::first(pop), date_range = paste0(min(date), ' - ', max(date))) %>% ungroup %>% mutate(yp = y / pop * 1000000) %>% ungroup %>% # filter(pop > 1000000) %>% arrange(desc(yp)) #Get country totals pd %>% group_by(country) %>% summarise(y = sum(y, na.rm = T), pop = sum(pop, na.rm = T)) %>% ungroup %>% mutate(yp = y / pop * 1000000) library(knitr) library(kableExtra) pd <- pd %>% mutate(yp = round(yp, digits = 1)) %>% mutate(Rank = 1:nrow(pd)) %>% dplyr::select(Rank, Región = ccaa, `Casos nuevos, 7 días` = y, Población = pop, `Casos nuevos 7 días por millón` = yp) kable(pd) %>% kable_styling("striped", full_width = F) %>% column_spec(which(names(pd) == 'Casos nuevos 7 días por millón'), bold = T) %>% row_spec(which(pd$`Región` %in% esp_df$ccaa), bold = T, color = "white", background = "#D7261E")
xpd = pd %>% filter(`Región` %in% c(ita$ccaa, esp_df$ccaa)) xpd$Rank <- 1:nrow(xpd) kable(xpd) %>% kable_styling("striped", full_width = F) %>% column_spec(which(names(xpd) == 'Casos nuevos 7 días por millón'), bold = T) %>% row_spec(which(xpd$`Región` %in% esp_df$ccaa), bold = T, color = "white", background = "#D7261E")
x = esp_df %>% left_join(esp_pop) %>% bind_rows(ita %>% left_join(ita_pop)) %>% filter(date == '2020-04-09') %>% filter(ccaa %in% c('Madrid', 'Cataluña', 'Lombardia')) %>% dplyr::select(ccaa, deaths, cases, pop) %>% mutate(deathsp = deaths / pop * 100000, casesp = cases / pop * 100000) covid19::plot_day_zero(countries = c('Italy', 'Spain', 'China', 'South Korea', 'Sinagpore'), districts = c('Madrid', #'Hubei', 'Cataluña', 'Lombardia'), by_district = T, roll = 7, deaths = F, pop = T, day0 = 0, ylog = F, calendar = T, cumulative = F) + labs(x = 'Data', y = 'Casos diaris (mitjana mòbil de 7 dies)', title = 'Casos diaris per 1.000.000', subtitle = 'Mitjana mòbil de 7 dies')
covid19::plot_day_zero(countries = c('Italy', 'Spain'), roll = 7, deaths = F, pop = T, day0 = 0, ylog = F, calendar = T, cumulative = F) + labs(x = 'Data', y = 'Casos diaris per 1.000.000 (mitjana mòbil de 7 dies)', title = 'Casos diaris per 1.000.000', subtitle = 'Mitjana mòbil de 7 dies') + theme(legend.direction = 'horizontal', legend.position = 'top')
covid19::plot_day_zero(countries = c('Italy', 'Spain'), roll = 7, deaths = T, pop = T, day0 = 0, ylog = F, calendar = T, cumulative = F) + labs(x = 'Data', y = 'Morts diaris per 1.000.000 (mitjana mòbil de 7 dies)', title = 'Morts diaris per 1.000.000', subtitle = 'Mitjana mòbil de 7 dies') + theme(legend.direction = 'horizontal', legend.position = 'top')
covid19::plot_day_zero(countries = c('Italy', 'Spain'), by_district = T, districts = c('Madrid', 'Emilia-Romagna', 'Cataluña', 'Lombardia'), roll = 7, deaths = F, pop = T, day0 = 0, ylog = F, calendar = T, cumulative = F) + labs(x = 'Data', y = 'Casos diaris per 1.000.000 (mitjana mòbil de 7 dies)', title = 'Casos diaris per 1.000.000', subtitle = 'Mitjana mòbil de 7 dies') + theme(legend.direction = 'horizontal', legend.position = 'top')
covid19::plot_day_zero(countries = c('Japan', 'South Korea', 'Singapore', 'Hong Kong'), roll = 7, deaths = F, # pop = T, day0 = 0, ylog = F, calendar = T, cumulative = F) + labs(x = 'Data', y = 'Casos diaris (mitjana mòbil de 3 dies)', title = 'Casos diaris', subtitle = 'Mitjana mòbil de 3 dies') + facet_wrap(~geo, scales = 'free_y') + theme(legend.position = 'none')
pd <- esp_df %>% arrange(date) %>% group_by(date) %>% summarise(deaths_non_cum = sum(deaths_non_cum), cases_non_cum = sum(cases_non_cum)) %>% ungroup %>% mutate(dow = weekdays(date)) %>% mutate(week = isoweek(date)) %>% group_by(week) %>% mutate(start_date = min(date)) %>% ungroup %>% filter(date >= '2020-03-09') pd$dow <- factor(pd$dow, levels = c('Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday'), labels = c('Lunes', 'Martes', 'Miércoles', 'Jueves', 'Viernes', 'Sábado', 'Domingo')) n_cols <- length(unique(pd$start_date)) cols <- colorRampPalette(RColorBrewer::brewer.pal(n = 9, name = 'Spectral'))(n_cols) pd$start_date <- factor(pd$start_date) ggplot(data = pd, aes(x = dow, y = cases_non_cum, group = week, color = start_date)) + geom_line(size = 4) + geom_point(size = 4) + scale_color_manual(name = 'Primer día\nde la semana', values = cols) + theme_simple() + labs(x = 'Día de la semana', y = 'Muertes')
pd <- esp_df %>% mutate(geo = ifelse(ccaa == 'País Vasco', 'Basque country', 'Rest of Spain')) %>% group_by(geo, date) %>% summarise(deaths = sum(deaths)) %>% ungroup pp <- esp_pop %>% mutate(geo = ifelse(ccaa == 'País Vasco', 'Basque country', 'Rest of Spain')) %>% group_by(geo) %>% summarise(pop = sum(pop)) pd <- left_join(pd, pp) %>% mutate(pk = deaths / pop * 100000) ggplot(data = pd %>% filter(pk > 0.1), aes(x = date, y = pk, color = geo)) + geom_line() + labs(x = 'Date', y = 'Cumulative deaths per 100,000') + scale_color_manual(name = '', values = c('red', 'black')) + theme_simple()
countries <- c( 'Spain', 'US', 'France', # 'Portugal', 'Italy', 'China' ) districts <- c('Lombardia', 'Cataluña', 'New York', # 'Hubei', 'CyL', 'CLM', # 'Washington', 'La Rioja', 'Madrid') plot_day_zero(countries = countries, districts = districts, ylog = F, day0 = 1, cumulative = F, time_before = 0, roll = 3, deaths = T, pop = T, by_district = T, point_alpha = 0, line_size = 3, color_var = 'geo')
dir.create('/tmp/totesmou') plot_day_zero(countries = c('Spain', 'Italy', max_date = Sys.Date()-1), point_size = 2, calendar = T) ggsave('/tmp/totesmou/1_italy_vs_spain.png', height = 5.6, width = 9.6)
plot_day_zero(countries = c('Spain', 'Italy'), point_size = 2, calendar = F) ggsave('/tmp/totesmou/2_italy_vs_spain_temps_ajustat.png', height = 5.6, width = 9.6)
plot_day_zero(countries = c('Spain', 'Italy'), point_size = 2, calendar = T, deaths = T, day0 = 10)
plot_day_zero(countries = c('Spain', 'Italy'), point_size = 2, calendar = F, deaths = T, day0 = 10)
plot_day_zero(countries = c('Spain', 'Italy'), point_size = 2, calendar = F, deaths = T, day0 = 10, pop = T)
plot_day_zero(countries = c('Spain', 'Italy'), point_size = 2, calendar = F, deaths = T, day0 = 1, pop = T, roll = 3, roll_fun = 'mean')
plot_day_zero(countries = c('Spain', 'Italy'), districts = c('Cataluña', 'Madrid', 'Lombardia', 'Emilia-Romagna'), by_district = T, day0 = 10, pop = T)
plot_day_zero(countries = c('Spain', 'Italy'), districts = c('Cataluña', 'Madrid', 'Lombardia', 'Emilia-Romagna'), by_district = T, deaths = T, day0 = 1, pop = T)
plot_day_zero(countries = c('Spain', 'Italy'), districts = c('Cataluña', 'Madrid', 'Lombardia', 'Emilia-Romagna'), by_district = T, deaths = T, roll = 3, roll_fun = 'mean', day0 = 1, ylog = F, pop = T, calendar = T)
plot_day_zero(countries = c('Spain', 'Italy', 'US'), districts = c('Cataluña', 'Madrid', 'New York', 'Lombardia', 'Emilia-Romagna'), by_district = T, deaths = T, roll = 3, roll_fun = 'mean', day0 = 1, pop = T)
plot_day_zero(countries = c('Spain', 'Italy', 'US'), districts = c('Cataluña', 'Madrid', 'New York', 'Lombardia', 'Emilia-Romagna'), by_district = T, deaths = T, # roll = 7, roll_fun = 'mean', day0 = 1, pop = F)
plot_day_zero(color_var = 'iso', by_district = T, deaths = T, day0 = 1, alpha = 0.6, point_alpha = 0, calendar = T, countries = c('Spain', 'Italy'), pop = T)
place_transform <- function(x){ifelse(x == 'Madrid', 'Madrid', # ifelse(x == 'Cataluña', 'Cataluña', 'Altres CCAA') # ) } place_transform_ita <- function(x){ ifelse(x == 'Lombardia', 'Lombardia', # ifelse(x == 'Emilia Romagna', 'Emilia Romagna', 'Altres regions italianes') # ) } pd <- esp_df %>% mutate(country = 'Espanya') %>% mutate(ccaa = place_transform(ccaa)) %>% bind_rows(ita %>% mutate(ccaa = place_transform_ita(ccaa), country = 'Itàlia')) %>% group_by(country, ccaa, date) %>% summarise(cases = sum(cases), uci = sum(uci), deaths = sum(deaths), cases_non_cum = sum(cases_non_cum), deaths_non_cum = sum(deaths_non_cum), uci_non_cum = sum(uci_non_cum)) %>% left_join(esp_pop %>% mutate(ccaa = place_transform(ccaa)) %>% bind_rows(ita_pop %>% mutate(ccaa = place_transform_ita(ccaa))) %>% group_by(ccaa) %>% summarise(pop = sum(pop))) %>% mutate(deaths_non_cum_p = deaths_non_cum / pop * 1000000) %>% group_by(country, date) %>% mutate(p_deaths_non_cum_country = deaths_non_cum / sum(deaths_non_cum) * 100, p_deaths_country = deaths / sum(deaths) * 100) pd$ccaa <- factor(pd$ccaa, levels = c('Madrid', # 'Cataluña', 'Altres CCAA', 'Lombardia', # 'Emilia Romagna', 'Altres regions italianes')) cols <- c( rev(brewer.pal(n = 3, 'Reds'))[1:2], rev(brewer.pal(n = 3, 'Blues'))[1:2] ) label_data <- pd %>% filter(country %in% c('Itàlia', 'Espanya')) %>% group_by(country) %>% filter(date == max(date)) %>% mutate(label = gsub('\nitalianas', '', gsub(' ', '\n', ccaa))) %>% mutate(x = date - 2, y = p_deaths_country + ifelse(p_deaths_country > 50, 10, -9)) ggplot(data = pd %>% group_by(country) %>% mutate(start_day = dplyr::first(date[deaths >=50])) %>% filter(date >= start_day), aes(x = date, y = p_deaths_country, color = ccaa, group = ccaa)) + # geom_bar(stat = 'identity', # position = position_dodge(width = 0.99)) + scale_fill_manual(name = '', values = cols) + scale_color_manual(name = '', values = cols) + geom_line(size = 2, aes(color = ccaa)) + # geom_point(size = 3, # aes(color = ccaa)) + facet_wrap(~country) + # xlim(as.Date('2020-03-09'), # Sys.Date()-1) + theme_simple() + geom_hline(yintercept = 50, lty = 2, alpha = 0.6) + # geom_line(lty = 2, alpha = 0.6) + labs(x = 'Data', y = 'Percentatge de morts', title = 'Percentatge de morts: regió més afectada vs resta del pais', subtitle = 'A partir del primer día a cada país amb 50 morts acumulades') + theme(legend.position = 'top', strip.text = element_text(size = 30), legend.text = element_text(size = 10)) + guides(color = guide_legend(nrow = 2, keywidth = 5)) + geom_text(data = label_data, aes(x = x-2, y = y, label = label, color = ccaa), size = 7, show.legend = FALSE)
# Same cahrt as previous, but one shared axis place_transform <- function(x){ifelse(x == 'Madrid', 'Madrid', # ifelse(x == 'Cataluña', 'Cataluña', 'Rest of Spain') # ) } place_transform_ita <- function(x){ ifelse(x == 'Lombardia', 'Lombardia', # ifelse(x == 'Emilia Romagna', 'Emilia Romagna', 'Rest of Italy') # ) } pd <- esp_df %>% mutate(country = 'Spain') %>% mutate(ccaa = place_transform(ccaa)) %>% bind_rows(ita %>% mutate(ccaa = place_transform_ita(ccaa), country = 'Italy')) %>% group_by(country, ccaa, date) %>% summarise(cases = sum(cases), uci = sum(uci), deaths = sum(deaths), cases_non_cum = sum(cases_non_cum), deaths_non_cum = sum(deaths_non_cum), uci_non_cum = sum(uci_non_cum)) %>% left_join(esp_pop %>% mutate(ccaa = place_transform(ccaa)) %>% bind_rows(ita_pop %>% mutate(ccaa = place_transform_ita(ccaa))) %>% group_by(ccaa) %>% summarise(pop = sum(pop))) %>% mutate(deaths_non_cum_p = deaths_non_cum / pop * 1000000) %>% group_by(country, date) %>% mutate(p_deaths_non_cum_country = deaths_non_cum / sum(deaths_non_cum) * 100, p_deaths_country = deaths / sum(deaths) * 100) pd$ccaa <- factor(pd$ccaa, levels = c('Madrid', # 'Cataluña', 'Rest of Spain', 'Lombardia', # 'Emilia Romagna', 'Rest of Italy')) cols <- c( rev(brewer.pal(n = 3, 'Reds'))[1:2], rev(brewer.pal(n = 3, 'Blues'))[1:2] ) label_data <- pd %>% filter(country %in% c('Italy', 'Spain')) %>% group_by(country) %>% filter(date == max(date)) %>% # mutate(label = gsub('\nitalianas', '', gsub(' ', '\n', ccaa))) %>% mutate(x = date - 2, y = p_deaths_country + ifelse(p_deaths_country > 50, 10, -9)) %>% ungroup ggplot(data = pd %>% group_by(country) %>% mutate(start_day = dplyr::first(date[deaths >=30])) %>% filter(date >= start_day), aes(x = date, y = p_deaths_country, color = ccaa, group = ccaa)) + # geom_bar(stat = 'identity', # position = position_dodge(width = 0.99)) + scale_fill_manual(name = '', values = cols) + scale_color_manual(name = '', values = cols) + geom_line(size = 2, aes(color = ccaa)) + # geom_point(size = 3, # aes(color = ccaa)) + # facet_wrap(~country, scales = 'free_x') + # xlim(as.Date('2020-03-09'), # Sys.Date()-1) + theme_simple() + geom_hline(yintercept = 50, lty = 2, alpha = 0.6) + # geom_line(lty = 2, alpha = 0.6) + labs(x = 'Date', y = 'Percentage of deaths', title = 'Percentage of country\'s cumulative COVID-19 deaths by geography', subtitle = 'Starting at first day with 50 or more cumulative deaths') + theme(legend.position = 'top', strip.text = element_text(size = 30), legend.text = element_text(size = 10)) + guides(color = guide_legend(nrow = 2, keywidth = 5))# + # geom_text(data = label_data, # aes(x = x-2, # y = y, # label = label, # color = ccaa), # size = 7, # show.legend = FALSE)
Same chart as above but absolute numbers
# Same cahrt as previous, but one shared axis place_transform <- function(x){ifelse(x == 'Madrid', 'Madrid', # ifelse(x == 'Cataluña', 'Cataluña', 'Rest of Spain') # ) } place_transform_ita <- function(x){ ifelse(x == 'Lombardia', 'Lombardia', # ifelse(x == 'Emilia Romagna', 'Emilia Romagna', 'Rest of Italy') # ) } pd <- esp_df %>% mutate(country = 'Spain') %>% mutate(ccaa = place_transform(ccaa)) %>% bind_rows(ita %>% mutate(ccaa = place_transform_ita(ccaa), country = 'Italy')) %>% group_by(country, ccaa, date) %>% summarise(cases = sum(cases), uci = sum(uci), deaths = sum(deaths), cases_non_cum = sum(cases_non_cum), deaths_non_cum = sum(deaths_non_cum), uci_non_cum = sum(uci_non_cum)) %>% left_join(esp_pop %>% mutate(ccaa = place_transform(ccaa)) %>% bind_rows(ita_pop %>% mutate(ccaa = place_transform_ita(ccaa))) %>% group_by(ccaa) %>% summarise(pop = sum(pop))) %>% mutate(deaths_non_cum_p = deaths_non_cum / pop * 1000000) %>% group_by(country, date) %>% mutate(p_deaths_non_cum_country = deaths_non_cum / sum(deaths_non_cum) * 100, p_deaths_country = deaths / sum(deaths) * 100) pd$ccaa <- factor(pd$ccaa, levels = rev(c('Madrid', # 'Cataluña', 'Rest of Spain', 'Lombardia', # 'Emilia Romagna', 'Rest of Italy'))) cols <- c( rev(brewer.pal(n = 3, 'Reds'))[1:2], rev(brewer.pal(n = 3, 'Blues'))[1:2] ) label_data <- pd %>% filter(country %in% c('Italy', 'Spain')) %>% group_by(country) %>% filter(date == max(date)) %>% # mutate(label = gsub('\nitalianas', '', gsub(' ', '\n', ccaa))) %>% mutate(x = date - 2, y = p_deaths_country + ifelse(p_deaths_country > 50, 10, -9)) %>% ungroup # Get moving ma <- function(x, n = 2){ if(length(x) >= n){ stats::filter(x, rep(1 / n, n), sides = 1) } else { new_n <- length(x) stats::filter(x, rep(1 / new_n, new_n), sides = 1) } } ggplot(data = pd %>% group_by(country) %>% mutate(start_day = dplyr::first(date[deaths >=1])) %>% filter(date >= start_day) %>% mutate(days_since = as.numeric(date - start_day)) %>% ungroup %>% arrange(date) %>% group_by(country, ccaa) %>% mutate(rolling_average = ma(deaths_non_cum, n = 3)) %>% ungroup, aes(x = date, y = rolling_average, color = ccaa, group = ccaa)) + # geom_bar(stat = 'identity', # position = position_dodge(width = 0.99)) + scale_fill_manual(name = '', values = cols) + scale_color_manual(name = '', values = cols) + geom_line(size = 2, aes(color = ccaa)) + # geom_point(size = 3, # aes(color = ccaa)) + # scale_y_log10(limits = c(1.5, 1000)) + # scale_y_log10() + facet_wrap(~country) + # xlim(as.Date('2020-03-09'), # Sys.Date()-1) + theme_simple() + # geom_hline(yintercept = 50, lty = 2, alpha = 0.6) + # geom_line(lty = 2, alpha = 0.6) + labs(x = 'Date', y = 'Deaths (log-scale)', title = 'Daily COVID-19 deaths by geography', subtitle = '3-day rolling average') + theme(legend.position = 'top', plot.title = element_text(size = 30), plot.subtitle = element_text(size = 24), strip.text = element_text(size = 30, hjust = 0.5), legend.text = element_text(size = 20)) + guides(color = guide_legend(nrow = 2, keywidth = 5)) ggsave('~/Desktop/madlom.png')
roll_curve <- function(data, n = 4, deaths = FALSE, scales = 'fixed', nrow = NULL, ncol = NULL, pop = FALSE){ # Create the n day rolling avg ma <- function(x, n = 5){ if(length(x) >= n){ stats::filter(x, rep(1 / n, n), sides = 1) } else { new_n <- length(x) stats::filter(x, rep(1 / new_n, new_n), sides = 1) } } pd <- data if(deaths){ pd$var <- pd$deaths_non_cum } else { pd$var <- pd$cases_non_cum } if('ccaa' %in% names(pd)){ pd$geo <- pd$ccaa } else { pd$geo <- pd$iso } if(pop){ # try to get population if(any(pd$geo %in% unique(esp_df$ccaa))){ right <- esp_pop right_var <- 'ccaa' } else { right <- world_pop right_var <- 'iso' } pd <- pd %>% left_join(right %>% dplyr::select(all_of(right_var), pop), by = c('geo' = right_var)) %>% mutate(var = var / pop * 100000) } pd <- pd %>% arrange(date) %>% group_by(geo) %>% mutate(with_lag = ma(var, n = n)) ggplot() + geom_bar(data = pd, aes(x = date, y = var), stat = 'identity', fill = 'grey', alpha = 0.8) + geom_area(data = pd, aes(x = date, y = with_lag), color = 'red', fill = 'red', alpha = 0.6) + facet_wrap(~geo, scales = scales, nrow = nrow, ncol = ncol) + theme_simple() + labs(x = '', y = ifelse(deaths, 'Deaths', 'Cases'), title = paste0('Daily (non-cumulative) ', ifelse(deaths, 'deaths', 'cases'), ifelse(pop, ' (per 100,000)', '')), subtitle = paste0('Data as of ', max(data$date), '.\nRed line = ', n, ' day rolling average. Grey bar = day-specific value.')) + theme(axis.text.x = element_text(size = 7, angle = 90, hjust = 0.5, vjust = 1)) #+ # scale_x_date(name ='', # breaks = sort(unique(pd$date)), # labels = format(sort(unique(pd$date)), '%b %d')) # scale_y_log10() } this_ccaa <- 'Cataluña' plot_data <- esp_df %>% mutate(geo = ccaa) %>% filter(ccaa == this_ccaa) roll_curve(plot_data, scales = 'fixed') + theme(strip.text = element_text(size = 30)) plot_data <- esp_df %>% mutate(geo = ccaa) %>% filter(ccaa == this_ccaa) roll_curve(plot_data, deaths = T, scales = 'fixed') + theme(strip.text = element_text(size = 30))
african_countries <- world_pop$country[world_pop$sub_region %in% c('Sub-Saharan Africa')] pd <- plot_day_zero(countries = c(african_countries), day0 = 1, max_date = Sys.Date() - 46, ylog = F) + ylim(0, 5000) pd pd <- plot_day_zero(countries = c(african_countries), day0 = 1, max_date = Sys.Date(), ylog = F) + ylim(0, 5000) pd pd <- plot_day_zero(countries = c(african_countries), day0 = 1, calendar = T, max_date = Sys.Date(), ylog = T) + theme(legend.position = 'none') pd pd <- plot_day_zero(countries = c(african_countries), day0 = 10, max_date = Sys.Date() - 13) pd pd <- plot_day_zero(countries = c(african_countries), day0 = 10, max_date = Sys.Date() - 6) pd pd <- plot_day_zero(countries = c(african_countries), day0 = 10, max_date = Sys.Date(), ylog = F) pd latam_countries <- world_pop$country[world_pop$sub_region %in% c('Latin America and the Caribbean')] latam_countries <- latam_countries[!latam_countries %in% c('Guyana')] pd <- plot_day_zero(countries = c(latam_countries), day0 = 10, max_date = Sys.Date() - 13) pd pd <- plot_day_zero(countries = c(latam_countries), day0 = 10, max_date = Sys.Date() - 6) pd pd <- plot_day_zero(countries = c(latam_countries), day0 = 10, max_date = Sys.Date()) pd
Latin America and Africa vs Europe
isos <- sort(unique(world_pop$sub_region)) keep_countries <- world_pop$country[world_pop$sub_region %in% c('Latin America and the Caribbean', 'Sub-Saharan Africa') | world_pop$region %in% 'Europe'] keep_countries <- keep_countries[!keep_countries %in% c('Guyana')] pd <- df_country %>% ungroup %>% filter(country %in% keep_countries) %>% dplyr::select(-country) %>% left_join(world_pop) %>% group_by(iso) %>% mutate(day0 = min(date[cases >= 10])) %>% ungroup %>% mutate(days_since = date - day0) %>% filter(days_since >= 0) cols <- c( 'black') g <- ggplot(data = pd, aes(x = days_since, y = cases, group = country, color = region)) + geom_line(data = pd %>% filter(region == 'Europe'), alpha = 0.6) + scale_y_log10() + scale_color_manual(name = '', values = cols) + theme_simple() + labs(x = 'Days since first day at 10 cases') + theme(legend.position = 'top') g cols <- c('darkred', 'black') g + geom_line(data = pd %>% filter(region == 'Africa'), # alpha = 1, size = 1.5, alpha = 0.8) + scale_y_log10() + scale_color_manual(name = '', values = cols) cols <- c( 'darkorange', 'black') g + geom_line(data = pd %>% filter(sub_region == 'Latin America and the Caribbean'), # alpha = 1, size = 1.5, alpha = 0.8) + scale_color_manual(name = '', values = cols) cols <- c('darkred', 'darkorange', 'black') g + geom_line(data = pd %>% filter(sub_region != 'Europe'), # alpha = 1, size = 1.5, alpha = 0.8) + scale_color_manual(name = '', values = cols)
# Assets pyramid_dir <- '../../data-raw/pyramids/' pyramid_files <- dir(pyramid_dir) out_list <- list() for(i in 1:length(pyramid_files)){ out_list[[i]] <- read_csv(paste0(pyramid_dir, pyramid_files[i])) %>% mutate(region = gsub('.csv', '', pyramid_files[i])) } pyramid <- bind_rows(out_list) make_pyramid <- function(the_region = 'AFRICA-2019'){ sub_data <- pyramid %>% filter(region == the_region) sub_data$Age <- factor(sub_data$Age, levels = sub_data$Age) sub_data <- tidyr::gather(sub_data, key, value, M:F) ggplot(data = sub_data, aes(x = Age, y = value, fill = key)) + geom_bar(stat = 'identity', position = position_dodge(), alpha = 0.6, color = 'black') + scale_fill_manual(name = '', values = c('darkorange', 'lightblue')) + theme_simple() + labs(x = 'Age group', y = 'Population') + theme(legend.position = 'top') } make_pyramid_overlay <- function(){ sub_data <- pyramid %>% filter(region %in% c('EUROPE-2019', 'AFRICA-2019', 'LATIN AMERICA AND THE CARIBBEAN-2019')) %>% mutate(region = gsub('-2019', '', region)) sub_data$Age <- factor(sub_data$Age, levels = unique(sub_data$Age)) sub_data <- tidyr::gather(sub_data, key, value, M:F) %>% group_by(Age, region) %>% summarise(value = sum(value)) %>% ungroup %>% group_by(region) %>% mutate(p = value / sum(value) * 100) %>% ungroup ggplot(data = sub_data, aes(x = Age, y = p, color = region, group = region, fill = region)) + geom_area(position = position_dodge(), alpha = 0.6) + scale_fill_manual(name = '', values = c('darkred', 'darkorange', 'black')) + scale_color_manual(name = '', values = c('darkred', 'darkorange', 'black')) + theme_simple() + theme(legend.position = 'top') + labs(x = 'Age group', y = 'Percentage') } make_pyramid(the_region = 'Spain-2019') + labs(title = 'Spain') make_pyramid(the_region = 'Italy-2019') + labs(title = 'Italy') make_pyramid(the_region = 'EUROPE-2019') + labs(title = 'Europe') make_pyramid(the_region = 'Kenya-2019') + labs(title = 'Kenya') make_pyramid(the_region = 'Mozambique-2019') + labs(title = 'Mozambique') make_pyramid(the_region = 'Tanzania-2019') + labs(title = 'Tanzania') make_pyramid(the_region = 'Guatemala-2019') + labs(title = 'Guatemala') make_pyramid(the_region = 'AFRICA-2019') + labs(title = 'Africa') make_pyramid(the_region = 'LATIN AMERICA AND THE CARIBBEAN-2019') + labs(title = 'Latin America and the Caribbean') make_pyramid_overlay() + labs(title = 'Population distribution by region') pyramid <- pyramid %>% mutate(old = Age %in% c('80-84', '85-89', '90-94','95-99', '100+')) pyramid %>% group_by(region, old) %>% summarise(n = sum(M) + sum(F)) %>% ungroup %>% group_by(region) %>% mutate(p = n / sum(n) * 100) %>% filter(old)
plot_day_zero(countries = c('South Africa', 'Spain'), day0 = 1, calendar = T)
plot_day_zero(countries = c('Kenya', 'Italy'), day0 = 10, calendar = F)
plot_day_zero(countries = c('China', 'Italy', 'Spain'), districts = c('Hubei', 'Lombardia', 'Cataluña', 'Madrid'), by_district = T, point_alpha = 0, day0 = 5, pop = F, deaths = T, ylog = T, calendar = F, roll = 5)
# cat_transform <- function(x){ifelse(x == 'Catalunya', 'Cataluña', x)} cat_transform <- function(x){return(x)} pd <- por_df %>% mutate(country = 'Portugal') %>% bind_rows(esp_df %>% mutate(country = 'Spain')) %>% bind_rows(fra_df %>% mutate(country = 'France')) %>% bind_rows(ita %>% mutate(country = 'Italy')) %>% bind_rows( df %>% filter(country == 'Andorra') %>% mutate(ccaa = 'Andorra') ) keep_date = pd %>% group_by(country) %>% summarise(max_date= max(date)) %>% summarise(x = min(max_date)) %>% .$x pd <- pd %>% mutate(ccaa = cat_transform(ccaa)) %>% group_by(ccaa) %>% filter(date == keep_date) %>% # filter(date == '2020-03-27') %>% ungroup %>% dplyr::select(date, ccaa, deaths, deaths_non_cum, cases, cases_non_cum) %>% left_join(regions_pop %>% bind_rows( world_pop %>% filter(country == 'Andorra') %>% dplyr::mutate(ccaa = country) %>% dplyr::select(-region, -sub_region) )) %>% mutate(cases_per_million = cases / pop * 1000000, deaths_per_million = deaths / pop * 1000000) %>% mutate(cases_per_million_non_cum = cases_non_cum / pop * 1000000, deaths_per_million_non_cum = deaths_non_cum / pop * 1000000) map_esp1 <- map_esp map_esp1@data <- map_esp1@data %>% dplyr::select(ccaa) map_fra1 <- map_fra map_fra1@data <- map_fra1@data %>% dplyr::select(ccaa = NAME_1) map_por1 <- map_por map_por1@data <- map_por1@data %>% dplyr::select(ccaa = CCDR) map_ita1 <- map_ita map_ita1@data <- map_ita1@data %>% dplyr::select(ccaa) map_and1 <- map_and map_and1@data <- map_and1@data %>% dplyr::select(ccaa = NAME_0) map <- rbind(map_esp1, map_por1, map_fra1, map_ita1, map_and1) # Remove areas not of interest centroids <- coordinates(map) centroids <- data.frame(centroids) names(centroids) <- c('x', 'y') centroids$ccaa <- map@data$ccaa centroids <- left_join(centroids, pd) # map <- map_sp <- map[centroids$y >35 & centroids$x > -10 & # centroids$x < 8 & (centroids$y < 43 | map@data$ccaa %in% c('Occitanie', 'Nouvelle-Aquitaine') | # map@data$ccaa %in% esp_df$ccaa),] # map_sp <- map <- map[centroids$x > -10 & centroids$y <47,] map_sp <- map <- map[centroids$x > -10 & centroids$y <77,] # map_sp <- map <- map[centroids$x > -10,] # fortify map <- fortify(map, region = 'ccaa') # join data map$ccaa <- map$id map <- left_join(map, pd) var <- 'deaths_per_million' map$var <- as.numeric(unlist(map[,var])) centroids <- centroids[,c('ccaa', 'x', 'y', var)] centroids <- centroids %>% filter(ccaa %in% map_sp@data$ccaa) # cols <- rev(RColorBrewer::brewer.pal(n = 9, name = 'Spectral')) # cols <- c('#A16928','#bd925a','#d6bd8d','#edeac2','#b5c8b8','#79a7ac','#2887a1') # cols <- c('#009392','#39b185','#9ccb86','#e9e29c','#eeb479','#e88471','#cf597e') # cols <- c('#008080','#70a494','#b4c8a8','#f6edbd','#edbb8a','#de8a5a','#ca562c') cols <- rev(colorRampPalette(c('darkred', 'red', 'darkorange', 'orange', 'yellow', 'lightblue'))(10)) g <- ggplot(data = map, aes(x = long, y = lat, group = group)) + geom_polygon(aes(fill = var), lwd = 0.3, # color = 'darkgrey', color = NA, size = 0.6) + scale_fill_gradientn(name = '', colours = cols) + # scale_fill_() + ggthemes::theme_map() + theme(legend.position = 'bottom', plot.title = element_text(size = 16)) + guides(fill = guide_colorbar(direction= 'horizontal', barwidth = 50, barheight = 1, label.position = 'bottom')) + labs(title = 'Cumulative COVID-19 deaths per million population, western Mediterranean', subtitle = paste0('Data as of ', format(max(pd$date), '%B %d, %Y'))) + geom_text(data = centroids, aes(x = x, y = y, group = NA, label = paste0(ccaa, '\n', round(deaths_per_million, digits = 2))), alpha = 0.8, size = 3) g ggsave('/tmp/map_with_borders.png', height = 8, width = 13)
dir.create('/tmp/animation_map/') pd <- por_df %>% mutate(country = 'Portugal') %>% bind_rows(esp_df %>% mutate(country = 'Spain')) %>% bind_rows(fra_df %>% mutate(country = 'France')) %>% bind_rows(ita %>% mutate(country = 'Italy')) %>% bind_rows( df %>% filter(country == 'Andorra') %>% mutate(ccaa = 'Andorra') ) pd %>% group_by(country) %>% summarise(max_date= max(date)) unique_dates <- sort(unique(pd$date)) unique_dates <- unique_dates[unique_dates >= '2020-03-01'] popper <- regions_pop %>% bind_rows( world_pop %>% filter(country == 'Andorra') %>% dplyr::mutate(ccaa = country) %>% dplyr::select(-region, -sub_region) ) map_esp1 <- map_esp map_esp1@data <- map_esp1@data %>% dplyr::select(ccaa) map_fra1 <- map_fra map_fra1@data <- map_fra1@data %>% dplyr::select(ccaa = NAME_1) map_por1 <- map_por map_por1@data <- map_por1@data %>% dplyr::select(ccaa = CCDR) map_ita1 <- map_ita map_ita1@data <- map_ita1@data %>% dplyr::select(ccaa) map_and1 <- map_and map_and1@data <- map_and1@data %>% dplyr::select(ccaa = NAME_0) map <- rbind(map_esp1, map_por1, map_fra1, map_ita1, map_and1) # Remove areas not of interest centroids <- coordinates(map) centroids <- data.frame(centroids) names(centroids) <- c('x', 'y') centroids$ccaa <- map@data$ccaa # map <- map_sp <- map[centroids$y >35 & centroids$x > -10 & # # centroids$x < 8 & # (centroids$y < 43 | map@data$ccaa %in% c('Occitanie', 'Nouvelle-Aquitaine') | # map@data$ccaa %in% esp_df$ccaa),] # map_sp <- map <- map[centroids$x > -10 & centroids$y <47,] map_sp <- map <- map[centroids$x > -10 & centroids$y <477,] # fortify map <- fortify(map, region = 'ccaa') for(i in 1:length(unique_dates)){ this_date <- unique_dates[i] today_map <- map today_centroids <- centroids today_pd <- pd today_pd <- today_pd %>% mutate(ccaa = cat_transform(ccaa)) %>% group_by(ccaa) %>% # filter(date == max(date)) %>% filter(date == this_date) %>% ungroup %>% dplyr::select(date, ccaa, deaths, deaths_non_cum, cases, cases_non_cum) %>% left_join(popper) %>% mutate(cases_per_million = cases / pop * 1000000, deaths_per_million = deaths / pop * 1000000) %>% mutate(cases_per_million_non_cum = cases_non_cum / pop * 1000000, deaths_per_million_non_cum = deaths_non_cum / pop * 1000000) today_centroids <- left_join(today_centroids, today_pd) # join data today_map$ccaa <- today_map$id today_map <- left_join(today_map, today_pd) var <- 'deaths_per_million' today_map$var <- as.numeric(unlist(today_map[,var])) today_map$var <- ifelse(is.na(today_map$var), 0, today_map$var) today_centroids <- today_centroids[,c('ccaa', 'x', 'y', var)] today_centroids <- today_centroids %>% filter(ccaa %in% today_map$ccaa) today_centroids$var <- today_centroids[,var] today_centroids$var <- ifelse(is.na(today_centroids$var), 0, today_centroids$var) # cols <- rev(RColorBrewer::brewer.pal(n = 9, name = 'Spectral')) # cols <- c('#A16928','#bd925a','#d6bd8d','#edeac2','#b5c8b8','#79a7ac','#2887a1') # cols <- c('#009392','#39b185','#9ccb86','#e9e29c','#eeb479','#e88471','#cf597e') # cols <- c('#008080','#70a494','#b4c8a8','#f6edbd','#edbb8a','#de8a5a','#ca562c') cols <- rev(colorRampPalette(c('darkred', 'red', 'darkorange', 'orange', 'yellow', 'white'))(17)) g <- ggplot(data = today_map, aes(x = long, y = lat, group = group)) + geom_polygon(aes(fill = var), lwd = 0.3, # color = 'darkgrey', color = NA, size = 0.6) + scale_fill_gradientn(name = '', colours = cols, breaks = seq(0, 1100, 50), limits = c(0, 1100)) + # scale_fill_() + ggthemes::theme_map() + theme(legend.position = 'right', plot.title = element_text(size = 24)) + guides(fill = guide_colorbar(direction= 'vertical', barwidth = 1, barheight = 30, label.position = 'left')) + labs(subtitle = 'Cumulative COVID-19 deaths per million population', title = paste0(format(this_date, '%B %d, %Y'))) + geom_text(data = today_centroids, aes(x = x, y = y, group = NA, label = paste0(ccaa, '\n', round(var, digits = 2))), alpha = 0.8, size = 1.5) ggsave(paste0('/tmp/animation_map/', this_date, '.png'), plot = g, height = 6, width = 9) }
# Command line cd /tmp/animation_map mogrify -resize 50% *.png convert -delay 20 -loop 0 *.png result.gif
df_country %>% filter(country == 'Spain') %>% arrange(date) %>% tail
pd <- df_country pd$value <- pd$deaths_non_cum the_date <- Sys.Date() - 1 pd <- pd %>% filter(date == the_date) %>% dplyr::select(country, iso, cases, cases_non_cum, deaths, value) %>% dplyr::arrange(desc(value)) %>% left_join(world_pop %>% dplyr::select(-country)) %>% mutate(value_per_million = value / pop * 1000000) #%>% # arrange(desc(value_per_million)) pd <- pd[1:10,] pd$country <- factor(pd$country, levels = pd$country) ggplot(data = pd, aes(x = country, y = value)) + geom_bar(stat = 'identity', fill = 'black') + theme_simple() + geom_text(aes(label = value), nudge_y = -20, size = 4, color = 'white') + labs(title = paste0('Confirmed COVID-19 deaths on ', the_date), x = '', y = '') pd
Deaths per million yesterday per CCAA
pd <- esp_df pd$value <- pd$deaths_non_cum the_date <- max(pd$date) pd <- pd %>% filter(date == max(date)) %>% dplyr::select(ccaa, cases, cases_non_cum, deaths, value) %>% dplyr::arrange(desc(value)) %>% left_join(esp_pop)%>% mutate(value_per_million = value / pop * 1000000) #%>% # arrange(desc(value_per_million)) pd <- pd[1:10,] pd$ccaa <- factor(pd$ccaa, levels = pd$ccaa) ggplot(data = pd, aes(x = ccaa, y = value)) + geom_bar(stat = 'identity', fill = 'black') + theme_simple() + geom_text(aes(label = value), nudge_y = -20, size = 4, color = 'white') pd
dir.create('/tmp/animation_deaths') dates <- seq(as.Date('2020-03-17'), (Sys.Date()-1), by = 1) for(i in 1:length(dates)){ this_date <- dates[i] pd <- df_country pd$value <- pd$deaths_non_cum pd <- pd %>% filter(date == max(this_date)) %>% dplyr::select(country, cases, cases_non_cum, deaths, value) %>% dplyr::arrange(desc(value)) pd <- pd[1:10,] pd <- pd %>% filter(value > 0) pd$country <- gsub(' ', '\n', pd$country) pd$country <- factor(pd$country, levels = pd$country) pd$color_var <- pd$country == 'Spain' if('Spain' %in% pd$country){ cols <- rev(c('darkred', 'black')) } else { cols <- 'black' } g <- ggplot(data = pd, aes(x = country, y = value)) + geom_bar(stat = 'identity', aes(fill = color_var), alpha = 0.8, show.legend = FALSE) + theme_simple() + geom_text(aes(label = value), nudge_y = max(pd$value) * .05, size = 5, color = 'black') + labs(title = 'Daily (non-cumulative) COVID-19 deaths', subtitle = format(this_date, '%B %d')) + labs(x = 'Country', y = 'Deaths') + theme(axis.text = element_text(size = 15), plot.subtitle = element_text(size = 20)) + scale_fill_manual(name ='', values = cols) + ylim(0, 900) ggsave(filename = paste0('/tmp/animation_deaths/', this_date, '.png'), g) } # Command line
cd /tmp/animation_deaths mogrify -resize 50% *.png convert -delay 50 -loop 0 *.png result.gif
pd <- by_country <- esp_df %>% mutate(country = 'Spain') %>% bind_rows(ita %>% mutate(country = 'Italy')) %>% bind_rows(por_df %>% mutate(country = 'Portugal')) %>% bind_rows(fra_df %>% mutate(country = 'France')) pd$value <- pd$deaths_non_cum max_date <- pd %>% group_by(country) %>% summarise(d = max(date)) %>% ungroup %>% summarise(d = min(d)) %>% .$d # pd$value <- ifelse(is.na(pd$value), 0, pd$value) left <- expand.grid(date = seq(min(pd$date), max(pd$date), by = 1), ccaa = sort(unique(pd$ccaa))) right <- pd %>% dplyr::select(date, ccaa, value) pd <- left_join(left, right) %>% mutate(value = ifelse(is.na(value), NA, value)) pd <- left_join(pd, by_country %>% dplyr::distinct(country, ccaa)) %>% filter(date <= max_date) %>% filter(value > 0) the_limits <- c(1, 600) the_breaks <- c(1, seq(100, 600, length = 6)) #seq(0, 600, length = 7) pd$ccaa <- factor(pd$ccaa, levels = rev(unique(sort(pd$ccaa)))) ggplot(data = pd, aes(x = date, y = ccaa, color = value, size = value)) + # geom_tile(color = 'white') + geom_point(alpha = 0.8) + # geom_tile() + scale_color_gradientn(colors = rev(colorRampPalette(brewer.pal(n = 8, 'Spectral'))(5)), name = '', limits = the_limits, breaks = the_breaks) + # scale_fill_gradientn(colors = rev(colorRampPalette(brewer.pal(n = 8, 'Spectral'))(5)), # name = '', # limits = the_limits, # breaks = the_breaks) + scale_size_area(name = '', limits = the_limits, breaks = the_breaks, max_size = 10) + theme_simple() + facet_wrap(~country, scales = 'free_y') + theme(strip.text = element_text(size = 20), axis.title = element_blank(), axis.text = element_text(size = 10), axis.text.x = element_text(size = 12)) + guides(color = guide_legend(), size = guide_legend()) + labs(title = 'Daily (non-cumulative) COVID-19 deaths by sub-state regions', caption = paste0('Data as of ', max_date)) ggsave('/tmp/1.png', width = 10, height = 8)
pd <- by_country <- esp_df %>% mutate(country = 'Spain') %>% bind_rows(ita %>% mutate(country = 'Italy')) poppy <- bind_rows(ita_pop, esp_pop) pd <- pd %>% left_join(poppy) pd$value <- pd$deaths_non_cum / pd$pop * 1000000 max_date <- pd %>% group_by(country) %>% summarise(d = max(date)) %>% ungroup %>% summarise(d = min(d)) %>% .$d # pd$value <- ifelse(is.na(pd$value), 0, pd$value) left <- expand.grid(date = seq(min(pd$date), max(pd$date), by = 1), ccaa = sort(unique(pd$ccaa))) right <- pd %>% dplyr::select(date, ccaa, value) pd <- left_join(left, right) %>% mutate(value = ifelse(is.na(value), NA, value)) pd <- left_join(pd, by_country %>% dplyr::distinct(country, ccaa)) %>% filter(date <= max_date) %>% filter(value > 0) the_limits <- c(1, 60) the_breaks <- c(1, seq(10, 60, length = 6)) #seq(0, 600, length = 7) pd$ccaa <- factor(pd$ccaa, levels = rev(unique(sort(pd$ccaa)))) ggplot(data = pd, aes(x = date, y = ccaa, color = value, size = value)) + # geom_tile(color = 'white') + geom_point(alpha = 0.8) + scale_color_gradientn(colors = rev(colorRampPalette(brewer.pal(n = 8, 'Spectral'))(5)), name = '', limits = the_limits, breaks = the_breaks) + scale_size_area(name = '', limits = the_limits, breaks = the_breaks, max_size = 10) + theme_simple() + facet_wrap(~country, scales = 'free') + theme(strip.text = element_text(size = 26), axis.title = element_blank(), axis.text = element_text(size = 16), axis.text.x = element_text(size = 12)) + guides(color = guide_legend(), size = guide_legend()) + labs(title = 'Daily COVID-19 deaths per 1,000,000 population by sub-state regions', caption = paste0('Data as of ', max_date)) ggsave('/tmp/2.png', width = 10, height = 8)
place_transform <- function(x){ifelse(x == 'Madrid', 'Madrid', # ifelse(x == 'Cataluña', 'Cataluña', 'Otras CCAA') # ) } place_transform_ita <- function(x){ ifelse(x == 'Lombardia', 'Lombardia', # ifelse(x == 'Emilia Romagna', 'Emilia Romagna', 'Otras regiones italianas') # ) } pd <- esp_df %>% mutate(country = 'España') %>% mutate(ccaa = place_transform(ccaa)) %>% bind_rows(ita %>% mutate(ccaa = place_transform_ita(ccaa), country = 'Italia')) %>% group_by(country, ccaa, date) %>% summarise(cases = sum(cases), uci = sum(uci), deaths = sum(deaths), cases_non_cum = sum(cases_non_cum), deaths_non_cum = sum(deaths_non_cum), uci_non_cum = sum(uci_non_cum)) %>% left_join(esp_pop %>% mutate(ccaa = place_transform(ccaa)) %>% bind_rows(ita_pop %>% mutate(ccaa = place_transform_ita(ccaa))) %>% group_by(ccaa) %>% summarise(pop = sum(pop))) %>% mutate(deaths_non_cum_p = deaths_non_cum / pop * 1000000) %>% group_by(country, date) %>% mutate(p_deaths_non_cum_country = deaths_non_cum / sum(deaths_non_cum) * 100, p_deaths_country = deaths / sum(deaths) * 100) pd$ccaa <- factor(pd$ccaa, levels = c('Madrid', # 'Cataluña', 'Otras CCAA', 'Lombardia', # 'Emilia Romagna', 'Otras regiones italianas')) cols <- c( rev(brewer.pal(n = 3, 'Reds'))[1:2], rev(brewer.pal(n = 3, 'Blues'))[1:2] ) ggplot(data = pd, aes(x = date, y = deaths_non_cum_p, fill = ccaa, group = ccaa)) + geom_bar(stat = 'identity', position = position_dodge(width = 0.99)) + scale_fill_manual(name = '', values = cols) + scale_color_manual(name = '', values = cols) + # geom_line(size = 0.2, # aes(color = ccaa)) + xlim(as.Date('2020-03-09'), Sys.Date()-1) + theme_simple() + labs(x = 'Fecha', y = 'Muertes diarias por 1.000.000', title = 'Muertes por 1.000.000 habitantes') + theme(legend.position = 'top') + geom_text(aes(label = round(deaths_non_cum_p, digits = 1), color = ccaa, y = deaths_non_cum_p + 2, group = ccaa), size = 2.5, angle = 90, position = position_dodge(width = 0.99)) label_data <- pd %>% filter(country %in% c('Italia', 'España')) %>% group_by(country) %>% filter(date == max(date)) %>% mutate(label = gsub('\nitalianas', '', gsub(' ', '\n', ccaa))) %>% mutate(x = date - 2, y = p_deaths_country + ifelse(p_deaths_country > 50, 10, -10)) ggplot(data = pd %>% group_by(country) %>% mutate(start_day = dplyr::first(date[deaths >=50])) %>% filter(date >= start_day), aes(x = date, y = p_deaths_country, color = ccaa, group = ccaa)) + # geom_bar(stat = 'identity', # position = position_dodge(width = 0.99)) + scale_fill_manual(name = '', values = cols) + scale_color_manual(name = '', values = cols) + geom_line(size = 2, aes(color = ccaa)) + geom_point(size = 3, aes(color = ccaa)) + facet_wrap(~country, scales = 'free_x') + # xlim(as.Date('2020-03-09'), # Sys.Date()-1) + theme_simple() + geom_hline(yintercept = 50, lty = 2, alpha = 0.6) + # geom_line(lty = 2, alpha = 0.6) + labs(x = 'Fecha', y = 'Porcentaje de muertes', title = 'Porcentaje de muertes del país: región más afectada vs. resto del país', subtitle = 'A partir del primer día en cada país con 50 o más muertes') + theme(legend.position = 'top', strip.text = element_text(size = 30), legend.text = element_text(size = 10)) + guides(color = guide_legend(nrow = 2, keywidth = 5)) + geom_text(data = label_data, aes(x = x - 2, y = y, label = label, color = ccaa), size = 6, show.legend = FALSE)
# Spanish data a <- esp_df %>% left_join(esp_pop) %>% mutate(country = 'Spain') # Italian data b <- ita %>% left_join(ita_pop) %>% mutate(country = 'Italy') # Chinese data d <- df %>% filter(country == 'China') %>% mutate(cases = cases) %>% mutate(ccaa = district) %>% mutate(country = 'China') %>% left_join(chi_pop) # join joined <- bind_rows(a, b, d) # Get deaths per milllion joined$deaths_pm <- joined$deaths / joined$pop * 1000000 joined$cases_pm <- joined$cases / joined$pop * 1000000 # Get the days since paradigm x_deaths <- 5 x_deaths_pm <- 5 x_cases <- 50 x_cases_pm <- 50 joined <- joined %>% arrange(date) %>% group_by(ccaa) %>% mutate(start_deaths = min(date[deaths >= x_deaths]), start_cases = min(date[cases >= x_cases]), start_deaths_pm = min(date[deaths_pm >= x_deaths_pm]), start_cases_pm = min(date[cases_pm >= x_cases_pm])) %>% ungroup %>% mutate(days_since_start_deaths = date - start_deaths, days_since_start_cases = date - start_cases, days_since_start_deaths_pm = date - start_deaths_pm, days_since_start_cases_pm = date - start_cases_pm) # Define plot data pd <- joined %>% filter(days_since_start_deaths_pm >= 0) %>% mutate(country = ifelse(country == 'China', 'Hubei (China)', ifelse(country == 'Italy', 'Italia', 'España'))) lombardy_location <- (max(pd_big$days_since_start_deaths_pm[pd_big$ccaa == 'Lombardia'])) # Define label data label_data <- pd %>% group_by(ccaa) %>% filter( ( (country == 'Hubei (China)' & days_since_start_deaths_pm == 22) | (date == max(date) & country == 'España' & deaths_pm > 40 & days_since_start_deaths_pm >= 7) & ccaa != 'CyL' | (date == max(date) & country == 'Italia' & ccaa != 'Liguria' & days_since_start_deaths_pm > 15) )) # Get differential label data based on what to be emphasized bigs <- c('Madrid', 'Lombardia', 'Hubei') label_data_big <- label_data %>% filter(ccaa %in% bigs) label_data_small <- label_data %>% filter(!ccaa %in% bigs) pd_big <- pd %>% filter(ccaa %in% bigs) pd_small <- pd %>% filter(!ccaa %in% bigs) # cols <- colorRampPalette(RColorBrewer::brewer.pal(n = 8, name = 'Set2'))(length(unique(pd$country))) # cols <- rainbow(3) cols <- c('darkred', '#FF6633', '#006666') ggplot(data = pd_big, aes(x = days_since_start_deaths_pm, y = deaths_pm, group = ccaa)) + geom_line(aes(color = country), alpha = 0.9, size = 2) + geom_line(data = pd_small, aes(x = days_since_start_deaths_pm, y = deaths_pm, color = country), alpha = 0.7, size = 1) + scale_y_log10() + scale_color_manual(name = '', values = c(cols)) + theme_simple() + theme(legend.position = 'top') + labs(x = 'Dias desde "el comienzo del brote"', y = 'Muertes por millón de habitantes', title = 'Muertes por 1.000.000 habitantes', subtitle = paste0('Dia 0 ("comienzo del brote") = primer día a ', x_deaths_pm, ' o más muertes acumuladas por milión de población\nLíneas rojas: CCAA; líneas verde-azules: regiones italianas; línea naranja: Hubei, China'), caption = '@joethebrew | www.databrew.cc') + geom_text(data = label_data_big, aes(x = days_since_start_deaths_pm + 0.6, y = (deaths_pm + 50), label = gsub(' ', '\n', ccaa), color = country), size = 8, alpha = 0.9, show.legend = FALSE) + geom_text(data = label_data_small, aes(x = days_since_start_deaths_pm + 0.6, y = deaths_pm + (log(deaths_pm)/2), label = gsub(' ', '\n', ccaa), color = country), size = 5, alpha = 0.6, show.legend = FALSE) + theme(axis.text = element_text(size = 14), axis.title = element_text(size = 16), legend.text = element_text(size = 16), plot.title = element_text(size = 25)) + xlim(0, lombardy_location + 5)
# Spanish data a <- esp_df %>% left_join(esp_pop) %>% mutate(country = 'Spain') # Italian data b <- ita %>% left_join(ita_pop) %>% mutate(country = 'Italy') # Chinese data d <- df %>% filter(country == 'China') %>% mutate(cases = cases) %>% mutate(ccaa = district) %>% mutate(country = 'China') %>% left_join(chi_pop) # join joined <- bind_rows(a, b, d) # Get deaths per milllion joined$deaths_pm <- joined$deaths / joined$pop * 1000000 joined$cases_pm <- joined$cases / joined$pop * 1000000 # Get the days since paradigm x_deaths <- 5 x_deaths_pm <- 5 x_cases <- 50 x_cases_pm <- 50 joined <- joined %>% arrange(date) %>% group_by(ccaa) %>% mutate(start_deaths = min(date[deaths >= x_deaths]), start_cases = min(date[cases >= x_cases]), start_deaths_pm = min(date[deaths_pm >= x_deaths_pm]), start_cases_pm = min(date[cases_pm >= x_cases_pm])) %>% ungroup %>% mutate(days_since_start_deaths = date - start_deaths, days_since_start_cases = date - start_cases, days_since_start_deaths_pm = date - start_deaths_pm, days_since_start_cases_pm = date - start_cases_pm) # Define plot data pd <- joined %>% filter(days_since_start_deaths >= 0) %>% mutate(country = ifelse(country == 'China', 'China', ifelse(country == 'Italy', 'Italia', 'España'))) # Define label data label_data <- pd %>% group_by(ccaa) %>% filter( ( (country == 'China' & deaths >10 & days_since_start_deaths == 29) | (date == max(date) & country == 'España' & deaths > 90) | (date == max(date) & country == 'Italia' & ccaa != 'Liguria' & days_since_start_deaths > 10) )) # Get differential label data based on what to be emphasized label_data_big <- label_data %>% filter(ccaa %in% c('Madrid', 'Lombardia', 'Hubei')) label_data_small <- label_data %>% filter(!ccaa %in% c('Madrid', 'Lombardia', 'Hubei')) pd_big <- pd %>% filter(ccaa %in% c('Madrid', 'Lombardia', 'Hubei')) pd_small <- pd %>% filter(!ccaa %in% c('Madrid', 'Lombardia', 'Hubei')) # cols <- colorRampPalette(RColorBrewer::brewer.pal(n = 8, name = 'Set2'))(length(unique(pd$country))) # cols <- rainbow(3) cols <- c( '#FF6633', 'darkred', '#006666') ggplot(data = pd_big, aes(x = days_since_start_deaths, y = deaths, group = ccaa)) + geom_line(aes(color = country), alpha = 0.9, size = 2) + geom_line(data = pd_small, aes(x = days_since_start_deaths, y = deaths, color = country), alpha = 0.7, size = 1) + scale_y_log10() + scale_color_manual(name = '', values = c(cols)) + theme_simple() + theme(legend.position = 'top') + labs(x = 'Dias desde el primer día con 5 o más muertes acumuladas', y = 'Muertes', title = 'Muertes por COVID-19', caption = '@joethebrew | www.databrew.cc') + geom_text(data = label_data_big, aes(x = days_since_start_deaths + 1.6, y = ifelse(ccaa == 'Hubei', (deaths -500), ifelse(ccaa == 'Lombardia', (deaths + 700), (deaths + 300))), label = gsub(' ', '\n', ccaa), color = country), size = 8, alpha = 0.9, show.legend = FALSE) + geom_text(data = label_data_small, aes(x = days_since_start_deaths + 1.6, align = 'left', y = deaths , label = ccaa, # label = gsub(' ', '\n', ccaa), color = country), size = 5, alpha = 0.6, show.legend = FALSE) + theme(axis.text = element_text(size = 14), axis.title = element_text(size = 20), legend.text = element_text(size = 16), plot.title = element_text(size = 30)) + xlim(0, 50)
# Spanish data a <- esp_df %>% left_join(esp_pop) %>% mutate(country = 'Spain') # Italian data b <- ita %>% left_join(ita_pop) %>% mutate(country = 'Italy') # Chinese data d <- df %>% filter(country == 'China') %>% mutate(cases = cases) %>% mutate(ccaa = district) %>% mutate(country = 'China') %>% left_join(chi_pop) # join joined <- bind_rows(a, b, d) # Get deaths per milllion joined$deaths_pm <- joined$deaths / joined$pop * 1000000 joined$cases_pm <- joined$cases / joined$pop * 1000000 # Get the days since paradigm x_deaths <- 5 x_deaths_pm <- 5 x_cases <- 50 x_cases_pm <- 50 joined <- joined %>% arrange(date) %>% group_by(ccaa) %>% mutate(start_deaths = min(date[deaths >= x_deaths]), start_cases = min(date[cases >= x_cases]), start_deaths_pm = min(date[deaths_pm >= x_deaths_pm]), start_cases_pm = min(date[cases_pm >= x_cases_pm])) %>% ungroup %>% mutate(days_since_start_deaths = date - start_deaths, days_since_start_cases = date - start_cases, days_since_start_deaths_pm = date - start_deaths_pm, days_since_start_cases_pm = date - start_cases_pm) # Define plot data pd <- joined %>% filter(days_since_start_deaths >= 0) %>% mutate(country = ifelse(country == 'China', 'China', ifelse(country == 'Italy', 'Italia', 'España'))) add_zero <- function(x, n){ x <- as.character(x) adders <- n - nchar(x) adders <- ifelse(adders < 0, 0, adders) for (i in 1:length(x)){ if(!is.na(x[i])){ x[i] <- paste0( paste0(rep('0', adders[i]), collapse = ''), x[i], collapse = '') } } return(x) } # # Define label data # label_data <- pd %>% group_by(ccaa) %>% filter( # ( # (country == 'China' & deaths >10 & days_since_start_deaths == 29) | # (date == max(date) & country == 'España' & deaths > 90) | # (date == max(date) & country == 'Italia' & # ccaa != 'Liguria' & days_since_start_deaths > 10) # )) # # Get differential label data based on what to be emphasized # label_data_big <- label_data %>% # filter(ccaa %in% c('Madrid', 'Lombardia', 'Hubei')) # label_data_small <- label_data %>% # filter(!ccaa %in% c('Madrid', 'Lombardia', 'Hubei')) # pd_big <- pd %>% filter(ccaa %in% c('Madrid', 'Lombardia', 'Hubei')) pd_small <- pd %>% filter(!ccaa %in% c('Madrid', 'Lombardia', 'Hubei')) # cols <- colorRampPalette(RColorBrewer::brewer.pal(n = 8, name = 'Set2'))(length(unique(pd$country))) # cols <- rainbow(3) cols <- c( '#FF6633', 'darkred', '#006666') the_dir <- '/tmp/animation/' dir.create(the_dir) the_dates <- sort(unique(c(pd_big$date, pd_small$date))) for(i in 1:length(the_dates)){ the_date <- the_dates[i] pd_big_sub <- pd_big %>% filter(date <= the_date) pd_big_current <- pd_big_sub %>% filter(date == the_date) pd_small_sub <- pd_small %>% filter(date <= the_date) pd_small_current <- pd_small_sub %>% filter(date == the_date) label_data_big <- pd_big_sub %>% filter(ccaa %in% c('Lombardia', 'Madrid', 'Hubei')) %>% group_by(ccaa) %>% filter(date == max(date)) %>% ungroup %>% mutate(days_since_start_deaths = ifelse(ccaa == 'Hubei' & days_since_start_deaths >32, 32, days_since_start_deaths)) label_data_small <- pd_small_sub %>% filter(ccaa %in% c('Emilia Romagna', 'Cataluña', 'CLM', 'País Vasco', 'Veneto', 'Piemonte', 'Henan', 'Heilongjiang')) %>% group_by(ccaa) %>% filter(date == max(date)) n_countries <- length(unique(pd_big_sub$country)) if(n_countries == 3){ sub_cols <- cols } if(n_countries == 2){ sub_cols <- cols[c(1,3)] } if(n_countries == 1){ sub_cols <- cols[1] } g <- ggplot(data = pd_big_sub, aes(x = days_since_start_deaths, y = deaths, group = ccaa)) + geom_line(aes(color = country), alpha = 0.9, size = 2) + geom_line(data = pd_small_sub, aes(x = days_since_start_deaths, y = deaths, color = country), alpha = 0.7, size = 1) + geom_point(data = pd_big_current, aes(x = days_since_start_deaths, y = deaths, color = country), size = 3) + geom_point(data = pd_small_current, aes(x = days_since_start_deaths, y = deaths, color = country), size = 1, alpha = 0.6) + scale_y_log10(limits = c(5, 4500)) + scale_color_manual(name = '', values = sub_cols) + theme_simple() + theme(legend.position = 'top') + labs(x = 'Dias desde el primer día con 5 o más muertes acumuladas', y = 'Muertes', title = format(the_date, '%d %b'), subtitle = 'Muertes por COVID-19', caption = '@joethebrew | www.databrew.cc') + geom_text(data = label_data_big, aes(x = days_since_start_deaths + 1, y = deaths, hjust = 0, label = gsub(' ', '\n', ccaa), color = country), size = 8, alpha = 0.9, show.legend = FALSE) + geom_text(data = label_data_small, aes(x = days_since_start_deaths + 1.6, y = deaths , label = ccaa, # label = gsub(' ', '\n', ccaa), color = country), size = 5, alpha = 0.6, show.legend = FALSE) + theme(axis.text = element_text(size = 14), axis.title = element_text(size = 20), legend.text = element_text(size = 16), plot.title = element_text(size = 35), plot.subtitle = element_text(size = 24)) + xlim(0, 38) message(i) ggsave(paste0(the_dir, add_zero(i, 3), '.png'), height = 7, width = 10.5) }
# Command line cd /tmp/animation mogrify -resize 50% *.png convert -delay 20 -loop 0 *.png result.gif
# Spanish data a <- esp_df %>% left_join(esp_pop) %>% mutate(country = 'Spain') joined <- a # Get deaths per milllion joined$deaths_pm <- joined$deaths / joined$pop * 1000000 joined$cases_pm <- joined$cases / joined$pop * 1000000 # Get the days since paradigm x_deaths <- 5 x_deaths_pm <- 5 x_cases <- 50 x_cases_pm <- 50 joined <- joined %>% arrange(date) %>% group_by(ccaa) %>% mutate(start_deaths = min(date[deaths >= x_deaths]), start_cases = min(date[cases >= x_cases]), start_deaths_pm = min(date[deaths_pm >= x_deaths_pm]), start_cases_pm = min(date[cases_pm >= x_cases_pm])) %>% ungroup %>% mutate(days_since_start_deaths = date - start_deaths, days_since_start_cases = date - start_cases, days_since_start_deaths_pm = date - start_deaths_pm, days_since_start_cases_pm = date - start_cases_pm) # Define plot data pd <- joined %>% filter(days_since_start_deaths >= 0) %>% mutate(country = ifelse(country == 'China', 'China', ifelse(country == 'Italy', 'Italia', 'España'))) bigs <- c('Madrid', 'Cataluña', 'CLM', 'CyL', 'País Vasco', 'La Rioja') pd_big <- pd %>% filter(ccaa %in% bigs) pd_small <- pd %>% filter(!ccaa %in% bigs) # cols <- colorRampPalette(RColorBrewer::brewer.pal(n = 8, name = 'Set2'))(length(unique(pd$country))) # cols <- rainbow(3) cols <- colorRampPalette(c('#A16928','#bd925a','#d6bd8d','#edeac2', '#b5c8b8','#79a7ac','#2887a1'))(length(unique(pd$country))) the_dir <- '/tmp/animation2/' dir.create(the_dir) the_dates <- sort(unique(c(pd_big$date, pd_small$date))) for(i in 1:length(the_dates)){ the_date <- the_dates[i] pd_big_sub <- pd_big %>% filter(date <= the_date) pd_big_current <- pd_big_sub %>% filter(date == the_date) pd_small_sub <- pd_small %>% filter(date <= the_date) pd_small_current <- pd_small_sub %>% filter(date == the_date) label_data_big <- pd_big_sub %>% filter(ccaa %in% bigs) %>% group_by(ccaa) %>% filter(date == max(date)) %>% ungroup label_data_small <- pd_small_sub %>% group_by(ccaa) %>% filter(date == max(date)) # sub_cols <- colorRampPalette(c('#A16928','#bd925a','#d6bd8d','#edeac2', '#b5c8b8','#79a7ac','#2887a1'))(length(unique(pd$ccaa))) sub_cols <- colorRampPalette(RColorBrewer::brewer.pal(n = 8, name = 'Dark2'))(length(unique(pd$ccaa))) # sub_cols <- rainbow((length(unique(pd$ccaa)))) g <- ggplot(data = pd_big_sub, aes(x = days_since_start_deaths, y = deaths, group = ccaa)) + geom_line(aes(color = ccaa), alpha = 0.9, size = 2) + geom_line(data = pd_small_sub, aes(x = days_since_start_deaths, y = deaths, color = ccaa), alpha = 0.7, size = 1) + geom_point(data = pd_big_current, aes(x = days_since_start_deaths, y = deaths, color = ccaa), size = 3) + geom_point(data = pd_small_current, aes(x = days_since_start_deaths, y = deaths, color = ccaa), size = 1, alpha = 0.6) + geom_point(data = pd, aes(x = days_since_start_deaths, y = deaths, color = ccaa), size = 1, alpha = 0.01) + scale_y_log10(limits = c(5, max(pd$deaths)*1.2), breaks = c(10, 50, 100, 500, 1000)) + scale_color_manual(name = '', values = sub_cols) + theme_simple() + theme(legend.position = 'top') + labs(x = 'Dias desde el primer día con 5 o más muertes acumuladas', y = 'Muertes', title = format(the_date, '%d %b'), subtitle = 'Muertes por COVID-19', caption = '@joethebrew | www.databrew.cc') + theme(axis.text = element_text(size = 14), axis.title = element_text(size = 20), legend.text = element_text(size = 16), plot.title = element_text(size = 35), plot.subtitle = element_text(size = 24)) + xlim(0, 20) + theme(legend.position = 'none') message(i) if(nrow(label_data_big) > 0){ g <- g + geom_text(data = label_data_big, aes(x = days_since_start_deaths + 0.2, y = deaths, hjust = 0, label = gsub(' ', ' ', ccaa), color = ccaa), size = 8, alpha = 0.9, show.legend = FALSE) + geom_text(data = label_data_small, aes(x = days_since_start_deaths + 0.2, y = deaths , label = ccaa, # label = gsub(' ', '\n', ccaa), color = ccaa), size = 5, alpha = 0.6, show.legend = FALSE) } ggsave(paste0(the_dir, add_zero(i, 3), '.png'), height = 7, width = 12) }
# Command line cd /tmp/animation mogrify -resize 50% *.png convert -delay 25 -loop 0 *.png result.gif
# Spanish data a <- esp_df %>% left_join(esp_pop) %>% mutate(country = 'Spain') # Italian data b <- ita %>% left_join(ita_pop) %>% mutate(country = 'Italy') # join joined <- bind_rows(a, b) # Get deaths per milllion joined$deaths_pm <- joined$deaths / joined$pop * 1000000 joined$cases_pm <- joined$cases / joined$pop * 1000000 # Get the days since paradigm x_deaths <- 5 x_deaths_pm <- 5 x_cases <- 50 x_cases_pm <- 50 joined <- joined %>% arrange(date) %>% group_by(ccaa) %>% mutate(start_deaths = min(date[deaths >= x_deaths]), start_cases = min(date[cases >= x_cases]), start_deaths_pm = min(date[deaths_pm >= x_deaths_pm]), start_cases_pm = min(date[cases_pm >= x_cases_pm])) %>% ungroup %>% mutate(days_since_start_deaths = date - start_deaths, days_since_start_cases = date - start_cases, days_since_start_deaths_pm = date - start_deaths_pm, days_since_start_cases_pm = date - start_cases_pm) ggplot(data = joined %>% filter(days_since_start_deaths_pm >= 0), aes(x = days_since_start_deaths_pm, y = deaths_pm, group = ccaa)) + geom_line(aes(color = country), alpha = 0.8, size = 2) + scale_y_log10() + scale_color_manual(name = '', values = c('darkorange', 'purple')) + theme_simple() + theme(legend.position = 'none') + labs(x = 'Days since "start out outbreak"', y = 'Deaths per million', title = 'Deaths per capita, Italian regions vs. Spanish autonomous communities', subtitle = paste0('Day 0 ("start of outbreak") = first day at ', x_deaths_pm, ' or greater cumulative deaths per million'), caption = '@joethebrew | www.databrew.cc') + geom_text(data = joined %>% group_by(ccaa) %>% filter(date == max(date) & ( (country == 'Spain' & deaths_pm > 25) | (country == 'Italy' & days_since_start_deaths_pm > 10) )), aes(x = days_since_start_deaths_pm + 0.6, y = deaths_pm, label = gsub(' ', '\n', ccaa), color = country), size = 6) + theme(axis.text = element_text(size = 14), axis.title = element_text(size = 20)) + xlim(0, 23) ggsave('/tmp/italy_comparison.png', height = 6, width = 10) # Separate for Catalonia pd <- joined %>% filter(days_since_start_deaths_pm >= 0) %>% mutate(country = ifelse(ccaa == 'Cataluña', 'Catalonia', country)) %>% mutate(ccaa = ifelse(ccaa == 'Cataluña', 'Catalunya', ccaa)) pdcat <- pd %>% filter(country == 'Catalonia') label_data <- pd %>% group_by(ccaa) %>% filter(date == max(date) & ( (country == 'Catalonia') | (country == 'Spain' & deaths_pm > 25) | (country == 'Italy' & days_since_start_deaths_pm > 10) )) ggplot(data = pd, aes(x = days_since_start_deaths_pm, y = deaths_pm, group = ccaa)) + geom_line(aes(color = country), alpha = 0.3, size = 1.5) + geom_line(data = pdcat, aes(color = country), alpha = 0.8, size = 2) + geom_point(data = pdcat %>% filter(date == max(date)), aes(color = country), alpha = 0.8, size = 4) + scale_y_log10() + scale_color_manual(name = '', values = c('darkred', 'darkorange', "purple")) + theme_simple() + theme(legend.position = 'none') + labs(x = 'Dies des del "començament del brot"', y = 'Morts per milió', title = 'Morts per càpita: Catalunya, comunitats autònomes, regions italianes', subtitle = paste0('Dia 0 ("començament del brot") = primer dia a ', x_deaths_pm, ' o més morts acumulades per milió de població'), caption = '@joethebrew | www.databrew.cc') + geom_text(data = label_data, aes(x = days_since_start_deaths_pm +0.2 , y = deaths_pm +3, hjust = 0, label = gsub(' ', '\n', ccaa), color = country), size = 6, alpha = 0.7) + theme(axis.text = element_text(size = 14), axis.title = element_text(size = 20)) + xlim(0, 24) ggsave('/tmp/cat_italy_comparison.png', height = 6, width = 10) # Straightforward Lombardy, Madrid, Cat comparison specials <- c('Lombardia', 'Madrid') pd <- joined %>% filter(days_since_start_deaths_pm >= 0) %>% mutate(country = ifelse(ccaa == 'Cataluña', 'Catalonia', country)) %>% mutate(ccaa = ifelse(ccaa == 'Cataluña', 'Catalunya', ccaa)) pdcat <- pd %>% filter(ccaa %in% specials) label_data <- pd %>% group_by(ccaa) %>% filter(date == max(date) & ( # (country == 'Catalonia') | (country == 'Spain' & deaths_pm > 20) | (country == 'Italy' & days_since_start_deaths_pm >= 10) )) ggplot(data = pd, aes(x = days_since_start_deaths_pm, y = deaths_pm, group = ccaa)) + geom_line(aes(color = country), alpha = 0.3, size = 1.5) + geom_line(data = pdcat, aes(color = country), alpha = 0.8, size = 2) + scale_y_log10() + scale_color_manual(name = '', values = c('darkred', 'darkorange', "purple")) + theme_simple() + theme(legend.position = 'none') + labs(x = 'Dias desde "el comienzo del brote"', y = 'Muertes por millón de habitantes', title = 'Muertes acumuladas por 1.000.000 habitantes', subtitle = paste0('Dia 0 ("comienzo del brote") = primer día a ', x_deaths_pm, ' o más muertes acumuladas por milión de población'), caption = '@joethebrew | www.databrew.cc') + geom_text(data = label_data %>% filter(!ccaa %in% specials), aes(x = days_since_start_deaths_pm + 0.4, y = deaths_pm +3, label = gsub(' ', '\n', ccaa), color = country), size = 5, alpha = 0.5) + geom_text(data = label_data %>% filter(ccaa %in% specials), aes(x = days_since_start_deaths_pm , y = deaths_pm +30, label = gsub(' ', '\n', ccaa), color = country), size = 8, alpha = 0.8) + theme(axis.text = element_text(size = 14), axis.title = element_text(size = 20)) + xlim(0, 23) ggsave('/tmp/mad_lom_italy_comparison.png', height = 6, width = 10)
isos <- sort(unique(world_pop$sub_region)) isos <- c('Central Asia', 'Eastern Asia', 'Eastern Europe', 'Latin America and the Caribbean', 'Northern Africa', 'Northern America', 'Nothern Europe', 'South-eastern Asia', 'Southern Asia', 'Southern Europe', 'Sub-Saharan Africa', 'Western Asia', 'Western Europe') dir.create('/tmp/world') for(i in 1:length(isos)){ this_iso <- isos[i] message(i, ' ', this_iso) countries <- world_pop %>% filter(sub_region == this_iso) pd <- df %>% filter(!country %in% c('Guyan a', 'Bahamas', 'The Bahamas')) %>% group_by(country, iso, date) %>% summarise(cases = sum(cases, na.rm = TRUE)) %>% ungroup %>% group_by(country) %>% filter(length(which(cases > 0)) > 1) %>% ungroup %>% filter(iso %in% countries$iso) if(nrow(pd) > 0){ cols <- colorRampPalette(brewer.pal(n = 8, 'Spectral'))(length(unique(pd$country))) cols <- sample(cols, length(cols)) # Plot n_countries <- (length(unique(pd$country))) ggplot(data = pd, aes(x = date, # color = country, # fill = country, y = cases)) + theme_simple() + # geom_point() + # geom_line() + geom_area(fill = 'darkred', alpha = 0.3, color = 'darkred') + # scale_color_manual(name = '', # values = cols) + # scale_fill_manual(name = '', # values = cols) + theme(legend.position = 'none', axis.text = element_text(size = 6), strip.text = element_text(size = ifelse(n_countries > 20, 6, ifelse(n_countries > 10, 10, ifelse(n_countries > 5, 11, 12))) ), legend.text = element_text(size = 6)) + # scale_y_log10() + facet_wrap(~country, scales = 'free') + labs(x = '', y = 'Confirmed cases', title = paste0('Confirmed cases of COVID-19 in ', this_iso)) ggsave(paste0('/tmp/world/', this_iso, '.png'), width = 12, height = 7) } }
plot_data <- df_country %>% filter(country %in% c('Spain', 'France', 'Italy', 'Germany', 'Belgium', 'Norway')) %>% mutate(geo = country) roll_curve(plot_data, pop = T)
dir.create('/tmp/countries') roll_curve_country <- function(the_country = 'Spain'){ plot_data <- df_country %>% filter(country %in% the_country) %>% mutate(geo = country) g1 <- roll_curve(plot_data, pop = F) g2 <- roll_curve(plot_data, pop = T) g3 <- roll_curve(plot_data, pop = F, deaths = T) g4 <- roll_curve(plot_data, pop = T, deaths = T) ggsave(paste0('/tmp/countries/', the_country, '1.png'), g1) ggsave(paste0('/tmp/countries/', the_country, '2.png'), g2) ggsave(paste0('/tmp/countries/', the_country, '3.png'), g3) ggsave(paste0('/tmp/countries/', the_country, '4.png'), g4) } countries <- c('Spain', 'France', 'Italy', 'Germany', 'Belgium', 'Norway', 'US', 'United Kingdom', 'Korea, South', 'China', 'Japan', 'Switzerland', 'Sweden', 'Denmark', 'Netherlands', 'Iran', 'Canada') for(i in 1:length(countries)){ roll_curve_country(the_country = countries[i]) }
# Cases plot_data <- df_country %>% filter(country == 'Spain') %>% mutate(geo = country) roll_curve(plot_data) # Cases adjusted plot_data <- df_country %>% filter(country == 'Spain') %>% mutate(geo = country) roll_curve(plot_data, pop = T) # Deaths plot_data <- df_country %>% filter(country == 'Spain') %>% mutate(geo = country) roll_curve(plot_data, deaths = T) # Cases adjusted plot_data <- df_country %>% filter(country == 'Spain') %>% mutate(geo = country) roll_curve(plot_data, pop = T, deaths = T)
plot_data <- esp_df %>% mutate(geo = ccaa) roll_curve(plot_data, pop = T, deaths = T)
plot_data <- df_country %>% filter(country == 'Spain') %>% mutate(geo = country) roll_curve(plot_data, deaths = T)
# Latest in Spain pd <- esp_df %>% filter(date == max(date)) %>% mutate(p = deaths / sum(deaths) * 100) text_size <- 12 # deaths ggplot(data = pd, aes(x = ccaa, y = deaths)) + geom_bar(stat = 'identity', fill = 'black') + theme_simple() + theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5)) + labs(x = '', y = 'Deaths | Muertes', title = 'COVID-19 deaths in Spain', subtitle = paste0('Data as of ', max(pd$date)), caption = 'github.com/databrew/covid19 | joe@databrew.cc') + theme(legend.position = 'top', legend.text = element_text(size = text_size * 2), axis.title = element_text(size = text_size * 2), plot.title = element_text(size = text_size * 2.3), axis.text.x = element_text(size = text_size * 1.5)) + geom_text(data = pd %>% filter(deaths > 0), aes(x = ccaa, y = deaths, label = paste0(deaths, '\n(', round(p, digits = 1), '%)')), size = text_size * 0.3, nudge_y = 180) + ylim(0, max(pd$deaths * 1.1)) ggsave('/tmp/spain.png')
Muertes relativas por CCAA
# Latest in Spain pd <- esp_df %>% filter(date == max(date)) %>% mutate(p = deaths / sum(deaths) * 100) pd <- pd %>% left_join(esp_pop) text_size <- 12 pd$value <- pd$deaths / pd$pop * 100000 # deaths ggplot(data = pd, aes(x = ccaa, y = value)) + geom_bar(stat = 'identity', fill = 'black') + theme_simple() + theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5)) + labs(x = '', y = 'Deaths per 100,000', title = 'COVID-19 deaths per 100.000', subtitle = paste0('Data as of ', max(pd$date)), caption = 'github.com/databrew/covid19 | joe@databrew.cc') + theme(legend.position = 'top', legend.text = element_text(size = text_size * 2), axis.title = element_text(size = text_size * 2), plot.title = element_text(size = text_size * 2.3), axis.text.x = element_text(size = text_size * 1.5)) + geom_text(data = pd %>% filter(value > 0), aes(x = ccaa, y = value, label = paste0(round(value, digits = 2), '\n(', deaths, '\ndeaths)')), size = text_size * 0.3, nudge_y = 4.5) + ylim(0, max(pd$value) * 1.2) ggsave('/tmp/spai2.png')
# Latest in Spain pd <- esp_df %>% filter(date == max(date)) %>% mutate(p = deaths_non_cum / sum(deaths_non_cum) * 100) text_size <- 12 # deaths ggplot(data = pd, aes(x = ccaa, y = deaths_non_cum)) + geom_bar(stat = 'identity', fill = 'black') + theme_simple() + theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5)) + labs(x = '', y = 'Deaths', title = 'COVID-19 deaths in Spain', subtitle = paste0('Data only for ', max(pd$date)), caption = 'github.com/databrew/covid19 | joe@databrew.cc') + theme(legend.position = 'top', legend.text = element_text(size = text_size * 2), axis.title = element_text(size = text_size * 2), plot.title = element_text(size = text_size * 2.3), axis.text.x = element_text(size = text_size * 1.5)) + geom_text(data = pd %>% filter(deaths_non_cum > 0), aes(x = ccaa, y = deaths_non_cum, label = paste0(deaths_non_cum, '\n(', round(p, digits = 1), '%)')), size = text_size * 0.3, nudge_y = 30) + ylim(0, max(pd$deaths_non_cum * 1.1)) ggsave('/tmp/spain_non_cum.png')
Muertes relativas por CCAA ayer SOLO
# Latest in Spain pd <- esp_df %>% filter(date == max(date)) %>% mutate(p = deaths_non_cum / sum(deaths_non_cum) * 100) pd <- pd %>% left_join(esp_pop) text_size <- 12 pd$value <- pd$deaths_non_cum / pd$pop * 100000 # deaths ggplot(data = pd, aes(x = ccaa, y = value)) + geom_bar(stat = 'identity', fill = 'black') + theme_simple() + theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5)) + labs(x = '', y = 'Deaths per 100,000', title = 'COVID-19 deaths per 100.000', subtitle = paste0('Data as of ', max(pd$date)), caption = 'github.com/databrew/covid19 | joe@databrew.cc') + theme(legend.position = 'top', legend.text = element_text(size = text_size * 2), axis.title = element_text(size = text_size * 2), plot.title = element_text(size = text_size * 2.3), axis.text.x = element_text(size = text_size * 1.5)) + geom_text(data = pd %>% filter(value > 0), aes(x = ccaa, y = value, label = paste0(round(value, digits = 2), '\n(', deaths_non_cum, '\ndeaths)')), size = text_size * 0.3, nudge_y = 1) + ylim(0, max(pd$value) * 1.3) ggsave('/tmp/spain_ayer_adj.png')
plot_data <- esp_df %>% mutate(geo = ccaa) %>% filter(!ccaa %in% c('Melilla')) roll_curve(plot_data, scales = 'fixed') ggsave('/tmp/a.png', width = 1280 / 150, height = 720 / 150)
Loop for everywhere (see desktop)
dir.create('/tmp/ccaas') ccaas <- sort(unique(esp_df$ccaa)) for(i in 1:length(ccaas)){ this_ccaa <- ccaas[i] plot_data <- esp_df %>% mutate(geo = ccaa) %>% filter(ccaa == this_ccaa) roll_curve(plot_data, scales = 'fixed') + theme(strip.text = element_text(size = 30)) ggsave(paste0('/tmp/ccaas/', i, this_ccaa, '_cases.png'), width = 1280 / 150, height = 720 / 150) } ccaas <- sort(unique(esp_df$ccaa)) for(i in 1:length(ccaas)){ this_ccaa <- ccaas[i] plot_data <- esp_df %>% mutate(geo = ccaa) %>% filter(ccaa == this_ccaa) roll_curve(plot_data, scales = 'fixed', pop = TRUE) + theme(strip.text = element_text(size = 30)) ggsave(paste0('/tmp/ccaas/', i, this_ccaa, '_cases_pop.png'), width = 1280 / 150, height = 720 / 150) } # Deaths too for(i in 1:length(ccaas)){ this_ccaa <- ccaas[i] plot_data <- esp_df %>% mutate(geo = ccaa) %>% filter(ccaa == this_ccaa) roll_curve(plot_data, deaths = T, scales = 'fixed') + theme(strip.text = element_text(size = 30)) ggsave(paste0('/tmp/ccaas/', i, this_ccaa, '_deaths.png'), width = 1280 / 150, height = 720 / 150) } # Deaths too for(i in 1:length(ccaas)){ this_ccaa <- ccaas[i] plot_data <- esp_df %>% mutate(geo = ccaa) %>% filter(ccaa == this_ccaa) roll_curve(plot_data, deaths = T, scales = 'fixed', pop = TRUE) + theme(strip.text = element_text(size = 30)) ggsave(paste0('/tmp/ccaas/', i, this_ccaa, '_deaths_pop.png'), width = 1280 / 150, height = 720 / 150) }
plot_data <- esp_df %>% mutate(geo = ccaa) %>% filter(!ccaa %in% c('Melilla')) roll_curve(plot_data, scales = 'free_y') ggsave('/tmp/b.png', width = 1280 / 150, height = 720 / 150)
plot_data <- esp_df %>% mutate(geo = ccaa) %>% filter(!ccaa %in% c('Melilla')) roll_curve(plot_data, deaths = T, scales = 'free_y') ggsave('/tmp/c.png', width = 1280 / 150, height = 720 / 150)
plot_data <- esp_df %>% mutate(geo = ccaa) %>% filter(!ccaa %in% c('Melilla')) roll_curve(plot_data, deaths = T, scales = 'fixed') ggsave('/tmp/d.png', width = 1280 / 150, height = 720 / 150)
plot_data <- df_country %>% filter(country %in% c('Spain', 'Italy', 'Germany', 'France', 'US', 'China', 'Korea, South', 'Japan', 'Singapore')) %>% mutate(geo = country) roll_curve(plot_data, scales = 'free_y') ggsave('/tmp/z.png', width = 1280 / 150, height = 720 / 150)
pd <- df_country %>% group_by(date) %>% summarise(n = sum(cases)) %>% filter(date < max(date)) ggplot(data = pd, aes(x = date, y = n)) + geom_point() + theme_simple() + labs(x = 'Date', y = 'Cases', title = 'COVID-19 cases') ggsave('~/Videos/update/a.png', width = 1280 / 150, height = 720 / 150)
pd <- df_country %>% group_by(date, country = ifelse(country == 'China', 'China', 'Other countries')) %>% summarise(n = sum(cases)) %>% ungroup %>% filter(date < max(date)) ggplot(data = pd, aes(x = date, y = n, color = country)) + geom_line(size = 2) + # geom_point() + theme_simple() + labs(x = 'Date', y = 'Cases', title = 'COVID-19 cases') + scale_color_manual(name = '', values = c('red', 'black')) + theme(legend.text = element_text(size = 25), legend.position = 'top') ggsave('~/Videos/update/b.png', width = 1280 / 150, height = 720 / 150)
pd <- df_country %>% group_by(date, country = ifelse(country == 'China', 'China', 'Other countries')) %>% summarise(n = sum(cases)) %>% filter(country == 'Other countries') %>% ungroup %>% filter(date < max(date)) ggplot(data = pd, aes(x = date, y = n)) + geom_line(size = 2) + # geom_point() + theme_simple() + labs(x = 'Date', y = 'Cases', title = 'COVID-19 cases, outside of China') ggsave('~/Videos/update/c.png', width = 1280 / 150, height = 720 / 150)
plot_day_zero(countries = c('France', 'Germany', 'Italy', 'Spain', 'Switzerland', 'Sweden', 'Norway', 'Netherlands')) # ggsave('~/Videos/update/d.png', # width = 1280 / 150, # height = 720 / 150)
pd <- df_country %>% group_by(date) %>% summarise(n = sum(deaths)) %>% filter(date < max(date)) ggplot(data = pd, aes(x = date, y = n)) + geom_point() + theme_simple() + labs(x = 'Date', y = 'Deaths', title = 'COVID-19 deaths') # ggsave('~/Videos/update/e.png', # width = 1280 / 150, # height = 720 / 150)
pd <- df_country %>% group_by(date, country = ifelse(country == 'China', 'China', 'Other countries')) %>% summarise(n = sum(deaths)) %>% ungroup %>% filter(date < max(date)) ggplot(data = pd, aes(x = date, y = n, color = country)) + geom_line(size = 2) + # geom_point() + theme_simple() + labs(x = 'Date', y = 'Deaths', title = 'COVID-19 deaths') + scale_color_manual(name = '', values = c('red', 'black')) + theme(legend.text = element_text(size = 25), legend.position = 'top') # ggsave('~/Videos/update/f.png', # width = 1280 / 150, # height = 720 / 150)
plot_day_zero(countries = c('Korea, South', 'Japan', 'China', 'Singapore')) # ggsave('~/Videos/update/g.png', # width = 1280 / 150, # height = 720 / 150)
Since trajectories are very unstable when cases are low, we'll exclude from our analysis the first few days, and will only count as "outbreak" once a country reaches 150 or more cumulative cases.
# Doubling time n_cases_start = 150 countries = c('Italy', 'Spain', 'France', 'Germany', 'Italy', 'Switzerland', 'Denmark', 'US', 'United Kingdom', 'Norway') # countries <- sort(unique(df_country$country)) out_list <- curve_list <- list() counter <- 0 for(i in 1:length(countries)){ message(i) this_country <- countries[i] sub_data <-df_country %>% filter(country == this_country) # Only calculate on countries with n_cases_start or greater cases, # starting at the first day at n_cases_start or greater ok <- max(sub_data$cases, na.rm = TRUE) >= n_cases_start if(ok){ counter <- counter + 1 pd <- sub_data %>% filter(!is.na(cases)) %>% mutate(start_date = min(date[cases >= n_cases_start])) %>% mutate(days_since = date - start_date) %>% filter(days_since >= 0) %>% mutate(days_since = as.numeric(days_since)) fit <- lm(log(cases) ~ days_since, data = pd) # plot(pd$days_since, log(pd$cases)) # abline(fit) ## Slope curve <- fit$coef[2] # Predict days ahead fake <- tibble(days_since = seq(0, max(pd$days_since) + 5, by = 1)) fake <- left_join(fake, pd %>% dplyr::select(days_since, cases, date)) fake$predicted <- exp(predict(fit, newdata = fake)) # Doubling time dt <- log(2)/fit$coef[2] out <- tibble(country = this_country, doubling_time = dt, slope = curve) out_list[[counter]] <- out curve_list[[counter]] <- fake %>% mutate(country = this_country) } } done <- bind_rows(out_list) print(done) curves <- bind_rows(curve_list) # Get curves back in exponential form # curves$curve <- exp(curves$curve) # Join doubling time to curves joined <- left_join(curves, done) # Get rid of Italy future (since it's the "leader") joined <- joined %>% filter(country != 'Italy' | date <= (Sys.Date() -1)) # Make long format long <- joined %>% dplyr::select(date, days_since, country, cases, predicted, doubling_time) %>% tidyr::gather(key, value, cases:predicted) %>% mutate(key = Hmisc::capitalize(gsub('_', ' ', key))) %>% mutate(key = ifelse(key == 'Predicted', 'Predicted (based on current doubling time)', key))
The below chart shows the trajectories in terms of number of cases in Europe in red, and the predicted trajectories in black. The black line assumes that the doubling rate will stay constant.
cols <- c('red', 'black') ggplot(data = long, aes(x = days_since, y = value, lty = key, color = key)) + geom_line(data = long %>% filter(key != 'Confirmed cases'), size = 1.2, alpha = 0.8) + geom_point(data = long %>% filter(key == 'Confirmed cases')) + geom_line(data = long %>% filter(key == 'Confirmed cases'), size = 0.8) + facet_wrap(~paste0(country, '\n', '(doubling time: ', round(doubling_time, digits = 1), ' days)'), scales = 'free') + theme_simple() + scale_linetype_manual(name ='', values = c(1,2)) + scale_color_manual(name = '', values = cols) + theme(legend.position = 'top') + labs(x = 'Days since first day at >150 cumulative cases', y = 'Cases', title = 'COVID-19 CASES: ("predicted" assumes no change in doubling time)', caption = 'Data from Johns Hopkins. Processing: Joe Brew @joethebrew. Code: github.com/databrew/covid19', subtitle = '(Doubling time calculated since first day at >150 cumulative cases)') + theme(strip.text = element_text(size = 13), plot.title = element_text(size = 15))
Since Italy is "leading the way", it's helpful to also compare each country to Italy. Let's see that.
# Overlay Italy ol1 <- joined %>% filter(!country %in% 'Italy') ol2 <- joined %>% filter(country == 'Italy') %>% dplyr::rename(Italy = cases) %>% dplyr::select(Italy, days_since) ol <- left_join(ol1, ol2) %>% dplyr::select(days_since, date, country, cases, predicted, Italy,doubling_time) ol <- tidyr::gather(ol, key, value, cases: Italy) %>% mutate(key = Hmisc::capitalize(gsub('_', ' ', key))) %>% mutate(key = ifelse(key == 'Predicted', 'Predicted (based on current doubling time)', key)) cols <- c('red', 'blue', 'black') ggplot(data = ol, aes(x = days_since, y = value, lty = key, color = key)) + geom_line(data = ol %>% filter(!key %in% c('Confirmed cases', 'Italy')), size = 1.2, alpha = 0.8) + geom_line(data = ol %>% filter(key %in% c('Italy')), size = 0.8, alpha = 0.8) + geom_point(data = ol %>% filter(key == 'Confirmed cases')) + geom_line(data = ol %>% filter(key == 'Confirmed cases'), size = 0.8) + facet_wrap(~paste0(country, '\n', '(doubling time: ', round(doubling_time, digits = 1), ' days)'), scales = 'free') + theme_simple() + scale_linetype_manual(name ='', values = c(1,6,2)) + scale_color_manual(name = '', values = cols) + theme(legend.position = 'top') + labs(x = 'Days since first day at >150 cumulative cases', y = 'Cases', title = 'COVID-19 CASES: ("predicted" assumes no change in doubling time)', caption = 'Data from Johns Hopkins. Processing: Joe Brew @joethebrew. Code: github.com/databrew/covid19', subtitle = '(Doubling time calculated since first day at >150 cumulative cases)') + theme(strip.text = element_text(size = 13), plot.title = element_text(size = 15))
In the above, what's striking is how many places have trajectories that are worse than Italy's. Yes, Italy has more cases, but it's doubling time is less. Either that changes soon, or these other countries will soon have more cases than Italy.
The number of cases is not necessarily the best indicator for the severity of an outbreak of this nature. Why? Because (a) testing rates and protocols are different by place and (b) testing rates are different by time (since health services are changing their approaches as things develop). In other words, when we compare the number of cases by place and time, we are introducing significant bias.
Using deaths to gauge the magnitude of the outbreak is also problematic. Death rates are differential by age, so the number of deaths depends on a country's population period, or age structure. Also, death rates will be a function of health services, which are not of the same quality every where. And, of course, like cases, we don't necessarily know about all of the deaths that occur because of COVID-19.
Still, there's an argument that death rates have less bias than case rates because deaths are easier to identify than cases. Let's accept that argument, for the time being, and have a look at death rates by country.
# Doubling time n_deaths_start = 5 countries = c('Italy', 'Spain', 'France', 'Italy', 'Switzerland', 'Denmark', 'US', 'United Kingdom', 'Norway', 'Germany') # countries <- sort(unique(df_country$country)) make_double_time <- function(data = df_country, the_country = 'Spain', n_deaths_start = 5, time_ahead = 7){ sub_data <-data %>% filter(country == the_country) # Only calculate on countries with n_cases_start or greater cases, # starting at the first day at n_cases_start or greater ok <- max(sub_data$deaths, na.rm = TRUE) >= n_deaths_start if(ok){ counter <- counter + 1 pd <- sub_data %>% filter(!is.na(deaths)) %>% mutate(start_date = min(date[deaths >= n_deaths_start])) %>% mutate(days_since = date - start_date) %>% filter(days_since >= 0) %>% mutate(days_since = as.numeric(days_since)) %>% mutate(the_weight = 1/(1 + (as.numeric(max(date) - date)))) fit <- lm(log(deaths) ~ days_since, weights = the_weight, data = pd) # fitlo <- loess(deaths ~ days_since, data = pd) # plot(pd$days_since, log(pd$cases)) # abline(fit) ## Slope # curve <- fit$coef[2] # Predict days ahead day0 <- pd$date[pd$days_since == 0] fake <- tibble(days_since = seq(0, max(pd$days_since) + time_ahead, by = 1)) fake <- fake %>%mutate(date = seq(day0, day0+max(fake$days_since), by = 1)) fake <- left_join(fake, pd %>% dplyr::select(days_since, deaths, date)) fake$predicted <- exp(predict(fit, newdata = fake)) # fake$predictedlo <- predict(fitlo, newdata = fake) ci <- exp(predict(fit, newdata = fake, interval = 'prediction')) # cilo <- predict(fitlo, newdata = fake, interval = 'prediction') fake$lwr <- ci[,'lwr'] fake$upr <- ci[,'upr'] # fake$lwrlo <- ci[,'lwr'] # fake$uprlo <- ci[,'upr'] # Doubling time dt <- log(2)/fit$coef[2] fake %>% mutate(country = the_country) %>% mutate(doubling_time = dt) } } plot_double_time <- function(data, ylog = F){ the_labs <- labs(x = 'Date', y = 'Deaths', title = paste0('Predicted deaths in ', data$country[1])) long <- data %>% tidyr::gather(key, value, deaths:predicted) %>% mutate(key = Hmisc::capitalize(key)) g <- ggplot() + geom_ribbon(data = data %>% filter(date > max(long$date[!is.na(long$value) & long$key == 'Deaths'])), aes(x = date, ymax = upr, ymin = lwr), alpha =0.6, fill = 'darkorange') + geom_line(data = long, aes(x = date, y = value, group = key, lty = key)) + geom_point(data = long %>% filter(key == 'Deaths'), aes(x = date, y = value)) + theme_simple() + theme(legend.position = 'right', legend.title = element_blank()) + the_labs if(ylog){ g <- g + scale_y_log10() } return(g) } options(scipen = '999') data <- make_double_time(n_deaths_start = 150, time_ahead = 7) data dir.create('/tmp/ccaa_predictions') plot_double_time(data, ylog = T) + labs(subtitle = 'Basic log-linear model weighted at (1 + (1/ days ago)),\nassuming no change in growth trajectory since first day at >150 deaths') ggsave('/tmp/ccaa_predictions/spain.png') # All ccaas ccaas <- sort(unique(esp_df$ccaa)) for(i in 1:length(ccaas)){ message(i) this_ccaa <- ccaas[i] sub_data <- esp_df %>% mutate(country = ccaa) try({ data <- make_double_time( data = sub_data, the_country = this_ccaa, n_deaths_start = 5, time_ahead = 7) plot_double_time(data, ylog = T) + labs(subtitle = 'Basic log-linear model weighted at (1 + (1/ days ago)), assuming no change in growth trajectory since first day at >5 deaths') ggsave(paste0('/tmp/ccaa_predictions/', this_ccaa, '.png'), height = 4.9, width = 8.5) }) }
# all_countries <- sort(unique(df_country$country)) # for(i in 1:length(all_countries)){ # this_country <- all_countries[i] # data <- make_double_time(the_country = this_country, n_deaths_start = 5) # if(!is.null(data)){ # # print(this_country) # g <- plot_double_time(data, ylog = F) + # labs(subtitle = 'Basic log-linear model assuming no change in growth trajectory since first day at >5 deaths') # ggsave(paste0('/tmp/', this_country, '.png'), height = 5, width = 8) # print(data) # } # }
counter <- 0 # Africa data <- make_double_time(the_country = 'South Africa', n_deaths_start = 5, time_ahead = 7) data dir.create('/tmp/africa_predictions') plot_double_time(data, ylog = T) + labs(subtitle = 'Basic log-linear model weighted at (1 + (1/ days ago)),\nassuming no change in growth trajectory since first day at >150 deaths')
out_list <- curve_list <- list() counter <- 0 for(i in 1:length(countries)){ message(i) this_country <- countries[i] sub_data <-df_country %>% filter(country == this_country) # Only calculate on countries with n_cases_start or greater cases, # starting at the first day at n_cases_start or greater ok <- max(sub_data$deaths, na.rm = TRUE) >= n_deaths_start if(ok){ counter <- counter + 1 pd <- sub_data %>% filter(!is.na(deaths)) %>% mutate(start_date = min(date[deaths >= n_deaths_start])) %>% mutate(days_since = date - start_date) %>% filter(days_since >= 0) %>% mutate(days_since = as.numeric(days_since)) fit <- lm(log(deaths) ~ days_since, data = pd) # plot(pd$days_since, log(pd$cases)) # abline(fit) ## Slope # curve <- fit$coef[2] # Predict days ahead fake <- tibble(days_since = seq(0, max(pd$days_since) + 5, by = 1)) fake <- left_join(fake, pd %>% dplyr::select(days_since, deaths, date)) fake$predicted <- exp(predict(fit, newdata = fake)) # Doubling time dt <- log(2)/fit$coef[2] out <- tibble(country = this_country, doubling_time = dt) out_list[[counter]] <- out curve_list[[counter]] <- fake %>% mutate(country = this_country) } } done <- bind_rows(out_list) curves <- bind_rows(curve_list) # Get curves back in exponential form # curves$curve <- exp(curves$curve) # Join doubling time to curves joined <- left_join(curves, done) # Get rid of Italy future (since it's the "leader") joined <- joined %>% filter(country != 'Italy' | date <= (Sys.Date() -1)) # Make long format long <- joined %>% dplyr::select(date, days_since, country, deaths, predicted, doubling_time) %>% tidyr::gather(key, value, deaths:predicted) %>% mutate(key = Hmisc::capitalize(gsub('_', ' ', key))) %>% mutate(key = ifelse(key == 'Predicted', 'Predicted (based on current doubling time)', key))
cols <- c('red', 'black') sub_data <- long %>% filter(country != 'US') ggplot(data = sub_data, aes(x = days_since, y = value, lty = key, color = key)) + geom_line(data = sub_data %>% filter(key != 'Deaths'), size = 1.2, alpha = 0.8) + geom_point(data = sub_data %>% filter(key == 'Deaths')) + geom_line(data = sub_data %>% filter(key == 'Deaths'), size = 0.8) + facet_wrap(~paste0(country, '\n', '(doubling time: ', round(doubling_time, digits = 1), ' days)'), scales = 'free') + theme_simple() + scale_linetype_manual(name ='', values = c(1,2)) + scale_color_manual(name = '', values = cols) + theme(legend.position = 'top') + labs(x = 'Days since first day at >5 cumulative deaths', y = 'Deaths', title = 'COVID-19 DEATHS: ("predicted" assumes no change in doubling time)', caption = 'Data from Johns Hopkins. Processing: Joe Brew @joethebrew. Code: github.com/databrew/covid19', subtitle = '(Doubling time calculated since first day at >5 cumulative deaths)') + theme(strip.text = element_text(size = 13), plot.title = element_text(size = 15))
Let's again overlay Italy.
# Overlay Italy ol1 <- joined %>% filter(!country %in% 'Italy') ol2 <- joined %>% filter(country == 'Italy') %>% dplyr::rename(Italy = deaths) %>% dplyr::select(Italy, days_since) ol <- left_join(ol1, ol2) %>% dplyr::select(days_since, date, country, deaths, predicted, Italy,doubling_time) ol <- tidyr::gather(ol, key, value, deaths: Italy) %>% mutate(key = Hmisc::capitalize(gsub('_', ' ', key))) %>% mutate(key = ifelse(key == 'Predicted', 'Predicted (based on current doubling time)', key)) cols <- c('red', 'blue', 'black') sub_data <- ol %>% filter(!(key == 'Predicted (based on current doubling time)' & country == 'Spain' & days_since > 13)) ggplot(data = sub_data, aes(x = days_since, y = value, lty = key, color = key)) + geom_line(data = sub_data %>% filter(!key %in% c('Deaths', 'Italy')), size = 1.2, alpha = 0.8) + geom_line(data = sub_data %>% filter(key %in% c('Italy')), size = 0.8, alpha = 0.8) + geom_point(data = sub_data %>% filter(key == 'Deaths')) + geom_line(data = sub_data %>% filter(key == 'Deaths'), size = 0.8) + facet_wrap(~paste0(country, '\n', '(doubling time: ', round(doubling_time, digits = 1), ' days)'), scales = 'free') + theme_simple() + scale_linetype_manual(name ='', values = c(1,6,2)) + scale_color_manual(name = '', values = cols) + scale_y_log10() + theme(legend.position = 'top') + labs(x = 'Days since first day at >5 deaths', y = 'Deaths', title = 'COVID-19 DEATHS: ("predicted" assumes no change in doubling time)', caption = 'Data from Johns Hopkins. Processing: Joe Brew @joethebrew. Code: github.com/databrew/covid19', subtitle = '(Doubling time calculated since first day at >5 cumulative deaths)') + theme(strip.text = element_text(size = 13), plot.title = element_text(size = 15))
Let's look just at Spain
# Overlay Italy ol1 <- joined %>% filter(!country %in% 'Italy', country == 'Spain') ol2 <- joined %>% filter(country == 'Italy') %>% dplyr::rename(Italy = deaths) %>% dplyr::select(Italy, days_since) ol <- left_join(ol1, ol2) %>% dplyr::select(days_since, date, country, deaths, predicted, Italy,doubling_time) ol <- tidyr::gather(ol, key, value, deaths: Italy) %>% mutate(key = Hmisc::capitalize(gsub('_', ' ', key))) %>% mutate(key = ifelse(key == 'Predicted', 'Predicted (based on current doubling time)', ifelse(key == 'Deaths', 'Spain', key))) cols <- c('blue', 'black', 'red') ggplot(data = ol, aes(x = days_since, y = value, lty = key, color = key)) + geom_line(data = ol %>% filter(!key %in% c('Deaths', 'Italy')), size = 1.2, alpha = 0.8) + geom_line(data = ol %>% filter(key %in% c('Italy')), size = 0.8, alpha = 0.8) + # geom_point(data = ol %>% filter(key == 'Deaths')) + geom_point(data = ol %>% filter(country == 'Spain', key == 'Spain'), size = 4, alpha = 0.6) + geom_line(data = ol %>% filter(key == 'Deaths'), size = 0.8) + # facet_wrap(~paste0(country, '\n', # '(doubling time: ', # round(doubling_time, digits = 1), ' days)'), scales = 'free') + theme_simple() + scale_linetype_manual(name ='', values = c(1,6,1)) + scale_color_manual(name = '', values = cols) + scale_y_log10() + theme(legend.position = 'top') + labs(x = 'Days since first day at >5 deaths', y = 'Deaths', title = 'COVID-19 DEATHS: ("predicted" assumes no change in doubling time)', caption = 'Data from Johns Hopkins. Processing: Joe Brew @joethebrew. Code: github.com/databrew/covid19', subtitle = '(Doubling time calculated since first day at >5 cumulative deaths)') + theme(strip.text = element_text(size = 13), plot.title = element_text(size = 15), axis.title = element_text(size = 18))
Things are changing very rapidly. And measures being taken by these countries will have an impact on the outbreak.
But it's important to remember that there is a lag between when an intervention takes place and when its effect is notable. Because of the incubation period - the number of days between someone getting infected and becoming sick - what we do today won't really have an effect until next weekend. And the clinical cases that present today are among people who got infected a week ago.
Disease control measures work. We can see that clearly in the case of Hubei, Wuhan, Iran, Japan. And they will work in Europe too. But because many of these measures were implemented very recently, we won't likely see a major effect for at least a few more days.
In the mean time, it's important to practice social distancing. Stay away from others to keep both you and others safe. Listen to Health Authorities. Take this very seriously.
# Madrid vs Lombardy deaths n_death_start <- 5 pd <- esp_df %>% # filter(ccaa == 'Madrid') %>% dplyr::select(date, ccaa, cases, deaths) %>% bind_rows(ita %>% # filter(ccaa == 'Lombardia') %>% dplyr::select(date, ccaa, cases, deaths)) %>% arrange(date) %>% group_by(ccaa) %>% mutate(first_n_death = min(date[deaths >= n_death_start])) %>% ungroup %>% mutate(days_since_n_deaths = date - first_n_death) %>% filter(is.finite(days_since_n_deaths)) pd$country <- pd$ccaa pd$cases <- pd$cases countries <- sort(unique(pd$country)) out_list <- curve_list <- list() counter <- 0 for(i in 1:length(countries)){ message(i) this_country <- countries[i] sub_data <- pd %>% filter(country == this_country) # Only calculate on countries with n_cases_start or greater cases, # starting at the first day at n_cases_start or greater # ok <- max(sub_data$deaths, na.rm = TRUE) >= n_deaths_start ok <- length(which(sub_data$deaths >= n_deaths_start)) if(ok){ counter <- counter + 1 sub_pd <- sub_data %>% filter(!is.na(deaths)) %>% mutate(start_date = min(date[deaths >= n_deaths_start])) %>% mutate(days_since = date - start_date) %>% filter(days_since >= 0) %>% mutate(days_since = as.numeric(days_since)) fit <- lm(log(deaths) ~ days_since, data = sub_pd) # plot(pd$days_since, log(pd$cases)) # abline(fit) ## Slope # curve <- fit$coef[2] # Predict days ahead fake <- tibble(days_since = seq(0, max(sub_pd$days_since) + 5, by = 1)) fake <- left_join(fake, sub_pd %>% dplyr::select(days_since, deaths, date)) fake$predicted <- exp(predict(fit, newdata = fake)) # Doubling time dt <- log(2)/fit$coef[2] out <- tibble(country = this_country, doubling_time = dt) out_list[[counter]] <- out curve_list[[counter]] <- fake %>% mutate(country = this_country) } } done <- bind_rows(out_list) curves <- bind_rows(curve_list) # Get curves back in exponential form # curves$curve <- exp(curves$curve) # Join doubling time to curves joined <- left_join(curves, done) # Make long format long <- joined %>% dplyr::select(date, days_since, country, deaths, predicted, doubling_time) %>% tidyr::gather(key, value, deaths:predicted) %>% mutate(key = Hmisc::capitalize(gsub('_', ' ', key))) %>% mutate(key = ifelse(key == 'Predicted', 'Predicted (based on current doubling time)', key)) # Remove those with not enough data to have a doubling time yet long <- long %>% filter(!is.na(doubling_time))
text_size <- 12 cols <- c('red', 'black') ggplot(data = long, aes(x = days_since, y = value, lty = key, color = key)) + geom_line(data = long %>% filter(key != 'Deaths'), size = 1.2, alpha = 0.8) + geom_point(data = long %>% filter(key == 'Deaths')) + geom_line(data = long %>% filter(key == 'Deaths'), size = 0.8) + facet_wrap(~paste0(country, '\n', '(doubling time: ', round(doubling_time, digits = 1), ' days)'), scales = 'free') + theme_simple() + scale_y_log10() + scale_linetype_manual(name ='', values = c(1,2)) + scale_color_manual(name = '', values = cols) + theme(legend.position = 'top') + labs(x = 'Days since first day at >150 cumulative cases', y = 'Deaths', title = 'COVID-19 DEATHS: ("predicted" assumes no change in doubling time)', caption = 'Data from Johns Hopkins. Processing: Joe Brew @joethebrew. Code: github.com/databrew/covid19', subtitle = '(Doubling time calculated since first day at >5 cumulative deaths)') + theme(strip.text = element_text(size = text_size * 0.5), plot.title = element_text(size = 15))
Let's overlay Lombardy
# Overlay Italy ol1 <- joined %>% filter(!country %in% 'Lombardia') ol2 <- joined %>% filter(country == 'Lombardia') %>% dplyr::rename(Lombardia = deaths) %>% dplyr::select(Lombardia, days_since) ol <- left_join(ol1, ol2) %>% dplyr::select(days_since, date, country, deaths, predicted, Lombardia,doubling_time) ol <- tidyr::gather(ol, key, value, deaths: Lombardia) %>% mutate(key = Hmisc::capitalize(gsub('_', ' ', key))) %>% mutate(key = ifelse(key == 'Predicted', 'Predicted (based on current doubling time)', key)) # Remove those with not enough data to have a doubling time yet ol <- ol %>% filter(!is.na(doubling_time)) cols <- c('red', 'blue', 'black') ggplot(data = ol, aes(x = days_since, y = value, lty = key, color = key)) + scale_y_log10() + geom_line(data = ol %>% filter(!key %in% c('Deaths', 'Italy')), size = 1.2, alpha = 0.8) + geom_line(data = ol %>% filter(key %in% c('Lombardia')), size = 0.5, alpha = 0.8) + geom_point(data = ol %>% filter(key == 'Deaths')) + geom_line(data = ol %>% filter(key == 'Deaths'), size = 0.8) + facet_wrap(~paste0(country, '\n', '(doubling time: ', round(doubling_time, digits = 1), ' days)'), scales = 'free') + theme_simple() + scale_linetype_manual(name ='', values = c(1,6,2)) + scale_color_manual(name = '', values = cols) + theme(legend.position = 'top') + labs(x = 'Days since first day at >5 deaths', y = 'Deaths', title = 'COVID-19 DEATHS: ("predicted" assumes no change in doubling time)', caption = 'Data from Johns Hopkins. Processing: Joe Brew @joethebrew. Code: github.com/databrew/covid19', subtitle = '(Doubling time calculated since first day at >5 cumulative deaths)') + theme(strip.text = element_text(size = text_size * 0.5), plot.title = element_text(size = 15))
Show only Spanish regions vs. Lombardy
text_size <- 14 # Overlay Italy ol1 <- joined %>% filter(!country %in% 'Lombardia') ol2 <- joined %>% filter(country == 'Lombardia') %>% dplyr::rename(Lombardia = deaths) %>% dplyr::select(Lombardia, days_since) ol <- left_join(ol1, ol2) %>% dplyr::select(days_since, date, country, deaths, predicted, Lombardia,doubling_time) ol <- tidyr::gather(ol, key, value, deaths: Lombardia) %>% mutate(key = Hmisc::capitalize(gsub('_', ' ', key))) %>% mutate(key = ifelse(key == 'Predicted', 'Predicted (based on current doubling time)', key)) # Remove those with not enough data to have a doubling time yet ol <- ol %>% filter(!is.na(doubling_time)) # Only Spain ol <- ol %>% filter(country %in% esp_df$ccaa) %>% filter(!country %in% 'Aragón') cols <- c('red', 'blue', 'black') ggplot(data = ol, aes(x = days_since, y = value, lty = key, color = key)) + scale_y_log10() + geom_line(data = ol %>% filter(!key %in% c('Deaths', 'Lombardia')), size = 1.2, alpha = 0.8) + geom_line(data = ol %>% filter(key %in% c('Lombardia')), size = 0.5, alpha = 0.8) + geom_point(data = ol %>% filter(key == 'Deaths')) + geom_line(data = ol %>% filter(key == 'Deaths'), size = 0.8) + facet_wrap(~paste0(country, '\n', '(doubling time: ', round(doubling_time, digits = 1), ' days)'), scales = 'free') + theme_simple() + scale_linetype_manual(name ='', values = c(1,6,2)) + scale_color_manual(name = '', values = cols) + theme(legend.position = 'top') + labs(x = 'Days since first day at >5 deaths', y = 'Deaths', title = 'COVID-19 DEATHS: ("predicted" assumes no change in doubling time)', caption = 'Data from Johns Hopkins. Processing: Joe Brew @joethebrew. Code: github.com/databrew/covid19', subtitle = '(Doubling time calculated since first day at >5 cumulative deaths)') + theme(strip.text = element_text(size = text_size * 0.6), plot.title = element_text(size = 15))
Same plot but overlayed
Same as above, but overlaid
text_size <-10 # cols <- c('red', 'black') long <- long %>% filter(country %in% c('Lombardia', 'Emilia Romagna') | country %in% esp_df$ccaa) %>% filter(country != 'Aragón') places <- sort(unique(long$country)) cols <- colorRampPalette(RColorBrewer::brewer.pal(n = 7, 'Spectral'))(length(places)) cols[which(places == 'Madrid')] <- 'red' cols[which(places == 'Cataluña')] <- 'purple' cols[which(places == 'Lombardia')] <- 'darkorange' cols[which(places == 'Emilia Romagna')] <- 'darkgreen' long$key <- ifelse(long$key != 'Deaths', 'Predicted', long$key) long$key <- ifelse(long$key == 'Predicted', 'Muertes\nprevistas', 'Muertes\nobservadas') # Keep only Madrid, Lombardy, Emilia Romagna long <- long %>% filter(country %in% c('Madrid', 'Lombardia', 'Emilia Romagna')) ggplot(data = long, aes(x = days_since, y = value, lty = key, color = country)) + geom_point(data = long %>% filter(key == 'Muertes\nobservadas'), size = 2, alpha = 0.8) + geom_line(data = long %>% filter(key == 'Muertes\nprevistas'), size = 1, alpha = 0.7) + geom_line(data = long %>% filter(key != 'Muertes\nprevistas'), size = 0.8) + theme_simple() + scale_y_log10() + scale_linetype_manual(name ='', values = c(1,4)) + scale_color_manual(name = '', values = cols) + theme(legend.position = 'top') + # labs(x = 'Days since first day at 5 or more cumulative deaths', # y = 'Deaths', # title = 'COVID-19 DEATHS: ("predicted" assumes no change in doubling time)', # caption = 'Data from Johns Hopkins. Processing: Joe Brew @joethebrew. Code: github.com/databrew/covid19', # subtitle = '(Doubling time calculated since first day at >5 cumulative deaths)') + labs(x = 'Días desde el primer día a 5 o más muertes acumuladas', y = 'Muertes (escala logarítmica)', title = 'Muertes por COVID-19', caption = 'Data from Johns Hopkins. Processing: Joe Brew @joethebrew. Code: github.com/databrew/covid19', subtitle = '(Tasa de crecimiento calculada desde el primer día a 5 o más muertes acumuladas)\n(Muertes "previstas": suponiendo que no hay cambios en la tasa de crecimiento)') + theme(strip.text = element_text(size = text_size * 0.75), plot.title = element_text(size = text_size * 3), legend.text = element_text(size = text_size * 1.5), axis.title = element_text(size = text_size * 2), axis.text = element_text(size = text_size * 2))
# cols <- c(cols, 'darkorange') # ggplot(data = ol, # aes(x = days_since, # y = value, # lty = key, # color = key)) + # scale_y_log10() + # geom_line(aes(color = country)) + # # # geom_line(data = ol %>% filter(!key %in% c('Deaths', 'Italy')), # # size = 1.2, alpha = 0.8) + # # geom_line(data = ol %>% filter(key %in% c('Lombardia')), # # size = 0.5, alpha = 0.8) + # # geom_point(data = ol %>% filter(key == 'Deaths')) + # # geom_line(data = ol %>% filter(key == 'Deaths'), # # size = 0.8) + # theme_simple() + # scale_linetype_manual(name ='', # values = c(1,6,2)) + # scale_color_manual(name = '', # values = cols) + # theme(legend.position = 'top') + # labs(x = 'Days since first day at >5 deaths', # y = 'Deaths', # title = 'COVID-19 DEATHS: ("predicted" assumes no change in doubling time)', # caption = 'Data from Johns Hopkins. Processing: Joe Brew @joethebrew. Code: github.com/databrew/covid19', # subtitle = '(Doubling time calculated since first day at >5 cumulative deaths)') + # theme(strip.text = element_text(size = text_size * 1), # plot.title = element_text(size = 15))
# Map data preparation if(!'map.RData' %in% dir()){ esp1 <- getData(name = 'GADM', country = 'ESP', level = 1) # Remove canary esp1 <- esp1[esp1@data$NAME_1 != 'Islas Canarias',] espf <- fortify(esp1, region = 'NAME_1') # Standardize names # Convert names map_names <- esp1@data$NAME_1 data_names <- sort(unique(esp_df$ccaa)) names_df <- tibble(NAME_1 = c('Andalucía', 'Aragón', 'Cantabria', 'Castilla-La Mancha', 'Castilla y León', 'Cataluña', 'Ceuta y Melilla', 'Comunidad de Madrid', 'Comunidad Foral de Navarra', 'Comunidad Valenciana', 'Extremadura', 'Galicia', 'Islas Baleares', 'La Rioja', 'País Vasco', 'Principado de Asturias', 'Región de Murcia'), ccaa = c('Andalucía', 'Aragón', 'Cantabria', 'CLM', 'CyL', 'Cataluña', 'Melilla', 'Madrid', 'Navarra', 'C. Valenciana', 'Extremadura', 'Galicia', 'Baleares', 'La Rioja', 'País Vasco', 'Asturias', 'Murcia')) espf <- left_join(espf %>% dplyr::rename(NAME_1 = id), names_df) centroids <- data.frame(coordinates(esp1)) names(centroids) <- c('long', 'lat') centroids$NAME_1 <- esp1$NAME_1 centroids <- centroids %>% left_join(names_df) # Get random sampling points random_list <- list() for(i in 1:nrow(esp1)){ message(i) shp <- esp1[i,] # bb <- bbox(shp) this_ccaa <- esp1@data$NAME_1[i] # xs <- runif(n = 500, min = bb[1,1], max = bb[1,2]) # ys <- runif(n = 500, min = bb[2,1], max = bb[2,2]) # random_points <- expand.grid(long = xs, lat = ys) %>% # mutate(x = long, # y = lat) # coordinates(random_points) <- ~x+y # proj4string(random_points) <- proj4string(shp) # get ccaa message('getting locations of randomly generated points') # polys <- over(random_points,polygons(shp)) # polys <- as.numeric(polys) random_points <- spsample(shp, n = 20000, type = 'random') random_points <- data.frame(random_points) random_points$NAME_1 <- this_ccaa random_points <- left_join(random_points, names_df) %>% dplyr::select(-NAME_1) random_list[[i]] <- random_points } random_points <- bind_rows(random_list) random_points <- random_points %>% mutate(long = x, lat = y) save(espf, esp1, names_df, centroids, random_points, file = 'map.RData') } else { load('map.RData') } # Define a function for adding zerio add_zero <- function (x, n) { x <- as.character(x) adders <- n - nchar(x) adders <- ifelse(adders < 0, 0, adders) for (i in 1:length(x)) { if (!is.na(x[i])) { x[i] <- paste0(paste0(rep("0", adders[i]), collapse = ""), x[i], collapse = "") } } return(x) }
remake_world_map <- FALSE options(scipen = '999') if(remake_world_map){ # World map animation world <- map_data('world') # world <- ne_countries(scale = "medium", returnclass = "sf") # Get plotting data pd <- df_country %>% dplyr::select(date, lng, lat, n = cases) dates <- sort(unique(pd$date)) n_days <- length(dates) # # Define vectors for projection # vec_lon <- seq(30, -20, length = n_days) # vec_lat <- seq(25, 15, length = n_days) dir.create('animation') for(i in 1:n_days){ message(i, ' of ', n_days) this_date <- dates[i] # this_lon <- vec_lon[i] # this_lat <- vec_lat[i] # the_crs <- # paste0("+proj=laea +lat_0=", this_lat, # " +lon_0=", # this_lon, # " +x_0=4321000 +y_0=3210000 +ellps=GRS80 +units=m +no_defs ") sub_data <- pd %>% filter(date == this_date) # coordinates(sub_data) <- ~lng+lat # proj4string(sub_data) <- proj4string(esp1) # # sub_data <- spTransform(sub_data, # # the_crs) # coordy <- coordinates(sub_data) # sub_data@data$long <- coordy[,1] # sub_data@data$lat <- coordy[,2] g <- ggplot() + geom_polygon(data = world, aes(x = long, y = lat, group = group), fill = 'black', color = 'white', size = 0.1) + theme_map() + geom_point(data = sub_data %>% filter(n > 0) %>% mutate(Deaths = n), aes(x = lng, y = lat, size = Deaths), color = 'red', alpha = 0.6) + geom_point(data = tibble(x = c(0,0), y = c(0,0), Deaths = c(1, 100000)), aes(x = x, y = y, size = Deaths), color = 'red', alpha =0.001) + scale_size_area(name = '', breaks = c(100, 1000, 10000, 100000), max_size = 25 ) + # scale_size_area(name = '', limits = c(1, 10), breaks = c(0, 10, 30, 50, 70, 100, 200, 500)) + labs(title = this_date) + theme(plot.title = element_text(size = 30), legend.text = element_text(size = 15), legend.position = 'left') plot_number <- add_zero(i, 3) ggsave(filename = paste0('animation/', plot_number, '.png'), plot = g, width = 9.5, height = 5.1) } setwd('animation') system('convert -delay 30x100 -loop 0 *.png result.gif') setwd('..') }
make_map <- function(var = 'deaths', data = NULL, pop = FALSE, pop_factor = 100000, points = FALSE, line_color = 'white', add_names = T, add_values = T, text_size = 2.7){ if(is.null(data)){ data <- esp_df %>% mutate(ccaa = cat_transform(ccaa)) } left <- espf %>% mutate(ccaa = cat_transform(ccaa)) right <- data[,c('ccaa', paste0(var, '_non_cum'))] names(right)[ncol(right)] <- 'var' right <- right %>% group_by(ccaa) %>% summarise(var = sum(var, na.rm = T)) if(pop){ right <- left_join(right, esp_pop) right$var <- right$var / right$pop * pop_factor } map <- left_join(left, right) if(points){ the_points <- centroids %>% left_join(right) g <- ggplot() + geom_polygon(data = map, aes(x = long, y = lat, group = group), fill = 'black', color = line_color, lwd = 0.4, alpha = 0.8) + geom_point(data = the_points, aes(x = long, y = lat, size = var), color = 'red', alpha = 0.7) + scale_size_area(name = '', max_size = 20) } else { # cols <- c('#008080','#70a494','#b4c8a8','#f6edbd','#edbb8a','#de8a5a','#ca562c') cols <- RColorBrewer::brewer.pal(n = 8, name = 'Blues') g <- ggplot(data = map, aes(x = long, y = lat, group = group)) + geom_polygon(aes(fill = var), lwd = 0.3, color = line_color) + scale_fill_gradientn(name = '', colours = cols) # scale_fill_viridis(name = '' ,option = 'magma', # direction = -1) } # Add names? if(add_names){ centy <- centroids %>% left_join(right) if(add_values){ centy$label <- paste0(centy$ccaa, '\n(', round(centy$var, digits = 2), ')') } else { centy$label <- centy$ccaa } g <- g + geom_text(data = centy, aes(x = long, y = lat, label = label, group = ccaa), alpha = 0.7, size = text_size) } g + theme_map() + labs(subtitle = paste0('Data as of ', max(data$date))) + theme(legend.position = 'right') } make_dot_map <- function(var = 'deaths', date = NULL, pop = FALSE, pop_factor = 100, point_factor = 1, points = FALSE, point_color = 'darkred', point_size = 0.6, point_alpha = 0.5){ if(is.null(date)){ the_date <- max(esp_df$date) } else { the_date <- date } right <- esp_df[esp_df$date == the_date,c('ccaa', var)] names(right)[ncol(right)] <- 'var' if(pop){ right <- left_join(right, esp_pop) right$var <- right$var / right$pop * pop_factor } map_data <- esp1@data %>% left_join(names_df) %>% left_join(right) map_data$var <- map_data$var / point_factor out_list <- list() for(i in 1:nrow(map_data)){ sub_data <- map_data[i,] this_value = round(sub_data$var) if(this_value >= 1){ this_ccaa = sub_data$ccaa # get some points sub_points <- random_points %>% filter(ccaa == this_ccaa) sampled_points <- sub_points %>% dplyr::sample_n(this_value) out_list[[i]] <- sampled_points } } the_points <- bind_rows(out_list) g <- ggplot() + geom_polygon(data = espf, aes(x = long, y = lat, group = group), fill = 'white', color = 'black', lwd = 0.4, alpha = 0.8) + geom_point(data = the_points, aes(x = long, y = lat), color = point_color, size = point_size, alpha = point_alpha) g + theme_map() + labs(subtitle = paste0('Data as of ', max(esp_df$date))) }
make_map(var = 'deaths', points = T) + labs(title = 'Number of deaths', caption = '@joethebrew')
make_map(var = 'deaths', line_color = 'darkgrey', points = F) + labs(title = 'Number of deaths', caption = '@joethebrew')
make_map(var = 'deaths', pop = TRUE, points = T) + labs(title = 'Number of deaths per 100,000', caption = '@joethebrew')
make_map(var = 'deaths', pop = TRUE, points = F, line_color = 'darkgrey') + labs(title = 'Number of deaths per 100,000', caption = '@joethebrew')
make_dot_map(var = 'deaths', point_size = 0.05) + labs(title = 'COVID-19 deaths: 1 point = 1 death\nImportant: points are random within each CCAA; do not reflect exact location', caption = '@joethebrew')
make_map(var = 'cases', points = T) + labs(title = 'Number of confirmed cases', caption = '@joethebrew')
make_map(var = 'cases', line_color = 'darkgrey', points = F) + labs(title = 'Number of confirmed cases', caption = '@joethebrew')
make_map(var = 'cases', pop = TRUE, points = T) + labs(title = 'Number of confirmed cases per 100,000', caption = '@joethebrew')
make_map(var = 'cases', pop = TRUE, points = F, line_color = 'darkgrey') + labs(title = 'Number of confirmed cases per 100,000', caption = '@joethebrew')
make_dot_map(var = 'cases', point_size = 0.05, point_alpha = 0.5, point_factor = 10) + labs(title = 'COVID-19 cases: 1 point = 10 cases\nImportant: points are random within each CCAA; do not reflect exact location', caption = '@joethebrew')
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.