# Basic knitr options library(knitr) opts_chunk$set(comment = NA, # echo = FALSE, warning = FALSE, message = FALSE, error = TRUE, cache = FALSE, fig.width = 8, fig.height = 6, fig.path = 'figures/')
## Load libraries library(covid19) library(ggplot2) library(lubridate) library(dplyr) library(ggplot2) library(sp) library(raster) library(viridis) library(ggthemes) library(sf) library(rnaturalearth) library(rnaturalearthdata)
# Map data preparation if(!'map.RData' %in% dir()){ esp1 <- getData(name = 'GADM', country = 'ESP', level = 1) # Remove canary esp1 <- esp1[esp1@data$NAME_1 != 'Islas Canarias',] espf <- fortify(esp1, region = 'NAME_1') # Standardize names # Convert names map_names <- esp1@data$NAME_1 data_names <- sort(unique(esp_df$ccaa)) names_df <- tibble(NAME_1 = c('Andalucía', 'Aragón', 'Cantabria', 'Castilla-La Mancha', 'Castilla y León', 'Cataluña', 'Ceuta y Melilla', 'Comunidad de Madrid', 'Comunidad Foral de Navarra', 'Comunidad Valenciana', 'Extremadura', 'Galicia', 'Islas Baleares', 'La Rioja', 'País Vasco', 'Principado de Asturias', 'Región de Murcia'), ccaa = c('Andalucía', 'Aragón', 'Cantabria', 'CLM', 'CyL', 'Cataluña', 'Melilla', 'Madrid', 'Navarra', 'C. Valenciana', 'Extremadura', 'Galicia', 'Baleares', 'La Rioja', 'País Vasco', 'Asturias', 'Murcia')) espf <- left_join(espf %>% dplyr::rename(NAME_1 = id), names_df) centroids <- data.frame(coordinates(esp1)) names(centroids) <- c('long', 'lat') centroids$NAME_1 <- esp1$NAME_1 centroids <- centroids %>% left_join(names_df) # Get random sampling points random_list <- list() for(i in 1:nrow(esp1)){ message(i) shp <- esp1[i,] # bb <- bbox(shp) this_ccaa <- esp1@data$NAME_1[i] # xs <- runif(n = 500, min = bb[1,1], max = bb[1,2]) # ys <- runif(n = 500, min = bb[2,1], max = bb[2,2]) # random_points <- expand.grid(long = xs, lat = ys) %>% # mutate(x = long, # y = lat) # coordinates(random_points) <- ~x+y # proj4string(random_points) <- proj4string(shp) # get ccaa message('getting locations of randomly generated points') # polys <- over(random_points,polygons(shp)) # polys <- as.numeric(polys) random_points <- spsample(shp, n = 20000, type = 'random') random_points <- data.frame(random_points) random_points$NAME_1 <- this_ccaa random_points <- left_join(random_points, names_df) %>% dplyr::select(-NAME_1) random_list[[i]] <- random_points } random_points <- bind_rows(random_list) random_points <- random_points %>% mutate(long = x, lat = y) save(espf, esp1, names_df, centroids, random_points, file = 'map.RData') } else { load('map.RData') } # Define a function for adding zerio add_zero <- function (x, n) { x <- as.character(x) adders <- n - nchar(x) adders <- ifelse(adders < 0, 0, adders) for (i in 1:length(x)) { if (!is.na(x[i])) { x[i] <- paste0(paste0(rep("0", adders[i]), collapse = ""), x[i], collapse = "") } } return(x) } options(scipen = '999')
make_map <- function(var = 'deaths', data = NULL, pop = FALSE, pop_factor = 100000, points = FALSE, line_color = 'white', add_names = T, add_values = T, text_size = 2.7){ if(is.null(data)){ data <- esp_df } left <- espf right <- data[,c('ccaa', paste0(var, '_non_cum'))] names(right)[ncol(right)] <- 'var' right <- right %>% group_by(ccaa) %>% summarise(var = sum(var, na.rm = T)) if(pop){ right <- left_join(right, esp_pop) right$var <- right$var / right$pop * pop_factor } map <- left_join(left, right) if(points){ the_points <- centroids %>% left_join(right) g <- ggplot() + geom_polygon(data = map, aes(x = long, y = lat, group = group), fill = 'black', color = line_color, lwd = 0.4, alpha = 0.8) + geom_point(data = the_points, aes(x = long, y = lat, size = var), color = 'red', alpha = 0.7) + scale_size_area(name = '', max_size = 20) } else { # cols <- c('#008080','#70a494','#b4c8a8','#f6edbd','#edbb8a','#de8a5a','#ca562c') cols <- RColorBrewer::brewer.pal(n = 8, name = 'Blues') g <- ggplot(data = map, aes(x = long, y = lat, group = group)) + geom_polygon(aes(fill = var), lwd = 0.3, color = line_color) + scale_fill_gradientn(name = '', colours = cols) # scale_fill_viridis(name = '' ,option = 'magma', # direction = -1) } # Add names? if(add_names){ centy <- centroids %>% left_join(right) if(add_values){ centy$label <- paste0(centy$ccaa, '\n(', round(centy$var, digits = 2), ')') } else { centy$label <- centy$ccaa } g <- g + geom_text(data = centy, aes(x = long, y = lat, label = label, group = ccaa), alpha = 0.7, size = text_size) } g + theme_map() + labs(subtitle = paste0('Data as of ', max(data$date))) + theme(legend.position = 'right') } make_dot_map <- function(var = 'deaths', date = NULL, pop = FALSE, pop_factor = 100, point_factor = 1, points = FALSE, point_color = 'darkred', point_size = 0.6, point_alpha = 0.5){ if(is.null(date)){ the_date <- max(esp_df$date) } else { the_date <- date } right <- esp_df[esp_df$date == the_date,c('ccaa', var)] names(right)[ncol(right)] <- 'var' if(pop){ right <- left_join(right, esp_pop) right$var <- right$var / right$pop * pop_factor } map_data <- esp1@data %>% left_join(names_df) %>% left_join(right) map_data$var <- map_data$var / point_factor out_list <- list() for(i in 1:nrow(map_data)){ sub_data <- map_data[i,] this_value = round(sub_data$var) if(this_value >= 1){ this_ccaa = sub_data$ccaa # get some points sub_points <- random_points %>% filter(ccaa == this_ccaa) sampled_points <- sub_points %>% dplyr::sample_n(this_value) out_list[[i]] <- sampled_points } } the_points <- bind_rows(out_list) g <- ggplot() + geom_polygon(data = espf, aes(x = long, y = lat, group = group), fill = 'white', color = 'black', lwd = 0.4, alpha = 0.8) + geom_point(data = the_points, aes(x = long, y = lat), color = point_color, size = point_size, alpha = point_alpha) g + theme_map() + labs(subtitle = paste0('Data as of ', max(esp_df$date))) }
make_map(var = 'deaths', points = T) + labs(title = 'Number of deaths') if(!dir.exists('high_quality')){ dir.create('high_quality') } ggsave('high_quality/points_death_absolute.pdf', width = 8, height = 6)
make_map(var = 'deaths', line_color = 'darkgrey', points = F) + labs(title = 'Number of deaths') ggsave('high_quality/polygons_death_absolute.pdf', width = 8, height = 6)
make_map(var = 'deaths', pop = TRUE, points = T) + labs(title = 'Number of deaths per 100,000') ggsave('high_quality/points_death_adjusted.pdf', width = 8, height = 6)
make_map(var = 'deaths', pop = TRUE, points = F, line_color = 'darkgrey') + labs(title = 'Number of deaths per 100,000') ggsave('high_quality/polygons_death_adjusted.pdf', width = 8, height = 6)
make_dot_map(var = 'deaths', point_size = 0.15) + labs(title = 'COVID-19 deaths: 1 point = 1 death\nImportant: points are random within each CCAA; do not reflect exact location') ggsave('high_quality/point_per_death.pdf', width = 8, height = 6)
make_map(var = 'cases', points = T) + labs(title = 'Number of confirmed cases') ggsave('high_quality/points_case_absolute.pdf', width = 8, height = 6)
make_map(var = 'cases', line_color = 'darkgrey', points = F) + labs(title = 'Number of confirmed cases') ggsave('high_quality/polygon_case_absolute.pdf', width = 8, height = 6)
make_map(var = 'cases', pop = TRUE, points = T) + labs(title = 'Number of confirmed cases per 100,000') ggsave('high_quality/points_case_adjusted.pdf', width = 8, height = 6)
make_map(var = 'cases', pop = TRUE, points = F, line_color = 'darkgrey') + labs(title = 'Number of confirmed cases per 100,000') ggsave('high_quality/polygons_case_adjusted.pdf', width = 8, height = 6)
keep_dates <- as.Date(seq((max(esp_df$date)-6), max(esp_df$date), by = 1)) data <- esp_df %>% filter(date %in% keep_dates) make_map(var = 'cases', data = data, pop = TRUE, points = F, line_color = 'darkgrey') + labs(title = 'Number of confirmed cases per 100,000', caption = paste0('Cumulative incidence over the week from ', min(keep_dates), ' through ', max(keep_dates))) ggsave('high_quality/polygons_case_adjusted_week.pdf', width = 8, height = 6)
make_dot_map(var = 'cases', point_size = 0.05, point_alpha = 0.5, point_factor = 10) + labs(title = 'COVID-19 cases: 1 point = 10 cases\nImportant: points are random within each CCAA; do not reflect exact location') ggsave('high_quality/point_per_10_cases.pdf', width = 8, height = 6)
Calculate cumulative incidence per 1.000.000 per week
keep_dates <- as.Date(seq((max(esp_df$date)-6), max(esp_df$date), by = 1)) x <- esp_df %>% filter(date %in% keep_dates) %>% group_by(ccaa) %>% summarise(incident_cases_over_7_days = sum(cases_non_cum)) %>% ungroup %>% left_join(esp_pop) %>% mutate(incident_cases_over_7_days_per_10e4 = (incident_cases_over_7_days / pop) * 10e4) %>% arrange(desc(incident_cases_over_7_days_per_10e4)) kable(x)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.