#' Generate hospitalization proportions.
#'
#' \code{get_MCMA_prop_hosp} wrangles individual Covid-19 data for MCMA and
#' calculates the proportion of incident detected cases hospitalized.
#'
#' @param n_date_ini Character specifying the initial date to compute proportions.
#' @param n_date_last Character specifying the last date to compute proportions.
#' @param save_data Flag (default is FALSE) of whether to save the data as a
#' csv file.
#' @return
#' A data.frame with proportion of incident detected cases hospitalized.
#' @export
get_MCMA_prop_hosp <- function(n_date_ini = "2020-02-24",
n_date_last = "2020-12-07",
save_data = FALSE){
# Load data ---------------------------------------------------------------
df_ssa_covid_raw <- fread("data-raw/201213COVID19MEXICO.csv", header=TRUE)
load("data-raw/df_pop_county.Rdata")
load("data/df_hosp_MCMA.RData")
load("data-raw/df_pop_ZMVM.RData")
n_time_stamp <- unique(df_ssa_covid_raw$FECHA_ACTUALIZACION)
# Wrangle data -----------------------------------------------------------
df_ssa_covid <- df_ssa_covid_raw %>%
select(ENTIDAD_RES, MUNICIPIO_RES, FECHA_INGRESO, FECHA_SINTOMAS,
FECHA_DEF, EDAD, SEXO, TIPO_PACIENTE, INTUBADO, UCI, CLASIFICACION_FINAL,
RESULTADO_LAB) %>%
mutate(state = case_when(ENTIDAD_RES==1 ~ "Aguascalientes",
ENTIDAD_RES==2 ~ "Baja California",
ENTIDAD_RES==3 ~ "Baja California Sur",
ENTIDAD_RES==4 ~ "Campeche",
ENTIDAD_RES==5 ~ "Coahuila",
ENTIDAD_RES==6 ~ "Colima",
ENTIDAD_RES==7 ~ "Chiapas",
ENTIDAD_RES==8 ~ "Chihuahua",
ENTIDAD_RES==9 ~ "Mexico City",
ENTIDAD_RES==10 ~ "Durango",
ENTIDAD_RES==11 ~ "Guanajuato",
ENTIDAD_RES==12 ~ "Guerrero",
ENTIDAD_RES==13 ~ "Hidalgo",
ENTIDAD_RES==14 ~ "Jalisco",
ENTIDAD_RES==15 ~ "State of Mexico",
ENTIDAD_RES==16 ~ "Michoacan",
ENTIDAD_RES==17 ~ "Morelos",
ENTIDAD_RES==18 ~ "Nayarit",
ENTIDAD_RES==19 ~ "Nuevo Leon",
ENTIDAD_RES==20 ~ "Oaxaca",
ENTIDAD_RES==21 ~ "Puebla",
ENTIDAD_RES==22 ~ "Queretaro",
ENTIDAD_RES==23 ~ "Quintana Roo",
ENTIDAD_RES==24 ~ "San Luis Potosi",
ENTIDAD_RES==25 ~ "Sinaloa",
ENTIDAD_RES==26 ~ "Sonora",
ENTIDAD_RES==27 ~ "Tabasco",
ENTIDAD_RES==28 ~ "Tamaulipas",
ENTIDAD_RES==29 ~ "Tlaxcala",
ENTIDAD_RES==30 ~ "Veracruz",
ENTIDAD_RES==31 ~ "Yucatan",
ENTIDAD_RES==32 ~ "Zacatecas")) %>%
mutate(entidad = case_when(ENTIDAD_RES==1 ~ "Aguascalientes",
ENTIDAD_RES==2 ~ "Baja California",
ENTIDAD_RES==3 ~ "Baja California Sur",
ENTIDAD_RES==4 ~ "Campeche",
ENTIDAD_RES==5 ~ "Coahuila",
ENTIDAD_RES==6 ~ "Colima",
ENTIDAD_RES==7 ~ "Chiapas",
ENTIDAD_RES==8 ~ "Chihuahua",
ENTIDAD_RES==9 ~ "Ciudad de México",
ENTIDAD_RES==10 ~ "Durango",
ENTIDAD_RES==11 ~ "Guanajuato",
ENTIDAD_RES==12 ~ "Guerrero",
ENTIDAD_RES==13 ~ "Hidalgo",
ENTIDAD_RES==14 ~ "Jalisco",
ENTIDAD_RES==15 ~ "Estado de México",
ENTIDAD_RES==16 ~ "Michoacán",
ENTIDAD_RES==17 ~ "Morelos",
ENTIDAD_RES==18 ~ "Nayarit",
ENTIDAD_RES==19 ~ "Nuevo León",
ENTIDAD_RES==20 ~ "Oaxaca",
ENTIDAD_RES==21 ~ "Puebla",
ENTIDAD_RES==22 ~ "Querétaro",
ENTIDAD_RES==23 ~ "Quintana Roo",
ENTIDAD_RES==24 ~ "San Luis Potosí",
ENTIDAD_RES==25 ~ "Sinaloa",
ENTIDAD_RES==26 ~ "Sonora",
ENTIDAD_RES==27 ~ "Tabasco",
ENTIDAD_RES==28 ~ "Tamaulipas",
ENTIDAD_RES==29 ~ "Tlaxcala",
ENTIDAD_RES==30 ~ "Veracruz",
ENTIDAD_RES==31 ~ "Yucatán",
ENTIDAD_RES==32 ~ "Zacatecas"))
# Create a variable = 1 if COVID-19 is positive
df_ssa_covid$covid <- ifelse(df_ssa_covid$CLASIFICACION_FINAL %in% c(1,2,3), 1, 0)
# df_ssa_covid$covid <- ifelse(df_ssa_covid$RESULTADO_LAB == 1, 1, 0)
# Subset the data for COVID-19 cases only
df_ssa_covid <- subset(df_ssa_covid, covid==1)
# Format date variables as.Date() and create date_dx and date_sx variables
df_ssa_covid$date_dx <- as.Date(df_ssa_covid$FECHA_INGRESO, format = "%Y-%m-%d")
df_ssa_covid$date_sx <- as.Date(df_ssa_covid$FECHA_SINTOMAS, format = "%Y-%m-%d")
df_ssa_covid$date_dead <- as.Date(df_ssa_covid$FECHA_DEF, format = "%Y-%m-%d")
# Create variable hosp_ind = 1 if patient was hospitalized
# Create variable icu_ind = 1 if patient required ICU
# Create variable vent_ind = 1 if patient was in ventilator
# Create variable dead_ind = 1 if patient died
df_ssa_covid <- df_ssa_covid %>%
mutate(hosp_ind = ifelse(TIPO_PACIENTE == 2, 1, 0)) %>%
mutate(icu_ind = ifelse(UCI == 1, 1, 0)) %>%
mutate(vent_ind = ifelse(INTUBADO == 1, 1, 0)) %>%
mutate(novent_ind = ifelse(INTUBADO != 1, 1, 0)) %>%
mutate(dead_ind = ifelse(is.na(date_dead), 0, 1)) %>%
mutate(time_to_dead = date_dead-date_dx)
#### Paste counties and population info
df_pop_county <- df_pop_county %>%
mutate(county_id = as.numeric(county_id))
df_ssa_covid <- df_ssa_covid %>%
mutate(county_id = formatC(MUNICIPIO_RES, width = 3, flag = "0"),
state_id = formatC(ENTIDAD_RES, width = 2, flag = "0"),
county_id = paste0(state_id, county_id),
county_id= as.numeric(county_id)) %>%
rename(age = EDAD) %>%
left_join(df_pop_county, by = c("county_id" = "county_id")) %>%
rename(entidad = "entidad.x")
# Vector of ZMVM county ids
v_zmvm_id <- c("9002", "9003", "9004", "9005", "9006",
"9007", "9008", "9009", "9010", "9011",
"9012", "9013", "9014", "9015", "9016",
"9017", "13069",
"15002", "15009", "15010", "15011",
"15013", "15015", "15016", "15017",
"15020", "15022", "15023", "15024",
"15025", "15028", "15029", "15030",
"15031", "15033", "15034", "15035",
"15036", "15037", "15038", "15039",
"15044", "15046", "15050", "15053",
"15057", "15058", "15059", "15060",
"15061", "15065", "15068", "15069",
"15070", "15075", "15081", "15083",
"15084", "15089", "15091", "15092",
"15093", "15094", "15095", "15096",
"15099", "15100", "15103", "15104",
"15108", "15109", "15112", "15120",
"15121", "15122", "15125")
# Create dummy for each county in ZMVM and get a subset of the
# data for MCMA only
df_ssa_covid_MCMA <- df_ssa_covid %>%
mutate(zmvm = ifelse(county_id %in% v_zmvm_id, 1, 0)) %>%
filter(zmvm == 1) %>%
mutate(entidad = case_when(zmvm == 1 ~ "ZMVM"),
state = case_when(zmvm == 1 ~ "MCMA"))
# Add ZMVM population
df_pop_ZMVM <- df_pop_ZMVM %>%
select(entidad, population)
df_ssa_covid_MCMA <- df_ssa_covid_MCMA %>%
left_join(df_pop_ZMVM, by = "entidad") %>%
rename(population = "population.y")
# Generate proportions ----------------------------------------------------
# Drop observations with time-to-dead < 0
# df_ssa_covid_MCMA <- df_ssa_covid_MCMA %>%
# filter(time_to_dead >= 0 | is.na(time_to_dead))
df_ssa_covid_MCMA_idx <- df_ssa_covid_MCMA %>%
filter(date_dx >= n_date_ini & date_dx <= n_date_last) %>%
group_by(date_dx) %>%
summarise(idx = n(),
dead = sum(dead_ind),
hosp = sum(hosp_ind),
vent = sum(vent_ind),
novent = sum(novent_ind),
icu = sum(icu_ind)) %>%
complete(date_dx = seq.Date(as.Date(n_date_ini), as.Date(n_date_last), by="day"), # Create a sequence of dates
fill = list(idx = 0,
dead = 0,
hosp = 0,
vent = 0,
novent = 0,
icu = 0)) %>% # Fill the dates without cases with zero
ungroup() %>%
rename(date = date_dx)
df_ssa_covid_MCMA_3wdead <- df_ssa_covid_MCMA %>%
filter(date_dx >= n_date_ini & date_dx <= n_date_last &
!is.na(date_dead)) %>%
mutate(dead_ind_3w = ifelse(time_to_dead <= 21, 1, 0)) %>%
group_by(date_dx) %>%
summarise(dead_3w = sum(dead_ind_3w)) %>%
complete(date_dx = seq.Date(as.Date(n_date_ini), as.Date(n_date_last), by="day"), # Create a sequence of dates
fill = list(dead_3w = 0)) %>% # Fill the dates without cases with zero
ungroup() %>%
rename(date = date_dx)
df_ssa_covid_MCMA_dead_hosp <- df_ssa_covid_MCMA %>%
filter(date_dx >= n_date_ini & date_dx <= n_date_last &
!is.na(date_dead) & hosp_ind == 1) %>%
group_by(date_dx) %>%
summarise(dead_hosp = sum(dead_ind)) %>%
complete(date_dx = seq.Date(as.Date(n_date_ini), as.Date(n_date_last), by="day"), # Create a sequence of dates
fill = list(dead_hosp = 0)) %>% # Fill the dates without cases with zero
ungroup() %>%
rename(date = date_dx)
df_ssa_covid_MCMA_dead_hosp_vent <- df_ssa_covid_MCMA %>%
filter(date_dx >= n_date_ini & date_dx <= n_date_last &
!is.na(date_dead) & hosp_ind == 1 & vent_ind == 1) %>%
group_by(date_dx) %>%
summarise(dead_hosp_vent = sum(dead_ind)) %>%
complete(date_dx = seq.Date(as.Date(n_date_ini), as.Date(n_date_last), by="day"), # Create a sequence of dates
fill = list(dead_hosp_vent = 0)) %>% # Fill the dates without cases with zero
ungroup() %>%
rename(date = date_dx)
df_ssa_covid_MCMA_dead_hosp_novent <- df_ssa_covid_MCMA %>%
filter(date_dx >= n_date_ini & date_dx <= n_date_last &
!is.na(date_dead) & hosp_ind == 1 & vent_ind == 0) %>%
group_by(date_dx) %>%
summarise(dead_hosp_novent = sum(dead_ind)) %>%
complete(date_dx = seq.Date(as.Date(n_date_ini), as.Date(n_date_last), by="day"), # Create a sequence of dates
fill = list(dead_hosp_novent = 0)) %>% # Fill the dates without cases with zero
ungroup() %>%
rename(date = date_dx)
df_ssa_covid_MCMA_prop <- left_join(df_ssa_covid_MCMA_idx,
df_ssa_covid_MCMA_3wdead,
by = "date") %>%
left_join(df_ssa_covid_MCMA_dead_hosp,
by = "date") %>%
left_join(df_ssa_covid_MCMA_dead_hosp_vent,
by = "date") %>%
left_join(df_ssa_covid_MCMA_dead_hosp_novent,
by = "date") %>%
mutate(dead = ifelse(is.na(dead), 0 , dead),
dead_3w = ifelse(is.na(dead_3w), 0 , dead_3w),
dead_hosp = ifelse(is.na(dead_hosp), 0 , dead_hosp),
dead_hosp_vent = ifelse(is.na(dead_hosp_vent), 0 , dead_hosp_vent),
dead_hosp_novent = ifelse(is.na(dead_hosp_novent), 0 , dead_hosp_novent),
p_hosp = ifelse(idx != 0, hosp/idx, 0),
p_vent_hosp = ifelse(hosp != 0, vent/hosp, 0),
p_novent_hosp = ifelse(hosp != 0, novent/hosp, 0),
p_icu_hosp = ifelse(hosp != 0, icu/hosp, 0),
p_vent = ifelse(idx != 0, vent/idx, 0),
p_icu = ifelse(idx != 0, icu/idx, 0),
cfr = ifelse(idx != 0, dead/idx, 0),
p_dead_hosp = ifelse(idx != 0, dead_hosp/idx, 0))
# Order data.frame
df_ssa_covid_MCMA_prop <- df_ssa_covid_MCMA_prop[order(df_ssa_covid_MCMA_prop$date),]
df_ssa_covid_MCMA_prop <- df_ssa_covid_MCMA_prop %>%
mutate(date0 = as.numeric(date - date[1]))
# ADIP's data
df_adip_covid_MCMA_prop <- df_hosp_MCMA %>%
mutate(date = as.Date(dates)) %>%
left_join(df_ssa_covid_MCMA_idx,
by = "date") %>%
filter(date >= n_date_ini & date <= n_date_last) %>%
mutate(p_hosp = ifelse(idx != 0, hosp_tot/idx, 0),
p_vent_hosp = bed_icu_tot/hosp_tot,
p_novent_hosp = bed_noicu_tot/hosp_tot,
date0 = as.numeric(date - date[1])) %>%
select(date, date0, idx, dead, hosp, hosp_tot, vent, novent, icu, p_hosp, p_vent_hosp, p_novent_hosp)
l_hosp_prop <- list(ssa = df_ssa_covid_MCMA_prop,
adip = df_adip_covid_MCMA_prop)
if(save_data){
save(l_hosp_prop, file = paste0("data/l_hosp_prop_",n_time_stamp,".RData"))
}
return(l_hosp_prop)
}
#' Plot proportion of incident detected cases hospitalized
#'
#' \code{plot_hosp_prop} plot observed and estimated proportion of incident
#' detected cases hospitalized.
#'
#' @param df_hosp_prop Data.frame of data of observed and estimated proportion
#' of incident detected cases hospitalized to be plotted.
#' @param save_plot Flag (default is FALSE) of whether to save the plot.
#' @return
#' A ggplot object.
plot_hosp_prop <- function(df_hosp_prop,
save_plot = FALSE){
df_hosp_prop$type <- as.factor(df_hosp_prop$type)
abbrev_state <- unique(df_hosp_prop$state)
Outcome <- unique(df_hosp_prop$Outcome)
# Name of plot
if(Outcome == "Total hospitalizations"){
plot_name <- "TotHospProp_"
}else if(Outcome == "Hospitalizations with ventilator"){
plot_name <- "ICUHospProp_"
}else if(Outcome == "Hospitalizations without ventilator"){
plot_name <- "NoICUHospProp_"
}
# Colors
v_colors_names <- levels(df_hosp_prop$type)
v_colors <- c("red", "black")
names(v_colors) <- v_colors_names
# Fill
v_colors_fill <- c("red", NA)
names(v_colors_fill) <- v_colors_names
# Linetype
v_linetype <- c("solid", 'blank')
names(v_linetype) <- v_colors_names
# Shape
v_shape <- c(NA, 8)
names(v_shape) <- v_colors_names
plot_temp <- ggplot(subset(df_hosp_prop, type == "Estimated"),
aes(x = date, y = prop,
ymin = lb, ymax = ub,
color = type, fill = type, shape = type)) +
geom_line(size = 1.2) +
geom_point(data = subset(df_hosp_prop, type == "Observed"),
size = 2) +
geom_ribbon(aes(x = date,
y = prop,
ymin = lb,
ymax = ub,
fill = type),
alpha = 0.2) +
scale_shape_manual(values = v_shape) +
scale_color_manual(values = v_colors) +
scale_fill_manual(values = v_colors_fill) +
guides(fill = guide_legend(title = ""),
shape = guide_legend(title = ""),
color = guide_legend(title = "",
ncol = 2,
override.aes = list(
shape = v_shape,
linetype = v_linetype,
color = v_colors,
fill = v_colors_fill))) +
scale_x_date("",
date_labels = "%m/%d", #Change the format of the date in the x axis
breaks = number_ticks(14)) +
ylab("Proportion") +
theme(text = element_text(size = 13),
plot.caption = element_text(hjust = 0,
size = 12),
axis.text.x = element_text(angle = 90,
hjust = 1,
colour = "black"),
axis.text.y = element_text(colour = "black"),
panel.background = element_rect(fill = "white",
colour = "gray",
size = 0.15,
linetype = "solid"),
panel.border = element_rect(colour = "black",
fill = NA,
size = 0.7),
legend.position = "bottom",
legend.text = element_text(size = 10),
legend.justification = "center",
legend.direction = "horizontal",
legend.title = element_blank(),
legend.key = element_rect(fill = "transparent",
colour = "transparent")) +
labs(title = Outcome)
if(save_plot){
ggsave(paste0("figs/",plot_name,"observed_vs_estimated_",abbrev_state,".pdf"),
width = 10, height = 7)
}
return(plot_temp)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.