knitr::opts_chunk$set(
  message=FALSE, 
  warning=FALSE,
  collapse = TRUE,
  fig.height=5, 
  fig.width=8,
  comment = "#>"
)

The covid19sf_vaccine_demo and covid19sf_vaccine_demo_ts datasets provide demographics information on the San Francisco vaccination effort:

San Francisco vaccination summary

The covid19sf_vaccine_demo summarizes the COVID-19 vaccine doses given to San Franciscans by demographics (age and race groups) and includes the following fields:

library(covid19sf)

data("covid19sf_vaccine_demo")

head(covid19sf_vaccine_demo)

str(covid19sf_vaccine_demo)

Using this dataset, we can answer the following questions:

Doses given to San Franciscans by age group:

In the following example, we will explore the distribution of San Francisco population by age group. Let's start by filtering the data by all providers and age group demographics:

library(dplyr)
library(plotly)

df_age <- covid19sf_vaccine_demo %>%
  filter(administering_provider_type == "All Providers", 
         demographic_group == "Age Bracket",
         age_group == "All")

df_age

Next, we will sort the data by age group and convert the demographic_subgroup into an ordered factor:

df_age <- df_age %>%
  arrange(demographic_subgroup_sort_order) %>%
  mutate(age_ordered = factor(demographic_subgroup, levels = demographic_subgroup))

Let's start by plotting the distribution of San Francisco vaccinated population using the following groups:

Since the total_2nd_doses variable (received the second dose) is a subset of total_1st_doses (received only the first out of two doses), we will have to exclude it:

df_age <- df_age %>%
  mutate(only_1st_dose = total_1st_doses - total_2nd_doses)

Next, we will collapse and aggregate the data by vaccination status:

df_summary <- df_age %>%
  summarise(`First Dose` = sum(only_1st_dose),
            `Two Doses` = sum(total_2nd_doses),
            `Single Dose` = sum(total_single_doses),
            `Total Pop` = sum(subgroup_population)) %>%
  mutate(`Not Received Vaccine` = `Total Pop` - `First Dose` - `Two Doses` - `Single Dose`) %>%
  select(- `Total Pop`) %>%
  t() %>%
  as.data.frame() %>%
  select(count = V1)

df_summary$type <- rownames(df_summary)

head(df_summary)

Using the df_summary, let's plot the distribution of San Francisco vaccination status:

plot_ly(data = df_summary,
        labels = ~ type,
        values = ~ count,
        textposition = 'inside',
        textinfo = 'label+percent',
        insidetextfont = list(color = '#FFFFFF',
                              size = 16),
        marker = list(colors = c("rgb(35, 90, 122)", "rgb(128, 200, 215)",
                                 "rgb(74, 168, 192)", "rgb(225, 100, 74)"),
                      line = list(color = '#FFFFFF', width = 1)),
        type = "pie",
        showlegend = FALSE) %>%
  layout(title = "Distribution of COVID-19 Vaccine Doses Given to San Franciscans",
         font = list(family = "Arieal",
                     size = 18),
         margin = list(t = 80, b = 60)) %>%
  add_annotations(text = paste("- Including children under the age of 12",
                               "- The First and Second Doses refer to either Pfizer or Moderna vaccine",
                               "- Single Dose refers to J&J vaccine", sep = "<br>"),
                  font = list(family = "Arieal",
                     size = 14),

                  x = -0.02,
                  y = -0.15,
                  align = "left",
                  xref = "paper",
                  yref = "paper",
                  showarrow = FALSE)

As can see in the plot above, about r paste(round(100 *sum(df_summary[which(df_summary$type%in% c("Single Dose", "Two Doses")),]$count) / sum(df_summary$count)),"%", sep = "") of the SF population completed the vaccination process, and r paste(round(100 *sum(df_summary[which(df_summary$type%in% c("First Dose")),]$count) / sum(df_summary$count)),"%", sep = "") received the first dose out of two. About r paste(round(100 *sum(df_summary[which(df_summary$type%in% c("Not Received Vaccine")),]$count) / sum(df_summary$count)),"%", sep = "") did not vaccinate. Note that the distribution includes children under the age of 12, which at this point are not vaccinated. If we want to get a better distribution of the vaccination effort of the SF population, let's exclude children under the age of 12:

df_summary2 <- df_age %>%
  filter(!demographic_subgroup %in% c("0-4", "5-11")) %>%
  summarise(`First Dose` = sum(only_1st_dose),
            `Two Doses` = sum(total_2nd_doses),
            `Single Dose` = sum(total_single_doses),
            `Total Pop` = sum(subgroup_population)) %>%
  mutate(`Not Received Vaccine` = `Total Pop` - `First Dose` - `Two Doses` - `Single Dose`) %>%
  select(- `Total Pop`) %>%
  t() %>%
  as.data.frame() %>%
  select(count = V1)

df_summary2$type <- rownames(df_summary2)

plot_ly(data = df_summary2,
        labels = ~ type,
        values = ~ count,
        textposition = 'inside',
        textinfo = 'label+percent',
        insidetextfont = list(color = '#FFFFFF',
                              size = 16),
        marker = list(colors = c("rgb(35, 90, 122)", "rgb(128, 200, 215)",
                                 "rgb(74, 168, 192)", "rgb(225, 100, 74)"),
                      line = list(color = '#FFFFFF', width = 1)),
        type = "pie",
        showlegend = FALSE) %>%
  layout(title = "Distribution of COVID-19 Vaccine Doses Given to San Franciscans",
         font = list(family = "Arieal",
                     size = 18),
         margin = list(t = 80, b = 60)) %>%
  add_annotations(text = paste("- Excluding children under the age of 12",
                               "- The First and Second Doses refer to either Pfizer or Moderna vaccine",
                               "- Single Dose refers to J&J vaccine", sep = "<br>"),
                  font = list(family = "Arieal",
                     size = 14),

                  x = -0.02,
                  y = -0.15,
                  align = "left",
                  xref = "paper",
                  yref = "paper",
                  showarrow = FALSE)

After excluding children under the age of 12 group, we can see from the plot above that r paste(round(100 *sum(df_summary2[which(df_summary2$type%in% c("Single Dose", "Two Doses")),]$count) / sum(df_summary2$count)),"%", sep = "") of the SF population (age 12 and above) completed the vaccination process, and r paste(round(100 *sum(df_summary2[which(df_summary2$type%in% c("First Dose")),]$count) / sum(df_summary2$count)),"%", sep = "") received the first dose.

Let's now plot the total population that received the full doses of the vaccine (single for J&J and two for Pfizer and Moderna):

plot_ly(data = df_age) %>%
  add_trace(x = ~ age_ordered,
            y = ~ total_2nd_doses,
            type = "bar",
            name = "Pfizer or Moderna") %>%
  add_trace(x = ~ age_ordered,
            type = "bar",
            y = ~ total_single_doses,
            name = "J&J") %>%
  layout(title = "San Francisco Fully Vaccinated Population by Age and Vaccine Type",
         barmode = "stack",
         yaxis = list(title = "Population"),
         xaxis = list(title = "Age Group"))

Last but not least, we will use a bar plot to describe the distribution, in percentage, between the city population that fully, partial, and not vaccinated. We will pre-calculate the percentage break down for each age group:

d1a <- covid19sf_vaccine_demo %>%
  filter(administering_provider_type == "All Providers",
                demographic_group == "Age Bracket",
                age_group == "All") %>%
  mutate(only_1st = total_1st_doses - total_2nd_doses,
                not_vaccinated = subgroup_population - total_series_completed - only_1st,
                first_per = 100 * only_1st / subgroup_population,
                second_per = 100 * total_2nd_doses / subgroup_population,
                single_per = 100 * total_single_doses / subgroup_population,
                not_vaccinated_per = 100 * not_vaccinated / subgroup_population,
                complete_per = 100 - not_vaccinated_per - first_per) %>%
  arrange(demographic_subgroup_sort_order) %>%
  mutate(age = factor(demographic_subgroup, levels = demographic_subgroup)) %>%
  select(age, first_per, second_per, single_per, not_vaccinated_per, complete_per)


d1a

Inspired from The Economist data visualization, for the following plot, we will use The Economist default color palette for bar plot:

# plot setting
font_size <- 22
opacity <- 0.95
background <- "rgb(225, 236, 242)"
complete_color <- paste("rgba(47, 106, 158,", opacity ,")", sep = "")
first_color <-  paste("rgba(100, 184, 206,", opacity ,")", sep = "")
not_vaccinated_color <- paste("rgba(162, 32, 61,", opacity ,")", sep = "")

# Plot
plot_ly(data = d1a,
        x = ~ complete_per,
        y = ~ age,
        type = 'bar', orientation = 'h',
        name = "Completed Two/Single Doses",
        marker = list(color = complete_color,
                      line = list(color = toRGB("gray50"), width = 1))) %>%
  add_trace(x = ~ first_per ,
            marker = list(color =first_color),
            name = "First Dose") %>%
  add_trace(x = ~ not_vaccinated_per ,
            marker = list(color = not_vaccinated_color),
            name = "Not Vaccinated") %>%
  layout(xaxis = list(title = "",
                      showgrid = TRUE,
                      showline = FALSE,
                      showticklabels = TRUE,
                      zeroline = TRUE,
                      tickwidth = 2,
                      tickcolor = toRGB("gray50"),
                      gridwidth = 2,
                      gridcolor = toRGB("gray50"),
                      side ="top",
                      # domain = c(0.1, 1),
                      ticksuffix = "%"),
         yaxis = list(title = "Age Group",
                      # domain = c(0, 0.8),
                      showgrid = FALSE,
                      showline = FALSE,
                      showticklabels = TRUE,
                      zeroline = FALSE),
         barmode = "stack",
         plot_bgcolor = background,
         paper_bgcolor = background,
         margin = list(l = 70, t = 100, b = 60),
         legend = list(orientation = 'h')) %>%
  add_annotations(text = "Source: San Francisco, Department of Public Health - Population Health Division",
                  font = list(size = 16,
                              family = "Ariel"),
                  align = "left",
                  x = 0,
                  y = - 0.08,
                  xref = "paper",
                  yref = "paper",
                  showarrow = FALSE) %>%
  add_annotations(text = "San Francisco COVID-19 Vaccination Distribution by Age Group",
                  y = 1.19,
                  x = -0.03,
                  font = list(family = "Ariel",
                              size =  24,
                              color = "black"),
                  yref = "paper",
                  xref = "paper",
                  align = "left",
                  valign = "middle",
                  showarrow = FALSE) 

San Francisco vaccination time series

The covid19sf_vaccine_demo_ts dataset is a time series format of the covid19sf_vaccine_demo dataset, and includes the following fields:

Let's load and review the dataset:

data("covid19sf_vaccine_demo_ts")

str(covid19sf_vaccine_demo_ts)

unique(covid19sf_vaccine_demo_ts$demographic_group)

This dataset enables us to review the vaccination progress of SF population by age and race/ethnicity group over time. Let's review the vaccination of the SF population by race/ethnicity group:

df <- covid19sf_vaccine_demo_ts %>%
  dplyr::filter(overall_segment == "All Ages by Race/Ethnicity, Administered by All Providers") 

table(df$demographic_subgroup)

df %>%
  plot_ly(x = ~ date_administered,
          y = ~ new_series_completed,
          color = ~ demographic_subgroup,
          type = "scatter",
          mode = "line") %>%
  layout(title = "San Francisco COVID-19 Daily Vaccination by Race/Ethnicity",
         yaxis = list(title = "Population"),
         xaxis = list(title = "Source: San Francisco, Department of Public Health - Population Health Division"),
         legend = list(x = 0, y = 1.1),
         margin = list(t = 90))

Similarly, we can plot the vaccination progress over time by race/ethnicity group. Note that there is some calculation error for Other Race demographic sub-group as the cumulative values of the group surpass the sub-group population. Therefore, we will exclude it from the calculation:

df %>%
  filter(demographic_subgroup != "Other Race") %>%
  plot_ly(x = ~ date_administered,
                  y = ~ 100 *cumulative_series_completed /subgroup_population,
                  color = ~ demographic_subgroup,
                  hoverinfo = "text",
                  text = ~ paste(demographic_subgroup, "<br>",
                                 "Date:", date_administered, "<br>",
                                 "Sub-Group Population:", subgroup_population, "<br>",
                                 "Completed Series:", cumulative_series_completed,
                                 paste("(", round(100 *cumulative_series_completed /subgroup_population) ,"%)", sep = "")),
                  type = "scatter",
                  mode = "line") %>%
  layout(title = "Proportion of San Francisco COVID-19 Vaccinated Population by Race/Ethnicity",
         yaxis = list(title = "Percentage", 
                      ticksuffix = "%"),
         xaxis = list(title = "Source: San Francisco, Department of Public Health - Population Health Division"),
         legend = list(x = 0.05, y = 0.95),
         margin = list(t = 90))


RamiKrispin/covid19sf documentation built on April 3, 2023, 4:11 p.m.