In RamiKrispin/covid19sf: The Covid19 San Francisco Dataset

knitr::opts_chunk$set(
  collapse = TRUE, 
  comment = "#>",
  fig.path = "man/figures/README-",
  out.width = "100%"
)

`%>%` <- magrittr::`%>%`

covid19sf

The covid19sf package provides a daily summary of the covid19 cases in San Francisco. The package includes the following datasets:

covid19sf_geo - Confirmed cases and deaths summarized by geography
covid19sf_hospital - Hospital capacity data
covid19sf_hospitalizations - Hospitalizations data
covid19sf_housing - Alternative housing sites
covid19sf_test_loc - Testing locations
covid19sf_tests - Daily number of tests
covid19sf_vaccine_demo - Summary of vaccine doses given to San Franciscans by demographics groups (age and race)
covid19sf_vaccine_demo_ts - Time series view of vaccine doses given to San Franciscans by demographics groups (age and race)
covid19sf_vaccine_geo - COVID-19 vaccines given to San Franciscans by geography
covid19sf_population - COVID-19 cases by population characteristics over time

The following dataset were deprecated and replaced by the covid19sf_population dataset:

covid19sf_demo- Cases summarized by date, transmission and case disposition
covid19sf_homeless - Confirmed cases by homelessness
covid19sf_age - Cases summarized by age group
covid19sf_gender - Confirmed cases summarized by gender
covid19sf_summary - Cases summarized by date, transmission and case disposition

Data soucre: San Francisco, Department of Public Health - Population Health Division through the San Francisco Opne Data protal website

Installation

# install.packages("devtools")
devtools::install_github("RamiKrispin/covid19sf")

Usage

The ccovid19sf package provides different views for the covid19 cases in San Francisco. That includes case distribution by age, gender, race, etc. The following examples demonstrate some of the data use cases.

library(covid19sf)

Cases distribution by demographic

The covid19sf_population provides a daily summary of new and cumulative positive cases by the following demograpich groups:

Age group
Comorbidities
Gender
Homelessness
Race/Ethnicity
Sexual Orientation
Single Room Occupancy Tenancy
Skilled Nursing Facility Occupancy
Transmission Type

data(covid19sf_population)

head(covid19sf_population)

Cases distribution by age

To get cases view by age group we will use the characteristic_type variable to filter the data:

library(dplyr)

df_age <- covid19sf_population %>%
  filter(characteristic_type == "Age Group")

head(df_age)

Ordering the age groups before plotting the cases distribution:

age_order <- df_age %>% 
  select(characteristic_group, characteristic_group_sort_order) %>%
  distinct() %>%
  arrange(characteristic_group_sort_order)


df_age$characteristic_group <- factor(df_age$characteristic_group, levels = age_order$characteristic_group)

The following box-plot shows the distribution of the positive cases by age group:

library(plotly)

plot_ly(df_age, 
        color = ~ characteristic_group, 
        y = ~ new_cases, 
        boxpoints = "all", 
        jitter = 0.3,
        pointpos = -1.8,
        type = "box" ) %>%
layout(title = "Distribution of Daily New COVID-19 Cases in San Francisco by Age Group",
       yaxis = list(title = "Number of Cases"),
       xaxis = list(title = "Source: San Francisco Department of Public Health"),
       legend = list(x = 0.9, y = 0.9),
       margin = list(t = 60, b = 60, l = 60, r = 60))

library(plotly)

p1 <- plot_ly(df_age, 
        color = ~ characteristic_group, 
        y = ~ new_cases, 
        boxpoints = "all", 
        jitter = 0.3,
        pointpos = -1.8,
        type = "box" ) %>%
layout(title = "Distribution of Daily New COVID-19 Cases in San Francisco by Age Group",
       yaxis = list(title = "Number of Cases"),
       xaxis = list(title = "Source: San Francisco Department of Public Health"),
       legend = list(x = 0.9, y = 0.9),
       margin = list(t = 60, b = 60, l = 60, r = 60))

orca(p1, "man/figures/age_dist1.svg", width = 8 * 96, height = 5 * 96)

Here is the overall distribution of cases by age group as of r max(df_age$specimen_collection_date):

df_age %>% 
  filter(specimen_collection_date == max(specimen_collection_date)) %>%
  plot_ly(values = ~ cumulative_cases, 
          labels = ~ characteristic_group, 
          type = "pie",
          textposition = 'inside',
          textinfo = 'label+percent',
          insidetextfont = list(color = '#FFFFFF'),
          hoverinfo = 'text',
          text = ~paste(" Age Group:", characteristic_group, "<br>",
                        "Total:", cumulative_cases, "<br>",
                        "Population Estimation:", population_estimate, 
                        paste("(",round(100* cumulative_cases/population_estimate, 1) ,"%)", sep = ""))) %>%
   layout(title = ~ paste("Total Cases Dist. by Age Group as of", max(specimen_collection_date)),
       margin = list(t = 60, b = 20, l = 30, r = 60))

library(dplyr)
library(plotly)
p <- df_age %>% 
  filter(specimen_collection_date == max(specimen_collection_date)) %>%
  plot_ly(values = ~ cumulative_cases, 
          labels = ~ characteristic_group, 
          type = "pie",
          textposition = 'inside',
          textinfo = 'label+percent',
          insidetextfont = list(color = '#FFFFFF'),
          hoverinfo = 'text',
          text = ~paste(" Age Group:", characteristic_group, "<br>",
                        "Total:", cumulative_cases, "<br>",
                        "Population Estimation:", population_estimate, 
                        paste("(",round(100* cumulative_cases/population_estimate, 1) ,"%)", sep = ""))) %>%
   layout(title = ~ paste("Total Cases Dist. by Age Group as of", max(specimen_collection_date)),
       margin = list(t = 60, b = 20, l = 30, r = 60))

orca(p, "man/figures/age_dist2.svg", width = 5 * 96, height = 5 * 96)

Geospatial visualiztion

The package provides several geo-spatial dataset:

covid19sf_vaccine_geo - COVID-19 vaccines given to San Franciscans by geography
covid19sf_geo - Confirmed cases and deaths summarized by geography
covid19sf_test_loc - Testing locations

Those three datasets are sf objects, ready to use. For example, plotting the COVID19 vaccination data by geography:

library(sf)

data(covid19sf_vaccine_geo)

str(covid19sf_vaccine_geo)

df <- covid19sf_vaccine_geo %>% filter(area_type == "Analysis Neighborhood") %>%
  dplyr::mutate(perc_complated = percent_pop_series_completed * 100)

We will plot the object Using the sf package:

plot(df[, c("perc_complated", "geometry")],
     main = "San Francisco - Percentage of Fully Vaccinated Population by Geo",
     key.pos = 1, axes = TRUE, key.width = lcm(1.2), key.length = 1.0)

More examples available on this vignette.

Tests results distribution

The covid19sf_tests provides a daily summary of the daily number of tests and their results (positive, negative, and indeterminate):

data(covid19sf_tests)

head(covid19sf_tests)

The plot below shows the daily distribution of the results of the tests:

covid19sf_tests %>%
plotly::plot_ly(x = ~ specimen_collection_date,
                y = ~ pos,
                name = "Positive",
                type = 'scatter', 
                mode = 'none', 
                stackgroup = 'one',
                fillcolor = "red") %>%
  plotly::add_trace(y = ~ neg, name = "Negative", fillcolor = "green") %>%
  plotly::add_trace(y = ~ indeterminate, name = "Indeterminate", fillcolor = "gray") %>%
  plotly::layout(title = "Tests Results Distribution",
                 yaxis = list(title = "Tests Count"),
                 xaxis = list(title = "Source: San Francisco Department of Public Health"),
                 legend = list(x = 0.1, y = 0.9))

p <- covid19sf_tests %>%
  plotly::plot_ly(x = ~ specimen_collection_date,
                y = ~ pos,
                name = "Positive",
                type = 'scatter', 
                mode = 'none', 
                stackgroup = 'one',
                fillcolor = "red") %>%
  plotly::add_trace(y = ~ neg, name = "Negative", fillcolor = "green") %>%
  plotly::add_trace(y = ~ indeterminate, name = "Indeterminate", fillcolor = "gray") %>%
  plotly::layout(title = "Tests Results Distribution",
                 yaxis = list(title = "Tests Count"),
                 xaxis = list(title = "Source: San Francisco Department of Public Health"),
                 legend = list(x = 0.1, y = 0.9))

orca(p, "man/figures/test_dist.svg", width = 8 * 96, height = 5 * 96)

Cases distribution by race ethnicity

The covid19sf_population dataset provides a daily summary of the COVID19 positive cases by race and ethnicity:

data(covid19sf_population)

head(covid19sf_population)

Below is a plot of the cumulative positive cases by race and ethnicity:

covid19sf_population %>% 
  filter(characteristic_type == "Race/Ethnicity") %>%
  dplyr::arrange(specimen_collection_date) %>%
  plotly::plot_ly(x = ~ specimen_collection_date, 
                  y = ~ cumulative_cases, 
                  # name = 'Cases', 
                  type = 'scatter', 
                  mode = 'none', 
                  color = ~characteristic_group,
                  stackgroup = 'one') %>%
  layout(title = "Total Cases Dist. by Race and Ethnicity",
         legend = list(x = 0.05, y = 0.9),
         yaxis = list(title = "Number of Cases", tickformat = ".0f"),
         xaxis = list(title = "Source: San Francisco Department of Public Health"))

p <- covid19sf_population %>% 
  filter(characteristic_type == "Race/Ethnicity") %>%
  dplyr::arrange(specimen_collection_date) %>%
  plotly::plot_ly(x = ~ specimen_collection_date, 
                  y = ~ cumulative_cases, 
                  # name = 'Cases', 
                  type = 'scatter', 
                  mode = 'none', 
                  color = ~characteristic_group,
                  stackgroup = 'one') %>%
  layout(title = "Total Cases Dist. by Race and Ethnicity",
         legend = list(x = 0.05, y = 0.9),
         yaxis = list(title = "Number of Cases", tickformat = ".0f"),
         xaxis = list(title = "Source: San Francisco Department of Public Health"))
orca(p, "man/figures/demo_dist.svg", width = 8 * 96, height = 5 * 96)