library(tidyverse)
library(geojsonsf)
# Get CA & SF data
if(sum(grepl(Sys.Date(), list.files(here::here("data", "get", "got")))) == 0){
# Get CA data -----------------
system(paste0("wget -O ", here::here("data", "get", "got", "CA_cases"), Sys.Date(), ".csv " ,
"https://data.ca.gov/dataset/590188d5-8545-4c93-a9a0-e230f0db7290/resource/926fd08f-cc91-4828-af38-bd45de97f8c3/download/statewide_cases.csv"))
CA_cases <- read.csv(paste0(here::here("data", "get", "got", "CA_cases"), Sys.Date(), ".csv")) %>%
filter(county != "Unassigned") %>%
mutate(date = as.Date(date)) %>%
padr::pad() %>%
replace_na(list(totalcountconfirmed = 0,
totalcountdeaths=0,
newcountconfirmed=0,
newcountdeaths=0)) %>%
group_by(county) %>%
mutate(newcount7day = zoo::rollmean(newcountconfirmed,7, na.pad=T, align="left"))
system(paste0("wget -O ", here::here("data", "get", "got", "CA_tests"), Sys.Date(), ".csv " ,
"https://data.ca.gov/dataset/efd6b822-7312-477c-922b-bccb82025fbe/resource/b6648a0d-ff0a-4111-b80b-febda2ac9e09/download/statewide_testing.csv"))
CA_tests <- read.csv(paste0(here::here("data", "get", "got", "CA_tests"), Sys.Date(), ".csv"))
system(paste0("wget -O ", here::here("data", "get", "got", "CA_hosp"), Sys.Date(), ".csv " ,
"https://data.ca.gov/dataset/529ac907-6ba1-4cb7-9aae-8966fc96aeef/resource/42d33765-20fd-44b8-a978-b083b7542225/download/hospitals_by_county.csv"))
CA_hosp <- read.csv(paste0(here::here("data", "get", "got", "CA_hosp"), Sys.Date(), ".csv" ))
# Get SF data ----------------------
system(paste0("wget -O ", here::here("data", "get", "got", "SF_hosp"), Sys.Date(), ".csv " ,
"https://data.sfgov.org/api/views/nxjg-bhem/rows.csv?accessType=DOWNLOAD"))
system(paste0("wget -O ", here::here("data", "get", "got", "SF_case"), Sys.Date(), ".csv " ,
"https://data.sfgov.org/api/views/tvq9-ec9w/rows.csv?accessType=DOWNLOAD"))
system(paste0("wget -O ", here::here("data", "get", "got", "SF_test"), Sys.Date(), ".csv " ,
"https://data.sfgov.org/api/views/nfpa-mg4g/rows.csv?accessType=DOWNLOAD"))
system(paste0("wget -O ", here::here("data", "get", "got", "SF_case_by_race"), Sys.Date(), ".csv " ,
"https://data.sfgov.org/api/views/vqqm-nsqg/rows.csv?accessType=DOWNLOAD"))
system(paste0("wget -O ", here::here("data", "get", "got", "SF_geo"), Sys.Date(), ".geojson " ,
"https://data.sfgov.org/api/views/d2ef-idww/rows.geojson?accessType=DOWNLOAD"))
# Clean SF data ---------------------
### Hospitalizations
sf_hosp <- read.csv(paste0(here::here("data", "get", "got", "SF_hosp"), Sys.Date(), ".csv" )) %>%
mutate(Date = as.Date(reportDate, format = "%Y/%m/%d"),
type = ifelse(DPHCategory == "ICU", "ICU", "HOSP"),
conf = ifelse(CovidStatus == "PUI", "PUI", "CONF"),
hosp_stat = paste(type, conf, sep = "_")) %>%
pivot_wider(names_from = hosp_stat,
values_from = PatientCount) %>%
group_by(Date) %>%
summarise(ICU_PUI = sum(ICU_PUI, na.rm = T),
ICU_CONF = sum(ICU_CONF, na.rm = T),
HOSP_PUI = sum(HOSP_PUI, na.rm = T),
HOSP_CONF = sum(HOSP_CONF, na.rm = T)) %>%
arrange(Date) %>%
mutate(HOSP_tot = ICU_CONF + HOSP_CONF,
HOSP_max = ICU_CONF + HOSP_CONF + ICU_PUI + HOSP_PUI,
cumICUconf = cumsum(ICU_CONF),
cumICUpui = cumsum(ICU_PUI),
cumHOSPconf = cumsum(HOSP_CONF),
cumHOSPpui = cumsum(HOSP_PUI))
### Cases
sf_case_raw <- read.csv(paste0(here::here("data", "get", "got", "SF_case"), Sys.Date(), ".csv")) %>%
mutate(Date = as.Date(Specimen.Collection.Date))
#sf_case_raw %>% filter(case_disposition == "Confirmed") %>% ggplot() + geom_line(aes(x = Date, y = case_count, col = transmission_category)) + theme_bw()
#sf_case_raw %>% filter(case_disposition == "Confirmed") %>% group_by(Date) %>% summarise(tot_cases = sum(case_count), cont_cases = case_count[which(transmission_category == "From Contact")], prop_contact = cont_cases/tot_cases) %>% ggplot() + geom_line(aes(x = Date, y = prop_contact)) + theme_bw() + labs(title = "Proportion of cases identified from contact")
sf_case <- sf_case_raw %>%
padr::pad(., start_val = as.Date("2020-03-01")) %>%
pivot_wider(names_from = Case.Disposition,
values_from = Case.Count) %>%
group_by(Date) %>%
summarise(Cases = sum(Confirmed, na.rm = T),
Deaths = sum(Death, na.rm = T),
Cases_Community = sum(Confirmed[which(Transmission.Category == "Community")], na.rm = T),
Cases_Contact = sum(Confirmed[which(Transmission.Category == "From Contact")], na.rm = T),
Cases_Unknown = sum(Confirmed[which(Transmission.Category == "Unknown")], na.rm = T),
Deaths_Community = sum(Death[which(Transmission.Category == "Community")], na.rm = T),
Deaths_Contact = sum(Death[which(Transmission.Category == "From Contact")], na.rm = T),
Deaths_Unknown = sum(Death[which(Transmission.Category == "Unknown")], na.rm = T)) %>%
arrange(Date) %>%
mutate(cum_case = cumsum(Cases),
cum_death = cumsum(Deaths))
### Cases by race
sf_case_race <- read.csv(paste0(here::here("data", "get", "got", "SF_case_by_race"), Sys.Date(), ".csv")) %>%
mutate(
Date = as.Date(Specimen.Collection.Date, format = "%Y/%m/%d"),
Race = case_when(Race.Ethnicity == "White" ~ 1,
Race.Ethnicity == "Black or African American" ~ 2,
Race.Ethnicity == "Native American" ~ 3,
Race.Ethnicity == "Asian" ~ 4,
Race.Ethnicity == "Native Hawaiian or Other Pacific Islander" ~ 5,
Race.Ethnicity == "Other" ~ 6,
Race.Ethnicity == "Multi-racial" ~ 7,
Race.Ethnicity == "Hispanic or Latino/a, all races" ~ 8,
Race.Ethnicity == "Unknown" ~ NA_real_)
) %>%
rename("New_Cases" = New.Confirmed.Cases,
"Cum_Cases" = Cumulative.Confirmed.Cases) %>%
dplyr::select(Date, Race, New_Cases, Cum_Cases)
### Tests
sf_test <- read.csv(paste0(here::here("data", "get", "got", "SF_test"), Sys.Date(), ".csv")) %>%
mutate(Date = as.Date(specimen_collection_date)) %>%
arrange(Date) %>%
mutate(cum_tests = cumsum(tests),
cum_pos = cumsum(pos))
sf_all <- sf_test %>%
dplyr::select(Date, tests, pos, neg, pct, indeterminate, cum_tests, cum_pos) %>%
full_join(sf_case, by = "Date") %>%
full_join(sf_hosp, by = "Date") %>%
mutate(time = as.integer(Date - as.Date("2020-02-29"))) %>%
filter(time >0)
### SF cases by geography
sf_geo <- geojson_sf(paste0(here::here("data", "get", "got", "SF_geo"), Sys.Date(), ".geojson")) %>%
filter(area_type == "Census Tract") %>%
mutate(
Date = as.Date(specimen_collection_date),
acs_population = as.numeric(acs_population),
new_confirmed_cases = as.numeric(new_confirmed_cases),
cumulative_confirmed_cases = as.numeric(cumulative_confirmed_cases),
rate_of_cumulative_confirmed_case = as.numeric(rate_of_cumulative_confirmed_case)
) %>%
dplyr::select(Date, id, acs_population,
new_confirmed_cases, cumulative_confirmed_cases, rate_of_cumulative_confirmed_case,
geometry)
sf_geo_null <- sf_geo %>% sf::st_set_geometry(NULL)
# Save final object
save(list=c("sf_all", "sf_case", "sf_hosp", "sf_test", "sf_geo", "sf_case_race", "sf_geo_null",
"CA_cases", "CA_hosp", "CA_tests"),
file=here::here("data", "get", "got", paste0("CA_SF_data", Sys.Date(), ".Rdata")))
# Delete csvs, geojson, and any older data files
got_files <- list.files(here::here("data", "get", "got"))
lapply(got_files[grepl(".csv", got_files)],
function(file){
unlink(paste0(here::here("data", "get", "got"), "/", file))
})
unlink(paste0(here::here("data", "get", "got"), "/", got_files[grepl(".geojson", got_files)]))
del_date <- Sys.Date() - 3
if(del_date != as.Date("2020-02-10")){ # Keep for sims to run through January so don't have to run entire script each new day
unlink(paste0(here::here("data", "get", "got"), "/", got_files[grepl(del_date, got_files)]))
}
} else {
load(paste0(here::here("data", "get", "got"), "/CA_SF_data", Sys.Date(), ".Rdata"))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.