knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)
library(covid19interventions)
library(rvest)
library(dplyr)
library(ggplot2)
library(tidyverse)
library(readr)
library(DT)
library(changepoint)
library(zoo)

Google Mobility Data

# load sah state dates from webscraping
# data("master")

# state stay at home order dates
sah_states <- sah %>% filter(county == 'All')
# counties in states with no stay at home orders
sah_counties <- sah %>% filter(county != 'All')

# join sah and google data
df_joined <- inner_join(mobility, sah_states, 
                        by=c('sub_region_1' = c('state')))

# add approx values for missing NA values by county
df_joined <- df_joined %>%
  group_by(sub_region_1, sub_region_2) %>%
  mutate(workplace_int = na.approx(workplaces_percent_change_from_baseline,
                                   na.rm = FALSE)) %>% 
  ungroup()

Change Point Detection

# select unique state and county in dataframe
state_county <- df_joined %>% select(sub_region_1,sub_region_2) %>% distinct() 

# create blank df for loop
county_wp_mean <- data.frame(sub_region_1 = character(), 
                sub_region_2 = character(), 
                wp_low_mean = double())

# for each county, extract workplace numbers as numeric
# filter to dates before state stay at home and remove NA values
for (i in 1:nrow(state_county)){
    # workplace sorted by date as numeric
    # print (c(state_county$sub_region_1[i], state_county$sub_region_2[i] ))
    county <- df_joined %>% filter(sub_region_1 == state_county$sub_region_1[i] & 
                               sub_region_2 == state_county$sub_region_2[i] &
                               date < order_date &
                               !is.na(workplace_int)) %>%
    select(workplace_int) %>% 
    as.matrix() %>% 
    as.numeric()
    # exclude counties that do not have enough data points
    # need at least 2 data points for change detection
    # county must have at least a month of data to be included
    if (length(county) > 30){
    # Change detection to identify changes in mean
    # changes in variance didn't return much
    # using AMOC:At Most One Change method
    county_mean <- cpt.mean(county,method='AMOC')
    # print lowest change in mean before state stay at home
    low_mean <- min(param.est(county_mean)$mean)
    # add to dataframe
    county_wp_mean <- rbind(county_wp_mean, 
                            data.frame(sub_region_1 = state_county$sub_region_1[i],
                                  sub_region_2 = state_county$sub_region_2[i],
                                  wp_low_mean = round(low_mean, 2)))
  }
}

# higher rank for counties with highest drop
# no filter applied
counties_ranks <- county_wp_mean %>% 
  arrange(wp_low_mean) %>% 
   mutate(rank = dense_rank(wp_low_mean))
datatable(counties_ranks)

Test Mean Change by County

# Test for one county
x <-  df_joined %>% filter(sub_region_1 == "California" & 
                             sub_region_2 == "Los Angeles County" &
                             date < order_date &
                             !is.na(workplaces_percent_change_from_baseline)) %>%
  select('workplaces_percent_change_from_baseline') %>%  
  as.matrix() %>% 
  as.numeric()
# changes in mean
v1.man=cpt.mean(x,method='AMOC')
# example plot
plot(v1.man,cpt.col='blue')
# print means
param.est(v1.man)$mean
# mobility change in County by type
df_joined %>% 
  filter(sub_region_1 == "California" & sub_region_2 == "San Francisco County") %>%
  select("date",
        "workplaces_percent_change_from_baseline")%>%
  pivot_longer(names_to = "variable", values_to = "value", -date) %>% 
  mutate(type = str_replace(variable, "_percent_change_from_baseline", ""))%>%
  ggplot(aes(y = value,x = date, group= type, color = type))+
  geom_line() +
  ggtitle("Workplace Percent Change From Baseline")+
  theme_bw()+
  theme(axis.text.x = element_text(angle = 90), 
        legend.title = element_blank(),
        legend.position="bottom") +
  scale_x_date(date_breaks = "1 day")
# add to master dates for counties that have already been collected
data("master")
# join sah and google data
master_joined <- full_join(counties_ranks, master, 
                           by = c("sub_region_1" = "admin1", 
                                  "sub_region_2" = "admin2"))
# sort asc
master_joined %>% 
  arrange(rank)
# join state sah date
master_joined <- left_join(master_joined, sah_states, 
                           by=c('sub_region_1' = c('state')))
write_csv(master_joined,"./data/master.csv", na= "")


agroimpacts/covid19interventions documentation built on May 6, 2020, 12:35 a.m.