# R imports - might have to install some library(sf) library(tidyverse) library(tmap) library(tmaptools) library(tidycensus) library(tigris) library(rmapshaper) library(matrixStats) library(SpatialAcc) library(reticulate) library(tidygeocoder)
# read in block group quarter data, from the last step in the sql code source("~/git/VDH/src/helper_functions.R") con <- get_db_conn(db_pass = "rsu8zvrsu8zv") va_ookla_bg <- st_read(con, query = "SELECT * FROM dc_working.va_bg_ookla_2019_2021_all_quarters_intersection") md_ookla_bg <- st_read(con, query = "SELECT * FROM dc_working.md_bg_ookla_2019_2021_all_quarters_intersection") dc_ookla_bg <- st_read(con, query = "SELECT * FROM dc_working.dc_bg_ookla_2019_2021_all_quarters_intersection") dbDisconnect(con) # get VA block group summary va_ookla_bg_2 <- st_drop_geometry(va_ookla_bg) %>% group_by(GEOID, year) %>% mutate(frac_devices = devices * tile_percent / sum(devices * tile_percent, na.rm = T), frac_tests = tests * tile_percent / sum(tests * tile_percent, na.rm = T)) %>% summarize(download_devices = sum(avg_d_kbps * frac_devices, na.rm = T) / 1000, download_tests = sum(avg_d_kbps * frac_tests, na.rm = T) / 1000, upload_devices = sum(avg_u_kbps * frac_devices, na.rm = T) / 1000, upload_tests = sum(avg_u_kbps * frac_tests, na.rm = T) / 1000, latency_devices = sum(avg_lat_ms * frac_devices, na.rm = T), latency_tests = sum(avg_lat_ms * frac_tests), devices = sum(devices * tile_percent, na.rm = T), tests = sum(tests * tile_percent, na.rm = T)) # get MD block group summary md_ookla_bg_2 <- st_drop_geometry(md_ookla_bg) %>% group_by(GEOID, year) %>% mutate(frac_devices = devices * tile_percent / sum(devices * tile_percent, na.rm = T), frac_tests = tests * tile_percent / sum(tests * tile_percent, na.rm = T)) %>% summarize(download_devices = sum(avg_d_kbps * frac_devices, na.rm = T) / 1000, download_tests = sum(avg_d_kbps * frac_tests, na.rm = T) / 1000, upload_devices = sum(avg_u_kbps * frac_devices, na.rm = T) / 1000, upload_tests = sum(avg_u_kbps * frac_tests, na.rm = T) / 1000, latency_devices = sum(avg_lat_ms * frac_devices, na.rm = T), latency_tests = sum(avg_lat_ms * frac_tests), devices = sum(devices * tile_percent, na.rm = T), tests = sum(tests * tile_percent, na.rm = T)) # get DC block group summary dc_ookla_bg_2 <- st_drop_geometry(dc_ookla_bg) %>% group_by(GEOID, year) %>% mutate(frac_devices = devices * tile_percent / sum(devices * tile_percent, na.rm = T), frac_tests = tests * tile_percent / sum(tests * tile_percent, na.rm = T)) %>% summarize(download_devices = sum(avg_d_kbps * frac_devices, na.rm = T) / 1000, download_tests = sum(avg_d_kbps * frac_tests, na.rm = T) / 1000, upload_devices = sum(avg_u_kbps * frac_devices, na.rm = T) / 1000, upload_tests = sum(avg_u_kbps * frac_tests, na.rm = T) / 1000, latency_devices = sum(avg_lat_ms * frac_devices, na.rm = T), latency_tests = sum(avg_lat_ms * frac_tests), devices = sum(devices * tile_percent, na.rm = T), tests = sum(tests * tile_percent, na.rm = T))
# get ACS block groups va.bg <- get_acs(geography = "block group", year = 2019, variables = c(tpop = "B01003_001"), state = "VA", survey = "acs5", output = "wide", geometry = TRUE) md.bg <- get_acs(geography = "block group", year = 2019, variables = c(tpop = "B01003_001"), state = "MD", survey = "acs5", output = "wide", geometry = TRUE) dc.bg <- get_acs(geography = "block group", year = 2019, variables = c(tpop = "B01003_001"), state = "DC", survey = "acs5", output = "wide", geometry = TRUE) # format VA block group measurements va_ookla_bg_3 <- va_ookla_bg_2 %>% merge(st_drop_geometry(va.bg)[, c("GEOID", "NAME")]) %>% rename(avg_down_using_devices = download_devices, avg_up_using_devices = upload_devices, avg_lat_using_devices = latency_devices, avg_down_using_tests = download_tests, avg_up_using_tests = upload_tests, avg_lat_using_tests = latency_tests, geoid = GEOID, region_name = NAME) %>% gather(measure, value, c(avg_down_using_devices, avg_up_using_devices, avg_lat_using_devices, avg_down_using_tests, avg_up_using_tests, avg_lat_using_tests, devices, tests)) %>% mutate(region_type = "block group", measure_type = ifelse(measure %in% c("devices", "tests"), "count", "mean"), measure_units = ifelse(measure %in% c("avg_down_using_devices", "avg_up_using_devices", "avg_down_using_tests", "avg_up_using_tests"), "Mb/sec", ifelse(measure %in% c("avg_lat_using_devices", "avg_lat_using_tests"), "ms", NA))) %>% relocate("geoid", "region_type", "region_name", "year", "measure", "value", "measure_type", "measure_units") # format MD block group measurements md_ookla_bg_3 <- md_ookla_bg_2 %>% merge(st_drop_geometry(md.bg)[, c("GEOID", "NAME")]) %>% rename(avg_down_using_devices = download_devices, avg_up_using_devices = upload_devices, avg_lat_using_devices = latency_devices, avg_down_using_tests = download_tests, avg_up_using_tests = upload_tests, avg_lat_using_tests = latency_tests, geoid = GEOID, region_name = NAME) %>% gather(measure, value, c(avg_down_using_devices, avg_up_using_devices, avg_lat_using_devices, avg_down_using_tests, avg_up_using_tests, avg_lat_using_tests, devices, tests)) %>% mutate(region_type = "block group", measure_type = ifelse(measure %in% c("devices", "tests"), "count", "mean"), measure_units = ifelse(measure %in% c("avg_down_using_devices", "avg_up_using_devices", "avg_down_using_tests", "avg_up_using_tests"), "Mb/sec", ifelse(measure %in% c("avg_lat_using_devices", "avg_lat_using_tests"), "ms", NA))) %>% relocate("geoid", "region_type", "region_name", "year", "measure", "value", "measure_type", "measure_units") # format DC block group measurements dc_ookla_bg_3 <- dc_ookla_bg_2 %>% merge(st_drop_geometry(dc.bg)[, c("GEOID", "NAME")]) %>% rename(avg_down_using_devices = download_devices, avg_up_using_devices = upload_devices, avg_lat_using_devices = latency_devices, avg_down_using_tests = download_tests, avg_up_using_tests = upload_tests, avg_lat_using_tests = latency_tests, geoid = GEOID, region_name = NAME) %>% gather(measure, value, c(avg_down_using_devices, avg_up_using_devices, avg_lat_using_devices, avg_down_using_tests, avg_up_using_tests, avg_lat_using_tests, devices, tests)) %>% mutate(region_type = "block group", measure_type = ifelse(measure %in% c("devices", "tests"), "count", "mean"), measure_units = ifelse(measure %in% c("avg_down_using_devices", "avg_up_using_devices", "avg_down_using_tests", "avg_up_using_tests"), "Mb/sec", ifelse(measure %in% c("avg_lat_using_devices", "avg_lat_using_tests"), "ms", NA))) %>% relocate("geoid", "region_type", "region_name", "year", "measure", "value", "measure_type", "measure_units")
# tract level ACS va.tr <- get_acs(geography = "tract", year = 2019, variables = c(tpop = "B01003_001"), state = "VA", survey = "acs5", output = "wide", geometry = TRUE) md.tr <- get_acs(geography = "tract", year = 2019, variables = c(tpop = "B01003_001"), state = "MD", survey = "acs5", output = "wide", geometry = TRUE) dc.tr <- get_acs(geography = "tract", year = 2019, variables = c(tpop = "B01003_001"), state = "DC", survey = "acs5", output = "wide", geometry = TRUE) # county level ACS va.ct <- get_acs(geography = "county", year = 2019, variables = c(tpop = "B01003_001"), state = "VA", survey = "acs5", output = "wide", geometry = TRUE) md.ct <- get_acs(geography = "county", year = 2019, variables = c(tpop = "B01003_001"), state = "MD", survey = "acs5", output = "wide", geometry = TRUE) dc.ct <- get_acs(geography = "county", year = 2019, variables = c(tpop = "B01003_001"), state = "DC", survey = "acs5", output = "wide", geometry = TRUE)
# get VA health Districts health_district <- read.csv("/project/biocomplexity/sdad/projects_data/vdh/va_county_to_hd.csv") health_district$county_id <- as.character(health_district$county_id) con <- get_db_conn(db_pass = "rsu8zvrsu8zv") health_district_geoids <- st_read(con, query = "SELECT * FROM dc_geographies.va_hd_vdh_2021_health_district_geo_names") dbDisconnect(con) health_district_2 <- left_join(health_district, health_district_geoids, by = c("health_district" = "region_name")) # add tract, county, and health district columns va_ookla_bg_2$tract <- substr(va_ookla_bg_2$GEOID, 1, 11) va_ookla_bg_2$county <- substr(va_ookla_bg_2$GEOID, 1, 5) va_ookla_bg_2 <- left_join(va_ookla_bg_2, health_district_2[, c("county_id", "health_district", "geoid")], by = c("county" = "county_id")) # format VA tract data va_ookla_tr <- va_ookla_bg_2 %>% group_by(tract, year) %>% summarise(download_devices = sum(download_devices * devices, na.rm = T) / sum(devices, na.rm = T), upload_devices = sum(upload_devices * devices, na.rm = T) / sum(devices, na.rm = T), latency_devices = sum(latency_devices * devices, na.rm = T) / sum(devices, na.rm = T), download_tests = sum(download_tests * tests, na.rm = T) / sum(tests, na.rm = T), upload_tests = sum(upload_tests * tests, na.rm = T) / sum(tests, na.rm = T), latency_tests = sum(latency_tests * tests, na.rm = T) / sum(tests, na.rm = T), devices = sum(devices, na.rm = T), tests = sum(tests, na.rm = T)) %>% merge(st_drop_geometry(va.tr)[, c("GEOID", "NAME")], by.x = "tract", by.y = "GEOID") %>% rename(avg_down_using_devices = download_devices, avg_up_using_devices = upload_devices, avg_lat_using_devices = latency_devices, avg_down_using_tests = download_tests, avg_up_using_tests = upload_tests, avg_lat_using_tests = latency_tests, geoid = tract, region_name = NAME) %>% gather(measure, value, c(avg_down_using_devices, avg_up_using_devices, avg_lat_using_devices, avg_down_using_tests, avg_up_using_tests, avg_lat_using_tests, devices, tests)) %>% mutate(region_type = "tract", measure_type = ifelse(measure %in% c("devices", "tests"), "count", "mean"), measure_units = ifelse(measure %in% c("avg_down_using_devices", "avg_up_using_devices", "avg_down_using_tests", "avg_up_using_tests"), "Mb/sec", ifelse(measure %in% c("avg_lat_using_devices", "avg_lat_using_tests"), "ms", NA))) %>% relocate("geoid", "region_type", "region_name", "year", "measure", "value", "measure_type", "measure_units") # format VA county data va_ookla_ct <- va_ookla_bg_2 %>% group_by(county, year) %>% summarise(download_devices = sum(download_devices * devices, na.rm = T) / sum(devices, na.rm = T), upload_devices = sum(upload_devices * devices, na.rm = T) / sum(devices, na.rm = T), latency_devices = sum(latency_devices * devices, na.rm = T) / sum(devices, na.rm = T), download_tests = sum(download_tests * tests, na.rm = T) / sum(tests, na.rm = T), upload_tests = sum(upload_tests * tests, na.rm = T) / sum(tests, na.rm = T), latency_tests = sum(latency_tests * tests, na.rm = T) / sum(tests, na.rm = T), devices = sum(devices, na.rm = T), tests = sum(tests, na.rm = T)) %>% merge(st_drop_geometry(va.ct)[, c("GEOID", "NAME")], by.x = "county", by.y = "GEOID") %>% rename(avg_down_using_devices = download_devices, avg_up_using_devices = upload_devices, avg_lat_using_devices = latency_devices, avg_down_using_tests = download_tests, avg_up_using_tests = upload_tests, avg_lat_using_tests = latency_tests, geoid = county, region_name = NAME) %>% gather(measure, value, c(avg_down_using_devices, avg_up_using_devices, avg_lat_using_devices, avg_down_using_tests, avg_up_using_tests, avg_lat_using_tests, devices, tests)) %>% mutate(region_type = "county", measure_type = ifelse(measure %in% c("devices", "tests"), "count", "mean"), measure_units = ifelse(measure %in% c("avg_down_using_devices", "avg_up_using_devices", "avg_down_using_tests", "avg_up_using_tests"), "Mb/sec", ifelse(measure %in% c("avg_lat_using_devices", "avg_lat_using_tests"), "ms", NA))) %>% relocate("geoid", "region_type", "region_name", "year", "measure", "value", "measure_type", "measure_units") # format VA health district data va_ookla_hd <- va_ookla_bg_2 %>% group_by(geoid, health_district, year) %>% summarise(download_devices = sum(download_devices * devices, na.rm = T) / sum(devices, na.rm = T), upload_devices = sum(upload_devices * devices, na.rm = T) / sum(devices, na.rm = T), latency_devices = sum(latency_devices * devices, na.rm = T) / sum(devices, na.rm = T), download_tests = sum(download_tests * tests, na.rm = T) / sum(tests, na.rm = T), upload_tests = sum(upload_tests * tests, na.rm = T) / sum(tests, na.rm = T), latency_tests = sum(latency_tests * tests, na.rm = T) / sum(tests, na.rm = T), devices = sum(devices, na.rm = T), tests = sum(tests, na.rm = T)) %>% rename(avg_down_using_devices = download_devices, avg_up_using_devices = upload_devices, avg_lat_using_devices = latency_devices, avg_down_using_tests = download_tests, avg_up_using_tests = upload_tests, avg_lat_using_tests = latency_tests, region_name = health_district) %>% gather(measure, value, c(avg_down_using_devices, avg_up_using_devices, avg_lat_using_devices, avg_down_using_tests, avg_up_using_tests, avg_lat_using_tests, devices, tests)) %>% mutate(region_type = "health district", measure_type = ifelse(measure %in% c("devices", "tests"), "count", "mean"), measure_units = ifelse(measure %in% c("avg_down_using_devices", "avg_up_using_devices", "avg_down_using_tests", "avg_up_using_tests"), "Mb/sec", ifelse(measure %in% c("avg_lat_using_devices", "avg_lat_using_tests"), "ms", NA))) %>% relocate("geoid", "region_type", "region_name", "year", "measure", "value", "measure_type", "measure_units")
# add tract and county columns md_ookla_bg_2$tract <- substr(md_ookla_bg_2$GEOID, 1, 11) md_ookla_bg_2$county <- substr(md_ookla_bg_2$GEOID, 1, 5) # format MD tract data md_ookla_tr <- md_ookla_bg_2 %>% group_by(tract, year) %>% summarise(download_devices = sum(download_devices * devices, na.rm = T) / sum(devices, na.rm = T), upload_devices = sum(upload_devices * devices, na.rm = T) / sum(devices, na.rm = T), latency_devices = sum(latency_devices * devices, na.rm = T) / sum(devices, na.rm = T), download_tests = sum(download_tests * tests, na.rm = T) / sum(tests, na.rm = T), upload_tests = sum(upload_tests * tests, na.rm = T) / sum(tests, na.rm = T), latency_tests = sum(latency_tests * tests, na.rm = T) / sum(tests, na.rm = T), devices = sum(devices, na.rm = T), tests = sum(tests, na.rm = T)) %>% merge(st_drop_geometry(md.tr)[, c("GEOID", "NAME")], by.x = "tract", by.y = "GEOID") %>% rename(avg_down_using_devices = download_devices, avg_up_using_devices = upload_devices, avg_lat_using_devices = latency_devices, avg_down_using_tests = download_tests, avg_up_using_tests = upload_tests, avg_lat_using_tests = latency_tests, geoid = tract, region_name = NAME) %>% gather(measure, value, c(avg_down_using_devices, avg_up_using_devices, avg_lat_using_devices, avg_down_using_tests, avg_up_using_tests, avg_lat_using_tests, devices, tests)) %>% mutate(region_type = "tract", measure_type = ifelse(measure %in% c("devices", "tests"), "count", "mean"), measure_units = ifelse(measure %in% c("avg_down_using_devices", "avg_up_using_devices", "avg_down_using_tests", "avg_up_using_tests"), "Mb/sec", ifelse(measure %in% c("avg_lat_using_devices", "avg_lat_using_tests"), "ms", NA))) %>% relocate("geoid", "region_type", "region_name", "year", "measure", "value", "measure_type", "measure_units") # format MD county data md_ookla_ct <- md_ookla_bg_2 %>% group_by(county, year) %>% summarise(download_devices = sum(download_devices * devices, na.rm = T) / sum(devices, na.rm = T), upload_devices = sum(upload_devices * devices, na.rm = T) / sum(devices, na.rm = T), latency_devices = sum(latency_devices * devices, na.rm = T) / sum(devices, na.rm = T), download_tests = sum(download_tests * tests, na.rm = T) / sum(tests, na.rm = T), upload_tests = sum(upload_tests * tests, na.rm = T) / sum(tests, na.rm = T), latency_tests = sum(latency_tests * tests, na.rm = T) / sum(tests, na.rm = T), devices = sum(devices, na.rm = T), tests = sum(tests, na.rm = T)) %>% merge(st_drop_geometry(md.ct)[, c("GEOID", "NAME")], by.x = "county", by.y = "GEOID") %>% rename(avg_down_using_devices = download_devices, avg_up_using_devices = upload_devices, avg_lat_using_devices = latency_devices, avg_down_using_tests = download_tests, avg_up_using_tests = upload_tests, avg_lat_using_tests = latency_tests, geoid = county, region_name = NAME) %>% gather(measure, value, c(avg_down_using_devices, avg_up_using_devices, avg_lat_using_devices, avg_down_using_tests, avg_up_using_tests, avg_lat_using_tests, devices, tests)) %>% mutate(region_type = "county", measure_type = ifelse(measure %in% c("devices", "tests"), "count", "mean"), measure_units = ifelse(measure %in% c("avg_down_using_devices", "avg_up_using_devices", "avg_down_using_tests", "avg_up_using_tests"), "Mb/sec", ifelse(measure %in% c("avg_lat_using_devices", "avg_lat_using_tests"), "ms", NA))) %>% relocate("geoid", "region_type", "region_name", "year", "measure", "value", "measure_type", "measure_units") # add tract and county columns dc_ookla_bg_2$tract <- substr(dc_ookla_bg_2$GEOID, 1, 11) dc_ookla_bg_2$county <- substr(dc_ookla_bg_2$GEOID, 1, 5) # format DC tract data dc_ookla_tr <- dc_ookla_bg_2 %>% group_by(tract, year) %>% summarise(download_devices = sum(download_devices * devices, na.rm = T) / sum(devices, na.rm = T), upload_devices = sum(upload_devices * devices, na.rm = T) / sum(devices, na.rm = T), latency_devices = sum(latency_devices * devices, na.rm = T) / sum(devices, na.rm = T), download_tests = sum(download_tests * tests, na.rm = T) / sum(tests, na.rm = T), upload_tests = sum(upload_tests * tests, na.rm = T) / sum(tests, na.rm = T), latency_tests = sum(latency_tests * tests, na.rm = T) / sum(tests, na.rm = T), devices = sum(devices, na.rm = T), tests = sum(tests, na.rm = T)) %>% merge(st_drop_geometry(dc.tr)[, c("GEOID", "NAME")], by.x = "tract", by.y = "GEOID") %>% rename(avg_down_using_devices = download_devices, avg_up_using_devices = upload_devices, avg_lat_using_devices = latency_devices, avg_down_using_tests = download_tests, avg_up_using_tests = upload_tests, avg_lat_using_tests = latency_tests, geoid = tract, region_name = NAME) %>% gather(measure, value, c(avg_down_using_devices, avg_up_using_devices, avg_lat_using_devices, avg_down_using_tests, avg_up_using_tests, avg_lat_using_tests, devices, tests)) %>% mutate(region_type = "tract", measure_type = ifelse(measure %in% c("devices", "tests"), "count", "mean"), measure_units = ifelse(measure %in% c("avg_down_using_devices", "avg_up_using_devices", "avg_down_using_tests", "avg_up_using_tests"), "Mb/sec", ifelse(measure %in% c("avg_lat_using_devices", "avg_lat_using_tests"), "ms", NA))) %>% relocate("geoid", "region_type", "region_name", "year", "measure", "value", "measure_type", "measure_units") # format DC county data dc_ookla_ct <- dc_ookla_bg_2 %>% group_by(county, year) %>% summarise(download_devices = sum(download_devices * devices, na.rm = T) / sum(devices, na.rm = T), upload_devices = sum(upload_devices * devices, na.rm = T) / sum(devices, na.rm = T), latency_devices = sum(latency_devices * devices, na.rm = T) / sum(devices, na.rm = T), download_tests = sum(download_tests * tests, na.rm = T) / sum(tests, na.rm = T), upload_tests = sum(upload_tests * tests, na.rm = T) / sum(tests, na.rm = T), latency_tests = sum(latency_tests * tests, na.rm = T) / sum(tests, na.rm = T), devices = sum(devices, na.rm = T), tests = sum(tests, na.rm = T)) %>% merge(st_drop_geometry(dc.ct)[, c("GEOID", "NAME")], by.x = "county", by.y = "GEOID") %>% rename(avg_down_using_devices = download_devices, avg_up_using_devices = upload_devices, avg_lat_using_devices = latency_devices, avg_down_using_tests = download_tests, avg_up_using_tests = upload_tests, avg_lat_using_tests = latency_tests, geoid = county, region_name = NAME) %>% gather(measure, value, c(avg_down_using_devices, avg_up_using_devices, avg_lat_using_devices, avg_down_using_tests, avg_up_using_tests, avg_lat_using_tests, devices, tests)) %>% mutate(region_type = "county", measure_type = ifelse(measure %in% c("devices", "tests"), "count", "mean"), measure_units = ifelse(measure %in% c("avg_down_using_devices", "avg_up_using_devices", "avg_down_using_tests", "avg_up_using_tests"), "Mb/sec", ifelse(measure %in% c("avg_lat_using_devices", "avg_lat_using_tests"), "ms", NA))) %>% relocate("geoid", "region_type", "region_name", "year", "measure", "value", "measure_type", "measure_units")
# send bg, tr, ct, and hd data to database (without any subsetting) source("~/git/VDH/src/helper_functions.R") con <- get_db_conn(db_pass = "rsu8zvrsu8zv") dc_dbWriteTable(con, "dc_digital_communications", "va_bg_ookla_2019_2021_speed_measurements", va_ookla_bg_3) dc_dbWriteTable(con, "dc_digital_communications", "va_tr_ookla_2019_2021_speed_measurements", va_ookla_tr) dc_dbWriteTable(con, "dc_digital_communications", "va_ct_ookla_2019_2021_speed_measurements", va_ookla_ct) dc_dbWriteTable(con, "dc_digital_communications", "va_hd_ookla_2019_2021_speed_measurements", va_ookla_hd) dc_dbWriteTable(con, "dc_digital_communications", "md_bg_ookla_2019_2021_speed_measurements", md_ookla_bg_3) dc_dbWriteTable(con, "dc_digital_communications", "md_tr_ookla_2019_2021_speed_measurements", md_ookla_tr) dc_dbWriteTable(con, "dc_digital_communications", "md_ct_ookla_2019_2021_speed_measurements", md_ookla_ct) dc_dbWriteTable(con, "dc_digital_communications", "dc_bg_ookla_2019_2021_speed_measurements", dc_ookla_bg_3) dc_dbWriteTable(con, "dc_digital_communications", "dc_tr_ookla_2019_2021_speed_measurements", dc_ookla_tr) dc_dbWriteTable(con, "dc_digital_communications", "dc_ct_ookla_2019_2021_speed_measurements", dc_ookla_ct) dbDisconnect(con)
# read in data to be subset (could use the data above) con <- get_db_conn(db_pass = "rsu8zvrsu8zv") va_ookla_bg_for_ncr <- st_read(con, query = "SELECT * FROM dc_digital_communications.va_bg_ookla_2019_2021_speed_measurements") va_ookla_tr_for_ncr <- st_read(con, query = "SELECT * FROM dc_digital_communications.va_tr_ookla_2019_2021_speed_measurements") va_ookla_ct_for_ncr <- st_read(con, query = "SELECT * FROM dc_digital_communications.va_ct_ookla_2019_2021_speed_measurements") md_ookla_bg_for_ncr <- st_read(con, query = "SELECT * FROM dc_digital_communications.md_bg_ookla_2019_2021_speed_measurements") md_ookla_tr_for_ncr <- st_read(con, query = "SELECT * FROM dc_digital_communications.md_tr_ookla_2019_2021_speed_measurements") md_ookla_ct_for_ncr <- st_read(con, query = "SELECT * FROM dc_digital_communications.md_ct_ookla_2019_2021_speed_measurements") dc_ookla_bg_for_ncr <- st_read(con, query = "SELECT * FROM dc_digital_communications.dc_bg_ookla_2019_2021_speed_measurements") dc_ookla_tr_for_ncr <- st_read(con, query = "SELECT * FROM dc_digital_communications.dc_tr_ookla_2019_2021_speed_measurements") dc_ookla_ct_for_ncr <- st_read(con, query = "SELECT * FROM dc_digital_communications.dc_ct_ookla_2019_2021_speed_measurements") dbDisconnect(con) # subset ncr_ookla_bg <- rbind(va_ookla_bg_for_ncr, md_ookla_bg_for_ncr, dc_ookla_bg_for_ncr) %>% filter(substr(geoid, 1, 5) %in% c("51013", "51059", "51107", "51510", "51600", "51153", "51683", "51685", "51610", "11001", "24031", "24033", "24017", "24021")) ncr_ookla_tr <- rbind(va_ookla_tr_for_ncr, md_ookla_tr_for_ncr, dc_ookla_tr_for_ncr) %>% filter(substr(geoid, 1, 5) %in% c("51013", "51059", "51107", "51510", "51600", "51153", "51683", "51685", "51610", "11001", "24031", "24033", "24017", "24021")) ncr_ookla_ct <- rbind(va_ookla_ct_for_ncr, md_ookla_ct_for_ncr, dc_ookla_ct_for_ncr) %>% filter(substr(geoid, 1, 5) %in% c("51013", "51059", "51107", "51510", "51600", "51153", "51683", "51685", "51610", "11001", "24031", "24033", "24017", "24021")) # send to db con <- get_db_conn(db_pass = "rsu8zvrsu8zv") dc_dbWriteTable(con, "dc_digital_communications", "ncr_bg_ookla_2019_2021_speed_measurements", ncr_ookla_bg) dc_dbWriteTable(con, "dc_digital_communications", "ncr_tr_ookla_2019_2021_speed_measurements", ncr_ookla_tr) dc_dbWriteTable(con, "dc_digital_communications", "ncr_ct_ookla_2019_2021_speed_measurements", ncr_ookla_ct) dbDisconnect(con)
bg_data_quarters2 <- st_drop_geometry(va_ookla_bg) %>% group_by(GEOID, year, quarter) %>% mutate(total_percent = sum(bg_percent), true_percent = bg_percent / total_percent, total_devices = sum(devices), frac_devices = devices / total_devices, denom_devices = sum(frac_devices * true_percent * tile_percent), total_tests = sum(tests), frac_tests = tests / total_tests, denom_tests = sum(frac_tests * true_percent * tile_percent)) %>% summarise(download_devices = sum(avg_d_kbps * true_percent * frac_devices * tile_percent / denom_devices / 1000), upload_devices = sum(avg_u_kbps * true_percent * frac_devices * tile_percent / denom_devices / 1000), latency_devices = sum(avg_lat_ms * true_percent * frac_devices * tile_percent / denom_devices), download_tests = sum(avg_d_kbps * true_percent * frac_tests * tile_percent / denom_tests / 1000), upload_tests = sum(avg_u_kbps * true_percent * frac_tests * tile_percent / denom_tests / 1000), latency_tests = sum(avg_lat_ms * true_percent * frac_tests * tile_percent / denom_tests), devices = sum(devices * tile_percent), tests = sum(tests * tile_percent)) %>% mutate(county = substr(GEOID, 1, 5)) va_merged_data <- merge(bg_data_quarters2, va_ookla_ct_for_ncr[va_ookla_ct_for_ncr$measure%in% c("avg_down_using_devices", "avg_up_using_devices", "avg_down_using_tests", "avg_up_using_tests"), c("geoid", "value", "year", "measure")][], by.x = c("county", "year"), by.y = c("geoid", "year")) va_merged_data_down_devices <- va_merged_data %>% group_by(county, year) %>% filter(measure == "avg_down_using_devices") %>% mutate(sd_county_down_devices = sqrt(sum(devices * (download_devices - value) ^ 2) / (sum(devices, na.rm = T) - 1))) va_merged_data_up_devices <- va_merged_data %>% group_by(county, year) %>% filter(measure == "avg_up_using_devices") %>% mutate(sd_county_up_devices = sqrt(sum(devices * (upload_devices - value) ^ 2) / (sum(devices, na.rm = T) - 1))) va_merged_data_down_tests <- va_merged_data %>% group_by(county, year) %>% filter(measure == "avg_down_using_tests") %>% mutate(sd_county_down_tests = sqrt(sum(tests * (download_tests - value) ^ 2) / (sum(tests, na.rm = T) - 1))) va_merged_data_up_tests <- va_merged_data %>% group_by(county, year) %>% filter(measure == "avg_up_using_tests") %>% mutate(sd_county_up_tests = sqrt(sum(tests * (upload_tests - value) ^ 2) / (sum(tests, na.rm = T) - 1))) # Virginia ACS % with internet va.bg <- get_acs(geography = "block group", year = 2019, variables = c(bg_pop = "B01003_001", tpop = "B28002_001", no_internet = "B28002_013"), state = "VA", survey = "acs5", output = "wide", geometry = TRUE) va.bg$w_internet <- va.bg$tpopE - va.bg$no_internetE
va_merged_data_down_devices$above_100_down <- pnorm(100, va_merged_data_down_devices$download_devices, va_merged_data_down_devices$sd_county_down_devices, lower.tail = F) * 100 va_merged_data_down_devices$above_25_down <- pnorm(25, va_merged_data_down_devices$download_devices, va_merged_data_down_devices$sd_county_down_devices, lower.tail = F) * 100 va_merged_data_up_devices$above_20_up <- pnorm(20, va_merged_data_up_devices$upload_devices, va_merged_data_up_devices$sd_county_up_devices, lower.tail = F) * 100 va_merged_data_up_devices$above_3_up <- pnorm(3, va_merged_data_up_devices$upload_devices, va_merged_data_up_devices$sd_county_up_devices, lower.tail = F) * 100 va_merged_data_down_tests$above_100_down <- pnorm(100, va_merged_data_down_tests$download_tests, va_merged_data_down_tests$sd_county_down_tests, lower.tail = F) * 100 va_merged_data_down_tests$above_25_down <- pnorm(25, va_merged_data_down_tests$download_tests, va_merged_data_down_tests$sd_county_down_tests, lower.tail = F) * 100 va_merged_data_up_tests$above_20_up <- pnorm(20, va_merged_data_up_tests$upload_tests, va_merged_data_up_tests$sd_county_up_tests, lower.tail = F) * 100 va_merged_data_up_tests$above_3_up <- pnorm(3, va_merged_data_up_tests$upload_tests, va_merged_data_up_tests$sd_county_up_tests, lower.tail = F) * 100 va_merged_data_a <- merge(va_merged_data_down_devices[, c("GEOID", "quarter", "year","above_100_down", "above_25_down")], va_merged_data_up_devices[, c("GEOID", "quarter", "year", "above_20_up", "above_3_up")], by = c("GEOID", "quarter", "year")) va_merged_data_b <- merge(va_merged_data_down_tests[, c("GEOID", "quarter", "year","above_100_down", "above_25_down")], va_merged_data_up_tests[, c("GEOID", "quarter", "year", "above_20_up", "above_3_up")], by = c("GEOID", "quarter", "year")) va_merged_data_c <- merge(va_merged_data_a, va_merged_data_b, by = c("GEOID", "quarter", "year")) %>% rename(perc_w_int_above_100_down_using_devices = above_100_down.x, perc_w_int_above_25_down_using_devices = above_25_down.x, perc_w_int_above_20_up_using_devices = above_20_up.x, perc_w_int_above_3_up_using_devices = above_3_up.x, perc_w_int_above_100_down_using_tests = above_100_down.y, perc_w_int_above_25_down_using_tests = above_25_down.y, perc_w_int_above_20_up_using_tests = above_20_up.y, perc_w_int_above_3_up_using_tests = above_3_up.y) %>% mutate(perc_w_int_100_20_using_devices = rowMins(cbind(perc_w_int_above_100_down_using_devices, perc_w_int_above_20_up_using_devices)), perc_w_int_25_3_using_devices = rowMins(cbind(perc_w_int_above_25_down_using_devices, perc_w_int_above_3_up_using_devices)), perc_w_int_100_20_using_tests = rowMins(cbind(perc_w_int_above_100_down_using_tests, perc_w_int_above_20_up_using_tests)), perc_w_int_25_3_using_tests = rowMins(cbind(perc_w_int_above_25_down_using_tests, perc_w_int_above_3_up_using_tests))) va_merged_data_d <- st_drop_geometry(left_join(va.bg, va_merged_data_c, by = "GEOID")) %>% mutate(perc_total_above_100_down_using_devices = perc_w_int_above_100_down_using_devices * w_internet/tpopE, perc_total_above_25_down_using_devices = perc_w_int_above_25_down_using_devices * w_internet/tpopE, perc_total_above_20_up_using_devices = perc_w_int_above_20_up_using_devices * w_internet/tpopE, perc_total_above_3_up_using_devices = perc_w_int_above_3_up_using_devices * w_internet/tpopE, perc_total_above_100_down_using_tests = perc_w_int_above_100_down_using_tests * w_internet/tpopE, perc_total_above_25_down_using_tests = perc_w_int_above_25_down_using_tests * w_internet/tpopE, perc_total_above_20_up_using_tests = perc_w_int_above_20_up_using_tests * w_internet/tpopE, perc_total_above_3_up_using_tests = perc_w_int_above_3_up_using_tests * w_internet/tpopE, perc_total_100_20_using_devices = rowMins(cbind(perc_total_above_100_down_using_devices, perc_total_above_20_up_using_devices)), perc_total_25_3_using_devices = rowMins(cbind(perc_total_above_25_down_using_devices, perc_total_above_3_up_using_devices)), perc_total_100_20_using_tests = rowMins(cbind(perc_total_above_100_down_using_tests, perc_total_above_20_up_using_tests)), perc_total_25_3_using_tests = rowMins(cbind(perc_total_above_25_down_using_tests, perc_total_above_3_up_using_tests)))
va_merged_data_e <- va_merged_data_d %>% group_by(year, GEOID, NAME) %>% summarise(perc_w_int_above_100_down_using_devices = mean(perc_w_int_above_100_down_using_devices, na.rm = T), perc_w_int_above_25_down_using_devices = mean(perc_w_int_above_25_down_using_devices, na.rm = T), perc_w_int_above_20_up_using_devices = mean(perc_w_int_above_20_up_using_devices, na.rm = T), perc_w_int_above_3_up_using_devices = mean(perc_w_int_above_3_up_using_devices, na.rm = T), perc_w_int_above_100_down_using_tests = mean(perc_w_int_above_100_down_using_tests, na.rm = T), perc_w_int_above_25_down_using_tests = mean(perc_w_int_above_25_down_using_tests, na.rm = T), perc_w_int_above_20_up_using_tests = mean(perc_w_int_above_20_up_using_tests, na.rm = T), perc_w_int_above_3_up_using_tests = mean(perc_w_int_above_3_up_using_tests, na.rm = T), perc_w_int_100_20_using_devices = mean(perc_w_int_100_20_using_devices, na.rm = T), perc_w_int_25_3_using_devices = mean(perc_w_int_25_3_using_devices, na.rm = T), perc_w_int_100_20_using_tests = mean(perc_w_int_100_20_using_tests, na.rm = T), perc_w_int_25_3_using_tests = mean(perc_w_int_25_3_using_tests, na.rm = T), perc_total_above_100_down_using_devices = mean(perc_total_above_100_down_using_devices, na.rm = T), perc_total_above_25_down_using_devices = mean(perc_total_above_25_down_using_devices, na.rm = T), perc_total_above_20_up_using_devices = mean(perc_total_above_20_up_using_devices, na.rm = T), perc_total_above_3_up_using_devices = mean(perc_total_above_3_up_using_devices, na.rm = T), perc_total_above_100_down_using_tests = mean(perc_total_above_100_down_using_tests, na.rm = T), perc_total_above_25_down_using_tests = mean(perc_total_above_25_down_using_tests, na.rm = T), perc_total_above_20_up_using_tests = mean(perc_total_above_20_up_using_tests, na.rm = T), perc_total_above_3_up_using_tests = mean(perc_total_above_3_up_using_tests, na.rm = T), perc_total_100_20_using_devices = mean(perc_total_100_20_using_devices, na.rm = T), perc_total_25_3_using_devices = mean(perc_total_25_3_using_devices, na.rm = T), perc_total_100_20_using_tests = mean(perc_total_100_20_using_tests, na.rm = T), perc_total_25_3_using_tests = mean(perc_total_25_3_using_tests, na.rm = T), w_internet = mean(w_internet), tpop = mean(tpopE)) %>% mutate(tract = substr(GEOID, 1, 11), county = substr(GEOID, 1, 5), pop_w_int_above_100_down_using_devices = perc_w_int_above_100_down_using_devices * w_internet / 100, pop_w_int_above_25_down_using_devices = perc_w_int_above_25_down_using_devices * w_internet / 100, pop_w_int_above_20_up_using_devices = perc_w_int_above_20_up_using_devices * w_internet / 100, pop_w_int_above_3_up_using_devices = perc_w_int_above_3_up_using_devices * w_internet / 100, pop_w_int_100_20_using_devices = perc_w_int_100_20_using_devices * w_internet / 100, pop_w_int_25_3_using_devices = perc_w_int_25_3_using_devices * w_internet / 100, pop_w_int_above_100_down_using_tests = perc_w_int_above_100_down_using_tests * w_internet / 100, pop_w_int_above_25_down_using_tests = perc_w_int_above_25_down_using_tests * w_internet / 100, pop_w_int_above_20_up_using_tests = perc_w_int_above_20_up_using_tests * w_internet / 100, pop_w_int_above_3_up_using_tests = perc_w_int_above_3_up_using_tests * w_internet / 100, pop_w_int_100_20_using_tests = perc_w_int_100_20_using_tests * w_internet / 100, pop_w_int_25_3_using_tests = perc_w_int_25_3_using_tests * w_internet / 100, pop_total_above_100_down_using_devices = perc_w_int_above_100_down_using_devices * w_internet / 100, pop_total_above_25_down_using_devices = perc_w_int_above_25_down_using_devices * w_internet / 100, pop_total_above_20_up_using_devices = perc_w_int_above_20_up_using_devices * w_internet / 100, pop_total_above_3_up_using_devices = perc_w_int_above_3_up_using_devices * w_internet / 100, pop_total_100_20_using_devices = perc_w_int_100_20_using_devices * w_internet / 100, pop_total_25_3_using_devices = perc_w_int_25_3_using_devices * w_internet / 100, pop_total_above_100_down_using_tests = perc_w_int_above_100_down_using_tests * w_internet / 100, pop_total_above_25_down_using_tests = perc_w_int_above_25_down_using_tests * w_internet / 100, pop_total_above_20_up_using_tests = perc_w_int_above_20_up_using_tests * w_internet / 100, pop_total_above_3_up_using_tests = perc_w_int_above_3_up_using_tests * w_internet / 100, pop_total_100_20_using_tests = perc_w_int_100_20_using_tests * w_internet / 100, pop_total_25_3_using_tests = perc_w_int_25_3_using_tests * w_internet / 100, year = as.character(year)) %>% merge(health_district[, c("county_id", "health_district")], by.x = "county", by.y = "county_id") va_tr_percs <- va_merged_data_e %>% group_by(year, tract) %>% summarize(perc_w_int_above_100_down_using_devices = sum(pop_w_int_above_100_down_using_devices, na.rm = T) / sum(w_internet, na.rm = T), perc_w_int_above_25_down_using_devices = sum(pop_w_int_above_25_down_using_devices, na.rm = T) / sum(w_internet, na.rm = T), perc_w_int_above_20_up_using_devices = sum(pop_w_int_above_20_up_using_devices, na.rm = T) / sum(w_internet, na.rm = T), perc_w_int_above_3_up_using_devices = sum(pop_w_int_above_3_up_using_devices, na.rm = T) / sum(w_internet, na.rm = T), perc_w_int_above_100_down_using_tests = sum(pop_w_int_above_100_down_using_tests, na.rm = T) / sum(w_internet, na.rm = T), perc_w_int_above_25_down_using_tests = sum(pop_w_int_above_25_down_using_tests, na.rm = T) / sum(w_internet, na.rm = T), perc_w_int_above_20_up_using_tests = sum(pop_w_int_above_20_up_using_tests, na.rm = T) / sum(w_internet, na.rm = T), perc_w_int_above_3_up_using_tests = sum(pop_w_int_above_3_up_using_tests, na.rm = T) / sum(w_internet, na.rm = T), perc_w_int_100_20_using_devices = sum(pop_w_int_100_20_using_devices, na.rm = T) / sum(w_internet, na.rm = T), perc_w_int_25_3_using_devices = sum(pop_w_int_25_3_using_devices, na.rm = T) / sum(w_internet, na.rm = T), perc_w_int_100_20_using_tests = sum(pop_w_int_100_20_using_tests, na.rm = T) / sum(w_internet, na.rm = T), perc_w_int_25_3_using_tests = sum(pop_w_int_25_3_using_tests, na.rm = T) / sum(w_internet, na.rm = T), perc_total_above_100_down_using_devices = sum(pop_total_above_100_down_using_devices, na.rm = T) / sum(tpop, na.rm = T), perc_total_above_25_down_using_devices = sum(pop_total_above_25_down_using_devices, na.rm = T) / sum(tpop, na.rm = T), perc_total_above_20_up_using_devices = sum(pop_total_above_20_up_using_devices, na.rm = T) / sum(tpop, na.rm = T), perc_total_above_3_up_using_devices = sum(pop_total_above_3_up_using_devices, na.rm = T) / sum(tpop, na.rm = T), perc_total_above_100_down_using_tests = sum(pop_total_above_100_down_using_tests, na.rm = T) / sum(tpop, na.rm = T), perc_total_above_25_down_using_tests = sum(pop_total_above_25_down_using_tests, na.rm = T) / sum(tpop, na.rm = T), perc_total_above_20_up_using_tests = sum(pop_total_above_20_up_using_tests, na.rm = T) / sum(tpop, na.rm = T), perc_total_above_3_up_using_tests = sum(pop_total_above_3_up_using_tests, na.rm = T) / sum(tpop, na.rm = T), perc_total_100_20_using_devices = sum(pop_total_100_20_using_devices, na.rm = T) / sum(tpop, na.rm = T), perc_total_25_3_using_devices = sum(pop_total_25_3_using_devices, na.rm = T) / sum(tpop, na.rm = T), perc_total_100_20_using_tests = sum(pop_total_100_20_using_tests, na.rm = T) / sum(tpop, na.rm = T), perc_total_25_3_using_tests = sum(pop_total_25_3_using_tests, na.rm = T) / sum(tpop, na.rm = T)) %>% drop_na() va_ct_percs <- va_merged_data_e %>% group_by(year, county) %>% summarize(perc_w_int_above_100_down_using_devices = sum(pop_w_int_above_100_down_using_devices, na.rm = T) / sum(w_internet, na.rm = T), perc_w_int_above_25_down_using_devices = sum(pop_w_int_above_25_down_using_devices, na.rm = T) / sum(w_internet, na.rm = T), perc_w_int_above_20_up_using_devices = sum(pop_w_int_above_20_up_using_devices, na.rm = T) / sum(w_internet, na.rm = T), perc_w_int_above_3_up_using_devices = sum(pop_w_int_above_3_up_using_devices, na.rm = T) / sum(w_internet, na.rm = T), perc_w_int_above_100_down_using_tests = sum(pop_w_int_above_100_down_using_tests, na.rm = T) / sum(w_internet, na.rm = T), perc_w_int_above_25_down_using_tests = sum(pop_w_int_above_25_down_using_tests, na.rm = T) / sum(w_internet, na.rm = T), perc_w_int_above_20_up_using_tests = sum(pop_w_int_above_20_up_using_tests, na.rm = T) / sum(w_internet, na.rm = T), perc_w_int_above_3_up_using_tests = sum(pop_w_int_above_3_up_using_tests, na.rm = T) / sum(w_internet, na.rm = T), perc_w_int_100_20_using_devices = sum(pop_w_int_100_20_using_devices, na.rm = T) / sum(w_internet, na.rm = T), perc_w_int_25_3_using_devices = sum(pop_w_int_25_3_using_devices, na.rm = T) / sum(w_internet, na.rm = T), perc_w_int_100_20_using_tests = sum(pop_w_int_100_20_using_tests, na.rm = T) / sum(w_internet, na.rm = T), perc_w_int_25_3_using_tests = sum(pop_w_int_25_3_using_tests, na.rm = T) / sum(w_internet, na.rm = T), perc_total_above_100_down_using_devices = sum(pop_total_above_100_down_using_devices, na.rm = T) / sum(tpop, na.rm = T), perc_total_above_25_down_using_devices = sum(pop_total_above_25_down_using_devices, na.rm = T) / sum(tpop, na.rm = T), perc_total_above_20_up_using_devices = sum(pop_total_above_20_up_using_devices, na.rm = T) / sum(tpop, na.rm = T), perc_total_above_3_up_using_devices = sum(pop_total_above_3_up_using_devices, na.rm = T) / sum(tpop, na.rm = T), perc_total_above_100_down_using_tests = sum(pop_total_above_100_down_using_tests, na.rm = T) / sum(tpop, na.rm = T), perc_total_above_25_down_using_tests = sum(pop_total_above_25_down_using_tests, na.rm = T) / sum(tpop, na.rm = T), perc_total_above_20_up_using_tests = sum(pop_total_above_20_up_using_tests, na.rm = T) / sum(tpop, na.rm = T), perc_total_above_3_up_using_tests = sum(pop_total_above_3_up_using_tests, na.rm = T) / sum(tpop, na.rm = T), perc_total_100_20_using_devices = sum(pop_total_100_20_using_devices, na.rm = T) / sum(tpop, na.rm = T), perc_total_25_3_using_devices = sum(pop_total_25_3_using_devices, na.rm = T) / sum(tpop, na.rm = T), perc_total_100_20_using_tests = sum(pop_total_100_20_using_tests, na.rm = T) / sum(tpop, na.rm = T), perc_total_25_3_using_tests = sum(pop_total_25_3_using_tests, na.rm = T) / sum(tpop, na.rm = T)) %>% drop_na() va_hd_percs <- va_merged_data_e %>% group_by(year, health_district) %>% summarize(perc_w_int_above_100_down_using_devices = sum(pop_w_int_above_100_down_using_devices, na.rm = T) / sum(w_internet, na.rm = T), perc_w_int_above_25_down_using_devices = sum(pop_w_int_above_25_down_using_devices, na.rm = T) / sum(w_internet, na.rm = T), perc_w_int_above_20_up_using_devices = sum(pop_w_int_above_20_up_using_devices, na.rm = T) / sum(w_internet, na.rm = T), perc_w_int_above_3_up_using_devices = sum(pop_w_int_above_3_up_using_devices, na.rm = T) / sum(w_internet, na.rm = T), perc_w_int_above_100_down_using_tests = sum(pop_w_int_above_100_down_using_tests, na.rm = T) / sum(w_internet, na.rm = T), perc_w_int_above_25_down_using_tests = sum(pop_w_int_above_25_down_using_tests, na.rm = T) / sum(w_internet, na.rm = T), perc_w_int_above_20_up_using_tests = sum(pop_w_int_above_20_up_using_tests, na.rm = T) / sum(w_internet, na.rm = T), perc_w_int_above_3_up_using_tests = sum(pop_w_int_above_3_up_using_tests, na.rm = T) / sum(w_internet, na.rm = T), perc_w_int_100_20_using_devices = sum(pop_w_int_100_20_using_devices, na.rm = T) / sum(w_internet, na.rm = T), perc_w_int_25_3_using_devices = sum(pop_w_int_25_3_using_devices, na.rm = T) / sum(w_internet, na.rm = T), perc_w_int_100_20_using_tests = sum(pop_w_int_100_20_using_tests, na.rm = T) / sum(w_internet, na.rm = T), perc_w_int_25_3_using_tests = sum(pop_w_int_25_3_using_tests, na.rm = T) / sum(w_internet, na.rm = T), perc_total_above_100_down_using_devices = sum(pop_total_above_100_down_using_devices, na.rm = T) / sum(tpop, na.rm = T), perc_total_above_25_down_using_devices = sum(pop_total_above_25_down_using_devices, na.rm = T) / sum(tpop, na.rm = T), perc_total_above_20_up_using_devices = sum(pop_total_above_20_up_using_devices, na.rm = T) / sum(tpop, na.rm = T), perc_total_above_3_up_using_devices = sum(pop_total_above_3_up_using_devices, na.rm = T) / sum(tpop, na.rm = T), perc_total_above_100_down_using_tests = sum(pop_total_above_100_down_using_tests, na.rm = T) / sum(tpop, na.rm = T), perc_total_above_25_down_using_tests = sum(pop_total_above_25_down_using_tests, na.rm = T) / sum(tpop, na.rm = T), perc_total_above_20_up_using_tests = sum(pop_total_above_20_up_using_tests, na.rm = T) / sum(tpop, na.rm = T), perc_total_above_3_up_using_tests = sum(pop_total_above_3_up_using_tests, na.rm = T) / sum(tpop, na.rm = T), perc_total_100_20_using_devices = sum(pop_total_100_20_using_devices, na.rm = T) / sum(tpop, na.rm = T), perc_total_25_3_using_devices = sum(pop_total_25_3_using_devices, na.rm = T) / sum(tpop, na.rm = T), perc_total_100_20_using_tests = sum(pop_total_100_20_using_tests, na.rm = T) / sum(tpop, na.rm = T), perc_total_25_3_using_tests = sum(pop_total_25_3_using_tests, na.rm = T) / sum(tpop, na.rm = T)) %>% drop_na()
bg_data_quarters2 <- st_drop_geometry(dc_ookla_bg) %>% group_by(GEOID, year, quarter) %>% mutate(total_percent = sum(bg_percent), true_percent = bg_percent / total_percent, total_devices = sum(devices), frac_devices = devices / total_devices, denom_devices = sum(frac_devices * true_percent * tile_percent), total_tests = sum(tests), frac_tests = tests / total_tests, denom_tests = sum(frac_tests * true_percent * tile_percent)) %>% summarise(download_devices = sum(avg_d_kbps * true_percent * frac_devices * tile_percent / denom_devices / 1000), upload_devices = sum(avg_u_kbps * true_percent * frac_devices * tile_percent / denom_devices / 1000), latency_devices = sum(avg_lat_ms * true_percent * frac_devices * tile_percent / denom_devices), download_tests = sum(avg_d_kbps * true_percent * frac_tests * tile_percent / denom_tests / 1000), upload_tests = sum(avg_u_kbps * true_percent * frac_tests * tile_percent / denom_tests / 1000), latency_tests = sum(avg_lat_ms * true_percent * frac_tests * tile_percent / denom_tests), devices = sum(devices * tile_percent), tests = sum(tests * tile_percent)) %>% mutate(county = substr(GEOID, 1, 5)) dc_merged_data <- merge(bg_data_quarters2, dc_ookla_ct_for_ncr[dc_ookla_ct_for_ncr$measure%in% c("avg_down_using_devices", "avg_up_using_devices", "avg_down_using_tests", "avg_up_using_tests"), c("geoid", "value", "year", "measure")][], by.x = c("county", "year"), by.y = c("geoid", "year")) dc_merged_data_down_devices <- dc_merged_data %>% group_by(county, year) %>% filter(measure == "avg_down_using_devices") %>% mutate(sd_county_down_devices = sqrt(sum(devices * (download_devices - value) ^ 2) / (sum(devices, na.rm = T) - 1))) dc_merged_data_up_devices <- dc_merged_data %>% group_by(county, year) %>% filter(measure == "avg_up_using_devices") %>% mutate(sd_county_up_devices = sqrt(sum(devices * (upload_devices - value) ^ 2) / (sum(devices, na.rm = T) - 1))) dc_merged_data_down_tests <- dc_merged_data %>% group_by(county, year) %>% filter(measure == "avg_down_using_tests") %>% mutate(sd_county_down_tests = sqrt(sum(tests * (download_tests - value) ^ 2) / (sum(tests, na.rm = T) - 1))) dc_merged_data_up_tests <- dc_merged_data %>% group_by(county, year) %>% filter(measure == "avg_up_using_tests") %>% mutate(sd_county_up_tests = sqrt(sum(tests * (upload_tests - value) ^ 2) / (sum(tests, na.rm = T) - 1))) # D.C. ACS % with internet dc.bg <- get_acs(geography = "block group", year = 2019, variables = c(bg_pop = "B01003_001", tpop = "B28002_001", no_internet = "B28002_013"), state = "DC", survey = "acs5", output = "wide", geometry = TRUE) dc.bg$w_internet <- dc.bg$tpopE - dc.bg$no_internetE
dc_merged_data_down_devices$above_100_down <- pnorm(100, dc_merged_data_down_devices$download_devices, dc_merged_data_down_devices$sd_county_down_devices, lower.tail = F) * 100 dc_merged_data_down_devices$above_25_down <- pnorm(25, dc_merged_data_down_devices$download_devices, dc_merged_data_down_devices$sd_county_down_devices, lower.tail = F) * 100 dc_merged_data_up_devices$above_20_up <- pnorm(20, dc_merged_data_up_devices$upload_devices, dc_merged_data_up_devices$sd_county_up_devices, lower.tail = F) * 100 dc_merged_data_up_devices$above_3_up <- pnorm(3, dc_merged_data_up_devices$upload_devices, dc_merged_data_up_devices$sd_county_up_devices, lower.tail = F) * 100 dc_merged_data_down_tests$above_100_down <- pnorm(100, dc_merged_data_down_tests$download_tests, dc_merged_data_down_tests$sd_county_down_tests, lower.tail = F) * 100 dc_merged_data_down_tests$above_25_down <- pnorm(25, dc_merged_data_down_tests$download_tests, dc_merged_data_down_tests$sd_county_down_tests, lower.tail = F) * 100 dc_merged_data_up_tests$above_20_up <- pnorm(20, dc_merged_data_up_tests$upload_tests, dc_merged_data_up_tests$sd_county_up_tests, lower.tail = F) * 100 dc_merged_data_up_tests$above_3_up <- pnorm(3, dc_merged_data_up_tests$upload_tests, dc_merged_data_up_tests$sd_county_up_tests, lower.tail = F) * 100 dc_merged_data_a <- merge(dc_merged_data_down_devices[, c("GEOID", "quarter", "year","above_100_down", "above_25_down")], dc_merged_data_up_devices[, c("GEOID", "quarter", "year", "above_20_up", "above_3_up")], by = c("GEOID", "quarter", "year")) dc_merged_data_b <- merge(dc_merged_data_down_tests[, c("GEOID", "quarter", "year","above_100_down", "above_25_down")], dc_merged_data_up_tests[, c("GEOID", "quarter", "year", "above_20_up", "above_3_up")], by = c("GEOID", "quarter", "year")) dc_merged_data_c <- merge(dc_merged_data_a, dc_merged_data_b, by = c("GEOID", "quarter", "year")) %>% rename(perc_w_int_above_100_down_using_devices = above_100_down.x, perc_w_int_above_25_down_using_devices = above_25_down.x, perc_w_int_above_20_up_using_devices = above_20_up.x, perc_w_int_above_3_up_using_devices = above_3_up.x, perc_w_int_above_100_down_using_tests = above_100_down.y, perc_w_int_above_25_down_using_tests = above_25_down.y, perc_w_int_above_20_up_using_tests = above_20_up.y, perc_w_int_above_3_up_using_tests = above_3_up.y) %>% mutate(perc_w_int_100_20_using_devices = rowMins(cbind(perc_w_int_above_100_down_using_devices, perc_w_int_above_20_up_using_devices)), perc_w_int_25_3_using_devices = rowMins(cbind(perc_w_int_above_25_down_using_devices, perc_w_int_above_3_up_using_devices)), perc_w_int_100_20_using_tests = rowMins(cbind(perc_w_int_above_100_down_using_tests, perc_w_int_above_20_up_using_tests)), perc_w_int_25_3_using_tests = rowMins(cbind(perc_w_int_above_25_down_using_tests, perc_w_int_above_3_up_using_tests))) dc_merged_data_d <- st_drop_geometry(left_join(dc.bg, dc_merged_data_c, by = "GEOID")) %>% mutate(perc_total_above_100_down_using_devices = perc_w_int_above_100_down_using_devices * w_internet/tpopE, perc_total_above_25_down_using_devices = perc_w_int_above_25_down_using_devices * w_internet/tpopE, perc_total_above_20_up_using_devices = perc_w_int_above_20_up_using_devices * w_internet/tpopE, perc_total_above_3_up_using_devices = perc_w_int_above_3_up_using_devices * w_internet/tpopE, perc_total_above_100_down_using_tests = perc_w_int_above_100_down_using_tests * w_internet/tpopE, perc_total_above_25_down_using_tests = perc_w_int_above_25_down_using_tests * w_internet/tpopE, perc_total_above_20_up_using_tests = perc_w_int_above_20_up_using_tests * w_internet/tpopE, perc_total_above_3_up_using_tests = perc_w_int_above_3_up_using_tests * w_internet/tpopE, perc_total_100_20_using_devices = rowMins(cbind(perc_total_above_100_down_using_devices, perc_total_above_20_up_using_devices)), perc_total_25_3_using_devices = rowMins(cbind(perc_total_above_25_down_using_devices, perc_total_above_3_up_using_devices)), perc_total_100_20_using_tests = rowMins(cbind(perc_total_above_100_down_using_tests, perc_total_above_20_up_using_tests)), perc_total_25_3_using_tests = rowMins(cbind(perc_total_above_25_down_using_tests, perc_total_above_3_up_using_tests)))
dc_merged_data_e <- dc_merged_data_d %>% group_by(year, GEOID, NAME) %>% summarise(perc_w_int_above_100_down_using_devices = mean(perc_w_int_above_100_down_using_devices, na.rm = T), perc_w_int_above_25_down_using_devices = mean(perc_w_int_above_25_down_using_devices, na.rm = T), perc_w_int_above_20_up_using_devices = mean(perc_w_int_above_20_up_using_devices, na.rm = T), perc_w_int_above_3_up_using_devices = mean(perc_w_int_above_3_up_using_devices, na.rm = T), perc_w_int_above_100_down_using_tests = mean(perc_w_int_above_100_down_using_tests, na.rm = T), perc_w_int_above_25_down_using_tests = mean(perc_w_int_above_25_down_using_tests, na.rm = T), perc_w_int_above_20_up_using_tests = mean(perc_w_int_above_20_up_using_tests, na.rm = T), perc_w_int_above_3_up_using_tests = mean(perc_w_int_above_3_up_using_tests, na.rm = T), perc_w_int_100_20_using_devices = mean(perc_w_int_100_20_using_devices, na.rm = T), perc_w_int_25_3_using_devices = mean(perc_w_int_25_3_using_devices, na.rm = T), perc_w_int_100_20_using_tests = mean(perc_w_int_100_20_using_tests, na.rm = T), perc_w_int_25_3_using_tests = mean(perc_w_int_25_3_using_tests, na.rm = T), perc_total_above_100_down_using_devices = mean(perc_total_above_100_down_using_devices, na.rm = T), perc_total_above_25_down_using_devices = mean(perc_total_above_25_down_using_devices, na.rm = T), perc_total_above_20_up_using_devices = mean(perc_total_above_20_up_using_devices, na.rm = T), perc_total_above_3_up_using_devices = mean(perc_total_above_3_up_using_devices, na.rm = T), perc_total_above_100_down_using_tests = mean(perc_total_above_100_down_using_tests, na.rm = T), perc_total_above_25_down_using_tests = mean(perc_total_above_25_down_using_tests, na.rm = T), perc_total_above_20_up_using_tests = mean(perc_total_above_20_up_using_tests, na.rm = T), perc_total_above_3_up_using_tests = mean(perc_total_above_3_up_using_tests, na.rm = T), perc_total_100_20_using_devices = mean(perc_total_100_20_using_devices, na.rm = T), perc_total_25_3_using_devices = mean(perc_total_25_3_using_devices, na.rm = T), perc_total_100_20_using_tests = mean(perc_total_100_20_using_tests, na.rm = T), perc_total_25_3_using_tests = mean(perc_total_25_3_using_tests, na.rm = T), w_internet = mean(w_internet), tpop = mean(tpopE)) %>% mutate(tract = substr(GEOID, 1, 11), county = substr(GEOID, 1, 5), pop_w_int_above_100_down_using_devices = perc_w_int_above_100_down_using_devices * w_internet / 100, pop_w_int_above_25_down_using_devices = perc_w_int_above_25_down_using_devices * w_internet / 100, pop_w_int_above_20_up_using_devices = perc_w_int_above_20_up_using_devices * w_internet / 100, pop_w_int_above_3_up_using_devices = perc_w_int_above_3_up_using_devices * w_internet / 100, pop_w_int_100_20_using_devices = perc_w_int_100_20_using_devices * w_internet / 100, pop_w_int_25_3_using_devices = perc_w_int_25_3_using_devices * w_internet / 100, pop_w_int_above_100_down_using_tests = perc_w_int_above_100_down_using_tests * w_internet / 100, pop_w_int_above_25_down_using_tests = perc_w_int_above_25_down_using_tests * w_internet / 100, pop_w_int_above_20_up_using_tests = perc_w_int_above_20_up_using_tests * w_internet / 100, pop_w_int_above_3_up_using_tests = perc_w_int_above_3_up_using_tests * w_internet / 100, pop_w_int_100_20_using_tests = perc_w_int_100_20_using_tests * w_internet / 100, pop_w_int_25_3_using_tests = perc_w_int_25_3_using_tests * w_internet / 100, pop_total_above_100_down_using_devices = perc_w_int_above_100_down_using_devices * w_internet / 100, pop_total_above_25_down_using_devices = perc_w_int_above_25_down_using_devices * w_internet / 100, pop_total_above_20_up_using_devices = perc_w_int_above_20_up_using_devices * w_internet / 100, pop_total_above_3_up_using_devices = perc_w_int_above_3_up_using_devices * w_internet / 100, pop_total_100_20_using_devices = perc_w_int_100_20_using_devices * w_internet / 100, pop_total_25_3_using_devices = perc_w_int_25_3_using_devices * w_internet / 100, pop_total_above_100_down_using_tests = perc_w_int_above_100_down_using_tests * w_internet / 100, pop_total_above_25_down_using_tests = perc_w_int_above_25_down_using_tests * w_internet / 100, pop_total_above_20_up_using_tests = perc_w_int_above_20_up_using_tests * w_internet / 100, pop_total_above_3_up_using_tests = perc_w_int_above_3_up_using_tests * w_internet / 100, pop_total_100_20_using_tests = perc_w_int_100_20_using_tests * w_internet / 100, pop_total_25_3_using_tests = perc_w_int_25_3_using_tests * w_internet / 100, year = as.character(year)) dc_tr_percs <- dc_merged_data_e %>% group_by(year, tract) %>% summarize(perc_w_int_above_100_down_using_devices = sum(pop_w_int_above_100_down_using_devices, na.rm = T) / sum(w_internet, na.rm = T), perc_w_int_above_25_down_using_devices = sum(pop_w_int_above_25_down_using_devices, na.rm = T) / sum(w_internet, na.rm = T), perc_w_int_above_20_up_using_devices = sum(pop_w_int_above_20_up_using_devices, na.rm = T) / sum(w_internet, na.rm = T), perc_w_int_above_3_up_using_devices = sum(pop_w_int_above_3_up_using_devices, na.rm = T) / sum(w_internet, na.rm = T), perc_w_int_above_100_down_using_tests = sum(pop_w_int_above_100_down_using_tests, na.rm = T) / sum(w_internet, na.rm = T), perc_w_int_above_25_down_using_tests = sum(pop_w_int_above_25_down_using_tests, na.rm = T) / sum(w_internet, na.rm = T), perc_w_int_above_20_up_using_tests = sum(pop_w_int_above_20_up_using_tests, na.rm = T) / sum(w_internet, na.rm = T), perc_w_int_above_3_up_using_tests = sum(pop_w_int_above_3_up_using_tests, na.rm = T) / sum(w_internet, na.rm = T), perc_w_int_100_20_using_devices = sum(pop_w_int_100_20_using_devices, na.rm = T) / sum(w_internet, na.rm = T), perc_w_int_25_3_using_devices = sum(pop_w_int_25_3_using_devices, na.rm = T) / sum(w_internet, na.rm = T), perc_w_int_100_20_using_tests = sum(pop_w_int_100_20_using_tests, na.rm = T) / sum(w_internet, na.rm = T), perc_w_int_25_3_using_tests = sum(pop_w_int_25_3_using_tests, na.rm = T) / sum(w_internet, na.rm = T), perc_total_above_100_down_using_devices = sum(pop_total_above_100_down_using_devices, na.rm = T) / sum(tpop, na.rm = T), perc_total_above_25_down_using_devices = sum(pop_total_above_25_down_using_devices, na.rm = T) / sum(tpop, na.rm = T), perc_total_above_20_up_using_devices = sum(pop_total_above_20_up_using_devices, na.rm = T) / sum(tpop, na.rm = T), perc_total_above_3_up_using_devices = sum(pop_total_above_3_up_using_devices, na.rm = T) / sum(tpop, na.rm = T), perc_total_above_100_down_using_tests = sum(pop_total_above_100_down_using_tests, na.rm = T) / sum(tpop, na.rm = T), perc_total_above_25_down_using_tests = sum(pop_total_above_25_down_using_tests, na.rm = T) / sum(tpop, na.rm = T), perc_total_above_20_up_using_tests = sum(pop_total_above_20_up_using_tests, na.rm = T) / sum(tpop, na.rm = T), perc_total_above_3_up_using_tests = sum(pop_total_above_3_up_using_tests, na.rm = T) / sum(tpop, na.rm = T), perc_total_100_20_using_devices = sum(pop_total_100_20_using_devices, na.rm = T) / sum(tpop, na.rm = T), perc_total_25_3_using_devices = sum(pop_total_25_3_using_devices, na.rm = T) / sum(tpop, na.rm = T), perc_total_100_20_using_tests = sum(pop_total_100_20_using_tests, na.rm = T) / sum(tpop, na.rm = T), perc_total_25_3_using_tests = sum(pop_total_25_3_using_tests, na.rm = T) / sum(tpop, na.rm = T)) %>% drop_na() dc_ct_percs <- dc_merged_data_e %>% group_by(year, county) %>% summarize(perc_w_int_above_100_down_using_devices = sum(pop_w_int_above_100_down_using_devices, na.rm = T) / sum(w_internet, na.rm = T), perc_w_int_above_25_down_using_devices = sum(pop_w_int_above_25_down_using_devices, na.rm = T) / sum(w_internet, na.rm = T), perc_w_int_above_20_up_using_devices = sum(pop_w_int_above_20_up_using_devices, na.rm = T) / sum(w_internet, na.rm = T), perc_w_int_above_3_up_using_devices = sum(pop_w_int_above_3_up_using_devices, na.rm = T) / sum(w_internet, na.rm = T), perc_w_int_above_100_down_using_tests = sum(pop_w_int_above_100_down_using_tests, na.rm = T) / sum(w_internet, na.rm = T), perc_w_int_above_25_down_using_tests = sum(pop_w_int_above_25_down_using_tests, na.rm = T) / sum(w_internet, na.rm = T), perc_w_int_above_20_up_using_tests = sum(pop_w_int_above_20_up_using_tests, na.rm = T) / sum(w_internet, na.rm = T), perc_w_int_above_3_up_using_tests = sum(pop_w_int_above_3_up_using_tests, na.rm = T) / sum(w_internet, na.rm = T), perc_w_int_100_20_using_devices = sum(pop_w_int_100_20_using_devices, na.rm = T) / sum(w_internet, na.rm = T), perc_w_int_25_3_using_devices = sum(pop_w_int_25_3_using_devices, na.rm = T) / sum(w_internet, na.rm = T), perc_w_int_100_20_using_tests = sum(pop_w_int_100_20_using_tests, na.rm = T) / sum(w_internet, na.rm = T), perc_w_int_25_3_using_tests = sum(pop_w_int_25_3_using_tests, na.rm = T) / sum(w_internet, na.rm = T), perc_total_above_100_down_using_devices = sum(pop_total_above_100_down_using_devices, na.rm = T) / sum(tpop, na.rm = T), perc_total_above_25_down_using_devices = sum(pop_total_above_25_down_using_devices, na.rm = T) / sum(tpop, na.rm = T), perc_total_above_20_up_using_devices = sum(pop_total_above_20_up_using_devices, na.rm = T) / sum(tpop, na.rm = T), perc_total_above_3_up_using_devices = sum(pop_total_above_3_up_using_devices, na.rm = T) / sum(tpop, na.rm = T), perc_total_above_100_down_using_tests = sum(pop_total_above_100_down_using_tests, na.rm = T) / sum(tpop, na.rm = T), perc_total_above_25_down_using_tests = sum(pop_total_above_25_down_using_tests, na.rm = T) / sum(tpop, na.rm = T), perc_total_above_20_up_using_tests = sum(pop_total_above_20_up_using_tests, na.rm = T) / sum(tpop, na.rm = T), perc_total_above_3_up_using_tests = sum(pop_total_above_3_up_using_tests, na.rm = T) / sum(tpop, na.rm = T), perc_total_100_20_using_devices = sum(pop_total_100_20_using_devices, na.rm = T) / sum(tpop, na.rm = T), perc_total_25_3_using_devices = sum(pop_total_25_3_using_devices, na.rm = T) / sum(tpop, na.rm = T), perc_total_100_20_using_tests = sum(pop_total_100_20_using_tests, na.rm = T) / sum(tpop, na.rm = T), perc_total_25_3_using_tests = sum(pop_total_25_3_using_tests, na.rm = T) / sum(tpop, na.rm = T)) %>% drop_na()
bg_data_quarters2 <- st_drop_geometry(md_ookla_bg) %>% group_by(GEOID, year, quarter) %>% mutate(total_percent = sum(bg_percent), true_percent = bg_percent / total_percent, total_devices = sum(devices), frac_devices = devices / total_devices, denom_devices = sum(frac_devices * true_percent * tile_percent), total_tests = sum(tests), frac_tests = tests / total_tests, denom_tests = sum(frac_tests * true_percent * tile_percent)) %>% summarise(download_devices = sum(avg_d_kbps * true_percent * frac_devices * tile_percent / denom_devices / 1000), upload_devices = sum(avg_u_kbps * true_percent * frac_devices * tile_percent / denom_devices / 1000), latency_devices = sum(avg_lat_ms * true_percent * frac_devices * tile_percent / denom_devices), download_tests = sum(avg_d_kbps * true_percent * frac_tests * tile_percent / denom_tests / 1000), upload_tests = sum(avg_u_kbps * true_percent * frac_tests * tile_percent / denom_tests / 1000), latency_tests = sum(avg_lat_ms * true_percent * frac_tests * tile_percent / denom_tests), devices = sum(devices * tile_percent), tests = sum(tests * tile_percent)) %>% mutate(county = substr(GEOID, 1, 5)) md_merged_data <- merge(bg_data_quarters2, md_ookla_ct_for_ncr[md_ookla_ct_for_ncr$measure%in% c("avg_down_using_devices", "avg_up_using_devices", "avg_down_using_tests", "avg_up_using_tests"), c("geoid", "value", "year", "measure")][], by.x = c("county", "year"), by.y = c("geoid", "year")) md_merged_data_down_devices <- md_merged_data %>% group_by(county, year) %>% filter(measure == "avg_down_using_devices") %>% mutate(sd_county_down_devices = sqrt(sum(devices * (download_devices - value) ^ 2) / (sum(devices, na.rm = T) - 1))) md_merged_data_up_devices <- md_merged_data %>% group_by(county, year) %>% filter(measure == "avg_up_using_devices") %>% mutate(sd_county_up_devices = sqrt(sum(devices * (upload_devices - value) ^ 2) / (sum(devices, na.rm = T) - 1))) md_merged_data_down_tests <- md_merged_data %>% group_by(county, year) %>% filter(measure == "avg_down_using_tests") %>% mutate(sd_county_down_tests = sqrt(sum(tests * (download_tests - value) ^ 2) / (sum(tests, na.rm = T) - 1))) md_merged_data_up_tests <- md_merged_data %>% group_by(county, year) %>% filter(measure == "avg_up_using_tests") %>% mutate(sd_county_up_tests = sqrt(sum(tests * (upload_tests - value) ^ 2) / (sum(tests, na.rm = T) - 1))) # Maryland ACS % with internet md.bg <- get_acs(geography = "block group", year = 2019, variables = c(bg_pop = "B01003_001", tpop = "B28002_001", no_internet = "B28002_013"), state = "MD", survey = "acs5", output = "wide", geometry = TRUE) md.bg$w_internet <- md.bg$tpopE - md.bg$no_internetE
md_merged_data_down_devices$above_100_down <- pnorm(100, md_merged_data_down_devices$download_devices, md_merged_data_down_devices$sd_county_down_devices, lower.tail = F) * 100 md_merged_data_down_devices$above_25_down <- pnorm(25, md_merged_data_down_devices$download_devices, md_merged_data_down_devices$sd_county_down_devices, lower.tail = F) * 100 md_merged_data_up_devices$above_20_up <- pnorm(20, md_merged_data_up_devices$upload_devices, md_merged_data_up_devices$sd_county_up_devices, lower.tail = F) * 100 md_merged_data_up_devices$above_3_up <- pnorm(3, md_merged_data_up_devices$upload_devices, md_merged_data_up_devices$sd_county_up_devices, lower.tail = F) * 100 md_merged_data_down_tests$above_100_down <- pnorm(100, md_merged_data_down_tests$download_tests, md_merged_data_down_tests$sd_county_down_tests, lower.tail = F) * 100 md_merged_data_down_tests$above_25_down <- pnorm(25, md_merged_data_down_tests$download_tests, md_merged_data_down_tests$sd_county_down_tests, lower.tail = F) * 100 md_merged_data_up_tests$above_20_up <- pnorm(20, md_merged_data_up_tests$upload_tests, md_merged_data_up_tests$sd_county_up_tests, lower.tail = F) * 100 md_merged_data_up_tests$above_3_up <- pnorm(3, md_merged_data_up_tests$upload_tests, md_merged_data_up_tests$sd_county_up_tests, lower.tail = F) * 100 md_merged_data_a <- merge(md_merged_data_down_devices[, c("GEOID", "quarter", "year","above_100_down", "above_25_down")], md_merged_data_up_devices[, c("GEOID", "quarter", "year", "above_20_up", "above_3_up")], by = c("GEOID", "quarter", "year")) md_merged_data_b <- merge(md_merged_data_down_tests[, c("GEOID", "quarter", "year","above_100_down", "above_25_down")], md_merged_data_up_tests[, c("GEOID", "quarter", "year", "above_20_up", "above_3_up")], by = c("GEOID", "quarter", "year")) md_merged_data_c <- merge(md_merged_data_a, md_merged_data_b, by = c("GEOID", "quarter", "year")) %>% rename(perc_w_int_above_100_down_using_devices = above_100_down.x, perc_w_int_above_25_down_using_devices = above_25_down.x, perc_w_int_above_20_up_using_devices = above_20_up.x, perc_w_int_above_3_up_using_devices = above_3_up.x, perc_w_int_above_100_down_using_tests = above_100_down.y, perc_w_int_above_25_down_using_tests = above_25_down.y, perc_w_int_above_20_up_using_tests = above_20_up.y, perc_w_int_above_3_up_using_tests = above_3_up.y) %>% mutate(perc_w_int_100_20_using_devices = rowMins(cbind(perc_w_int_above_100_down_using_devices, perc_w_int_above_20_up_using_devices)), perc_w_int_25_3_using_devices = rowMins(cbind(perc_w_int_above_25_down_using_devices, perc_w_int_above_3_up_using_devices)), perc_w_int_100_20_using_tests = rowMins(cbind(perc_w_int_above_100_down_using_tests, perc_w_int_above_20_up_using_tests)), perc_w_int_25_3_using_tests = rowMins(cbind(perc_w_int_above_25_down_using_tests, perc_w_int_above_3_up_using_tests))) md_merged_data_d <- st_drop_geometry(left_join(md.bg, md_merged_data_c, by = "GEOID")) %>% mutate(perc_total_above_100_down_using_devices = perc_w_int_above_100_down_using_devices * w_internet/tpopE, perc_total_above_25_down_using_devices = perc_w_int_above_25_down_using_devices * w_internet/tpopE, perc_total_above_20_up_using_devices = perc_w_int_above_20_up_using_devices * w_internet/tpopE, perc_total_above_3_up_using_devices = perc_w_int_above_3_up_using_devices * w_internet/tpopE, perc_total_above_100_down_using_tests = perc_w_int_above_100_down_using_tests * w_internet/tpopE, perc_total_above_25_down_using_tests = perc_w_int_above_25_down_using_tests * w_internet/tpopE, perc_total_above_20_up_using_tests = perc_w_int_above_20_up_using_tests * w_internet/tpopE, perc_total_above_3_up_using_tests = perc_w_int_above_3_up_using_tests * w_internet/tpopE, perc_total_100_20_using_devices = rowMins(cbind(perc_total_above_100_down_using_devices, perc_total_above_20_up_using_devices)), perc_total_25_3_using_devices = rowMins(cbind(perc_total_above_25_down_using_devices, perc_total_above_3_up_using_devices)), perc_total_100_20_using_tests = rowMins(cbind(perc_total_above_100_down_using_tests, perc_total_above_20_up_using_tests)), perc_total_25_3_using_tests = rowMins(cbind(perc_total_above_25_down_using_tests, perc_total_above_3_up_using_tests)))
md_merged_data_e <- md_merged_data_d %>% group_by(year, GEOID, NAME) %>% summarise(perc_w_int_above_100_down_using_devices = mean(perc_w_int_above_100_down_using_devices, na.rm = T), perc_w_int_above_25_down_using_devices = mean(perc_w_int_above_25_down_using_devices, na.rm = T), perc_w_int_above_20_up_using_devices = mean(perc_w_int_above_20_up_using_devices, na.rm = T), perc_w_int_above_3_up_using_devices = mean(perc_w_int_above_3_up_using_devices, na.rm = T), perc_w_int_above_100_down_using_tests = mean(perc_w_int_above_100_down_using_tests, na.rm = T), perc_w_int_above_25_down_using_tests = mean(perc_w_int_above_25_down_using_tests, na.rm = T), perc_w_int_above_20_up_using_tests = mean(perc_w_int_above_20_up_using_tests, na.rm = T), perc_w_int_above_3_up_using_tests = mean(perc_w_int_above_3_up_using_tests, na.rm = T), perc_w_int_100_20_using_devices = mean(perc_w_int_100_20_using_devices, na.rm = T), perc_w_int_25_3_using_devices = mean(perc_w_int_25_3_using_devices, na.rm = T), perc_w_int_100_20_using_tests = mean(perc_w_int_100_20_using_tests, na.rm = T), perc_w_int_25_3_using_tests = mean(perc_w_int_25_3_using_tests, na.rm = T), perc_total_above_100_down_using_devices = mean(perc_total_above_100_down_using_devices, na.rm = T), perc_total_above_25_down_using_devices = mean(perc_total_above_25_down_using_devices, na.rm = T), perc_total_above_20_up_using_devices = mean(perc_total_above_20_up_using_devices, na.rm = T), perc_total_above_3_up_using_devices = mean(perc_total_above_3_up_using_devices, na.rm = T), perc_total_above_100_down_using_tests = mean(perc_total_above_100_down_using_tests, na.rm = T), perc_total_above_25_down_using_tests = mean(perc_total_above_25_down_using_tests, na.rm = T), perc_total_above_20_up_using_tests = mean(perc_total_above_20_up_using_tests, na.rm = T), perc_total_above_3_up_using_tests = mean(perc_total_above_3_up_using_tests, na.rm = T), perc_total_100_20_using_devices = mean(perc_total_100_20_using_devices, na.rm = T), perc_total_25_3_using_devices = mean(perc_total_25_3_using_devices, na.rm = T), perc_total_100_20_using_tests = mean(perc_total_100_20_using_tests, na.rm = T), perc_total_25_3_using_tests = mean(perc_total_25_3_using_tests, na.rm = T), w_internet = mean(w_internet), tpop = mean(tpopE)) %>% mutate(tract = substr(GEOID, 1, 11), county = substr(GEOID, 1, 5), pop_w_int_above_100_down_using_devices = perc_w_int_above_100_down_using_devices * w_internet / 100, pop_w_int_above_25_down_using_devices = perc_w_int_above_25_down_using_devices * w_internet / 100, pop_w_int_above_20_up_using_devices = perc_w_int_above_20_up_using_devices * w_internet / 100, pop_w_int_above_3_up_using_devices = perc_w_int_above_3_up_using_devices * w_internet / 100, pop_w_int_100_20_using_devices = perc_w_int_100_20_using_devices * w_internet / 100, pop_w_int_25_3_using_devices = perc_w_int_25_3_using_devices * w_internet / 100, pop_w_int_above_100_down_using_tests = perc_w_int_above_100_down_using_tests * w_internet / 100, pop_w_int_above_25_down_using_tests = perc_w_int_above_25_down_using_tests * w_internet / 100, pop_w_int_above_20_up_using_tests = perc_w_int_above_20_up_using_tests * w_internet / 100, pop_w_int_above_3_up_using_tests = perc_w_int_above_3_up_using_tests * w_internet / 100, pop_w_int_100_20_using_tests = perc_w_int_100_20_using_tests * w_internet / 100, pop_w_int_25_3_using_tests = perc_w_int_25_3_using_tests * w_internet / 100, pop_total_above_100_down_using_devices = perc_w_int_above_100_down_using_devices * w_internet / 100, pop_total_above_25_down_using_devices = perc_w_int_above_25_down_using_devices * w_internet / 100, pop_total_above_20_up_using_devices = perc_w_int_above_20_up_using_devices * w_internet / 100, pop_total_above_3_up_using_devices = perc_w_int_above_3_up_using_devices * w_internet / 100, pop_total_100_20_using_devices = perc_w_int_100_20_using_devices * w_internet / 100, pop_total_25_3_using_devices = perc_w_int_25_3_using_devices * w_internet / 100, pop_total_above_100_down_using_tests = perc_w_int_above_100_down_using_tests * w_internet / 100, pop_total_above_25_down_using_tests = perc_w_int_above_25_down_using_tests * w_internet / 100, pop_total_above_20_up_using_tests = perc_w_int_above_20_up_using_tests * w_internet / 100, pop_total_above_3_up_using_tests = perc_w_int_above_3_up_using_tests * w_internet / 100, pop_total_100_20_using_tests = perc_w_int_100_20_using_tests * w_internet / 100, pop_total_25_3_using_tests = perc_w_int_25_3_using_tests * w_internet / 100, year = as.character(year)) md_tr_percs <- md_merged_data_e %>% group_by(year, tract) %>% summarize(perc_w_int_above_100_down_using_devices = sum(pop_w_int_above_100_down_using_devices, na.rm = T) / sum(w_internet, na.rm = T), perc_w_int_above_25_down_using_devices = sum(pop_w_int_above_25_down_using_devices, na.rm = T) / sum(w_internet, na.rm = T), perc_w_int_above_20_up_using_devices = sum(pop_w_int_above_20_up_using_devices, na.rm = T) / sum(w_internet, na.rm = T), perc_w_int_above_3_up_using_devices = sum(pop_w_int_above_3_up_using_devices, na.rm = T) / sum(w_internet, na.rm = T), perc_w_int_above_100_down_using_tests = sum(pop_w_int_above_100_down_using_tests, na.rm = T) / sum(w_internet, na.rm = T), perc_w_int_above_25_down_using_tests = sum(pop_w_int_above_25_down_using_tests, na.rm = T) / sum(w_internet, na.rm = T), perc_w_int_above_20_up_using_tests = sum(pop_w_int_above_20_up_using_tests, na.rm = T) / sum(w_internet, na.rm = T), perc_w_int_above_3_up_using_tests = sum(pop_w_int_above_3_up_using_tests, na.rm = T) / sum(w_internet, na.rm = T), perc_w_int_100_20_using_devices = sum(pop_w_int_100_20_using_devices, na.rm = T) / sum(w_internet, na.rm = T), perc_w_int_25_3_using_devices = sum(pop_w_int_25_3_using_devices, na.rm = T) / sum(w_internet, na.rm = T), perc_w_int_100_20_using_tests = sum(pop_w_int_100_20_using_tests, na.rm = T) / sum(w_internet, na.rm = T), perc_w_int_25_3_using_tests = sum(pop_w_int_25_3_using_tests, na.rm = T) / sum(w_internet, na.rm = T), perc_total_above_100_down_using_devices = sum(pop_total_above_100_down_using_devices, na.rm = T) / sum(tpop, na.rm = T), perc_total_above_25_down_using_devices = sum(pop_total_above_25_down_using_devices, na.rm = T) / sum(tpop, na.rm = T), perc_total_above_20_up_using_devices = sum(pop_total_above_20_up_using_devices, na.rm = T) / sum(tpop, na.rm = T), perc_total_above_3_up_using_devices = sum(pop_total_above_3_up_using_devices, na.rm = T) / sum(tpop, na.rm = T), perc_total_above_100_down_using_tests = sum(pop_total_above_100_down_using_tests, na.rm = T) / sum(tpop, na.rm = T), perc_total_above_25_down_using_tests = sum(pop_total_above_25_down_using_tests, na.rm = T) / sum(tpop, na.rm = T), perc_total_above_20_up_using_tests = sum(pop_total_above_20_up_using_tests, na.rm = T) / sum(tpop, na.rm = T), perc_total_above_3_up_using_tests = sum(pop_total_above_3_up_using_tests, na.rm = T) / sum(tpop, na.rm = T), perc_total_100_20_using_devices = sum(pop_total_100_20_using_devices, na.rm = T) / sum(tpop, na.rm = T), perc_total_25_3_using_devices = sum(pop_total_25_3_using_devices, na.rm = T) / sum(tpop, na.rm = T), perc_total_100_20_using_tests = sum(pop_total_100_20_using_tests, na.rm = T) / sum(tpop, na.rm = T), perc_total_25_3_using_tests = sum(pop_total_25_3_using_tests, na.rm = T) / sum(tpop, na.rm = T)) %>% drop_na() md_ct_percs <- md_merged_data_e %>% group_by(year, county) %>% summarize(perc_w_int_above_100_down_using_devices = sum(pop_w_int_above_100_down_using_devices, na.rm = T) / sum(w_internet, na.rm = T), perc_w_int_above_25_down_using_devices = sum(pop_w_int_above_25_down_using_devices, na.rm = T) / sum(w_internet, na.rm = T), perc_w_int_above_20_up_using_devices = sum(pop_w_int_above_20_up_using_devices, na.rm = T) / sum(w_internet, na.rm = T), perc_w_int_above_3_up_using_devices = sum(pop_w_int_above_3_up_using_devices, na.rm = T) / sum(w_internet, na.rm = T), perc_w_int_above_100_down_using_tests = sum(pop_w_int_above_100_down_using_tests, na.rm = T) / sum(w_internet, na.rm = T), perc_w_int_above_25_down_using_tests = sum(pop_w_int_above_25_down_using_tests, na.rm = T) / sum(w_internet, na.rm = T), perc_w_int_above_20_up_using_tests = sum(pop_w_int_above_20_up_using_tests, na.rm = T) / sum(w_internet, na.rm = T), perc_w_int_above_3_up_using_tests = sum(pop_w_int_above_3_up_using_tests, na.rm = T) / sum(w_internet, na.rm = T), perc_w_int_100_20_using_devices = sum(pop_w_int_100_20_using_devices, na.rm = T) / sum(w_internet, na.rm = T), perc_w_int_25_3_using_devices = sum(pop_w_int_25_3_using_devices, na.rm = T) / sum(w_internet, na.rm = T), perc_w_int_100_20_using_tests = sum(pop_w_int_100_20_using_tests, na.rm = T) / sum(w_internet, na.rm = T), perc_w_int_25_3_using_tests = sum(pop_w_int_25_3_using_tests, na.rm = T) / sum(w_internet, na.rm = T), perc_total_above_100_down_using_devices = sum(pop_total_above_100_down_using_devices, na.rm = T) / sum(tpop, na.rm = T), perc_total_above_25_down_using_devices = sum(pop_total_above_25_down_using_devices, na.rm = T) / sum(tpop, na.rm = T), perc_total_above_20_up_using_devices = sum(pop_total_above_20_up_using_devices, na.rm = T) / sum(tpop, na.rm = T), perc_total_above_3_up_using_devices = sum(pop_total_above_3_up_using_devices, na.rm = T) / sum(tpop, na.rm = T), perc_total_above_100_down_using_tests = sum(pop_total_above_100_down_using_tests, na.rm = T) / sum(tpop, na.rm = T), perc_total_above_25_down_using_tests = sum(pop_total_above_25_down_using_tests, na.rm = T) / sum(tpop, na.rm = T), perc_total_above_20_up_using_tests = sum(pop_total_above_20_up_using_tests, na.rm = T) / sum(tpop, na.rm = T), perc_total_above_3_up_using_tests = sum(pop_total_above_3_up_using_tests, na.rm = T) / sum(tpop, na.rm = T), perc_total_100_20_using_devices = sum(pop_total_100_20_using_devices, na.rm = T) / sum(tpop, na.rm = T), perc_total_25_3_using_devices = sum(pop_total_25_3_using_devices, na.rm = T) / sum(tpop, na.rm = T), perc_total_100_20_using_tests = sum(pop_total_100_20_using_tests, na.rm = T) / sum(tpop, na.rm = T), perc_total_25_3_using_tests = sum(pop_total_25_3_using_tests, na.rm = T) / sum(tpop, na.rm = T)) %>% drop_na()
# tract level final_va_tr_percs <- va_tr_percs %>% merge(st_drop_geometry(va.tr)[, c("GEOID", "NAME")], by.x = "tract", by.y = "GEOID") %>% gather(measure, value, c("perc_w_int_above_100_down_using_devices", "perc_w_int_above_25_down_using_devices", "perc_w_int_above_20_up_using_devices", "perc_w_int_above_3_up_using_devices", "perc_w_int_above_100_down_using_tests", "perc_w_int_above_25_down_using_tests", "perc_w_int_above_20_up_using_tests", "perc_w_int_above_3_up_using_tests", "perc_w_int_100_20_using_devices", "perc_w_int_25_3_using_devices", "perc_w_int_100_20_using_tests", "perc_w_int_25_3_using_tests", "perc_total_above_100_down_using_devices", "perc_total_above_25_down_using_devices", "perc_total_above_20_up_using_devices", "perc_total_above_3_up_using_devices", "perc_total_above_100_down_using_tests", "perc_total_above_25_down_using_tests", "perc_total_above_20_up_using_tests", "perc_total_above_3_up_using_tests", "perc_total_100_20_using_devices", "perc_total_25_3_using_devices", "perc_total_100_20_using_tests", "perc_total_25_3_using_tests")) %>% rename(geoid = tract, region_name = NAME) %>% mutate(measure_type = "percent", measure_units = as.character(NA), region_type = "tract", value = value * 100) %>% relocate("geoid", "region_type", "region_name", "year", "measure", "value", "measure_type", "measure_units") final_dc_tr_percs <- dc_tr_percs %>% merge(st_drop_geometry(dc.tr)[, c("GEOID", "NAME")], by.x = "tract", by.y = "GEOID") %>% gather(measure, value, c("perc_w_int_above_100_down_using_devices", "perc_w_int_above_25_down_using_devices", "perc_w_int_above_20_up_using_devices", "perc_w_int_above_3_up_using_devices", "perc_w_int_above_100_down_using_tests", "perc_w_int_above_25_down_using_tests", "perc_w_int_above_20_up_using_tests", "perc_w_int_above_3_up_using_tests", "perc_w_int_100_20_using_devices", "perc_w_int_25_3_using_devices", "perc_w_int_100_20_using_tests", "perc_w_int_25_3_using_tests", "perc_total_above_100_down_using_devices", "perc_total_above_25_down_using_devices", "perc_total_above_20_up_using_devices", "perc_total_above_3_up_using_devices", "perc_total_above_100_down_using_tests", "perc_total_above_25_down_using_tests", "perc_total_above_20_up_using_tests", "perc_total_above_3_up_using_tests", "perc_total_100_20_using_devices", "perc_total_25_3_using_devices", "perc_total_100_20_using_tests", "perc_total_25_3_using_tests")) %>% rename(geoid = tract, region_name = NAME) %>% mutate(measure_type = "percent", measure_units = as.character(NA), region_type = "tract", value = value * 100) %>% relocate("geoid", "region_type", "region_name", "year", "measure", "value", "measure_type", "measure_units") final_md_tr_percs <- md_tr_percs %>% merge(st_drop_geometry(md.tr)[, c("GEOID", "NAME")], by.x = "tract", by.y = "GEOID") %>% gather(measure, value, c("perc_w_int_above_100_down_using_devices", "perc_w_int_above_25_down_using_devices", "perc_w_int_above_20_up_using_devices", "perc_w_int_above_3_up_using_devices", "perc_w_int_above_100_down_using_tests", "perc_w_int_above_25_down_using_tests", "perc_w_int_above_20_up_using_tests", "perc_w_int_above_3_up_using_tests", "perc_w_int_100_20_using_devices", "perc_w_int_25_3_using_devices", "perc_w_int_100_20_using_tests", "perc_w_int_25_3_using_tests", "perc_total_above_100_down_using_devices", "perc_total_above_25_down_using_devices", "perc_total_above_20_up_using_devices", "perc_total_above_3_up_using_devices", "perc_total_above_100_down_using_tests", "perc_total_above_25_down_using_tests", "perc_total_above_20_up_using_tests", "perc_total_above_3_up_using_tests", "perc_total_100_20_using_devices", "perc_total_25_3_using_devices", "perc_total_100_20_using_tests", "perc_total_25_3_using_tests")) %>% rename(geoid = tract, region_name = NAME) %>% mutate(measure_type = "percent", measure_units = as.character(NA), region_type = "tract", value = value * 100) %>% relocate("geoid", "region_type", "region_name", "year", "measure", "value", "measure_type", "measure_units") # county final_va_ct_percs <- va_ct_percs %>% merge(st_drop_geometry(va.ct)[, c("GEOID", "NAME")], by.x = "county", by.y = "GEOID") %>% gather(measure, value, c("perc_w_int_above_100_down_using_devices", "perc_w_int_above_25_down_using_devices", "perc_w_int_above_20_up_using_devices", "perc_w_int_above_3_up_using_devices", "perc_w_int_above_100_down_using_tests", "perc_w_int_above_25_down_using_tests", "perc_w_int_above_20_up_using_tests", "perc_w_int_above_3_up_using_tests", "perc_w_int_100_20_using_devices", "perc_w_int_25_3_using_devices", "perc_w_int_100_20_using_tests", "perc_w_int_25_3_using_tests", "perc_total_above_100_down_using_devices", "perc_total_above_25_down_using_devices", "perc_total_above_20_up_using_devices", "perc_total_above_3_up_using_devices", "perc_total_above_100_down_using_tests", "perc_total_above_25_down_using_tests", "perc_total_above_20_up_using_tests", "perc_total_above_3_up_using_tests", "perc_total_100_20_using_devices", "perc_total_25_3_using_devices", "perc_total_100_20_using_tests", "perc_total_25_3_using_tests")) %>% rename(geoid = county, region_name = NAME) %>% mutate(measure_type = "percent", measure_units = as.character(NA), region_type = "county", value = value * 100) %>% relocate("geoid", "region_type", "region_name", "year", "measure", "value", "measure_type", "measure_units") final_dc_ct_percs <- dc_ct_percs %>% merge(st_drop_geometry(dc.ct)[, c("GEOID", "NAME")], by.x = "county", by.y = "GEOID") %>% gather(measure, value, c("perc_w_int_above_100_down_using_devices", "perc_w_int_above_25_down_using_devices", "perc_w_int_above_20_up_using_devices", "perc_w_int_above_3_up_using_devices", "perc_w_int_above_100_down_using_tests", "perc_w_int_above_25_down_using_tests", "perc_w_int_above_20_up_using_tests", "perc_w_int_above_3_up_using_tests", "perc_w_int_100_20_using_devices", "perc_w_int_25_3_using_devices", "perc_w_int_100_20_using_tests", "perc_w_int_25_3_using_tests", "perc_total_above_100_down_using_devices", "perc_total_above_25_down_using_devices", "perc_total_above_20_up_using_devices", "perc_total_above_3_up_using_devices", "perc_total_above_100_down_using_tests", "perc_total_above_25_down_using_tests", "perc_total_above_20_up_using_tests", "perc_total_above_3_up_using_tests", "perc_total_100_20_using_devices", "perc_total_25_3_using_devices", "perc_total_100_20_using_tests", "perc_total_25_3_using_tests")) %>% rename(geoid = county, region_name = NAME) %>% mutate(measure_type = "percent", measure_units = as.character(NA), region_type = "county", value = value * 100) %>% relocate("geoid", "region_type", "region_name", "year", "measure", "value", "measure_type", "measure_units") final_md_ct_percs <- md_ct_percs %>% merge(st_drop_geometry(md.ct)[, c("GEOID", "NAME")], by.x = "county", by.y = "GEOID") %>% gather(measure, value, c("perc_w_int_above_100_down_using_devices", "perc_w_int_above_25_down_using_devices", "perc_w_int_above_20_up_using_devices", "perc_w_int_above_3_up_using_devices", "perc_w_int_above_100_down_using_tests", "perc_w_int_above_25_down_using_tests", "perc_w_int_above_20_up_using_tests", "perc_w_int_above_3_up_using_tests", "perc_w_int_100_20_using_devices", "perc_w_int_25_3_using_devices", "perc_w_int_100_20_using_tests", "perc_w_int_25_3_using_tests", "perc_total_above_100_down_using_devices", "perc_total_above_25_down_using_devices", "perc_total_above_20_up_using_devices", "perc_total_above_3_up_using_devices", "perc_total_above_100_down_using_tests", "perc_total_above_25_down_using_tests", "perc_total_above_20_up_using_tests", "perc_total_above_3_up_using_tests", "perc_total_100_20_using_devices", "perc_total_25_3_using_devices", "perc_total_100_20_using_tests", "perc_total_25_3_using_tests")) %>% rename(geoid = county, region_name = NAME) %>% mutate(measure_type = "percent", measure_units = as.character(NA), region_type = "county", value = value * 100) %>% relocate("geoid", "region_type", "region_name", "year", "measure", "value", "measure_type", "measure_units") # health district final_va_hd_percs <- va_hd_percs %>% merge(health_district_geoids, by.x = "health_district", "region_name") %>% gather(measure, value, c("perc_w_int_above_100_down_using_devices", "perc_w_int_above_25_down_using_devices", "perc_w_int_above_20_up_using_devices", "perc_w_int_above_3_up_using_devices", "perc_w_int_above_100_down_using_tests", "perc_w_int_above_25_down_using_tests", "perc_w_int_above_20_up_using_tests", "perc_w_int_above_3_up_using_tests", "perc_w_int_100_20_using_devices", "perc_w_int_25_3_using_devices", "perc_w_int_100_20_using_tests", "perc_w_int_25_3_using_tests", "perc_total_above_100_down_using_devices", "perc_total_above_25_down_using_devices", "perc_total_above_20_up_using_devices", "perc_total_above_3_up_using_devices", "perc_total_above_100_down_using_tests", "perc_total_above_25_down_using_tests", "perc_total_above_20_up_using_tests", "perc_total_above_3_up_using_tests", "perc_total_100_20_using_devices", "perc_total_25_3_using_devices", "perc_total_100_20_using_tests", "perc_total_25_3_using_tests")) %>% rename(region_name = health_district) %>% mutate(measure_type = "percent", measure_units = as.character(NA), value = value * 100) %>% relocate("geoid", "region_type", "region_name", "year", "measure", "value", "measure_type", "measure_units")
# send to db con <- get_db_conn(db_pass = "rsu8zvrsu8zv") dc_dbWriteTable(con, "dc_digital_communications", "va_tr_ookla_2019_2021_percent_above_threshold", final_va_tr_percs) dc_dbWriteTable(con, "dc_digital_communications", "va_ct_ookla_2019_2021_percent_above_threshold", final_va_ct_percs) dc_dbWriteTable(con, "dc_digital_communications", "va_hd_ookla_2019_2021_percent_above_threshold", final_va_hd_percs) dc_dbWriteTable(con, "dc_digital_communications", "dc_tr_ookla_2019_2021_percent_above_threshold", final_dc_tr_percs) dc_dbWriteTable(con, "dc_digital_communications", "dc_ct_ookla_2019_2021_percent_above_threshold", final_dc_ct_percs) dc_dbWriteTable(con, "dc_digital_communications", "md_tr_ookla_2019_2021_percent_above_threshold", final_md_tr_percs) dc_dbWriteTable(con, "dc_digital_communications", "md_ct_ookla_2019_2021_percent_above_threshold", final_md_ct_percs) dbDisconnect(con)
# formatting virginia block group data final_va_bg_percs <- va_merged_data_e %>% gather(measure, value, c("perc_w_int_above_100_down_using_devices", "perc_w_int_above_25_down_using_devices", "perc_w_int_above_20_up_using_devices", "perc_w_int_above_3_up_using_devices", "perc_w_int_above_100_down_using_tests", "perc_w_int_above_25_down_using_tests", "perc_w_int_above_20_up_using_tests", "perc_w_int_above_3_up_using_tests", "perc_w_int_100_20_using_devices", "perc_w_int_25_3_using_devices", "perc_w_int_100_20_using_tests", "perc_w_int_25_3_using_tests", "perc_total_above_100_down_using_devices", "perc_total_above_25_down_using_devices", "perc_total_above_20_up_using_devices", "perc_total_above_3_up_using_devices", "perc_total_above_100_down_using_tests", "perc_total_above_25_down_using_tests", "perc_total_above_20_up_using_tests", "perc_total_above_3_up_using_tests", "perc_total_100_20_using_devices", "perc_total_25_3_using_devices", "perc_total_100_20_using_tests", "perc_total_25_3_using_tests")) %>% rename(geoid = GEOID, region_name = NAME) %>% mutate(measure_type = "percent", measure_units = as.character(NA), region_type = "block group") %>% select("geoid", "region_type", "region_name", "year", "measure", "value", "measure_type", "measure_units") %>% relocate("geoid", "region_type", "region_name", "year", "measure", "value", "measure_type", "measure_units") # formatting dc block group data final_dc_bg_percs <- dc_merged_data_e %>% gather(measure, value, c("perc_w_int_above_100_down_using_devices", "perc_w_int_above_25_down_using_devices", "perc_w_int_above_20_up_using_devices", "perc_w_int_above_3_up_using_devices", "perc_w_int_above_100_down_using_tests", "perc_w_int_above_25_down_using_tests", "perc_w_int_above_20_up_using_tests", "perc_w_int_above_3_up_using_tests", "perc_w_int_100_20_using_devices", "perc_w_int_25_3_using_devices", "perc_w_int_100_20_using_tests", "perc_w_int_25_3_using_tests", "perc_total_above_100_down_using_devices", "perc_total_above_25_down_using_devices", "perc_total_above_20_up_using_devices", "perc_total_above_3_up_using_devices", "perc_total_above_100_down_using_tests", "perc_total_above_25_down_using_tests", "perc_total_above_20_up_using_tests", "perc_total_above_3_up_using_tests", "perc_total_100_20_using_devices", "perc_total_25_3_using_devices", "perc_total_100_20_using_tests", "perc_total_25_3_using_tests")) %>% rename(geoid = GEOID, region_name = NAME) %>% mutate(measure_type = "percent", measure_units = as.character(NA), region_type = "block group") %>% select("geoid", "region_type", "region_name", "year", "measure", "value", "measure_type", "measure_units") %>% relocate("geoid", "region_type", "region_name", "year", "measure", "value", "measure_type", "measure_units") # formatting maryland block group data final_md_bg_percs <- md_merged_data_e %>% gather(measure, value, c("perc_w_int_above_100_down_using_devices", "perc_w_int_above_25_down_using_devices", "perc_w_int_above_20_up_using_devices", "perc_w_int_above_3_up_using_devices", "perc_w_int_above_100_down_using_tests", "perc_w_int_above_25_down_using_tests", "perc_w_int_above_20_up_using_tests", "perc_w_int_above_3_up_using_tests", "perc_w_int_100_20_using_devices", "perc_w_int_25_3_using_devices", "perc_w_int_100_20_using_tests", "perc_w_int_25_3_using_tests", "perc_total_above_100_down_using_devices", "perc_total_above_25_down_using_devices", "perc_total_above_20_up_using_devices", "perc_total_above_3_up_using_devices", "perc_total_above_100_down_using_tests", "perc_total_above_25_down_using_tests", "perc_total_above_20_up_using_tests", "perc_total_above_3_up_using_tests", "perc_total_100_20_using_devices", "perc_total_25_3_using_devices", "perc_total_100_20_using_tests", "perc_total_25_3_using_tests")) %>% rename(geoid = GEOID, region_name = NAME) %>% mutate(measure_type = "percent", measure_units = as.character(NA), region_type = "block group") %>% select("geoid", "region_type", "region_name", "year", "measure", "value", "measure_type", "measure_units") %>% relocate("geoid", "region_type", "region_name", "year", "measure", "value", "measure_type", "measure_units") # send to db con <- get_db_conn(db_pass = "rsu8zvrsu8zv") dc_dbWriteTable(con, "dc_digital_communications", "va_bg_ookla_2019_2021_percent_above_threshold", final_va_bg_percs) dc_dbWriteTable(con, "dc_digital_communications", "dc_bg_ookla_2019_2021_percent_above_threshold", final_dc_bg_percs) dc_dbWriteTable(con, "dc_digital_communications", "md_bg_ookla_2019_2021_percent_above_threshold", final_md_bg_percs) dbDisconnect(con)
# read in dcmdva data for ncr subsetting con <- get_db_conn(db_pass = "rsu8zvrsu8zv") va_ookla_bg_for_ncr.2 <- st_read(con, query = "SELECT * FROM dc_digital_communications.va_bg_ookla_2019_2021_percent_above_threshold") va_ookla_tr_for_ncr.2 <- st_read(con, query = "SELECT * FROM dc_digital_communications.va_tr_ookla_2019_2021_percent_above_threshold") va_ookla_ct_for_ncr.2 <- st_read(con, query = "SELECT * FROM dc_digital_communications.va_ct_ookla_2019_2021_percent_above_threshold") md_ookla_bg_for_ncr.2 <- st_read(con, query = "SELECT * FROM dc_digital_communications.md_bg_ookla_2019_2021_percent_above_threshold") md_ookla_tr_for_ncr.2 <- st_read(con, query = "SELECT * FROM dc_digital_communications.md_tr_ookla_2019_2021_percent_above_threshold") md_ookla_ct_for_ncr.2 <- st_read(con, query = "SELECT * FROM dc_digital_communications.md_ct_ookla_2019_2021_percent_above_threshold") dc_ookla_bg_for_ncr.2 <- st_read(con, query = "SELECT * FROM dc_digital_communications.dc_bg_ookla_2019_2021_percent_above_threshold") dc_ookla_tr_for_ncr.2 <- st_read(con, query = "SELECT * FROM dc_digital_communications.dc_tr_ookla_2019_2021_percent_above_threshold") dc_ookla_ct_for_ncr.2 <- st_read(con, query = "SELECT * FROM dc_digital_communications.dc_ct_ookla_2019_2021_percent_above_threshold") dbDisconnect(con) # subset data ncr_ookla_bg.2 <- rbind(va_ookla_bg_for_ncr.2, md_ookla_bg_for_ncr.2, dc_ookla_bg_for_ncr.2) %>% filter(substr(geoid, 1, 5) %in% c("51013", "51059", "51107", "51510", "51600", "51153", "51683", "51685", "51610", "11001", "24031", "24033", "24017", "24021")) ncr_ookla_tr.2 <- rbind(va_ookla_tr_for_ncr.2, md_ookla_tr_for_ncr.2, dc_ookla_tr_for_ncr.2) %>% filter(substr(geoid, 1, 5) %in% c("51013", "51059", "51107", "51510", "51600", "51153", "51683", "51685", "51610", "11001", "24031", "24033", "24017", "24021")) ncr_ookla_ct.2 <- rbind(va_ookla_ct_for_ncr.2, md_ookla_ct_for_ncr.2, dc_ookla_ct_for_ncr.2) %>% filter(substr(geoid, 1, 5) %in% c("51013", "51059", "51107", "51510", "51600", "51153", "51683", "51685", "51610", "11001", "24031", "24033", "24017", "24021")) # send ncr data to db con <- get_db_conn(db_pass = "rsu8zvrsu8zv") dc_dbWriteTable(con, "dc_digital_communications", "ncr_bg_ookla_2019_2021_percent_above_threshold", ncr_ookla_bg.2) dc_dbWriteTable(con, "dc_digital_communications", "ncr_tr_ookla_2019_2021_percent_above_threshold", ncr_ookla_tr.2) dc_dbWriteTable(con, "dc_digital_communications", "ncr_ct_ookla_2019_2021_percent_above_threshold", ncr_ookla_ct.2) dbDisconnect(con)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.