load packages

library(sf)
library(tidyverse)
library(tmap)
library(tmaptools)
library(tigris)
library(tidycensus)
library(rmapshaper)
library(matrixStats)
library(SpatialAcc)
library(reticulate)
library(dplyr)
library(tidygeocoder)
library(readxl)
library(DBI)

read in all internet packages

# read in data
con <- get_db_conn(db_pass = "rsu8zvrsu8zv")
va_broadband_now_prices <- st_read(con, query = "SELECT * FROM dc_digital_communications.va_bg_broadband_now_2021_all_internet_packages")
dc_broadband_now_prices <- st_read(con, query = "SELECT * FROM dc_digital_communications.dc_bg_broadband_now_2021_all_internet_packages")
md_broadband_now_prices <- st_read(con, query = "SELECT * FROM dc_digital_communications.md_bg_broadband_now_2021_all_internet_packages")
dmv_broadband_now_prices <- st_read(con, query = "SELECT * FROM dc_digital_communications.dmv_bg_broadband_now_2021_all_internet_packages")
dbDisconnect(con)

# add tract and county columns (and convert block group to string)
md_broadband_now_prices <- md_broadband_now_prices %>%
  mutate(block_group = as.character(block_group),
         tract = substr(block_group, 1, 11),
         county = substr(block_group, 1, 5))
va_broadband_now_prices <- va_broadband_now_prices %>%
  mutate(block_group = as.character(block_group),
         tract = substr(block_group, 1, 11),
         county = substr(block_group, 1, 5))
dc_broadband_now_prices <- dc_broadband_now_prices %>%
  mutate(block_group = as.character(block_group),
         tract = substr(block_group, 1, 11),
         county = substr(block_group, 1, 5))
dmv_broadband_now_prices <- dmv_broadband_now_prices %>%
  mutate(block_group = as.character(block_group),
         tract = substr(block_group, 1, 11),
         county = substr(block_group, 1, 5))

# subset data for those above 100/20
va_broadband_now_prices.subset <- va_broadband_now_prices %>% filter(download >= 100, upload >= 20)
dc_broadband_now_prices.subset <- dc_broadband_now_prices %>% filter(download >= 100, upload >= 20)
md_broadband_now_prices.subset <- md_broadband_now_prices %>% filter(download >= 100, upload >= 20)
dmv_broadband_now_prices.subset <- dmv_broadband_now_prices %>% filter(download >= 100, upload >= 20)

# get median and minimum qualifying packages
get_data.subset <- function(va_broadband_now_prices.subset, county = T, tract = F)
{
  if (county)
  {
    va_100_20_med_prices <- va_broadband_now_prices.subset %>%
      group_by(county) %>%
      summarize(med_price_100_20 = median(price, na.rm = T))
    va_100_20_min_prices <- va_broadband_now_prices.subset %>%
      group_by(block_group) %>%
      slice_max(order_by = -price, n = 1)  %>%
      group_by(county) %>%
      summarize(min_price_100_20 = median(price, na.rm = T)) %>%
      full_join(va_100_20_med_prices, by = "county")
  }
  else if (tract)
  {
    va_100_20_med_prices <- va_broadband_now_prices.subset %>%
      group_by(tract) %>%
      summarize(med_price_100_20 = median(price, na.rm = T))
    va_100_20_min_prices <- va_broadband_now_prices.subset %>%
      group_by(block_group) %>%
      slice_max(order_by = -price, n = 1)  %>%
      group_by(tract) %>%
      summarize(min_price_100_20 = median(price, na.rm = T)) %>%
      full_join(va_100_20_med_prices, by = "tract")
  }
  else
  {
    va_100_20_med_prices <- va_broadband_now_prices.subset %>%
      group_by(block_group) %>%
      summarize(med_price_100_20 = median(price, na.rm = T))
    va_100_20_min_prices <- va_broadband_now_prices.subset %>%
      group_by(block_group) %>% slice_max(order_by = -price, n = 1)  %>%
      group_by(block_group) %>%
      summarize(min_price_100_20 = median(price, na.rm = T)) %>%
      full_join(va_100_20_med_prices, by = "block_group")
  }
  return(va_100_20_min_prices)
}

# county level
va_med_and_min_prices.subset.ct <- get_data.subset(va_broadband_now_prices.subset)
dc_med_and_min_prices.subset.ct <- get_data.subset(dc_broadband_now_prices.subset)
md_med_and_min_prices.subset.ct <- get_data.subset(md_broadband_now_prices.subset)
dmv_med_and_min_prices.subset.ct <- get_data.subset(dmv_broadband_now_prices.subset)

# tract level
va_med_and_min_prices.subset.tr <- get_data.subset(va_broadband_now_prices.subset, county = F, tract = T)
dc_med_and_min_prices.subset.tr <- get_data.subset(dc_broadband_now_prices.subset, county = F, tract = T)
md_med_and_min_prices.subset.tr <- get_data.subset(md_broadband_now_prices.subset, county = F, tract = T)
dmv_med_and_min_prices.subset.tr <- get_data.subset(dmv_broadband_now_prices.subset, county = F, tract = T)

# block group level
va_med_and_min_prices.subset.bg <- get_data.subset(va_broadband_now_prices.subset, county = F, tract = F)
dc_med_and_min_prices.subset.bg <- get_data.subset(dc_broadband_now_prices.subset, county = F, tract = F)
md_med_and_min_prices.subset.bg <- get_data.subset(md_broadband_now_prices.subset, county = F, tract = F)
dmv_med_and_min_prices.subset.bg <- get_data.subset(dmv_broadband_now_prices.subset, county = F, tract = F)

get subsets and merge with ACS data

# ACS data with median HH income
dmv.bg <- get_acs(geography = "block group",
                 year = 2019,
                 variables = c(median_household_income = "B19013_001"),
                 state = c("VA", "DC", "MD"),
                 survey = "acs5",
                 output = "wide",
                 geometry = TRUE)
dmv.tr <- get_acs(geography = "tract",
                 year = 2019,
                 variables = c(median_household_income = "B19013_001"),
                 state = c("VA", "DC", "MD"),
                 survey = "acs5",
                 output = "wide",
                 geometry = TRUE)
dmv.ct <- get_acs(geography = "county",
                 year = 2019,
                 variables = c(median_household_income = "B19013_001"),
                 state = c("VA", "DC", "MD"),
                 survey = "acs5",
                 output = "wide",
                 geometry = TRUE)

# subset for NCR
dmv_prices.subset.ct <- dmv.ct[dmv.ct$GEOID  %in% c("51013", "51059", "51107", "51510", "51600", "51153", "51683", "51685", "51610", "11001", "24031", "24033", "24017", "24021"),] %>% left_join(dmv_med_and_min_prices.subset.ct, by = c("GEOID" = "county"))
dmv_prices.subset.tr <- dmv.tr[substr(dmv.tr$GEOID, 1, 5)  %in% c("51013", "51059", "51107", "51510", "51600", "51153", "51683", "51685", "51610", "11001", "24031", "24033", "24017", "24021"),] %>% left_join(dmv_med_and_min_prices.subset.tr, by = c("GEOID" = "tract"))
dmv_prices.subset.bg <- dmv.bg[substr(dmv.bg$GEOID, 1, 5) %in% c("51013", "51059", "51107", "51510", "51600", "51153", "51683", "51685", "51610", "11001", "24031", "24033", "24017", "24021"),] %>% left_join(dmv_med_and_min_prices.subset.bg, by = c("GEOID" = "block_group"))

va_prices.subset.ct <- dmv.ct[substr(dmv.ct$GEOID, 1, 2) == "51",] %>% left_join(va_med_and_min_prices.subset.ct, by = c("GEOID" = "county"))
va_prices.subset.tr <- dmv.tr[substr(dmv.tr$GEOID, 1, 2) == "51",] %>% left_join(va_med_and_min_prices.subset.tr, by = c("GEOID" = "tract"))
va_prices.subset.bg <- dmv.bg[substr(dmv.bg$GEOID, 1, 2) == "51",] %>% left_join(va_med_and_min_prices.subset.bg, by = c("GEOID" = "block_group"))

dc_prices.subset.ct <- dmv.ct[substr(dmv.ct$GEOID, 1, 2) == "11",] %>% left_join(dc_med_and_min_prices.subset.ct, by = c("GEOID" = "county"))
dc_prices.subset.tr <- dmv.tr[substr(dmv.tr$GEOID, 1, 2) == "11",] %>% left_join(dc_med_and_min_prices.subset.tr, by = c("GEOID" = "tract"))
dc_prices.subset.bg <- dmv.bg[substr(dmv.bg$GEOID, 1, 2) == "11",] %>% left_join(dc_med_and_min_prices.subset.bg, by = c("GEOID" = "block_group"))

md_prices.subset.ct <- dmv.ct[substr(dmv.ct$GEOID, 1, 2) == "24",] %>% left_join(md_med_and_min_prices.subset.ct, by = c("GEOID" = "county"))
md_prices.subset.tr <- dmv.tr[substr(dmv.tr$GEOID, 1, 2) == "24",] %>% left_join(md_med_and_min_prices.subset.tr, by = c("GEOID" = "tract"))
md_prices.subset.bg <- dmv.bg[substr(dmv.bg$GEOID, 1, 2) == "24",] %>% left_join(md_med_and_min_prices.subset.bg, by = c("GEOID" = "block_group"))

ncr.isp.price.subset.ct <- st_drop_geometry(dmv_prices.subset.ct) %>%
  mutate(year = 2021, region_type = "county", measure_type = "price", measure_units = "dollars") %>%
  gather(measure, value, c(min_price_100_20, med_price_100_20)) %>%
  rename(geoid = GEOID, region_name = NAME) %>%
  select("geoid", "region_type", "region_name", "year", "measure", "value", "measure_type", "measure_units") %>%
  relocate("geoid", "region_type", "region_name", "year", "measure", "value", "measure_type", "measure_units") %>%
  drop_na()
ncr.isp.price.subset.tr <- st_drop_geometry(dmv_prices.subset.tr) %>%
  mutate(year = 2021, region_type = "tract", measure_type = "price", measure_units = "dollars") %>%
  gather(measure, value, c(min_price_100_20, med_price_100_20)) %>%
  rename(geoid = GEOID, region_name = NAME) %>%
  select("geoid", "region_type", "region_name", "year", "measure", "value", "measure_type", "measure_units") %>%
  relocate("geoid", "region_type", "region_name", "year", "measure", "value", "measure_type", "measure_units") %>%
  drop_na()
ncr.isp.price.subset.bg <- st_drop_geometry(dmv_prices.subset.bg) %>%
  mutate(year = 2021, region_type = "block group", measure_type = "price", measure_units = "dollars") %>%
  gather(measure, value, c(min_price_100_20, med_price_100_20)) %>%
  rename(geoid = GEOID, region_name = NAME) %>%
  select("geoid", "region_type", "region_name", "year", "measure", "value", "measure_type", "measure_units") %>%
  relocate("geoid", "region_type", "region_name", "year", "measure", "value", "measure_type", "measure_units") %>%
  drop_na()

va.isp.price.subset.ct <- st_drop_geometry(va_prices.subset.ct) %>%
  mutate(year = 2021, region_type = "county", measure_type = "price", measure_units = "dollars") %>%
  gather(measure, value, c(min_price_100_20, med_price_100_20)) %>%
  rename(geoid = GEOID, region_name = NAME) %>%
  select("geoid", "region_type", "region_name", "year", "measure", "value", "measure_type", "measure_units") %>%
  relocate("geoid", "region_type", "region_name", "year", "measure", "value", "measure_type", "measure_units") %>%
  drop_na()
va.isp.price.subset.tr <- st_drop_geometry(va_prices.subset.tr) %>%
  mutate(year = 2021, region_type = "tract", measure_type = "price", measure_units = "dollars") %>%
  gather(measure, value, c(min_price_100_20, med_price_100_20)) %>%
  rename(geoid = GEOID, region_name = NAME) %>%
  select("geoid", "region_type", "region_name", "year", "measure", "value", "measure_type", "measure_units") %>%
  relocate("geoid", "region_type", "region_name", "year", "measure", "value", "measure_type", "measure_units") %>%
  drop_na()
va.isp.price.subset.bg <- st_drop_geometry(va_prices.subset.bg) %>%
  mutate(year = 2021, region_type = "block group", measure_type = "price", measure_units = "dollars") %>%
  gather(measure, value, c(min_price_100_20, med_price_100_20)) %>%
  rename(geoid = GEOID, region_name = NAME) %>%
  select("geoid", "region_type", "region_name", "year", "measure", "value", "measure_type", "measure_units") %>%
  relocate("geoid", "region_type", "region_name", "year", "measure", "value", "measure_type", "measure_units") %>%
  drop_na()

dc.isp.price.subset.ct <- st_drop_geometry(dc_prices.subset.ct) %>%
  mutate(year = 2021, region_type = "county", measure_type = "price", measure_units = "dollars") %>%
  gather(measure, value, c(min_price_100_20, med_price_100_20)) %>%
  rename(geoid = GEOID, region_name = NAME) %>%
  select("geoid", "region_type", "region_name", "year", "measure", "value", "measure_type", "measure_units") %>%
  relocate("geoid", "region_type", "region_name", "year", "measure", "value", "measure_type", "measure_units") %>%
  drop_na()
dc.isp.price.subset.tr <- st_drop_geometry(dc_prices.subset.tr) %>%
  mutate(year = 2021, region_type = "tract", measure_type = "price", measure_units = "dollars") %>%
  gather(measure, value, c(min_price_100_20, med_price_100_20)) %>%
  rename(geoid = GEOID, region_name = NAME) %>%
  select("geoid", "region_type", "region_name", "year", "measure", "value", "measure_type", "measure_units") %>%
  relocate("geoid", "region_type", "region_name", "year", "measure", "value", "measure_type", "measure_units") %>%
  drop_na()
dc.isp.price.subset.bg <- st_drop_geometry(dc_prices.subset.bg) %>%
  mutate(year = 2021, region_type = "block group", measure_type = "price", measure_units = "dollars") %>%
  gather(measure, value, c(min_price_100_20, med_price_100_20)) %>%
  rename(geoid = GEOID, region_name = NAME) %>%
  select("geoid", "region_type", "region_name", "year", "measure", "value", "measure_type", "measure_units") %>%
  relocate("geoid", "region_type", "region_name", "year", "measure", "value", "measure_type", "measure_units") %>%
  drop_na()

md.isp.price.subset.ct <- st_drop_geometry(md_prices.subset.ct) %>%
  mutate(year = 2021, region_type = "county", measure_type = "price", measure_units = "dollars") %>%
  gather(measure, value, c(min_price_100_20, med_price_100_20)) %>%
  rename(geoid = GEOID, region_name = NAME) %>%
  select("geoid", "region_type", "region_name", "year", "measure", "value", "measure_type", "measure_units") %>%
  relocate("geoid", "region_type", "region_name", "year", "measure", "value", "measure_type", "measure_units") %>%
  drop_na()
md.isp.price.subset.tr <- st_drop_geometry(md_prices.subset.tr) %>%
  mutate(year = 2021, region_type = "tract", measure_type = "price", measure_units = "dollars") %>%
  gather(measure, value, c(min_price_100_20, med_price_100_20)) %>%
  rename(geoid = GEOID, region_name = NAME) %>%
  select("geoid", "region_type", "region_name", "year", "measure", "value", "measure_type", "measure_units") %>%
  relocate("geoid", "region_type", "region_name", "year", "measure", "value", "measure_type", "measure_units") %>%
  drop_na()
md.isp.price.subset.bg <- st_drop_geometry(md_prices.subset.bg) %>%
  mutate(year = 2021, region_type = "block group", measure_type = "price", measure_units = "dollars") %>%
  gather(measure, value, c(min_price_100_20, med_price_100_20)) %>%
  rename(geoid = GEOID, region_name = NAME) %>%
  select("geoid", "region_type", "region_name", "year", "measure", "value", "measure_type", "measure_units") %>%
  relocate("geoid", "region_type", "region_name", "year", "measure", "value", "measure_type", "measure_units") %>%
  drop_na()

send to database

con <- get_db_conn(db_pass = "rsu8zvrsu8zv")
dc_dbWriteTable(con, "dc_digital_communications", "ncr_ct_broadband_now_2021_internet_package_price_100_20", ncr.isp.price.subset.ct)
dc_dbWriteTable(con, "dc_digital_communications", "va_ct_broadband_now_2021_internet_package_price_100_20", va.isp.price.subset.ct)
dc_dbWriteTable(con, "dc_digital_communications", "dc_ct_broadband_now_2021_internet_package_price_100_20", dc.isp.price.subset.ct)
dc_dbWriteTable(con, "dc_digital_communications", "md_ct_broadband_now_2021_internet_package_price_100_20", md.isp.price.subset.ct)

dc_dbWriteTable(con, "dc_digital_communications", "ncr_tr_broadband_now_2021_internet_package_price_100_20", ncr.isp.price.subset.tr)
dc_dbWriteTable(con, "dc_digital_communications", "va_tr_broadband_now_2021_internet_package_price_100_20", va.isp.price.subset.tr)
dc_dbWriteTable(con, "dc_digital_communications", "dc_tr_broadband_now_2021_internet_package_price_100_20", dc.isp.price.subset.tr)
dc_dbWriteTable(con, "dc_digital_communications", "md_tr_broadband_now_2021_internet_package_price_100_20", md.isp.price.subset.tr)

dc_dbWriteTable(con, "dc_digital_communications", "ncr_bg_broadband_now_2021_internet_package_price_100_20", ncr.isp.price.subset.bg)
dc_dbWriteTable(con, "dc_digital_communications", "va_bg_broadband_now_2021_internet_package_price_100_20", va.isp.price.subset.bg)
dc_dbWriteTable(con, "dc_digital_communications", "dc_bg_broadband_now_2021_internet_package_price_100_20", dc.isp.price.subset.bg)
dc_dbWriteTable(con, "dc_digital_communications", "md_bg_broadband_now_2021_internet_package_price_100_20", md.isp.price.subset.bg)
dbDisconnect(con)

add health districts

health_district <- read.csv("/project/biocomplexity/sdad/projects_data/vdh/va_county_to_hd.csv")
health_district$county_id <- as.character(health_district$county_id)

va_bg_broadband_now_prices.subset2 <- left_join(health_district, va_broadband_now_prices.subset, by = c("county_id" = "county"))

va_100_20_med_prices <- va_bg_broadband_now_prices.subset2 %>%
  group_by(health_district) %>%
  summarize(med_price_100_20 = median(price, na.rm = T))
va_100_20_min_prices <- va_bg_broadband_now_prices.subset2 %>%
  group_by(block_group) %>%
  slice_max(order_by = -price, n = 1)  %>%
  group_by(health_district) %>%
  summarize(min_price_100_20 = median(price, na.rm = T)) %>%
  full_join(va_100_20_med_prices, by = "health_district")

con <- get_db_conn(db_pass = "rsu8zvrsu8zv")
health_district_geoids <- st_read(con, query = "SELECT * FROM dc_geographies.va_hd_vdh_2021_health_district_geo_names")
DBI::dbDisconnect(con)

va.isp.price.hd2 <- merge(va_100_20_min_prices, health_district_geoids, by.x = "health_district", by.y = "region_name") %>%
  mutate(year = 2021, region_type = "health district", measure_type = "price", measure_units = "dollars") %>%
  gather(measure, value, c(med_price_100_20, min_price_100_20)) %>%
  rename(region_name = health_district) %>%
  select("geoid", "region_type", "region_name", "year", "measure", "value", "measure_type", "measure_units") %>%
  relocate("geoid", "region_type", "region_name", "year", "measure", "value", "measure_type", "measure_units") %>%
  drop_na()

con <- get_db_conn(db_pass = "rsu8zvrsu8zv")
dc_dbWriteTable(con, "dc_digital_communications", "va_hd_broadband_now_2021_internet_package_price_100_20", va.isp.price.hd2)
dbDisconnect(con)


uva-bi-sdad/dc.broadbandnow.broadband_prices documentation built on June 13, 2022, 4:41 a.m.