Drug Visits

library(httr)
url <- "https://www.vdh.virginia.gov/content/uploads/sites/13/2021/10/Drug-Overdose-ED-Visits_Virginia-September-2021.xlsx"
GET(url, write_disk("Drug-Overdose-ED-Visits_Virginia-September-2021.xlsx", overwrite=TRUE))

library(readxl)
my_data <- read_excel("Drug-Overdose-ED-Visits_Virginia-September-2021.xlsx", 3)

county_names <- my_data$`ALL DRUG`[2:134]
avg_monthly_rate_per_100k <- my_data[, as.vector(my_data[1, ] == "Avg Monthly Rate per 100k Pop")][2:134, ]
colnames(avg_monthly_rate_per_100k) <- paste0(c(2015:2020), "_avg_monthly_rate")

f<- my_data[,which(grepl("2021$", colnames(my_data))) + 3]
f<- lapply(f, function(x) {as.numeric(x)})
rates_2021 <- rowMeans(cbind(f[[1]], f[[2]], f[[3]], f[[4]], f[[5]], f[[6]], f[[7]], f[[8]], f[[9]]), na.rm = T)[2:134]
rates_2021[is.na(rates_2021)] <- 0
avg_monthly_rate_per_100k$`2021_avg_monthly_rate` <- rates_2021

ct.od <- avg_monthly_rate_per_100k %>%
  gather(measure, value, c(`2015_avg_monthly_rate`, `2016_avg_monthly_rate`,
                           `2017_avg_monthly_rate`, `2018_avg_monthly_rate`,
                           `2019_avg_monthly_rate`, `2020_avg_monthly_rate`,
                           `2021_avg_monthly_rate`)) %>%
  mutate(region_type = "county",
         year = as.character(rep(2015:2021, each = 133)),
         measure_type = "rate per 100k", # assuming they use mean for this average
         measure_units = as.character(NA),
         region_name = rep(county_names, times = 7),
         value = as.numeric(value)) %>%
  relocate("region_type", "region_name", "year", "measure", "value", "measure_type", "measure_units")
ct.od$region_name<- str_remove(ct.od$region_name, "‡")
ct.od$region_name <- paste0(ct.od$region_name, ", Virginia")

va.co <- get_acs(geography = "county",
                 year = 2019,
                 variables = c(tpop = "B01003_001"),
                 state = "VA",
                 survey = "acs5",
                 output = "wide",
                 geometry = TRUE)

vec <- vector(length = length(ct.od$region_name))
for (i in 1:length(ct.od$region_name))
{
  vec[i] <- grep(ct.od$region_name[i], va.co$NAME, ignore.case = TRUE, value = TRUE)
}

ct.od$region_name <- vec

county_overdose <- left_join(ct.od, st_drop_geometry(va.co), by = c("region_name" = "NAME")) %>%
  relocate("GEOID", "region_type", "region_name", "year", "measure", "value", "measure_type", "measure_units") %>%
  select(-c(tpopE, tpopM)) %>%
  rename(geoid = GEOID) %>%
  mutate(measure = "avg_monthly_rate")

county_overdose

con <- get_db_conn(db_pass = "rsu8zvrsu8zv")
dc_dbWriteTable(con, "dc_health_behavior_diet", "va_ct_vdh_2015_2021_drug_overdose_ed_visits", county_overdose)
dbDisconnect(con)
my_data <- read_excel("Drug-Overdose-ED-Visits_Virginia-September-2021.xlsx", 6)

hd_names <- my_data$`ALL DRUG`[2:36]
avg_monthly_rate_per_100k <- my_data[, as.vector(my_data[1, ] == "Avg Monthly Rate per 100k Pop")][2:36, ]
colnames(avg_monthly_rate_per_100k) <- paste0(c(2015:2020), "_avg_monthly_rate")


f<- my_data[,which(grepl("2021$", colnames(my_data))) + 3]
f<- lapply(f, function(x) {as.numeric(x)})
rates_2021 <- rowMeans(cbind(f[[1]], f[[2]], f[[3]], f[[4]], f[[5]], f[[6]], f[[7]], f[[8]], f[[9]]), na.rm = T)[2:36]
rates_2021[is.na(rates_2021)] <- 0
avg_monthly_rate_per_100k$`2021_avg_monthly_rate` <- rates_2021


hd.od <- avg_monthly_rate_per_100k %>%
  gather(measure, value, c(`2015_avg_monthly_rate`, `2016_avg_monthly_rate`,
                           `2017_avg_monthly_rate`, `2018_avg_monthly_rate`,
                           `2019_avg_monthly_rate`, `2020_avg_monthly_rate`,
                           `2021_avg_monthly_rate`)) %>%
  mutate(region_type = "health district",
         year = as.character(rep(2015:2021, each = 35)),
         measure_type = "rate per 100k", # assuming they use mean for this average
         measure_units = as.character(NA),
         region_name = rep(hd_names, times = 7),
         value = as.numeric(value)) %>%
  relocate("region_type", "region_name", "year", "measure", "value", "measure_type", "measure_units")

hd.od[hd.od$region_name == "Blue Ridge (formerly Thomas Jefferson)", ]$region_name <- "Thomas Jefferson"
hd.od[hd.od$region_name == "Norfolk City", ]$region_name <- "Norfolk"
hd.od[hd.od$region_name == "Pittsylvania/Danville", ]$region_name <- "Pittsylvania-Danville"
hd.od[hd.od$region_name == "Rappahannock/Rapidan", ]$region_name <- "Rappahannock Rapidan"
hd.od[hd.od$region_name == "Richmond City", ]$region_name <- "Richmond"
hd.od[hd.od$region_name == "Roanoke City", ]$region_name <- "Roanoke"

con <- get_db_conn(db_pass = "rsu8zvrsu8zv")
health_district_geoids <- st_read(con, query = "SELECT * FROM dc_common.va_hd_sdad_2021_virginia_health_district_geoids")
dbDisconnect(con)

hd.od.2 <- merge(hd.od, health_district_geoids) %>%
  relocate('geoid', "region_type", "region_name", "year", "measure", "value", "measure_type", "measure_units") %>%
  mutate(measure = "avg_monthly_rate")

con <- get_db_conn(db_pass = "rsu8zvrsu8zv")
dc_dbWriteTable(con, "dc_health_behavior_diet", "va_hd_vdh_2015_2021_drug_overdose_ed_visits", hd.od.2)
dbDisconnect(con)

Preventable Hospitalizations

con <- get_db_conn(db_pass = "rsu8zvrsu8zv")
preventable_hospitalization <- st_read(con, query = "SELECT * FROM dc_health_behavior_diet.va_ct_chr_2015_2021_preventable_hospitalizations")
dbDisconnect(con)

health_district <- fread("/project/biocomplexity/sdad/projects_data/vdh/va_county_to_hd.csv")
health_district$county_id <- as.character(health_district$county_id)

# C27006_004E + C27006_007E + C27006_011E + C27006_014E + C27006_017E + C27006_020E
va.co.19 <- get_acs(geography = "county",
                 year = 2019,
                 variables = c(pop1 = "C27006_004", pop2 = "C27006_007", pop3 = "C27006_011",
                               pop4 = "C27006_014", pop5 = "C27006_017", pop6 = "C27006_020"),
                 state = "VA",
                 survey = "acs5",
                 output = "wide",
                 geometry = TRUE)
va.co.19$tpopE <- va.co.19$pop1E + va.co.19$pop2E + va.co.19$pop3E + va.co.19$pop4E + va.co.19$pop5E + va.co.19$pop6E
va.co.18 <- get_acs(geography = "county",
                 year = 2018,
                 variables = c(pop1 = "C27006_004", pop2 = "C27006_007", pop3 = "C27006_011",
                               pop4 = "C27006_014", pop5 = "C27006_017", pop6 = "C27006_020"),
                 state = "VA",
                 survey = "acs5",
                 output = "wide",
                 geometry = TRUE)
va.co.18$tpopE <- va.co.18$pop1E + va.co.18$pop2E + va.co.18$pop3E + va.co.18$pop4E + va.co.18$pop5E + va.co.18$pop6E
va.co.17 <- get_acs(geography = "county",
                 year = 2017,
                 variables = c(pop1 = "C27006_004", pop2 = "C27006_007", pop3 = "C27006_011",
                               pop4 = "C27006_014", pop5 = "C27006_017", pop6 = "C27006_020"),
                 state = "VA",
                 survey = "acs5",
                 output = "wide",
                 geometry = TRUE)
va.co.17$tpopE <- va.co.17$pop1E + va.co.17$pop2E + va.co.17$pop3E + va.co.17$pop4E + va.co.17$pop5E + va.co.17$pop6E
va.co.16 <- get_acs(geography = "county",
                 year = 2016,
                 variables = c(pop1 = "C27006_004", pop2 = "C27006_007", pop3 = "C27006_011",
                               pop4 = "C27006_014", pop5 = "C27006_017", pop6 = "C27006_020"),
                 state = "VA",
                 survey = "acs5",
                 output = "wide",
                 geometry = TRUE)
va.co.16$tpopE <- va.co.16$pop1E + va.co.16$pop2E + va.co.16$pop3E + va.co.16$pop4E + va.co.16$pop5E + va.co.16$pop6E
va.co.15 <- get_acs(geography = "county",
                 year = 2015,
                 variables = c(pop1 = "C27006_004", pop2 = "C27006_007", pop3 = "C27006_011",
                               pop4 = "C27006_014", pop5 = "C27006_017", pop6 = "C27006_020"),
                 state = "VA",
                 survey = "acs5",
                 output = "wide",
                 geometry = TRUE)
va.co.15$tpopE <- va.co.15$pop1E + va.co.15$pop2E + va.co.15$pop3E + va.co.15$pop4E + va.co.15$pop5E + va.co.15$pop6E

hd.19 <- merge(st_drop_geometry(va.co.19)[, c("tpopE", "GEOID", "NAME")], health_district[, c("county_id", "health_district")], by.x = "GEOID", by.y = "county_id") %>%
  group_by(health_district) %>%
  mutate(frac_pop = tpopE/sum(tpopE))
hd.18 <- merge(st_drop_geometry(va.co.18)[, c("tpopE", "GEOID", "NAME")], health_district[, c("county_id", "health_district")], by.x = "GEOID", by.y = "county_id") %>%
  group_by(health_district) %>%
  mutate(frac_pop = tpopE/sum(tpopE))
hd.17 <- merge(st_drop_geometry(va.co.17)[, c("tpopE", "GEOID", "NAME")], health_district[, c("county_id", "health_district")], by.x = "GEOID", by.y = "county_id") %>%
  group_by(health_district) %>%
  mutate(frac_pop = tpopE/sum(tpopE))
hd.16 <- merge(st_drop_geometry(va.co.16)[, c("tpopE", "GEOID", "NAME")], health_district[, c("county_id", "health_district")], by.x = "GEOID", by.y = "county_id") %>%
  group_by(health_district) %>%
  mutate(frac_pop = tpopE/sum(tpopE))
hd.15 <- merge(st_drop_geometry(va.co.15)[, c("tpopE", "GEOID", "NAME")], health_district[, c("county_id", "health_district")], by.x = "GEOID", by.y = "county_id") %>%
  group_by(health_district) %>%
  mutate(frac_pop = tpopE/sum(tpopE))

vec <- vector(length = length(preventable_hospitalization$region_name))
for (i in 1:length(preventable_hospitalization$region_name))
{
  vec[i] <- grep(preventable_hospitalization$region_name[i], hd.19$NAME, ignore.case = TRUE, value = TRUE)
}
preventable_hospitalization$region_name <- vec

hd.19.2 <- merge(hd.19, preventable_hospitalization, by.x = "NAME", by.y = "region_name") %>%
  filter(year %in% c(2019:2021)) %>%
  group_by(year, health_district) %>%
  summarise(value = sum(frac_pop * value, na.rm = T)) %>%
  mutate(measure_type = "rate per 100k",
         measure = "prevent_hosp_rate",
         region_type = "health district",
         measure_units = as.character(NA)) %>%
  rename(region_name = health_district) %>%
  merge(health_district_geoids) %>%
  relocate("geoid", "region_type", "region_name", "year", "measure", "value", "measure_type", "measure_units")
hd.18.2 <- merge(hd.18, preventable_hospitalization, by.x = "NAME", by.y = "region_name") %>%
  filter(year %in% 2018) %>%
  group_by(year, health_district) %>%
  summarise(value = sum(frac_pop * value, na.rm = T)) %>%
  mutate(measure_type = "rate per 100k",
         measure = "prevent_hosp_rate",
         region_type = "health district",
         measure_units = as.character(NA)) %>%
  rename(region_name = health_district) %>%
  merge(health_district_geoids) %>%
  relocate("geoid", "region_type", "region_name", "year", "measure", "value", "measure_type", "measure_units")
hd.17.2 <- merge(hd.17, preventable_hospitalization, by.x = "NAME", by.y = "region_name") %>%
  filter(year %in% 2017) %>%
  group_by(year, health_district) %>%
  summarise(value = sum(frac_pop * value, na.rm = T)) %>%
  mutate(measure_type = "rate per 100k",
         measure = "prevent_hosp_rate",
         region_type = "health district",
         measure_units = as.character(NA)) %>%
  rename(region_name = health_district) %>%
  merge(health_district_geoids) %>%
  relocate("geoid", "region_type", "region_name", "year", "measure", "value", "measure_type", "measure_units")
hd.16.2 <- merge(hd.16, preventable_hospitalization, by.x = "NAME", by.y = "region_name") %>%
  filter(year %in% 2016) %>%
  group_by(year, health_district) %>%
  summarise(value = sum(frac_pop * value, na.rm = T)) %>%
  mutate(measure_type = "rate per 100k",
         measure = "prevent_hosp_rate",
         region_type = "health district",
         measure_units = as.character(NA)) %>%
  rename(region_name = health_district) %>%
  merge(health_district_geoids) %>%
  relocate("geoid", "region_type", "region_name", "year", "measure", "value", "measure_type", "measure_units")
# hd.15.2 <- merge(hd.15, preventable_hospitalization, by.x = "NAME", by.y = "region_name") %>%
#   filter(year %in% 2015) %>%
#   group_by(year, health_district) %>%
#   summarise(value = sum(frac_pop * value, na.rm = T)) %>%
#   mutate(measure_type = "rate per 100k",
#          measure = "prevent_hosp_rate",
#          region_type = "health district",
#          measure_units = as.character(NA)) %>%
#   rename(region_name = health_district) %>%
#   merge(health_district_geoids) %>%
#   relocate("geoid", "region_type", "region_name", "year", "measure", "value", "measure_type", "measure_units")

all.hd.data <- rbind(hd.19.2, hd.18.2, hd.17.2, hd.16.2)

# con <- get_db_conn(db_pass = "rsu8zvrsu8zv")
# dc_dbWriteTable(con, "dc_health_behavior_diet", "va_hd_chr_2016_2021_preventable_hospitalizations", all.hd.data)
# dbDisconnect(con)
# va_fcc_2019 <- fread("/project/biocomplexity/sdad/projects_data/mc/data_commons/FCC/VA-Fixed-Dec2019-v1.csv")
# va_fcc_2019$BlockCode <- as.character(va_fcc_2019$BlockCode)
# va_fcc_2019$tract_code <- substr(va_fcc_2019$BlockCode,
#                                  1,
#                                  11)
# # indicator if block has at least
# # 100 Mbps download and 20 Mbps upload
# va_fcc_2019$down_up_100_20 <- ifelse(va_fcc_2019$MaxAdDown >= 100 & va_fcc_2019$MaxAdUp >= 20, 1, 0)


# tract
all_tr_data <- matrix(, nrow = 0, ncol = 8)
for (i in 2017:2019)
{
  va.tr <- get_acs(geography = "tract",
                   year = i,
                   variables = c(tpop = "B28002_001",
                                 broadband = "B28002_004",
                                 cable_fiber_DSL = "B28002_007"),
                   state = "VA",
                   survey = "acs5",
                   output = "wide",
                   geometry = TRUE)
  va.tr.broadband <- va.tr %>%
    mutate(perc_w_broadband = broadbandE / tpopE,
           perc_w_cable_fiber_DSL = cable_fiber_DSLE / tpopE) %>%
    st_drop_geometry() %>%
    select(-c(tpopE, tpopM, broadbandE, broadbandM, cable_fiber_DSLE, cable_fiber_DSLM)) %>%
  gather(measure, value, c(perc_w_broadband, perc_w_cable_fiber_DSL)) %>%
  mutate(region_type = "tract",
         measure_units = as.character(NA),
         year = as.character(i),
         measure_type = "percentage") %>%
    rename(geoid = GEOID,
           region_name = NAME) %>%
    relocate("geoid", "region_type", "region_name", "year", "measure", "value", "measure_type", "measure_units")
  all_tr_data <- rbind(all_tr_data, va.tr.broadband)
}

# county
all_co_data <- matrix(, nrow = 0, ncol = 8)
for (i in 2017:2019)
{
  va.co <- get_acs(geography = "county",
                   year = i,
                   variables = c(tpop = "B28002_001",
                                 broadband = "B28002_004",
                                 cable_fiber_DSL = "B28002_007"),
                   state = "VA",
                   survey = "acs5",
                   output = "wide",
                   geometry = TRUE)
  va.co.broadband <- va.co %>%
  mutate(perc_w_broadband = broadbandE / tpopE,
         perc_w_cable_fiber_DSL = cable_fiber_DSLE / tpopE) %>%
  st_drop_geometry() %>%
  select(-c(tpopE, tpopM, broadbandE, broadbandM, cable_fiber_DSLE, cable_fiber_DSLM)) %>%
  gather(measure, value, c(perc_w_broadband, perc_w_cable_fiber_DSL)) %>%
  mutate(region_type = "county",
         measure_units = as.character(NA),
         year = as.character(i),
         measure_type = "percentage") %>%
    rename(geoid = GEOID,
           region_name = NAME) %>%
    relocate("geoid", "region_type", "region_name", "year", "measure", "value", "measure_type", "measure_units")
  all_co_data <- rbind(all_co_data, va.co.broadband)
}

# health district
all_hd_data <- matrix(, nrow = 0, ncol = 8)
for (i in 2017:2019)
{
  va.hd <- get_acs(geography = "county",
                     year = i,
                     variables = c(tpop = "B28002_001",
                                   broadband = "B28002_004",
                                   cable_fiber_DSL = "B28002_007"),
                     state = "VA",
                     survey = "acs5",
                     output = "wide",
                     geometry = TRUE)
  va.hd.broadband <- merge(st_drop_geometry(va.hd), health_district[, c("health_district", "county_id")], by.x = "GEOID", by.y = "county_id") %>%
    group_by(health_district) %>%
    summarise(perc_w_broadband = sum(broadbandE)/sum(tpopE),
              perc_w_cable_fiber_DSL = sum(cable_fiber_DSLE) / sum(tpopE)) %>%
    gather(measure, value, c(perc_w_broadband, perc_w_cable_fiber_DSL)) %>%
    mutate(region_type = "health district",
           measure_units = as.character(NA),
           year = as.character(i),
           measure_type = "percentage") %>%
    merge(health_district_geoids[, c("geoid", "region_name")], by.x = "health_district", by.y =  "region_name") %>%
    rename(region_name = health_district) %>%
      relocate("geoid", "region_type", "region_name", "year", "measure", "value", "measure_type", "measure_units")
  all_hd_data <- rbind(all_hd_data, va.hd.broadband)
}

con <- get_db_conn(db_pass = "rsu8zvrsu8zv")
dc_dbWriteTable(con, "dc_digital_communications", "va_hd_acs_2017_2019_perc_pop_with_broadband", all_hd_data)
dc_dbWriteTable(con, "dc_digital_communications", "va_ct_acs_2017_2019_perc_pop_with_broadband", all_co_data)
dc_dbWriteTable(con, "dc_digital_communications", "va_tr_acs_2017_2019_perc_pop_with_broadband", all_tr_data)
dbDisconnect(con)

all_tr_data
library(sf)
con <- get_db_conn(db_pass = "rsu8zvrsu8zv")
a <- st_read(con, query = "SELECT * FROM dc_digital_communications.va_bg_sdad_2021_perc_income_on_internet_update
")
dbDisconnect(con)

unique(a$measure)
a
## CHANGE NAMES

data <- read.csv("~/git/dc.chr.preventable.hospitalizations/data/va_hdct_chr_2015_2021_preventable_hospitalizations.csv.xz") %>% mutate(geoid = ifelse(region_type == "health district", paste0("51_hd_", geoid), geoid))
readr::write_csv(data,
                 xzfile("~/git/dc.chr.preventable.hospitalizations/data/va_hdct_chr_2015_2021_preventable_hospitalizations.csv.xz", compression = 9))


uva-bi-sdad/dc.chr.preventable.hospitalizations documentation built on May 6, 2022, 4:25 p.m.