library(httr) url <- "https://www.vdh.virginia.gov/content/uploads/sites/13/2021/10/Drug-Overdose-ED-Visits_Virginia-September-2021.xlsx" GET(url, write_disk("Drug-Overdose-ED-Visits_Virginia-September-2021.xlsx", overwrite=TRUE)) library(readxl) my_data <- read_excel("Drug-Overdose-ED-Visits_Virginia-September-2021.xlsx", 3) county_names <- my_data$`ALL DRUG`[2:134] avg_monthly_rate_per_100k <- my_data[, as.vector(my_data[1, ] == "Avg Monthly Rate per 100k Pop")][2:134, ] colnames(avg_monthly_rate_per_100k) <- paste0(c(2015:2020), "_avg_monthly_rate") f<- my_data[,which(grepl("2021$", colnames(my_data))) + 3] f<- lapply(f, function(x) {as.numeric(x)}) rates_2021 <- rowMeans(cbind(f[[1]], f[[2]], f[[3]], f[[4]], f[[5]], f[[6]], f[[7]], f[[8]], f[[9]]), na.rm = T)[2:134] rates_2021[is.na(rates_2021)] <- 0 avg_monthly_rate_per_100k$`2021_avg_monthly_rate` <- rates_2021 ct.od <- avg_monthly_rate_per_100k %>% gather(measure, value, c(`2015_avg_monthly_rate`, `2016_avg_monthly_rate`, `2017_avg_monthly_rate`, `2018_avg_monthly_rate`, `2019_avg_monthly_rate`, `2020_avg_monthly_rate`, `2021_avg_monthly_rate`)) %>% mutate(region_type = "county", year = as.character(rep(2015:2021, each = 133)), measure_type = "rate per 100k", # assuming they use mean for this average measure_units = as.character(NA), region_name = rep(county_names, times = 7), value = as.numeric(value)) %>% relocate("region_type", "region_name", "year", "measure", "value", "measure_type", "measure_units") ct.od$region_name<- str_remove(ct.od$region_name, "‡") ct.od$region_name <- paste0(ct.od$region_name, ", Virginia") va.co <- get_acs(geography = "county", year = 2019, variables = c(tpop = "B01003_001"), state = "VA", survey = "acs5", output = "wide", geometry = TRUE) vec <- vector(length = length(ct.od$region_name)) for (i in 1:length(ct.od$region_name)) { vec[i] <- grep(ct.od$region_name[i], va.co$NAME, ignore.case = TRUE, value = TRUE) } ct.od$region_name <- vec county_overdose <- left_join(ct.od, st_drop_geometry(va.co), by = c("region_name" = "NAME")) %>% relocate("GEOID", "region_type", "region_name", "year", "measure", "value", "measure_type", "measure_units") %>% select(-c(tpopE, tpopM)) %>% rename(geoid = GEOID) %>% mutate(measure = "avg_monthly_rate") county_overdose con <- get_db_conn(db_pass = "rsu8zvrsu8zv") dc_dbWriteTable(con, "dc_health_behavior_diet", "va_ct_vdh_2015_2021_drug_overdose_ed_visits", county_overdose) dbDisconnect(con)
my_data <- read_excel("Drug-Overdose-ED-Visits_Virginia-September-2021.xlsx", 6) hd_names <- my_data$`ALL DRUG`[2:36] avg_monthly_rate_per_100k <- my_data[, as.vector(my_data[1, ] == "Avg Monthly Rate per 100k Pop")][2:36, ] colnames(avg_monthly_rate_per_100k) <- paste0(c(2015:2020), "_avg_monthly_rate") f<- my_data[,which(grepl("2021$", colnames(my_data))) + 3] f<- lapply(f, function(x) {as.numeric(x)}) rates_2021 <- rowMeans(cbind(f[[1]], f[[2]], f[[3]], f[[4]], f[[5]], f[[6]], f[[7]], f[[8]], f[[9]]), na.rm = T)[2:36] rates_2021[is.na(rates_2021)] <- 0 avg_monthly_rate_per_100k$`2021_avg_monthly_rate` <- rates_2021 hd.od <- avg_monthly_rate_per_100k %>% gather(measure, value, c(`2015_avg_monthly_rate`, `2016_avg_monthly_rate`, `2017_avg_monthly_rate`, `2018_avg_monthly_rate`, `2019_avg_monthly_rate`, `2020_avg_monthly_rate`, `2021_avg_monthly_rate`)) %>% mutate(region_type = "health district", year = as.character(rep(2015:2021, each = 35)), measure_type = "rate per 100k", # assuming they use mean for this average measure_units = as.character(NA), region_name = rep(hd_names, times = 7), value = as.numeric(value)) %>% relocate("region_type", "region_name", "year", "measure", "value", "measure_type", "measure_units") hd.od[hd.od$region_name == "Blue Ridge (formerly Thomas Jefferson)", ]$region_name <- "Thomas Jefferson" hd.od[hd.od$region_name == "Norfolk City", ]$region_name <- "Norfolk" hd.od[hd.od$region_name == "Pittsylvania/Danville", ]$region_name <- "Pittsylvania-Danville" hd.od[hd.od$region_name == "Rappahannock/Rapidan", ]$region_name <- "Rappahannock Rapidan" hd.od[hd.od$region_name == "Richmond City", ]$region_name <- "Richmond" hd.od[hd.od$region_name == "Roanoke City", ]$region_name <- "Roanoke" con <- get_db_conn(db_pass = "rsu8zvrsu8zv") health_district_geoids <- st_read(con, query = "SELECT * FROM dc_common.va_hd_sdad_2021_virginia_health_district_geoids") dbDisconnect(con) hd.od.2 <- merge(hd.od, health_district_geoids) %>% relocate('geoid', "region_type", "region_name", "year", "measure", "value", "measure_type", "measure_units") %>% mutate(measure = "avg_monthly_rate") con <- get_db_conn(db_pass = "rsu8zvrsu8zv") dc_dbWriteTable(con, "dc_health_behavior_diet", "va_hd_vdh_2015_2021_drug_overdose_ed_visits", hd.od.2) dbDisconnect(con)
con <- get_db_conn(db_pass = "rsu8zvrsu8zv") preventable_hospitalization <- st_read(con, query = "SELECT * FROM dc_health_behavior_diet.va_ct_chr_2015_2021_preventable_hospitalizations") dbDisconnect(con) health_district <- fread("/project/biocomplexity/sdad/projects_data/vdh/va_county_to_hd.csv") health_district$county_id <- as.character(health_district$county_id) # C27006_004E + C27006_007E + C27006_011E + C27006_014E + C27006_017E + C27006_020E va.co.19 <- get_acs(geography = "county", year = 2019, variables = c(pop1 = "C27006_004", pop2 = "C27006_007", pop3 = "C27006_011", pop4 = "C27006_014", pop5 = "C27006_017", pop6 = "C27006_020"), state = "VA", survey = "acs5", output = "wide", geometry = TRUE) va.co.19$tpopE <- va.co.19$pop1E + va.co.19$pop2E + va.co.19$pop3E + va.co.19$pop4E + va.co.19$pop5E + va.co.19$pop6E va.co.18 <- get_acs(geography = "county", year = 2018, variables = c(pop1 = "C27006_004", pop2 = "C27006_007", pop3 = "C27006_011", pop4 = "C27006_014", pop5 = "C27006_017", pop6 = "C27006_020"), state = "VA", survey = "acs5", output = "wide", geometry = TRUE) va.co.18$tpopE <- va.co.18$pop1E + va.co.18$pop2E + va.co.18$pop3E + va.co.18$pop4E + va.co.18$pop5E + va.co.18$pop6E va.co.17 <- get_acs(geography = "county", year = 2017, variables = c(pop1 = "C27006_004", pop2 = "C27006_007", pop3 = "C27006_011", pop4 = "C27006_014", pop5 = "C27006_017", pop6 = "C27006_020"), state = "VA", survey = "acs5", output = "wide", geometry = TRUE) va.co.17$tpopE <- va.co.17$pop1E + va.co.17$pop2E + va.co.17$pop3E + va.co.17$pop4E + va.co.17$pop5E + va.co.17$pop6E va.co.16 <- get_acs(geography = "county", year = 2016, variables = c(pop1 = "C27006_004", pop2 = "C27006_007", pop3 = "C27006_011", pop4 = "C27006_014", pop5 = "C27006_017", pop6 = "C27006_020"), state = "VA", survey = "acs5", output = "wide", geometry = TRUE) va.co.16$tpopE <- va.co.16$pop1E + va.co.16$pop2E + va.co.16$pop3E + va.co.16$pop4E + va.co.16$pop5E + va.co.16$pop6E va.co.15 <- get_acs(geography = "county", year = 2015, variables = c(pop1 = "C27006_004", pop2 = "C27006_007", pop3 = "C27006_011", pop4 = "C27006_014", pop5 = "C27006_017", pop6 = "C27006_020"), state = "VA", survey = "acs5", output = "wide", geometry = TRUE) va.co.15$tpopE <- va.co.15$pop1E + va.co.15$pop2E + va.co.15$pop3E + va.co.15$pop4E + va.co.15$pop5E + va.co.15$pop6E hd.19 <- merge(st_drop_geometry(va.co.19)[, c("tpopE", "GEOID", "NAME")], health_district[, c("county_id", "health_district")], by.x = "GEOID", by.y = "county_id") %>% group_by(health_district) %>% mutate(frac_pop = tpopE/sum(tpopE)) hd.18 <- merge(st_drop_geometry(va.co.18)[, c("tpopE", "GEOID", "NAME")], health_district[, c("county_id", "health_district")], by.x = "GEOID", by.y = "county_id") %>% group_by(health_district) %>% mutate(frac_pop = tpopE/sum(tpopE)) hd.17 <- merge(st_drop_geometry(va.co.17)[, c("tpopE", "GEOID", "NAME")], health_district[, c("county_id", "health_district")], by.x = "GEOID", by.y = "county_id") %>% group_by(health_district) %>% mutate(frac_pop = tpopE/sum(tpopE)) hd.16 <- merge(st_drop_geometry(va.co.16)[, c("tpopE", "GEOID", "NAME")], health_district[, c("county_id", "health_district")], by.x = "GEOID", by.y = "county_id") %>% group_by(health_district) %>% mutate(frac_pop = tpopE/sum(tpopE)) hd.15 <- merge(st_drop_geometry(va.co.15)[, c("tpopE", "GEOID", "NAME")], health_district[, c("county_id", "health_district")], by.x = "GEOID", by.y = "county_id") %>% group_by(health_district) %>% mutate(frac_pop = tpopE/sum(tpopE)) vec <- vector(length = length(preventable_hospitalization$region_name)) for (i in 1:length(preventable_hospitalization$region_name)) { vec[i] <- grep(preventable_hospitalization$region_name[i], hd.19$NAME, ignore.case = TRUE, value = TRUE) } preventable_hospitalization$region_name <- vec hd.19.2 <- merge(hd.19, preventable_hospitalization, by.x = "NAME", by.y = "region_name") %>% filter(year %in% c(2019:2021)) %>% group_by(year, health_district) %>% summarise(value = sum(frac_pop * value, na.rm = T)) %>% mutate(measure_type = "rate per 100k", measure = "prevent_hosp_rate", region_type = "health district", measure_units = as.character(NA)) %>% rename(region_name = health_district) %>% merge(health_district_geoids) %>% relocate("geoid", "region_type", "region_name", "year", "measure", "value", "measure_type", "measure_units") hd.18.2 <- merge(hd.18, preventable_hospitalization, by.x = "NAME", by.y = "region_name") %>% filter(year %in% 2018) %>% group_by(year, health_district) %>% summarise(value = sum(frac_pop * value, na.rm = T)) %>% mutate(measure_type = "rate per 100k", measure = "prevent_hosp_rate", region_type = "health district", measure_units = as.character(NA)) %>% rename(region_name = health_district) %>% merge(health_district_geoids) %>% relocate("geoid", "region_type", "region_name", "year", "measure", "value", "measure_type", "measure_units") hd.17.2 <- merge(hd.17, preventable_hospitalization, by.x = "NAME", by.y = "region_name") %>% filter(year %in% 2017) %>% group_by(year, health_district) %>% summarise(value = sum(frac_pop * value, na.rm = T)) %>% mutate(measure_type = "rate per 100k", measure = "prevent_hosp_rate", region_type = "health district", measure_units = as.character(NA)) %>% rename(region_name = health_district) %>% merge(health_district_geoids) %>% relocate("geoid", "region_type", "region_name", "year", "measure", "value", "measure_type", "measure_units") hd.16.2 <- merge(hd.16, preventable_hospitalization, by.x = "NAME", by.y = "region_name") %>% filter(year %in% 2016) %>% group_by(year, health_district) %>% summarise(value = sum(frac_pop * value, na.rm = T)) %>% mutate(measure_type = "rate per 100k", measure = "prevent_hosp_rate", region_type = "health district", measure_units = as.character(NA)) %>% rename(region_name = health_district) %>% merge(health_district_geoids) %>% relocate("geoid", "region_type", "region_name", "year", "measure", "value", "measure_type", "measure_units") # hd.15.2 <- merge(hd.15, preventable_hospitalization, by.x = "NAME", by.y = "region_name") %>% # filter(year %in% 2015) %>% # group_by(year, health_district) %>% # summarise(value = sum(frac_pop * value, na.rm = T)) %>% # mutate(measure_type = "rate per 100k", # measure = "prevent_hosp_rate", # region_type = "health district", # measure_units = as.character(NA)) %>% # rename(region_name = health_district) %>% # merge(health_district_geoids) %>% # relocate("geoid", "region_type", "region_name", "year", "measure", "value", "measure_type", "measure_units") all.hd.data <- rbind(hd.19.2, hd.18.2, hd.17.2, hd.16.2) # con <- get_db_conn(db_pass = "rsu8zvrsu8zv") # dc_dbWriteTable(con, "dc_health_behavior_diet", "va_hd_chr_2016_2021_preventable_hospitalizations", all.hd.data) # dbDisconnect(con)
# va_fcc_2019 <- fread("/project/biocomplexity/sdad/projects_data/mc/data_commons/FCC/VA-Fixed-Dec2019-v1.csv") # va_fcc_2019$BlockCode <- as.character(va_fcc_2019$BlockCode) # va_fcc_2019$tract_code <- substr(va_fcc_2019$BlockCode, # 1, # 11) # # indicator if block has at least # # 100 Mbps download and 20 Mbps upload # va_fcc_2019$down_up_100_20 <- ifelse(va_fcc_2019$MaxAdDown >= 100 & va_fcc_2019$MaxAdUp >= 20, 1, 0) # tract all_tr_data <- matrix(, nrow = 0, ncol = 8) for (i in 2017:2019) { va.tr <- get_acs(geography = "tract", year = i, variables = c(tpop = "B28002_001", broadband = "B28002_004", cable_fiber_DSL = "B28002_007"), state = "VA", survey = "acs5", output = "wide", geometry = TRUE) va.tr.broadband <- va.tr %>% mutate(perc_w_broadband = broadbandE / tpopE, perc_w_cable_fiber_DSL = cable_fiber_DSLE / tpopE) %>% st_drop_geometry() %>% select(-c(tpopE, tpopM, broadbandE, broadbandM, cable_fiber_DSLE, cable_fiber_DSLM)) %>% gather(measure, value, c(perc_w_broadband, perc_w_cable_fiber_DSL)) %>% mutate(region_type = "tract", measure_units = as.character(NA), year = as.character(i), measure_type = "percentage") %>% rename(geoid = GEOID, region_name = NAME) %>% relocate("geoid", "region_type", "region_name", "year", "measure", "value", "measure_type", "measure_units") all_tr_data <- rbind(all_tr_data, va.tr.broadband) } # county all_co_data <- matrix(, nrow = 0, ncol = 8) for (i in 2017:2019) { va.co <- get_acs(geography = "county", year = i, variables = c(tpop = "B28002_001", broadband = "B28002_004", cable_fiber_DSL = "B28002_007"), state = "VA", survey = "acs5", output = "wide", geometry = TRUE) va.co.broadband <- va.co %>% mutate(perc_w_broadband = broadbandE / tpopE, perc_w_cable_fiber_DSL = cable_fiber_DSLE / tpopE) %>% st_drop_geometry() %>% select(-c(tpopE, tpopM, broadbandE, broadbandM, cable_fiber_DSLE, cable_fiber_DSLM)) %>% gather(measure, value, c(perc_w_broadband, perc_w_cable_fiber_DSL)) %>% mutate(region_type = "county", measure_units = as.character(NA), year = as.character(i), measure_type = "percentage") %>% rename(geoid = GEOID, region_name = NAME) %>% relocate("geoid", "region_type", "region_name", "year", "measure", "value", "measure_type", "measure_units") all_co_data <- rbind(all_co_data, va.co.broadband) } # health district all_hd_data <- matrix(, nrow = 0, ncol = 8) for (i in 2017:2019) { va.hd <- get_acs(geography = "county", year = i, variables = c(tpop = "B28002_001", broadband = "B28002_004", cable_fiber_DSL = "B28002_007"), state = "VA", survey = "acs5", output = "wide", geometry = TRUE) va.hd.broadband <- merge(st_drop_geometry(va.hd), health_district[, c("health_district", "county_id")], by.x = "GEOID", by.y = "county_id") %>% group_by(health_district) %>% summarise(perc_w_broadband = sum(broadbandE)/sum(tpopE), perc_w_cable_fiber_DSL = sum(cable_fiber_DSLE) / sum(tpopE)) %>% gather(measure, value, c(perc_w_broadband, perc_w_cable_fiber_DSL)) %>% mutate(region_type = "health district", measure_units = as.character(NA), year = as.character(i), measure_type = "percentage") %>% merge(health_district_geoids[, c("geoid", "region_name")], by.x = "health_district", by.y = "region_name") %>% rename(region_name = health_district) %>% relocate("geoid", "region_type", "region_name", "year", "measure", "value", "measure_type", "measure_units") all_hd_data <- rbind(all_hd_data, va.hd.broadband) } con <- get_db_conn(db_pass = "rsu8zvrsu8zv") dc_dbWriteTable(con, "dc_digital_communications", "va_hd_acs_2017_2019_perc_pop_with_broadband", all_hd_data) dc_dbWriteTable(con, "dc_digital_communications", "va_ct_acs_2017_2019_perc_pop_with_broadband", all_co_data) dc_dbWriteTable(con, "dc_digital_communications", "va_tr_acs_2017_2019_perc_pop_with_broadband", all_tr_data) dbDisconnect(con) all_tr_data
library(sf) con <- get_db_conn(db_pass = "rsu8zvrsu8zv") a <- st_read(con, query = "SELECT * FROM dc_digital_communications.va_bg_sdad_2021_perc_income_on_internet_update ") dbDisconnect(con) unique(a$measure) a
## CHANGE NAMES data <- read.csv("~/git/dc.chr.preventable.hospitalizations/data/va_hdct_chr_2015_2021_preventable_hospitalizations.csv.xz") %>% mutate(geoid = ifelse(region_type == "health district", paste0("51_hd_", geoid), geoid)) readr::write_csv(data, xzfile("~/git/dc.chr.preventable.hospitalizations/data/va_hdct_chr_2015_2021_preventable_hospitalizations.csv.xz", compression = 9))
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.