knitr::opts_chunk$set(echo = TRUE)

libraries

library(catchment)
library(sf)
library(leaflet)
library(dplyr)

start new

 rm(list=ls())

working directory

setwd("~/VDH/Floating_Catchment_Areas/va/4. primcare_va_model/")

read data

population <- read.csv("population.csv")
provider <- read.csv("provider.csv")
rownames(provider) <- provider$ID
traveltimes <- read.csv("traveltimes_trimmed.csv", row.names = 1)
#traveltimes <- traveltimes %>% filter(!is.na(as.data.frame(traveltimes) ))

population$prov_cnt[is.na(population$prov_cnt)] <- 0

prepare map

#blockgroup_shapes <- st_transform(read_sf("blockgroups.geojson", as_tibble = FALSE), 4326)
blockgroup_shapes <-   st_transform( tigris::block_groups(state = c("VA"), class = "sf", year=2019), 4326)

rownames(blockgroup_shapes) <- blockgroup_shapes$GEOID
blockgroup_shapes <- blockgroup_shapes[as.character(population$GEOID),]
##new
shape_counties <- tigris::counties(state = c("VA"), class = "sf")

#format for counties
shape_counties <- st_transform(shape_counties, 4326 )

#create map
map <- leaflet(blockgroup_shapes, options = leafletOptions(attributionControl = FALSE)) %>%
  addTiles() %>%
  addScaleBar("bottomleft") %>%
  addMapPane("lines", zIndex = 410) %>%
  addMapPane("points", zIndex = 411) %>%
  addPolygons(
    fillColor = colorNumeric("RdYlBu", population$population)(population$population),
    fillOpacity = 1, stroke = FALSE, group = "Population", label = population$pediatric_pop
  ) %>%
  hideGroup("Population") %>%
  addLayersControl(
    position = "topleft", overlayGroups = c("Pediatrics", "Population", "Access")
  ) %>%
  addCircles(
    data = provider, color = "blue", lng = ~lon, lat = ~lat, weight = 3,
    label = ~ paste0("ID: ", ID), group = "Urgent", options = pathOptions(pane = "points")
  )

calculate and display catchment areas

2-step

population$providers_2sfca <- catchment_ratio(
  population, provider, traveltimes, 30,
  consumers_value = "population", providers_id = "ID", providers_value = "doctors", verbose = TRUE
) * 1000

pal <- colorBin("RdYlBu", population$providers_2sfca)

plot_2sfca <- map %>%
  addControl("Urgent Care Facilities Per 1,000 People (2-Step Floating Catchment Area)", "topright") %>%
  addLegend("bottomright", pal, population$providers_2sfca, opacity = .7) %>%
  addPolygons(
    fillColor = pal(population$providers_2sfca), fillOpacity = .7, weight = 1, color = "#000",
    highlightOptions = highlightOptions(color = "#fff"), group = "Access",
    label = paste0(
      "GEOID: ", population$GEOID, ", Population Ages 0-17: ", population$population,
      ", Per 1k People: ", round(population$providers_2sfca, 4), ", In Region: ",
      round(population$population * population$providers_2sfca / 1000, 4)
    )
  ) %>%
  addPolylines(data = shape_counties, color = "black", opacity = 1, weight = 2.5) 

## enhanced 2-step

weight <- list(c(60, .042), c(30, .377), c(20, .704), c(10, .962))
#weight <- list(c(100, .042), c(60, .377), c(40, .704), c(20, .962))

population$providers_e2sfca <- catchment_ratio(
  population, provider, traveltimes, weight,
  consumers_value = "population", providers_id = "ID", providers_value = "doctors", verbose = TRUE
) * 1000

pal <- colorBin("RdYlBu", population$providers_e2sfca)

plot_e2sfca <- map %>%
  addControl("Urgent Care Facilities Per 1,000 People (enhanced 2-Step Floating Catchment Area)", "topright") %>%
  addLegend("bottomright", pal, population$providers_e2sfca, opacity = .7) %>%
  addPolygons(
    fillColor = pal(population$providers_e2sfca), fillOpacity = .7, weight = 1, color = "#000",
    highlightOptions = highlightOptions(color = "#fff"), group = "Access",
    label = paste0(
      "GEOID: ", population$GEOID, ", Population Ages 0-17: ", population$population,
      ", Per 1k People: ", round(population$providers_e2sfca, 4), ", In Region: ",
      round(population$population * population$providers_e2sfca / 1000, 4)
    )
  ) %>% addPolylines(data = shape_counties, color = "black", opacity = 1, weight = 2.5) 


## 3-step

population$providers_3sfca <- catchment_ratio(
  population, provider, traveltimes, "gaussian", normalize_weight = TRUE, scale = 20,
  consumers_value = "population", providers_id = "ID", providers_value = "doctors", verbose = TRUE
) * 1000

pal <- colorBin("RdYlBu", population$providers_3sfca)

plot_3sfca <- map %>%
  addControl("Urgent Care Facilities Per 1,000 People (3-Step Floating Catchment Area)", "topright") %>%
  addLegend("bottomright", pal, population$providers_3sfca, opacity = .7) %>%
  addPolygons(
    fillColor = pal(population$providers_3sfca), fillOpacity = .7, weight = 1, color = "#000",
    highlightOptions = highlightOptions(color = "#fff"), group = "Access",
    label = paste0(
      "GEOID: ", population$GEOID, ", Population Ages 0-17: ", population$population,
      ", Per 1k People: ", round(population$providers_3sfca, 4), ", In Region: ",
      round(population$population * population$providers_3sfca / 1000, 4)
    )
  ) %>% addPolylines(data = shape_counties, color = "black", opacity = 1, weight = 2.5) 

plot summary

plot_2sfca
plot_e2sfca
plot_3sfca

reshape wide-long

######

#DEFINE relevant population and names
dmv_bg_2021_acccess_scores_wide <- population   %>% dplyr::select( geoid="GEOID", 
                                                            primcare_pop_cnt ="population",
                                                            primcare_cnt = "prov_cnt",
                                                            primcare_near_10_mean = "near_10_mean",
                                                            primcare_near_10_median = "near_10_median",
                                                            primcare_2sfca = "providers_2sfca" ,
                                                            primcare_e2sfca = "providers_e2sfca", 
                                                            primcare_3sfca = "providers_3sfca" )

long format block group

dmv_bg_2021_acccess_scores_bg <- tidyr::gather(dmv_bg_2021_acccess_scores_wide, measure, value, 
                                            primcare_pop_cnt,
                                            primcare_cnt,
                                            primcare_near_10_mean,
                                            primcare_near_10_median,
                                            primcare_2sfca, 
                                            primcare_e2sfca, 
                                            primcare_3sfca)
#define year
dmv_bg_2021_acccess_scores_bg$year <- 2021
#define measure type
dmv_bg_2021_acccess_scores_bg$measure_type <- as.factor(ifelse(dmv_bg_2021_acccess_scores_bg$measure=="primcare_2sfca" |
                                                            dmv_bg_2021_acccess_scores_bg$measure=="primcare_e2sfca" |
                                                              dmv_bg_2021_acccess_scores_bg$measure=="primcare_3sfca", 'index','count') )

include name: geographic name for the smallest geographic unit: block group in this case. Run

#a) connect
con <- RPostgreSQL::dbConnect(drv = RPostgreSQL::PostgreSQL(),
                              dbname = "sdad",
                              host = "postgis1",
                              port = 5432,
                              user = Sys.getenv(x = "DB_USR"),
                              password = Sys.getenv(x = "DB_PWD"))

#b) query
name_geo <- sf::st_read(con, 
  query= "
SELECT  *
FROM dc_geographies.ncr_cttrbg_tiger_2010_2020_geo_names "
)

#join region_name + region_type
dmv_bg_2021_acccess_scores_bg$geoid <- as.character(dmv_bg_2021_acccess_scores_bg$geoid)
dmv_bg_2021_acccess_scores_bg <- dmv_bg_2021_acccess_scores_bg %>% left_join(name_geo, by="geoid")
#order
dmv_bg_2021_acccess_scores_bg <- dmv_bg_2021_acccess_scores_bg %>% dplyr::select("geoid","region_type","region_name", "measure", "value", "year", "measure_type")
######

AGGREGATION tract. CHANGE names provider, then run

#generate id tract 
#the relevant file is WIDE
dmv_bg_2021_acccess_scores_wide_aggreg <- dmv_bg_2021_acccess_scores_wide  %>% mutate(tract_geoid = substr(geoid,1,11))
#add region name - tract
# dmv_bg_2021_acccess_scores_wide_aggreg <- dmv_bg_2021_acccess_scores_wide_aggreg %>% left_join(name_geo, by= c("tract_geoid"= "geoid") )

## measures tract
dmv_bg_2021_acccess_scores_tract_wide <- dmv_bg_2021_acccess_scores_wide_aggreg %>% group_by(tract_geoid) %>% dplyr::summarise(
  primcare_pop = sum(primcare_pop_cnt, na.rm = TRUE), 
  primcare_cnt = sum(primcare_cnt, na.rm = TRUE), 
  primcare_near_10_mean = weighted.mean(primcare_near_10_mean, primcare_pop_cnt, na.rm = TRUE), 
  primcare_near_10_median = weighted.mean(primcare_near_10_median, primcare_pop_cnt, na.rm = TRUE), 
  primcare_2sfca = weighted.mean(primcare_2sfca, primcare_pop_cnt, na.rm = TRUE), 
  primcare_e2sfca = weighted.mean(primcare_e2sfca, primcare_pop_cnt, na.rm = TRUE),
  primcare_3sfca = weighted.mean(primcare_3sfca, primcare_pop_cnt, na.rm = TRUE),
  )

long format tract,

dmv_bg_2021_acccess_scores_tract <- tidyr::gather(dmv_bg_2021_acccess_scores_tract_wide, measure, value, 
                                            primcare_pop,
                                            primcare_cnt,
                                            primcare_near_10_mean,
                                            primcare_near_10_median,
                                            primcare_2sfca, 
                                            primcare_e2sfca, 
                                            primcare_3sfca)
#define year
dmv_bg_2021_acccess_scores_tract$year <- 2021
#define measure type
dmv_bg_2021_acccess_scores_tract$measure_type <- as.factor(ifelse(dmv_bg_2021_acccess_scores_tract$measure=="primcare_2sfca" |
                                                            dmv_bg_2021_acccess_scores_tract$measure=="primcare_e2sfca" |
                                                              dmv_bg_2021_acccess_scores_tract$measure=="primcare_3sfca", 'index','count') )

include name: geographic name for geographic unit: tract in this case

#join region_name + region_type
dmv_bg_2021_acccess_scores_tract$tract_geoid <- as.character(dmv_bg_2021_acccess_scores_tract$tract_geoid)
dmv_bg_2021_acccess_scores_tract <- dmv_bg_2021_acccess_scores_tract %>% left_join(name_geo, by= c("tract_geoid" = "geoid") )
#order
dmv_bg_2021_acccess_scores_tract <- dmv_bg_2021_acccess_scores_tract %>% dplyr::select(geoid="tract_geoid","region_type","region_name", "measure", "value", "year", "measure_type")

AGGREGATION county

#generate id tract 
#the relevant file is WIDE
dmv_bg_2021_acccess_scores_wide_aggreg <- dmv_bg_2021_acccess_scores_wide  %>% mutate(county_geoid = substr(geoid,1,5))
#add region name - tract
# dmv_bg_2021_acccess_scores_wide_aggreg <- dmv_bg_2021_acccess_scores_wide_aggreg %>% left_join(name_geo, by= c("tract_geoid"= "geoid") )

## measures tract
dmv_bg_2021_acccess_scores_county_wide <- dmv_bg_2021_acccess_scores_wide_aggreg %>% group_by(county_geoid) %>% summarise(
  primcare_pop = sum(primcare_pop_cnt, na.rm = TRUE), 
  primcare_cnt = sum(primcare_cnt, na.rm = TRUE), 
  primcare_near_10_mean = weighted.mean(primcare_near_10_mean, primcare_pop_cnt, na.rm = TRUE), 
  primcare_near_10_median = weighted.mean(primcare_near_10_median, primcare_pop_cnt, na.rm = TRUE), 
  primcare_2sfca = weighted.mean(primcare_2sfca, primcare_pop_cnt, na.rm = TRUE), 
  primcare_e2sfca = weighted.mean(primcare_e2sfca, primcare_pop_cnt, na.rm = TRUE),
  primcare_3sfca = weighted.mean(primcare_3sfca, primcare_pop_cnt, na.rm = TRUE),
  )

long format tract

dmv_bg_2021_acccess_scores_county <- tidyr::gather(dmv_bg_2021_acccess_scores_county_wide, measure, value, 
                                            primcare_pop,
                                            primcare_cnt,
                                            primcare_near_10_mean,
                                            primcare_near_10_median,
                                            primcare_2sfca, 
                                            primcare_e2sfca, 
                                            primcare_3sfca)
#define year
dmv_bg_2021_acccess_scores_county$year <- 2021
#define measure type
dmv_bg_2021_acccess_scores_county$measure_type <- as.factor(ifelse(dmv_bg_2021_acccess_scores_county$measure=="primcare_2sfca" |
                                                            dmv_bg_2021_acccess_scores_county$measure=="primcare_e2sfca" |
                                                              dmv_bg_2021_acccess_scores_county$measure=="primcare_3sfca", 'index','count') )

include name: geographic name for geographic unit: county in this case

#join region_name + region_type
dmv_bg_2021_acccess_scores_county$tract_geoid <- as.character(dmv_bg_2021_acccess_scores_county$county_geoid)
dmv_bg_2021_acccess_scores_county <- dmv_bg_2021_acccess_scores_county %>% left_join(name_geo, by= c("county_geoid" = "geoid") )
#order
dmv_bg_2021_acccess_scores_county <- dmv_bg_2021_acccess_scores_county %>% dplyr::select(geoid="county_geoid","region_type","region_name", "measure", "value", "year", "measure_type")

POTENTIALLY INCLUDE HERE FOR HD

6. HEALTH DISTRICT METRICS

health_district <- read.csv("~/spatial_access_health/Locality-to-HD-to-HPR.csv")
names(health_district)[2] <- "county_geoid"
health_district$county_geoid <- as.character(health_district$county_geoid )

AGGREGATION Health district. CHANGE names provider, then run

#join name of HD
dmv_bg_2021_acccess_scores_wide_aggreg <- dmv_bg_2021_acccess_scores_wide_aggreg %>% left_join(health_district, by="county_geoid")

#generate id tract 
#the relevant file is WIDE
#dmv_bg_2021_acccess_scores_wide_aggreg <- dmv_bg_2021_acccess_scores_wide  %>% mutate(tract_geoid = substr(geoid,1,11))
#add region name - tract
# dmv_bg_2021_acccess_scores_wide_aggreg <- dmv_bg_2021_acccess_scores_wide_aggreg %>% left_join(name_geo, by= c("tract_geoid"= "geoid") )

## measures tract
dmv_bg_2021_acccess_scores_hd_wide <- dmv_bg_2021_acccess_scores_wide_aggreg %>% group_by(HealthDistrict) %>% dplyr::summarise(
  primcare_pop = sum(primcare_pop_cnt, na.rm = TRUE), 
  primcare_cnt = sum(primcare_cnt, na.rm = TRUE), 
  primcare_near_10_mean = weighted.mean(primcare_near_10_mean, primcare_pop_cnt, na.rm = TRUE), 
  primcare_near_10_median = weighted.mean(primcare_near_10_median, primcare_pop_cnt, na.rm = TRUE), 
  primcare_2sfca = weighted.mean(primcare_2sfca, primcare_pop_cnt, na.rm = TRUE), 
  primcare_e2sfca = weighted.mean(primcare_e2sfca, primcare_pop_cnt, na.rm = TRUE),
  primcare_3sfca = weighted.mean(primcare_3sfca, primcare_pop_cnt, na.rm = TRUE)
  )

long format hd

dmv_bg_2021_acccess_scores_hd <- tidyr::gather(dmv_bg_2021_acccess_scores_hd_wide, measure, value, 
                                            primcare_pop,
                                            primcare_cnt,
                                            primcare_near_10_mean,
                                            primcare_near_10_median,
                                            primcare_2sfca, 
                                            primcare_e2sfca, 
                                            primcare_3sfca)
#define year
dmv_bg_2021_acccess_scores_hd$year <- 2021
#define measure type
dmv_bg_2021_acccess_scores_hd$measure_type <- as.factor(ifelse(dmv_bg_2021_acccess_scores_hd$measure=="primcare_2sfca" |
                                                            dmv_bg_2021_acccess_scores_hd$measure=="primcare_e2sfca" |
                                                              dmv_bg_2021_acccess_scores_hd$measure=="primcare_3sfca", 'index','count') )

include name: geographic name for geographic unit: HD in this case

con <- RPostgreSQL::dbConnect(drv = RPostgreSQL::PostgreSQL(),
                              dbname = "sdad",
                              host = "postgis1",
                              port = 5432,
                              user = Sys.getenv(x = "DB_USR"),
                              password = Sys.getenv(x = "DB_PWD"))
#b)query
hd_names  <- sf::st_read(
  con, 
  query= "
    SELECT *
 FROM dc_geographies.va_hd_vdh_2021_health_district_geo_names"
)

#create region_type and region_name: join 

dmv_bg_2021_acccess_scores_hd <- dmv_bg_2021_acccess_scores_hd %>% left_join(hd_names, by = c("HealthDistrict"="region_name") )
#order
dmv_bg_2021_acccess_scores_hd <- dmv_bg_2021_acccess_scores_hd %>% dplyr::select("geoid","region_type", region_name = "HealthDistrict", "measure", "value", "year", "measure_type")

CHANGE OF GEOGRAPHY FOR VA (instead of dmv)

stack all the measures for BLOCK GROUP + TRACT + COUNTY: include NAME

va_bg_2021_access_scores_primcare <- rbind(dmv_bg_2021_acccess_scores_bg, 
                                    dmv_bg_2021_acccess_scores_tract,
                                    dmv_bg_2021_acccess_scores_county, 
                                    dmv_bg_2021_acccess_scores_hd)

change one name

va_bg_2021_access_scores_primcare$measure <- ifelse(va_bg_2021_access_scores_primcare$measure=='primcare_pop', 'primcare_pop_cnt' , va_bg_2021_access_scores_primcare$measure)

SAVE with specific name

write.csv(va_bg_2021_access_scores_primcare, "va_hdcttrbg_webmd_2021_access_scores_primcare", row.names = FALSE )

#a) connect
con <- RPostgreSQL::dbConnect(drv = RPostgreSQL::PostgreSQL(),
                               dbname = "sdad",
                               host = "postgis1",
                               port = 5432,
                               user = Sys.getenv(x = "DB_USR"),
                               password = Sys.getenv(x = "DB_PWD"))

#b) save the file in scheme dc_working in pgadmin: CHANGE name
sf::st_write(
  va_bg_2021_access_scores_primcare, con,
  c("dc_health_behavior_diet", "va_hdcttrbg_webmd_2021_access_scores_primcare"),
  table_owner = "data_commons",
  row.names = FALSE)

#d) Disconnect
RPostgreSQL::dbDisconnect(con)


uva-bi-sdad/dc.webmd.primcare documentation built on June 18, 2022, 7 a.m.