In uva-bi-sdad/dc.hifld.ems: Social Data Commons: Hospitals Access Scores

knitr::opts_chunk$set(echo = TRUE)

libraries

library(sf)
library(tidygeocoder)
library(dplyr)
library(community)
library(catchment)

clean df

#start clean
rm(list=ls())

working directory

setwd("~/VDH/Floating_Catchment_Areas/dmv/6. ems_stations_dmv_model/")

load data provider. Do not run

#provider <- read.csv("pediat.dmv.geo.csv", row.names = 1)
#from pgadmin
#a) conn
con <- RPostgreSQL::dbConnect(drv = RPostgreSQL::PostgreSQL(),
                               dbname = "sdad",
                               host = "postgis1",
                               port = 5432,
                               user = Sys.getenv(x = "DB_USR"),
                               password = Sys.getenv(x = "DB_PWD"))

#b) query
provider <- sf::st_read(
  con, 
  query= "
SELECT  *
FROM dc_health_behavior_diet.ncr_pl_hifld_2021_ems_stations  "
)

provider <- provider %>% select(unique_ID, facility_name, address, city, state, county, fips, lat, lon)

#c) Disconnect
RPostgreSQL::dbDisconnect(con)

#write
write.csv(provider, "ems_stations.csv" )

load data provider

provider <- read.csv("ems_stations.csv", row.names = 1)

fix data

## collapse by location
provider$doctors <- 1
provider$location <- paste0(provider$lat, ",", provider$lat)
counts <- tapply(provider$doctors, provider$location, sum)
locations <- which(!duplicated(provider$location))
provider <- provider[locations,]
provider$doctors <- counts[provider$location]

## assign IDs just to be explicit
provider$ID <- paste0("l", seq_len(nrow(provider)))

data combined

## data combined =
county_names <- list(
  dc = "District of Columbia",
  md = c("Charles", "Frederick", "Montgomery", "Prince George's"),
  va = c("Alexandria", "Arlington", "Fairfax", "Falls Church", "Loudoun", "Manassas", "Manassas Park", "Prince William")
)
data <- list()
shapes <- list()


for(state in c("dc", "md", "va")){
  # get county shapes to identify block groups
  counties <- download_census_shapes(".", state, "county", paste0(state, "_counties"))
  blockgroups <- download_census_shapes(".", state, "bg", paste0(state, "_blockgroups"))

  # save only selected counties
  counties <- counties[counties$NAME %in% county_names[[state]],]
  counties[counties$NAME == "Fairfax", "NAME"] <- c("Fairfax City", "Fairfax")
  shapes[[state]] <- list(
    counties = counties,
    blockgroups = blockgroups[substr(blockgroups$GEOID, 1, 5) %in% counties$GEOID,]
  )

  # population data
  data[[state]] <- download_census_population(".", state, 2019, counties = counties$GEOID)
}

data_combined <- do.call(rbind, lapply(names(data), function(state){
  d <- data[[state]]$estimates
  s <- shapes[[state]]$blockgroups
  rownames(s) <- s$GEOID
  total <- d$TOTAL.POPULATION_Total
  total[total == 0] <- 1
  data.frame(
    GEOID = d$GEOID,
    population = d$TOTAL.POPULATION_Total,
    percent_female = d$SEX.BY.AGE_Female_Female / total * 100,
    percent_white = d$RACE_Total_White.alone / total * 100,
    percent_over_49 = rowSums(d[, grep("[5-8][05]", colnames(d))]) / total * 100,
    female_over_14 = rowSums(d[, grep("Female_(?:[2-9]\\d|1[58])", colnames(d))]),
    pediatric_pop = rowSums(d[, c(4:7,28:31) ]),

    st_coordinates(st_centroid(st_geometry(s[as.character(d$GEOID),])))
  )
}))
data_combined <- data_combined[!is.na(data_combined$Y),]

travel time

library(osrm)
options(osrm.server = Sys.getenv("OSRM_SERVER"), osrm.profile = "car")
if(!file.exists("traveltimes_exercise.csv")){
  traveltimes <- osrmTable(
    src = data_combined[, c("GEOID", "X", "Y")],  #population-demand
    dst = provider[, c("ID", "lon", "lat")]     #providers supply
  )$duration
  write.csv(
    cbind(GEOID = rownames(traveltimes), as.data.frame(traveltimes)),
    "traveltimes_exercise.csv", row.names = FALSE
  )
}

add1. Define geography id. This is because the Geography-GEOID from initial file may be outdated

library(tigris)
library(maps)
# add block geoids
# get US blocks shapefile
blocks_VA <- st_as_sf(blocks(state="VA"))
blocks_MD <- st_as_sf(blocks(state="MD"))
blocks_DC <- st_as_sf(blocks(state="DC"))
blocks <- rbind(blocks_VA, blocks_MD)
blocks <- rbind(blocks, blocks_DC)

# lon and lat to geo-points
geopts <- provider %>%
  st_as_sf(coords = c("lon", "lat"), crs = 4269)
# indeces of bgs which contain a geopoint
inds <- st_within(geopts$geometry, blocks$geometry, sparse=T)
blk_list <- c()
for (i in inds){
  if (identical(blocks$NAME[i],character(0))){
    blk_list<- append(blk_list, NA)}
  else{
    blk_list <- append(blk_list, blocks$GEOID[i])}
}
provider['geoid_blk_new'] <- blk_list


# This assumes that you have longitude and latitude of each location and if you want bg or tract or county just replace 'blocks_VA <- st_as_sf(blocks(state="VA"))' with a corresponding value, e.g. 'tracts_VA <- st_as_sf(tracts(state="VA"))'. I believe it uses either tigris or maps package

add2 count providers per geography using matching codes

#obtain geoid
provider$GEOID <- substr(provider$geoid_blk_new, 1,12)
data_combined$GEOID <- as.character(data_combined$GEOID)
#summary by geoid: counts
provider_counts <- provider %>% group_by(GEOID) %>% summarise( hosp_cnt= sum(doctors))

#join hosp_cnt to geographies
data_combined <- data_combined %>% left_join(provider_counts, by= c("GEOID") )

add3 mean and median of 10 nearest drive times

#mean of 10 nearest
top_mean <- function(x) {  
   mean(head(sort(x ), 10) ) }
#median of 10 nearest
top_median <- function(x) {  
   median(head(sort(x ), 10) ) }
#apply rowwise
traveltimes_near <- data.frame(near_10_mean=apply(traveltimes, 1, top_mean), 
                               near_10_median=apply(traveltimes, 1, top_median)) 
#rownames_to_column(traveltimes_near, var = "GEOID")
traveltimes_near$GEOID <- row.names(traveltimes_near) 
#join mean median traveltimes to geographies
data_combined <- data_combined %>% left_join(traveltimes_near, by= "GEOID")

prepara data for save

#raw traveltimes: traveltimes matrix already estimated and with colnames arranged 
traveltimes <- read.csv("traveltimes_exercise.csv", row.names = 1)
#population: always recheck relevant population: ie. for pediatrics: pop 0-17 years
population <- data_combined %>% select(GEOID, population,hosp_cnt, near_10_mean.y, near_10_median.y)
# realign travel times
traveltimes <- traveltimes[as.character(population$GEOID), provider$ID]

save new data

write.csv(provider[, c("ID", "address", "lat", "lon", "doctors")], "provider.csv", row.names = FALSE)
write.csv(cbind(GEOID = rownames(traveltimes), traveltimes), "traveltimes_trimmed.csv", row.names = FALSE)
write.csv(population, "population.csv", row.names = FALSE)

next: catchment areas - ems

uva-bi-sdad/dc.hifld.ems documentation built on June 8, 2022, 9:11 a.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

uva-bi-sdad/dc.hifld.ems
Social Data Commons: Hospitals Access Scores

In uva-bi-sdad/dc.hifld.ems: Social Data Commons: Hospitals Access Scores

libraries

clean df

working directory

load data provider. Do not run

load data provider

fix data

data combined

travel time

add1. Define geography id. This is because the Geography-GEOID from initial file may be outdated

add2 count providers per geography using matching codes

add3 mean and median of 10 nearest drive times

prepara data for save

save new data

next: catchment areas - ems

R Package Documentation

Browse R Packages

We want your feedback!

uva-bi-sdad/dc.hifld.ems Social Data Commons: Hospitals Access Scores

In uva-bi-sdad/dc.hifld.ems: Social Data Commons: Hospitals Access Scores

libraries

clean df

working directory

load data provider. Do not run

load data provider

fix data

data combined

travel time

add1. Define geography id. This is because the Geography-GEOID from initial file may be outdated

add2 count providers per geography using matching codes

add3 mean and median of 10 nearest drive times

prepara data for save

save new data

next: catchment areas - ems

R Package Documentation

Browse R Packages

We want your feedback!

uva-bi-sdad/dc.hifld.ems
Social Data Commons: Hospitals Access Scores