knitr::opts_chunk$set(echo = TRUE)
library(sf) library(tidygeocoder) library(dplyr) library(community) library(catchment)
#start clean rm(list=ls())
setwd("~/VDH/Floating_Catchment_Areas/dmv/6. ems_stations_dmv_model/")
#provider <- read.csv("pediat.dmv.geo.csv", row.names = 1) #from pgadmin #a) conn con <- RPostgreSQL::dbConnect(drv = RPostgreSQL::PostgreSQL(), dbname = "sdad", host = "postgis1", port = 5432, user = Sys.getenv(x = "DB_USR"), password = Sys.getenv(x = "DB_PWD")) #b) query provider <- sf::st_read( con, query= " SELECT * FROM dc_health_behavior_diet.ncr_pl_hifld_2021_ems_stations " ) provider <- provider %>% select(unique_ID, facility_name, address, city, state, county, fips, lat, lon) #c) Disconnect RPostgreSQL::dbDisconnect(con) #write write.csv(provider, "ems_stations.csv" )
provider <- read.csv("ems_stations.csv", row.names = 1)
## collapse by location provider$doctors <- 1 provider$location <- paste0(provider$lat, ",", provider$lat) counts <- tapply(provider$doctors, provider$location, sum) locations <- which(!duplicated(provider$location)) provider <- provider[locations,] provider$doctors <- counts[provider$location] ## assign IDs just to be explicit provider$ID <- paste0("l", seq_len(nrow(provider)))
## data combined = county_names <- list( dc = "District of Columbia", md = c("Charles", "Frederick", "Montgomery", "Prince George's"), va = c("Alexandria", "Arlington", "Fairfax", "Falls Church", "Loudoun", "Manassas", "Manassas Park", "Prince William") ) data <- list() shapes <- list() for(state in c("dc", "md", "va")){ # get county shapes to identify block groups counties <- download_census_shapes(".", state, "county", paste0(state, "_counties")) blockgroups <- download_census_shapes(".", state, "bg", paste0(state, "_blockgroups")) # save only selected counties counties <- counties[counties$NAME %in% county_names[[state]],] counties[counties$NAME == "Fairfax", "NAME"] <- c("Fairfax City", "Fairfax") shapes[[state]] <- list( counties = counties, blockgroups = blockgroups[substr(blockgroups$GEOID, 1, 5) %in% counties$GEOID,] ) # population data data[[state]] <- download_census_population(".", state, 2019, counties = counties$GEOID) } data_combined <- do.call(rbind, lapply(names(data), function(state){ d <- data[[state]]$estimates s <- shapes[[state]]$blockgroups rownames(s) <- s$GEOID total <- d$TOTAL.POPULATION_Total total[total == 0] <- 1 data.frame( GEOID = d$GEOID, population = d$TOTAL.POPULATION_Total, percent_female = d$SEX.BY.AGE_Female_Female / total * 100, percent_white = d$RACE_Total_White.alone / total * 100, percent_over_49 = rowSums(d[, grep("[5-8][05]", colnames(d))]) / total * 100, female_over_14 = rowSums(d[, grep("Female_(?:[2-9]\\d|1[58])", colnames(d))]), pediatric_pop = rowSums(d[, c(4:7,28:31) ]), st_coordinates(st_centroid(st_geometry(s[as.character(d$GEOID),]))) ) })) data_combined <- data_combined[!is.na(data_combined$Y),]
library(osrm) options(osrm.server = Sys.getenv("OSRM_SERVER"), osrm.profile = "car") if(!file.exists("traveltimes_exercise.csv")){ traveltimes <- osrmTable( src = data_combined[, c("GEOID", "X", "Y")], #population-demand dst = provider[, c("ID", "lon", "lat")] #providers supply )$duration write.csv( cbind(GEOID = rownames(traveltimes), as.data.frame(traveltimes)), "traveltimes_exercise.csv", row.names = FALSE ) }
library(tigris) library(maps) # add block geoids # get US blocks shapefile blocks_VA <- st_as_sf(blocks(state="VA")) blocks_MD <- st_as_sf(blocks(state="MD")) blocks_DC <- st_as_sf(blocks(state="DC")) blocks <- rbind(blocks_VA, blocks_MD) blocks <- rbind(blocks, blocks_DC) # lon and lat to geo-points geopts <- provider %>% st_as_sf(coords = c("lon", "lat"), crs = 4269) # indeces of bgs which contain a geopoint inds <- st_within(geopts$geometry, blocks$geometry, sparse=T) blk_list <- c() for (i in inds){ if (identical(blocks$NAME[i],character(0))){ blk_list<- append(blk_list, NA)} else{ blk_list <- append(blk_list, blocks$GEOID[i])} } provider['geoid_blk_new'] <- blk_list # This assumes that you have longitude and latitude of each location and if you want bg or tract or county just replace 'blocks_VA <- st_as_sf(blocks(state="VA"))' with a corresponding value, e.g. 'tracts_VA <- st_as_sf(tracts(state="VA"))'. I believe it uses either tigris or maps package
#obtain geoid provider$GEOID <- substr(provider$geoid_blk_new, 1,12) data_combined$GEOID <- as.character(data_combined$GEOID) #summary by geoid: counts provider_counts <- provider %>% group_by(GEOID) %>% summarise( hosp_cnt= sum(doctors)) #join hosp_cnt to geographies data_combined <- data_combined %>% left_join(provider_counts, by= c("GEOID") )
#mean of 10 nearest top_mean <- function(x) { mean(head(sort(x ), 10) ) } #median of 10 nearest top_median <- function(x) { median(head(sort(x ), 10) ) } #apply rowwise traveltimes_near <- data.frame(near_10_mean=apply(traveltimes, 1, top_mean), near_10_median=apply(traveltimes, 1, top_median)) #rownames_to_column(traveltimes_near, var = "GEOID") traveltimes_near$GEOID <- row.names(traveltimes_near) #join mean median traveltimes to geographies data_combined <- data_combined %>% left_join(traveltimes_near, by= "GEOID")
#raw traveltimes: traveltimes matrix already estimated and with colnames arranged traveltimes <- read.csv("traveltimes_exercise.csv", row.names = 1) #population: always recheck relevant population: ie. for pediatrics: pop 0-17 years population <- data_combined %>% select(GEOID, population,hosp_cnt, near_10_mean.y, near_10_median.y) # realign travel times traveltimes <- traveltimes[as.character(population$GEOID), provider$ID]
write.csv(provider[, c("ID", "address", "lat", "lon", "doctors")], "provider.csv", row.names = FALSE) write.csv(cbind(GEOID = rownames(traveltimes), traveltimes), "traveltimes_trimmed.csv", row.names = FALSE) write.csv(population, "population.csv", row.names = FALSE)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.