knitr::opts_chunk$set(echo = TRUE)
library(sf) library(tidygeocoder) library(dplyr) library(community) library(readr)
#start clean rm(list=ls())
setwd("~/VDH/Floating_Catchment_Areas/va/4. primcare_va_model/")
#provider <- read.csv("pediat.dmv.geo.csv", row.names = 1) #from pgadmin #a) conn con <- RPostgreSQL::dbConnect(drv = RPostgreSQL::PostgreSQL(), dbname = "sdad", host = "postgis1", port = 5432, user = Sys.getenv(x = "DB_USR"), password = Sys.getenv(x = "DB_PWD")) #b) query provider <- sf::st_read( con, query= " SELECT * FROM data_commons.virginia_primary_care_doctors_geolocated_blks" ) #separate geometry to obtain lat lon provider <- provider %>% dplyr::mutate(lon = sf::st_coordinates(.)[,1], lat = sf::st_coordinates(.)[,2]) #c) Disconnect RPostgreSQL::dbDisconnect(con) #write #write.csv(provider, "urgent_care_va.csv" )
#provider <- read.csv("urgent_care_va.csv", row.names = 1 )
## collapse by location provider$doctors <- 1 provider$location <- paste0(provider$lat, ",", provider$lon) #identify unique values provider <- provider %>% distinct(name, location, .keep_all = TRUE) counts <- tapply(provider$doctors, provider$location, sum) locations <- which(!duplicated(provider$location)) provider <- provider[locations,] provider$doctors <- counts[provider$location] ## assign IDs just to be explicit provider$ID <- paste0("l", seq_len(nrow(provider))) provider <- provider %>% filter(!is.na(lat)) provider <- st_drop_geometry(provider) #delete outliers: possibly wrong lat and long provider <- provider %>% filter(lon > -90) provider <- provider %>% filter(lon < -70)
library(tidycensus) library(tidyverse) census_api_key("eba406410c653b81d6a795ac4e989221f7bdf302") # Bring in census tract data. va <- get_acs(geography = "block group", year = 2019, variables = c(population = "B01003_001E", medinc = "B19013_001E" ), state = "VA", survey = "acs5", output = "wide", geometry = TRUE) # data combined #centroid and coordinates data_combined <- data.frame( GEOID = va$GEOID, population = va$population, medinc = va$medinc, st_coordinates(st_centroid(va$geometry)) ) data_combined <- data_combined %>% filter(!is.na(data_combined$X)) data_combined <- data_combined %>% filter(GEOID != 517000323001 ) data_combined <- data_combined %>% filter(GEOID != 517000323002 ) data_combined <- data_combined %>% filter(GEOID != 517000323003 )
library(osrm) options(osrm.server = Sys.getenv("OSRM_SERVER"), osrm.profile = "car") if(!file.exists("traveltimes_exercise.csv")){ traveltimes <- osrmTable( src = data_combined[, c("GEOID", "X", "Y")], #population-demand dst = provider[, c("ID", "lon", "lat")] #providers supply )$duration write.csv( cbind(GEOID = rownames(traveltimes), as.data.frame(traveltimes)), "traveltimes_exercise.csv", row.names = FALSE ) } traveltimes <- read.csv("traveltimes_exercise.csv", row.names = 1)
library(tigris) library(maps) library(sf) # add block geoids # get US blocks shapefile blocks_VA <- st_as_sf(block_groups(state="VA", year=2019)) #, year=2010 blocks <- blocks_VA # lon and lat to geo-points geopts <- provider %>% st_as_sf(coords = c("lon", "lat"), crs = 4269) #4326. initial: 4269 geopts <-st_transform(geopts, crs = 4326) blocks <-st_transform(blocks, crs = 4326) #prim.care.va <-st_transform(prim.care.va, crs = 4326) st_crs(st_as_sf(geopts)) st_crs(st_as_sf(blocks)) # indeces of bgs which contain a geopoint inds <- st_within(geopts$geometry, blocks$geometry, sparse=T) blk_list <- c() for (i in inds){ if (identical(blocks$NAME[i],character(0))){ blk_list<- append(blk_list, NA)} else{ blk_list <- append(blk_list, blocks$GEOID[i])} } provider['GEOID'] <- blk_list
#providers w geoid provider$GEOID <- substr(provider$geoid_blk, 1, 12 ) #data_combined with geoid: ok num_providers <- provider %>% group_by(GEOID) %>% summarise(prov_cnt = sum(doctors) ) #join providers to block groups data_combined$GEOID <- as.character( data_combined$GEOID) data_combined <- data_combined %>% left_join(num_providers, by= "GEOID" )
#mean of 10 nearest top_mean <- function(x) { mean(head(sort(x ), 10) ) } #median of 10 nearest top_median <- function(x) { median(head(sort(x ), 10) ) } #apply rowwise traveltimes_near <- data.frame(near_10_mean=apply(traveltimes, 1, top_mean), near_10_median=apply(traveltimes, 1, top_median)) #rownames_to_column(traveltimes_near, var = "GEOID") traveltimes_near$GEOID <- row.names(traveltimes_near) #join mean median traveltimes to geographies data_combined <- data_combined %>% left_join(traveltimes_near, by= "GEOID")
#raw traveltimes: traveltimes matrix already estimated and with colnames arranged traveltimes <- read.csv("traveltimes_exercise.csv", row.names = 1) #population: always recheck relevant population: ie. for pediatrics: pop 0-17 years population <- data_combined %>% select(GEOID, population, prov_cnt, near_10_mean, near_10_median) # realign travel times traveltimes <- traveltimes[as.character(population$GEOID), provider$ID]
write.csv(provider[, c("ID", "address", "lat", "lon", "doctors")], "provider.csv", row.names = FALSE) write.csv(cbind(GEOID = rownames(traveltimes), traveltimes), "traveltimes_trimmed.csv", row.names = FALSE) write.csv(population, "population.csv", row.names = FALSE)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.