knitr::opts_chunk$set(echo = TRUE)
library(sf) library(tidygeocoder) library(dplyr) library(community) library(catchment)
#start clean rm(list=ls())
setwd("~/VDH/Floating_Catchment_Areas/dmv/4. primarycare_dmv_model/")
provider <- read.csv("primcare.dmv.geo.csv", row.names = 1)
## collapse by location provider$doctors <- 1 provider$location <- paste0(provider$lat, ",", provider$lat) counts <- tapply(provider$doctors, provider$location, sum) locations <- which(!duplicated(provider$location)) provider <- provider[locations,] provider$doctors <- counts[provider$location] ## assign IDs just to be explicit provider$ID <- paste0("l", seq_len(nrow(provider))) # exclude any outside of the focal region ## get zipcodes within focal counties dmv_counties <- list( dc = "District of Columbia", md = c("Charles", "Frederick", "Montgomery", "Prince George's"), va = c( "Alexandria", "Arlington", "Fairfax", "Falls Church", "Loudoun", "Manassas", "Manassas Park", "Prince William" ) ) county_shapes <- read_sf("counties.geojson", as_tibble = FALSE) geography_ref <- read.csv( "https://www2.census.gov/geo/docs/maps-data/data/rel/zcta_county_rel_10.txt" ) zips <- unique(unlist(lapply(names(dmv_counties), function(state){ GEOIDs <- county_shapes[county_shapes$NAME %in% dmv_counties[[state]], "GEOID", drop = TRUE] formatC(geography_ref[geography_ref$GEOID %in% GEOIDs, "ZCTA5"], width = 5, flag = 0) }), use.names = FALSE)) ### identify zipcodes that cross county boundaries zip_cross <- substr(unique(do.call(paste0, geography_ref[geography_ref$ZCTA5 %in% zips, c("ZCTA5", "GEOID")] )), 1, 5) zip_cross <- zip_cross[duplicated(zip_cross)] ## identify locations in those boundary zip codes potential_ex <- provider[ !grepl(paste0("(?:", paste(zips, collapse = "|"), ")$"), provider$address) | (grepl(paste0("(?:", paste(zip_cross, collapse = "|"), ")$"), provider$address) & !grepl( paste0("(?:", paste(unlist(dmv_counties), collapse = "|"), "),"), provider$address )), ] ## get the counties of those addresses, and exclude any not in the focal set potential_ex$county <- reverse_geo(potential_ex$lat, potential_ex$lon, full_results = TRUE, method = "arcgis")$Subregion provider <- provider[ !provider$address %in% c(potential_ex[ !is.na(potential_ex$county) & !grepl( paste0("(?:", paste(unlist(dmv_counties), collapse = "|"), ")"), potential_ex$county ), "address" ], "408 W LOMBARD St, College Park, MD 20742"), ] #no NA provider <- provider %>% filter( !is.na(lon))
## data combined county_names <- list( dc = "District of Columbia", md = c("Charles", "Frederick", "Montgomery", "Prince George's"), va = c("Alexandria", "Arlington", "Fairfax", "Falls Church", "Loudoun", "Manassas", "Manassas Park", "Prince William") ) data <- list() shapes <- list() for(state in c("dc", "md", "va")){ # get county shapes to identify block groups counties <- download_census_shapes(".", state, "county", paste0(state, "_counties")) blockgroups <- download_census_shapes(".", state, "bg", paste0(state, "_blockgroups")) # save only selected counties counties <- counties[counties$NAME %in% county_names[[state]],] counties[counties$NAME == "Fairfax", "NAME"] <- c("Fairfax City", "Fairfax") shapes[[state]] <- list( counties = counties, blockgroups = blockgroups[substr(blockgroups$GEOID, 1, 5) %in% counties$GEOID,] ) # population data data[[state]] <- download_census_population(".", state, 2019, counties = counties$GEOID) } data_combined <- do.call(rbind, lapply(names(data), function(state){ d <- data[[state]]$estimates s <- shapes[[state]]$blockgroups rownames(s) <- s$GEOID total <- d$TOTAL.POPULATION_Total total[total == 0] <- 1 data.frame( GEOID = d$GEOID, population = d$TOTAL.POPULATION_Total, percent_female = d$SEX.BY.AGE_Female_Female / total * 100, percent_white = d$RACE_Total_White.alone / total * 100, percent_over_49 = rowSums(d[, grep("[5-8][05]", colnames(d))]) / total * 100, female_over_14 = rowSums(d[, grep("Female_(?:[2-9]\\d|1[58])", colnames(d))]), pediatric_pop = rowSums(d[, c(4:7,28:31) ]), st_coordinates(st_centroid(st_geometry(s[as.character(d$GEOID),]))) ) })) data_combined <- data_combined[!is.na(data_combined$Y),]
library(osrm) options(osrm.server = Sys.getenv("OSRM_SERVER"), osrm.profile = "car") if(!file.exists("traveltimes_exercise.csv")){ traveltimes <- osrmTable( src = data_combined[, c("GEOID", "X", "Y")], #population-demand dst = provider[, c("ID", "lon", "lat")] #providers supply )$duration write.csv( cbind(GEOID = rownames(traveltimes), as.data.frame(traveltimes)), "traveltimes_exercise.csv", row.names = FALSE ) } traveltimes <- read.csv("traveltimes_exercise.csv", row.names = 1)
```r
# another method ```r geo2fips <- function(latitude, longitude) { url <- "https://geo.fcc.gov/api/census/area?lat=%f&lon=%f&format=json " block <- jsonlite::fromJSON(sprintf(url, latitude, longitude))[["results"]][["block_fips"]] } #BB <- geo2fips( provider$lat, provider$lon) list_geoid <- list() for (i in 1:nrow(provider)) { B <- geo2fips( provider$lat[i], provider$lon[i]) list_geoid[i] <- B } df_list_geoid <- Reduce(rbind,list_geoid ) provider['geoid_new'] <- df_list_geoid
#providers w geoid provider$GEOID <- substr(provider$geoid_new, 1, 12 ) #data_combined with geoid: ok num_providers <- provider %>% group_by(GEOID) %>% summarise(prov_cnt = sum(doctors) ) #join providers to block groups data_combined$GEOID <- as.character( data_combined$GEOID) data_combined <- data_combined %>% left_join(num_providers, by= "GEOID" )
#mean of 10 nearest top_mean <- function(x) { mean(head(sort(x ), 10) ) } #median of 10 nearest top_median <- function(x) { median(head(sort(x ), 10) ) } #apply rowwise traveltimes_near <- data.frame(near_10_mean=apply(traveltimes, 1, top_mean), near_10_median=apply(traveltimes, 1, top_median)) #rownames_to_column(traveltimes_near, var = "GEOID") traveltimes_near$GEOID <- row.names(traveltimes_near) #join mean median traveltimes to geographies data_combined <- data_combined %>% left_join(traveltimes_near, by= "GEOID")
#traveltimes <- read.csv("traveltimes_exercise.csv") #raw traveltimes: traveltimes matrix already estimated and with colnames arranged traveltimes <- read.csv("traveltimes_exercise.csv", row.names = 1) #population: always recheck relevant population: ie. for pediatrics: pop 0-17 years population <- data_combined %>% select(GEOID, population, prov_cnt, near_10_mean, near_10_median) # realign travel times traveltimes <- traveltimes[as.character(population$GEOID), provider$ID]
write.csv(provider[, c("ID", "address", "lat", "lon", "doctors")], "provider.csv", row.names = FALSE) write.csv(cbind(GEOID = rownames(traveltimes), traveltimes), "traveltimes_trimmed.csv", row.names = FALSE) write.csv(population, "population.csv", row.names = FALSE)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.