In uva-bi-sdad/dc.webmd.primcare: Social Data Commons: Health, Behavior, Diet: Primary Care Access Scores

knitr::opts_chunk$set(echo = TRUE)

libraries

library(sf)
library(tidygeocoder)
library(dplyr)
library(community)
library(catchment)

clean df

#start clean
rm(list=ls())

working directory

setwd("~/VDH/Floating_Catchment_Areas/dmv/4. primarycare_dmv_model/")

load data provider:

provider <- read.csv("primcare.dmv.geo.csv", row.names = 1)

fix data

## collapse by location
provider$doctors <- 1
provider$location <- paste0(provider$lat, ",", provider$lat)
counts <- tapply(provider$doctors, provider$location, sum)
locations <- which(!duplicated(provider$location))
provider <- provider[locations,]
provider$doctors <- counts[provider$location]

## assign IDs just to be explicit
provider$ID <- paste0("l", seq_len(nrow(provider)))

# exclude any outside of the focal region

## get zipcodes within focal counties
dmv_counties <- list(
  dc = "District of Columbia",
  md = c("Charles", "Frederick", "Montgomery", "Prince George's"),
  va = c(
    "Alexandria", "Arlington", "Fairfax", "Falls Church", "Loudoun", "Manassas",
    "Manassas Park", "Prince William"
  )
)
county_shapes <- read_sf("counties.geojson", as_tibble = FALSE)
geography_ref <- read.csv(
  "https://www2.census.gov/geo/docs/maps-data/data/rel/zcta_county_rel_10.txt"
)
zips <- unique(unlist(lapply(names(dmv_counties), function(state){
  GEOIDs <- county_shapes[county_shapes$NAME %in% dmv_counties[[state]], "GEOID", drop = TRUE]
  formatC(geography_ref[geography_ref$GEOID %in% GEOIDs, "ZCTA5"], width = 5, flag = 0)
}), use.names = FALSE))

### identify zipcodes that cross county boundaries
zip_cross <- substr(unique(do.call(paste0,
  geography_ref[geography_ref$ZCTA5 %in% zips, c("ZCTA5", "GEOID")]
)), 1, 5)
zip_cross <- zip_cross[duplicated(zip_cross)]

## identify locations in those boundary zip codes
potential_ex <- provider[
  !grepl(paste0("(?:", paste(zips, collapse = "|"), ")$"), provider$address) |
    (grepl(paste0("(?:", paste(zip_cross, collapse = "|"), ")$"), provider$address) &
      !grepl(
        paste0("(?:", paste(unlist(dmv_counties), collapse = "|"), "),"),
        provider$address
      )),
]

## get the counties of those addresses, and exclude any not in the focal set
potential_ex$county <- reverse_geo(potential_ex$lat, potential_ex$lon, full_results = TRUE, method = "arcgis")$Subregion
provider <- provider[
  !provider$address %in% c(potential_ex[
    !is.na(potential_ex$county) & !grepl(
      paste0("(?:", paste(unlist(dmv_counties), collapse = "|"), ")"), potential_ex$county
    ), "address"
  ], "408 W LOMBARD St, College Park, MD 20742"),
]

#no NA
provider <- provider %>% filter( !is.na(lon))

data combined

## data combined
county_names <- list(
  dc = "District of Columbia",
  md = c("Charles", "Frederick", "Montgomery", "Prince George's"),
  va = c("Alexandria", "Arlington", "Fairfax", "Falls Church", "Loudoun", "Manassas", "Manassas Park", "Prince William")
)
data <- list()
shapes <- list()


for(state in c("dc", "md", "va")){
  # get county shapes to identify block groups
  counties <- download_census_shapes(".", state, "county", paste0(state, "_counties"))
  blockgroups <- download_census_shapes(".", state, "bg", paste0(state, "_blockgroups"))

  # save only selected counties
  counties <- counties[counties$NAME %in% county_names[[state]],]
  counties[counties$NAME == "Fairfax", "NAME"] <- c("Fairfax City", "Fairfax")
  shapes[[state]] <- list(
    counties = counties,
    blockgroups = blockgroups[substr(blockgroups$GEOID, 1, 5) %in% counties$GEOID,]
  )

  # population data
  data[[state]] <- download_census_population(".", state, 2019, counties = counties$GEOID)
}

data_combined <- do.call(rbind, lapply(names(data), function(state){
  d <- data[[state]]$estimates
  s <- shapes[[state]]$blockgroups
  rownames(s) <- s$GEOID
  total <- d$TOTAL.POPULATION_Total
  total[total == 0] <- 1
  data.frame(
    GEOID = d$GEOID,
    population = d$TOTAL.POPULATION_Total,
    percent_female = d$SEX.BY.AGE_Female_Female / total * 100,
    percent_white = d$RACE_Total_White.alone / total * 100,
    percent_over_49 = rowSums(d[, grep("[5-8][05]", colnames(d))]) / total * 100,
    female_over_14 = rowSums(d[, grep("Female_(?:[2-9]\\d|1[58])", colnames(d))]),
    pediatric_pop = rowSums(d[, c(4:7,28:31) ]),

    st_coordinates(st_centroid(st_geometry(s[as.character(d$GEOID),])))
  )
}))
data_combined <- data_combined[!is.na(data_combined$Y),]

travel time

library(osrm)
options(osrm.server = Sys.getenv("OSRM_SERVER"), osrm.profile = "car")
if(!file.exists("traveltimes_exercise.csv")){
  traveltimes <- osrmTable(
    src = data_combined[, c("GEOID", "X", "Y")],  #population-demand
    dst = provider[, c("ID", "lon", "lat")]     #providers supply
  )$duration
  write.csv(
    cbind(GEOID = rownames(traveltimes), as.data.frame(traveltimes)),
    "traveltimes_exercise.csv", row.names = FALSE
  )
}

traveltimes <- read.csv("traveltimes_exercise.csv", row.names = 1)

add1. Define geography id. This is because the Geography-GEOID from initial file may be outdated

```r

library(tigris)

library(maps)

library(sf)

# add block geoids

# get US blocks shapefile

blocks_VA <- st_as_sf(blocks(state="VA"), year=2019)

blocks_MD <- st_as_sf(blocks(state="MD"), year=2019)

blocks_DC <- st_as_sf(blocks(state="DC"), year=2019)

blocks_DC <- readRDS("~/VDH/Floating_Catchment_Areas/dmv/blocks_DC_2014" )

blocks_MD <- readRDS( "~/VDH/Floating_Catchment_Areas/dmv/blocks_MD_2014" )

blocks_VA <- readRDS("~/VDH/Floating_Catchment_Areas/dmv/blocks_VA_2014" )

blocks <- rbind(blocks_VA, blocks_MD)

blocks <- rbind(blocks, blocks_DC)

# lon and lat to geo-points

geopts <- provider %>%

st_as_sf(coords = c("lon", "lat"), crs = 4269)

# indeces of bgs which contain a geopoint

inds <- st_within(geopts$geometry, blocks$geometry, sparse=T)

blk_list <- c()

for (i in inds){

if (identical(blocks$NAME[i],character(0))){

blk_list<- append(blk_list, NA)}

else{

blk_list <- append(blk_list, blocks$NAME[i])}

}

provider['geoid_blk'] <- blk_list

# This assumes that you have longitude and latitude of each location and if you want bg or tract or county just replace 'blocks_VA <- st_as_sf(blocks(state="VA"))' with a corresponding value, e.g. 'tracts_VA <- st_as_sf(tracts(state="VA"))'. I believe it uses either tigris or maps package

# another method
```r
geo2fips <- function(latitude, longitude) {
  url <- "https://geo.fcc.gov/api/census/area?lat=%f&lon=%f&format=json "
  block <- jsonlite::fromJSON(sprintf(url, latitude, longitude))[["results"]][["block_fips"]]
}

#BB <- geo2fips( provider$lat, provider$lon)

list_geoid <- list()
for (i in 1:nrow(provider)) {
  B <- geo2fips( provider$lat[i], provider$lon[i])
  list_geoid[i] <- B
}

df_list_geoid <- Reduce(rbind,list_geoid )

provider['geoid_new'] <- df_list_geoid

add2 count providers per geography using matching codes

#providers w geoid
provider$GEOID <- substr(provider$geoid_new, 1, 12 ) 
#data_combined with geoid: ok
num_providers <- provider %>% group_by(GEOID) %>% summarise(prov_cnt = sum(doctors) )

#join providers to block groups
data_combined$GEOID <-  as.character( data_combined$GEOID)
data_combined <- data_combined %>% left_join(num_providers, by= "GEOID" )

add3 mean and median of 10 nearest drive times

#mean of 10 nearest
top_mean <- function(x) {  
   mean(head(sort(x ), 10) ) }
#median of 10 nearest
top_median <- function(x) {  
   median(head(sort(x ), 10) ) }
#apply rowwise
traveltimes_near <- data.frame(near_10_mean=apply(traveltimes, 1, top_mean), 
                               near_10_median=apply(traveltimes, 1, top_median)) 
#rownames_to_column(traveltimes_near, var = "GEOID")
traveltimes_near$GEOID <- row.names(traveltimes_near) 
#join mean median traveltimes to geographies
data_combined <- data_combined %>% left_join(traveltimes_near, by= "GEOID")

prepara data for save

#traveltimes <- read.csv("traveltimes_exercise.csv")

#raw traveltimes: traveltimes matrix already estimated and with colnames arranged 
traveltimes <- read.csv("traveltimes_exercise.csv", row.names = 1)
#population: always recheck relevant population: ie. for pediatrics: pop 0-17 years
population <- data_combined %>% select(GEOID, population, prov_cnt, near_10_mean, near_10_median)
# realign travel times
traveltimes <- traveltimes[as.character(population$GEOID), provider$ID]

save new data

write.csv(provider[, c("ID", "address", "lat", "lon", "doctors")], "provider.csv", row.names = FALSE)
write.csv(cbind(GEOID = rownames(traveltimes), traveltimes), "traveltimes_trimmed.csv", row.names = FALSE)
write.csv(population, "population.csv", row.names = FALSE)

uva-bi-sdad/dc.webmd.primcare documentation built on June 18, 2022, 7 a.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

uva-bi-sdad/dc.webmd.primcare Social Data Commons: Health, Behavior, Diet: Primary Care Access Scores

In uva-bi-sdad/dc.webmd.primcare: Social Data Commons: Health, Behavior, Diet: Primary Care Access Scores

libraries

clean df

working directory

load data provider:

fix data

data combined

travel time

add1. Define geography id. This is because the Geography-GEOID from initial file may be outdated

library(tigris)

library(maps)

library(sf)

# add block geoids

# get US blocks shapefile

blocks_VA <- st_as_sf(blocks(state="VA"), year=2019)

blocks_MD <- st_as_sf(blocks(state="MD"), year=2019)

blocks_DC <- st_as_sf(blocks(state="DC"), year=2019)

blocks_DC <- readRDS("~/VDH/Floating_Catchment_Areas/dmv/blocks_DC_2014" )

blocks_MD <- readRDS( "~/VDH/Floating_Catchment_Areas/dmv/blocks_MD_2014" )

blocks_VA <- readRDS("~/VDH/Floating_Catchment_Areas/dmv/blocks_VA_2014" )

blocks <- rbind(blocks_VA, blocks_MD)

blocks <- rbind(blocks, blocks_DC)

# lon and lat to geo-points

geopts <- provider %>%

st_as_sf(coords = c("lon", "lat"), crs = 4269)

# indeces of bgs which contain a geopoint

inds <- st_within(geopts$geometry, blocks$geometry, sparse=T)

blk_list <- c()

for (i in inds){

if (identical(blocks$NAME[i],character(0))){

blk_list<- append(blk_list, NA)}

else{

blk_list <- append(blk_list, blocks$NAME[i])}

}

provider['geoid_blk'] <- blk_list

# This assumes that you have longitude and latitude of each location and if you want bg or tract or county just replace 'blocks_VA <- st_as_sf(blocks(state="VA"))' with a corresponding value, e.g. 'tracts_VA <- st_as_sf(tracts(state="VA"))'. I believe it uses either tigris or maps package

add2 count providers per geography using matching codes

add3 mean and median of 10 nearest drive times

prepara data for save

save new data

R Package Documentation

Browse R Packages

We want your feedback!

uva-bi-sdad/dc.webmd.primcare
Social Data Commons: Health, Behavior, Diet: Primary Care Access Scores