In uva-bi-sdad/dc.webmd.pediatrics: Social Data Commons: Pediatrics Access Scores

knitr::opts_chunk$set(echo = TRUE)

libraries

library(sf)
library(tidygeocoder)
library(dplyr)
library(community)
library(readr)

clean df

#start clean
rm(list=ls())

working directory

setwd("~/VDH/Floating_Catchment_Areas/va/2. pediatrics_va_model/")

load data provider

#provider <- read.csv("pediat.dmv.geo.csv", row.names = 1)

#from pgadmin
#a) conn
con <- RPostgreSQL::dbConnect(drv = RPostgreSQL::PostgreSQL(),
                               dbname = "sdad",
                               host = "postgis1",
                               port = 5432,
                               user = Sys.getenv(x = "DB_USR"),
                               password = Sys.getenv(x = "DB_PWD"))

#b) query
provider <- sf::st_read(
  con, 
  query= "
SELECT  *
FROM data_commons.virginia_pediatric_doctors_geolocated_blks "
)


#c) Disconnect
RPostgreSQL::dbDisconnect(con)

#write
write.csv(provider, "pediatrics_va.csv" )

fix data

## collapse by location
provider$doctors <- 1
provider$location <- paste0(provider$lat, ",", provider$lon)
#identify unique values
provider <- provider %>% distinct(name, location, .keep_all = TRUE)

counts <- tapply(provider$doctors, provider$location, sum)
locations <- which(!duplicated(provider$location))
provider <- provider[locations,]
provider$doctors <- counts[provider$location]

## assign IDs just to be explicit
provider$ID <- paste0("l", seq_len(nrow(provider)))


provider <- provider %>% filter(!is.na(lat))

provider <- st_drop_geometry(provider)

data combined

library(tidycensus)
library(tidyverse)

census_api_key("eba406410c653b81d6a795ac4e989221f7bdf302")

# Bring in census tract data. 
pop_va_pediatric <- get_acs(geography = "block group", 
                      year = 2019,
                      variables = c(population = "B01001_001",
                                    Male="B01001_002",
                                    MaleUnder5years="B01001_003",
                                    Male5to9years="B01001_004",
                                    Male10to14years="B01001_005",
                                    Male15to17years="B01001_006",
                                    Female="B01001_026",
                                    FemaleUnder5years="B01001_027",
                                    Female5to9years="B01001_028",
                                    Female10to14years="B01001_029",
                                    Female15to17years="B01001_030"
                                    ),
                      state = "VA",
                      survey = "acs5",
                      output = "wide",
                      geometry = TRUE)


va <- pop_va_pediatric %>% mutate( pediatric_pop= MaleUnder5yearsE+Male5to9yearsE+Male10to14yearsE+Male15to17yearsE+
                                     FemaleUnder5yearsE+Female5to9yearsE+Female10to14yearsE+Female15to17yearsE) %>% 
  select(GEOID, NAME, population=populationE,  pediatric_pop )

# data combined
#centroid and coordinates

data_combined <- data.frame(
  GEOID = va$GEOID,
  population = va$population,
  pediatric_pop=va$pediatric_pop,
st_coordinates(st_centroid(va$geometry))
)

data_combined <- data_combined %>% filter(!is.na(data_combined$X))

data_combined <- data_combined %>% filter(GEOID != 517000323001 )
data_combined <- data_combined %>% filter(GEOID != 517000323002 )
data_combined <- data_combined %>% filter(GEOID != 517000323003 )

travel time

library(osrm)
options(osrm.server = "http://104.248.112.16:5000/", osrm.profile = "car")
if(!file.exists("traveltimes_exercise.csv")){
  traveltimes <- osrmTable(
    src = data_combined[, c("GEOID", "X", "Y")],  #population-demand
    dst = provider[, c("ID", "lon", "lat")]    #providers supply
  )$duration
  write.csv(
    cbind(GEOID = rownames(traveltimes), as.data.frame(traveltimes)),
    "traveltimes_exercise.csv", row.names = FALSE
  )
}

traveltimes <- read.csv("traveltimes_exercise.csv", row.names = 1)

add1. Define geography id. This is because the Geography-GEOID from initial file may be outdated

library(tigris)
library(maps)
library(sf)
# add block geoids
# get US blocks shapefile
blocks_VA <- st_as_sf(block_groups(state="VA", year=2019)) #, year=2010

blocks <- blocks_VA
# lon and lat to geo-points
geopts <- provider %>%
  st_as_sf(coords = c("lon", "lat"), crs = 4269) #4326. initial: 4269
# indeces of bgs which contain a geopoint
inds <- st_within(geopts$geometry, blocks$geometry, sparse=T)
blk_list <- c()
for (i in inds){
  if (identical(blocks$NAME[i],character(0))){
    blk_list<- append(blk_list, NA)}
  else{
    blk_list <- append(blk_list, blocks$GEOID[i])}
}
provider['GEOID'] <- blk_list

add2 count providers per geography using matching codes

#providers w geoid
num_providers <- provider %>% group_by(GEOID) %>% summarise(prov_cnt = sum(doctors) )
#num_providers <- st_drop_geometry(num_providers )

#join providers to block groups
data_combined$GEOID <-  as.character( data_combined$GEOID)
data_combined <- data_combined %>% left_join(num_providers, by= "GEOID" )

sum(data_combined$prov_cnt, na.rm = TRUE)

add3 mean and median of 10 nearest drive times

#mean of 10 nearest
top_mean <- function(x) {  
   mean(head(sort(x ), 10) ) }
#median of 10 nearest
top_median <- function(x) {  
   median(head(sort(x ), 10) ) }
#apply rowwise
traveltimes_near <- data.frame(near_10_mean=apply(traveltimes, 1, top_mean), 
                               near_10_median=apply(traveltimes, 1, top_median)) 
#rownames_to_column(traveltimes_near, var = "GEOID")
traveltimes_near$GEOID <- row.names(traveltimes_near) 
#join mean median traveltimes to geographies
data_combined <- data_combined %>% left_join(traveltimes_near, by= "GEOID")

prepara data for save

#raw traveltimes: traveltimes matrix already estimated and with colnames arranged 
traveltimes <- read.csv("traveltimes_exercise.csv", row.names = 1)
#population: always recheck relevant population: ie. for pediatrics: pop 0-17 years
population <- data_combined %>% select(GEOID, population, pediatric_pop, prov_cnt,  near_10_mean, near_10_median)
# realign travel times
traveltimes <- traveltimes[as.character(population$GEOID), provider$ID]

save new data

write.csv(provider[, c("ID", "address", "lat", "lon", "doctors")], "provider.csv", row.names = FALSE)
write.csv(cbind(GEOID = rownames(traveltimes), traveltimes), "traveltimes_trimmed.csv", row.names = FALSE)
write.csv(population, "population.csv", row.names = FALSE)

uva-bi-sdad/dc.webmd.pediatrics documentation built on June 13, 2022, 11:12 a.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

uva-bi-sdad/dc.webmd.pediatrics
Social Data Commons: Pediatrics Access Scores

In uva-bi-sdad/dc.webmd.pediatrics: Social Data Commons: Pediatrics Access Scores

libraries

clean df

working directory

load data provider

fix data

data combined

travel time

add1. Define geography id. This is because the Geography-GEOID from initial file may be outdated

add2 count providers per geography using matching codes

add3 mean and median of 10 nearest drive times

prepara data for save

save new data

R Package Documentation

Browse R Packages

We want your feedback!

uva-bi-sdad/dc.webmd.pediatrics Social Data Commons: Pediatrics Access Scores

In uva-bi-sdad/dc.webmd.pediatrics: Social Data Commons: Pediatrics Access Scores

libraries

clean df

working directory

load data provider

fix data

data combined

travel time

add1. Define geography id. This is because the Geography-GEOID from initial file may be outdated

add2 count providers per geography using matching codes

add3 mean and median of 10 nearest drive times

prepara data for save

save new data

R Package Documentation

Browse R Packages

We want your feedback!

uva-bi-sdad/dc.webmd.pediatrics
Social Data Commons: Pediatrics Access Scores