library(dplyr)
library(haven)
library(usmap)
library(janitor)
library(stringr)
library(readr)
library(httr)
library(maps)
library(ggplot2)
library(purrr)
library(tidyr)
library(accesstocare)
library(usethis)
library(maptools)
library(rgdal)
library(pins)
set.seed(100)
options(scipen=999)

library(dplyr)
library(haven)
library(usmap)
library(janitor)
library(stringr)
library(readr)
library(httr)
library(maps)
library(ggplot2)
library(purrr)
library(tidyr)
library(accesstocare)
library(usethis)
library(maptools)
library(rgdal)
library(pins)

Data provider: CMS Data website: https://data.cms.gov/ Data URL: https://data.cms.gov/provider-data/dataset/xubh-q36u

It pulls the data from the package instead of the site. By default, the following code chunk does not run, unless an update is needed.

data_url <- "https://data.cms.gov/provider-data/sites/default/files/resources/092256becd267d9eeccf73bf7d16c46b_1623902717/Hospital_General_Information.csv"

cms_data <- read_csv(data_url)

cms_raw_data <- clean_names(cms_data[, 1:9])

use_data(cms_raw_data)

Prepares the CMS data by removing Psychiatric hospitals, and thos from states not included in the FIPS files.

hospital_tbl <- cms_raw_data %>% 
  filter(
    hospital_type != "Psychiatric", 
    state != "PR", state != "VI", 
    state != "AS", state != "GU",  
    state != "MP"
    ) %>% 
  select(
    facility_id, 
    facility_name, 
    address, 
    city, 
    state, 
    zip_code,
    county_name
    ) %>% 
  mutate(county_name = str_to_upper(county_name)) %>% 
  group_by(address, city, state, zip_code, county_name) %>% 
  summarise(
    facility_id = first(facility_id),
    facility_name = first(facility_name)
    ) %>% 
  select(contains("facility"), everything()) %>% 
  arrange(state, city, address, facility_name) %>% 
  ungroup() %>% 
  mutate(
    county_name = str_to_upper(county_name),
    county_name = str_remove(
      county_name, 
      " MUNICIPALITY| CENSUS AREA| COUNTY| PARISH| CITY AND| BOROUGH"
      ),
    county_name = str_remove(county_name, "BOROUGH"),
    county_name = str_replace(county_name, "ST. ", "SAINT "),
    county_name = str_replace(county_name, "STE. ", "SAINTE "),
    county_name = str_replace(county_name, "-", " "),
    county_name = case_when(
      state == "AL" & county_name == "NEW YORK" ~ "CHAMBERS",
      state == "AZ" & county_name == "DUPAGE" ~ "MARICOPA",
      state == "IN" & county_name == "ST JOSEPH" ~ "SAINT JOSEPH",
      state == "VA" & county_name == "SALEM" ~ "SALEM CITY",
      TRUE ~ county_name
    ),
    county_name = str_trim(county_name),
    county_name = str_replace_all(county_name, " ", ""),
    county_name = str_replace_all(county_name, "'", "")
  )

Uses the usmap data to retrieve the FIPS code for each county. Performs close to the same transformations to the county names to produce the matches

states <- usmap::statepop %>% 
  select(state = abbr, state_name = full)

county_list <- countypop %>% 
  select(
    fips,
    state = abbr,
    county_name = county
  ) %>% 
  mutate(
    county_name = str_to_upper(county_name),
    county_name = str_remove(
      county_name, 
      " MUNICIPALITY| CENSUS AREA| COUNTY| PARISH| CITY AND| BOROUGH"
      ),
    county_name = str_remove(county_name, "BOROUGH"),
    county_name = str_replace(county_name, "ST. ", "SAINT "),
    county_name = str_replace(county_name, "STE. ", "SAINTE "),
    county_name = str_replace(county_name, "-", " "),
    county_name = str_trim(county_name),
    county_name = str_replace_all(county_name, " ", ""),
    county_name = str_replace_all(county_name, "'", ""),
    county_name = str_replace_all(county_name, "Ð", "N")
  )

Confirms that there are no unmtached counties

hospital_counties <- hospital_tbl %>% 
  group_by(state, county_name) %>% 
  summarise() %>% 
  ungroup() 

hospital_counties %>% 
  left_join(county_list, by = c("state", "county_name")) %>% 
  filter(is.na(fips))

Prepares the us_hospitals data set.

county_tbl <- countypop %>% 
  select(
    fips, 
    state = abbr, 
    county_name = county, 
    population = pop_2015
    )

us_hospitals <- hospital_tbl %>% 
  left_join(county_list, by = c("state", "county_name")) %>% 
  select(-county_name, -state) %>% 
  left_join(county_tbl, by = c("fips")) %>% 
  mutate(
    county_name = str_to_upper(county_name),
    city = str_to_upper(city)
    ) %>% 
  select(-population) %>% 
  left_join(states, by = "state")

Prepares data for the model, and produces the us_model variable.

cnt_hosp <- us_hospitals %>% 
  count(fips, name = "hospitals") %>% 
  right_join(county_tbl, by = "fips")

cnt_hosp2 <- cnt_hosp %>% 
  replace_na(list(hospitals = 0)) %>% 
  select(fips, state, county_name, hospitals, population) %>% 
  arrange(state, county_name)

us_model <- cnt_hosp2 %>% 
  sample_frac(0.3) %>% 
  lm(hospitals ~ population, data = .) 

preds <- predict(
  us_model, 
  newdata = cnt_hosp2, 
  interval = "prediction",
  level = 0.99
  ) %>% 
  as_tibble() %>% 
  rename_all(function(x) paste0("pred_", x))

Matches the county data with the predictions to create the us_counties data set

us_counties <- cnt_hosp2 %>% 
  bind_cols(preds) %>% 
  mutate(
    pred_lwr = round(pred_lwr),
    pred_fit = round(pred_fit),
    pred_upr = round(pred_upr),
    pred_lwr = ifelse(pred_lwr < 0, 0, pred_lwr),
    pred_fit = ifelse(pred_fit < 0, 0, pred_fit),
    pred_upr = ifelse(pred_upr < 0, 0, pred_upr),
    pred_status = case_when(
      hospitals < pred_lwr ~ "below",
      hospitals > pred_upr ~ "above",
      TRUE ~ "ok"
    )
  ) %>% 
  left_join(states, by = "state")
us_states <- us_counties %>% 
  group_by(state) %>% 
  summarise(
    hospitals = sum(hospitals),
    population = sum(population)
  ) %>% 
  ungroup() %>% 
  mutate(
    per_person = round(population / hospitals),
    per_person_quartile = ntile(per_person, 4)
  ) %>% 
  left_join(states, by = "state")
library(hexbin)

sh <- hexcoords(0.5, 0.5)

st_locations_list <- list(
  list(state = "MS", x = 0, y = 0),
  list(state = "AL", x = 1, y = 0),
  list(state = "GA", x = 2, y = 0),
  list(state = "FL", x = 1.5, y = -1.5),
  list(state = "LA", x = -1, y = 0),
  list(state = "OK", x = -2, y = 0),
  list(state = "TX", x = -1.5, y = -1.5),
  list(state = "AZ", x = -3, y = 0),
  list(state = "TN", x = 0.5, y = 1.5),
  list(state = "AR", x = -0.5, y = 1.5),
  list(state = "KS", x = -1.5, y = 1.5),
  list(state = "NM", x = -2.5, y = 1.5),
  list(state = "UT", x = -3.5, y = 1.5),
  list(state = "CA", x = -4.5, y = 1.5),
  list(state = "NC", x = 1.5, y = 1.5),
  list(state = "SC", x = 2.5, y = 1.5),
  list(state = "DC", x = 3.5, y = 1.5),
  list(state = "OR", x = -5, y = 3),
  list(state = "NV", x = -4, y = 3),
  list(state = "CO", x = -3, y = 3),
  list(state = "NE", x = -2, y = 3),
  list(state = "MO", x = -1, y = 3),
  list(state = "KY", x = 0, y = 3),
  list(state = "WV", x = 1, y = 3),
  list(state = "VA", x = 2, y = 3),
  list(state = "MD", x = 3, y = 3),
  list(state = "DE", x = 4, y = 3),
  list(state = "CT", x = 4.5, y = 4.5),
  list(state = "NJ", x = 3.5, y = 4.5),
  list(state = "PA", x = 2.5, y = 4.5),
  list(state = "OH", x = 1.5, y = 4.5),
  list(state = "IN", x = 0.5, y = 4.5),
  list(state = "IL", x = -0.5, y = 4.5),
  list(state = "IA", x = -1.5, y = 4.5),
  list(state = "SD", x = -2.5, y = 4.5),
  list(state = "WY", x = -3.5, y = 4.5),
  list(state = "ID", x = -4.5, y = 4.5),
  list(state = "WA", x = -5, y = 6),
  list(state = "MT", x = -4, y = 6),
  list(state = "ND", x = -3, y = 6),
  list(state = "MN", x = -2, y = 6),
  list(state = "WI", x = -1, y = 6),
  list(state = "MI", x = 1, y = 6),
  list(state = "NY", x = 3, y = 6),
  list(state = "MA", x = 4, y = 6),
  list(state = "RI", x = 5, y = 6),
  list(state = "NH", x = 4.5, y = 7.5),
  list(state = "VT", x = 3.5, y = 7.5),
  list(state = "ME", x = 5, y = 9),
  list(state = "AK", x = -5, y = 9),
  list(state = "HI", x = -5, y = -1.5)
)

us_hex_positions <- st_locations_list %>% 
  map_dfr(~.x) %>% 
  left_join(states, by = "state")

us_hex_polygons <- map_dfr(
  st_locations_list,
  ~{
    tibble(
      x = sh$x + .x$x,
      y = sh$y + .x$y,
      state = .x$state
    )
  }
)

us_hex_polygons %>% 
  ggplot() +
  geom_polygon(aes(x, y, group = state), fill = "lightgray", color = "white") +
  geom_text(aes(x, y, label = state), data = us_hex_positions) +
  theme_void() +
  theme(legend.position = "none") 
pred_count <- us_counties %>% 
  count(state, pred_status) %>% 
  pivot_wider(
    names_from = pred_status, 
    values_from = n, 
    values_fill = 0,
    names_prefix = "pred_"
    )

us_atc_state_polygons <- us_states %>% 
  inner_join(us_hex_polygons, by = "state") %>% 
  inner_join(pred_count, by = "state")
county_map <- usmap::us_map("county")

us_atc_county_polygons <- us_counties %>% 
  left_join(county_map, by = "fips") 
us_large_cities <- maps::us.cities %>% 
  select(long, lat, everything()) %>% 
  usmap::usmap_transform() %>% 
  select(-lat, -long) %>% 
  rename(x = long.1, 
         y = lat.1, 
         city_name = name, 
         state = country.etc,
         population = pop
         ) %>% 
  mutate(city_name = str_sub(city_name, 1, nchar(city_name) - 3)) %>% 
  arrange(state, -population) %>% 
  group_by(state) %>% 
  mutate(position = row_number()) %>% 
  ungroup()
palette_atc <- list(
    above = "#0072B2",
    below =  "#CC79A7",
    ok = "#009E73",
    low = "#ffffff",
    high = "#0072B2",
    ambulance = "#E69F00"
  )
us_atc_model <- us_model 
if(Sys.getenv("R_CONFIG_ACTIVE") == "rsconnect") {
  boardname <- "rsconnect"
  board_register_rsconnect(
    server = Sys.getenv("CONNECT_SERVER"),
    key = Sys.getenv("CONNECT_API_KEY")
    )

  pin(
    us_counties,
    name = "atc-counties",
    board = boardname
  )

  pin(
    us_states,
    name = "atc-states",
    board = boardname
  )

  pin(
    us_hospitals,
    name = "atc-hospitals",
    board = boardname
  )

  pin(
    us_atc_model,
    name = "atc-model",
    board = boardname
  )
}
library(usethis)

use_data(us_hex_positions, overwrite = TRUE)
use_data(us_hex_polygons, overwrite = TRUE)
use_data(us_hospitals, overwrite = TRUE)
use_data(us_counties, overwrite = TRUE)
use_data(us_states, overwrite = TRUE)

use_data(us_atc_county_polygons, overwrite = TRUE)
use_data(us_atc_state_polygons, overwrite = TRUE)

use_data(us_large_cities, overwrite = TRUE)

use_data(us_atc_model, overwrite = TRUE)

use_data(palette_atc, overwrite = TRUE)


sol-eng/accesstocare documentation built on Dec. 23, 2021, 3:32 a.m.