library(tidyverse)
library(showtext)
library(DataExplorer)
library(kableExtra)
library(corrr)
library(ggiraph)
library(glue)
library(scales)
library(sf)
library(leaflet)
library(labelled)
library(coc.data.package)

font_add_google("Libre Franklin", family = "libre franklin")
showtext_auto()

base_theme <- theme_minimal(base_family = "libre franklin") +
  theme(
    panel.grid.minor = element_blank(),
    panel.grid.major = element_line(color = "grey90")
  )

knitr::opts_chunk$set(
  fig.showtext = TRUE,
  echo = FALSE,
  warning = FALSE,
  collapse = TRUE,
  comment = "#>"
)

labels_df = function(data) {
  label_list = var_label(data)
  tibble(
    variable = names(label_list),
    label = unlist(label_list, use.names = FALSE)
  )
}
latest_release = gh::gh("GET /repos/ucsf-bhhi/coc-data/releases/latest")
asset_position = purrr::detect_index(
  latest_release$assets, 
  ~ purrr::pluck(.x, "name") == "coc_data.rds"
)
asset_url = purrr::pluck(latest_release$assets, asset_position, "url")

# setup a tempfile
tf = tempfile()

# download the release
response = httr::GET(
  asset_url, 
  httr::add_headers(
    c(
      Authorization = paste("token", gh::gh_token()),
      Accept = "application/octet-stream"
    )
  ),
  httr::write_disk(tf, overwrite = TRUE)
)

# load the data
combined_dataset = read_rds(tf)

Data Introduction

introduce(combined_dataset) %>%
  mutate(memory_usage = round(memory_usage / 1000, 0)) %>%
  rename(`memory_usage_(kb)` = memory_usage) %>%
  pivot_longer(everything(), names_to = "stat", values_to = "value") %>%
  mutate(
    stat = str_replace_all(stat, "_", " "),
    stat = str_to_sentence(stat)
  ) %>%
  kbl(
    col.names = c("", ""),
    format.args = list(big.mark = ",")
  ) %>%
  kable_styling()

CoC Map

People experiencing homelessness per 1,000 people in CoC population, 2019 ```{css, echo=FALSE} .leaflet-container { background: #ffffff26; }

```r
map = read_rds("../maps/coc_display_maps.rds") %>% 
  # this is 2019
  pluck(9) %>%  
  # join on the data
  left_join(combined_dataset, by = c("coc_number", "coc_name", "year")) %>% 
  # set the crs to web mercator to play nice with leaflet
  st_transform(4326)

# set up the color pallette for the homelessness rates
fill_map = colorNumeric("GnBu", map$homeless_per_1000_total_pop)

# generate the text for the hover popups
popups = glue("{map$coc_number} - {map$coc_name}: {round(map$homeless_per_1000_total_pop, 1)}")

# grab the map bounding box to later set the zoom limits
map_bbox = st_bbox(map) %>% as.numeric()

# get the center of the map for setting the initial view
map_center = st_bbox(map) %>% st_as_sfc() %>% st_centroid() %>% unlist()

# start the map and set the zoom limits
leaflet(map, options = leafletOptions(minZoom = 4, 7)) %>% 
  addPolygons(
    # turn down the smoothing so the states don't look weird
    smoothFactor = 0.5,
    fillColor = ~fill_map(homeless_per_1000_total_pop),
    # border line weight
    weight = 0.5,
    opacity = 1,
    # border line color
    color = "grey",
    fillOpacity = 0.9,
    # options to highlight a coc when hovering over it
    highlight = highlightOptions(
      weight = 1,
      bringToFront = TRUE,
      fillOpacity = 1
    ),
    label = popups
  ) %>% 
  addLegend(
    pal = fill_map,
    values = ~homeless_per_1000_total_pop,
    opacity = 0.7,
    position = "bottomright",
    title = NULL
  ) %>% 
  # don't allow panning the map outside of it's bounds
  setMaxBounds(map_bbox[3], map_bbox[4], map_bbox[1], map_bbox[2]) %>% 
  # set the initial view (map center) and zoom level
  setView(lng = map_center[1], lat = map_center[2], zoom = 4.25)

Summary Statistics

build_summary_stats(combined_dataset) %>%
  select(-c(N, `10th`, `90th`, `99th`)) %>%
  # align first column (variable name) left and all others (values) right
  kbl(align = c("l", rep("r", ncol(.) - 1))) %>%
  kable_styling(
    bootstrap_options = c("striped")
  ) %>%
  add_header_above(c(" " = 6, "Percentiles" = 2))

The observations with missing homelessness counts and rates are from CoCs that did not conduct a PiT count in a given year or result from the "shared jurisdiction" Massachusetts CoC which is present in the shapefiles but not in the PiT count data.

Correlation with Homelessness Rate

correlation_dataset <- combined_dataset %>%
  select(where(is.numeric), -eviction_filing_rate, -starts_with("homeless_rate")) %>%
  correlate()
homelessness_rate_correlation_plot <- correlation_dataset %>%
  focus(homeless_per_1000_total_pop) %>%
  filter(!str_detect(term, "homeless")) %>%
  mutate(
    pos_neg = homeless_per_1000_total_pop >= 0,
    tt = round(homeless_per_1000_total_pop, 2)
  ) %>%
  left_join(labels_df(combined_dataset), by = c("term" = "variable")) %>% 
  ggplot() +
  geom_bar_interactive(
    aes(x = label, y = homeless_per_1000_total_pop, fill = pos_neg, tooltip = tt),
    stat = "identity",
    width = 0.6
  ) +
  scale_fill_manual(
    guide = "none",
    values = c("TRUE" = "#d7191c", "FALSE" = "#1a9641")
  ) +
  ylim(c(-0.5, 0.5)) +
  labs(
    subtitle = "Correlation coefficient with homeless_per_1000_total_pop",
    x = NULL,
    y = NULL
  ) +
  coord_flip() +
  base_theme +
  theme(plot.title.position = "plot")

girafe(ggobj = homelessness_rate_correlation_plot, height_svg = 7)

Scatterplots with Homelessness Rate

combined_dataset %>% 
  select(-c(coc_category, homeless_rate_total_pop, homeless_rate_in_poverty)) %>% 
  pivot_longer(
    -c(coc_name, coc_number, year, homeless_per_1000_total_pop),
    names_to = "variable",
    values_to = "value"
  ) %>% 
  left_join(labels_df(combined_dataset), by = "variable") %>% 
  ggplot() +
  geom_point(aes(x = value, y = homeless_per_1000_total_pop)) +
  scale_x_continuous(name = NULL, labels = label_comma()) +
  labs(
    y = NULL,
    subtitle = "Y-axis is the number of people experiencing homelessness per 1,000 people in the overall CoC population"
  ) +
  facet_wrap(~ label, ncol = 3, scales = "free", switch = "x") +
  base_theme

Dataset Correlation Grid

row_order <- correlation_dataset %>%
  shave() %>%
  pull(term)

correlation_grid_plot <- correlation_dataset %>%
  shave() %>%
  stretch(na.rm = TRUE) %>%
  left_join(labels_df(combined_dataset), by = c("x" = "variable")) %>%
  rename(label_x = label) %>% 
  left_join(labels_df(combined_dataset), by = c("y" = "variable")) %>%
  rename(label_y = label) %>%
  mutate(
    x = factor(x, levels = row_order),
    y = factor(y, levels = rev(row_order)),
    short_r = round(r, 2),
    tooltip = glue("{label_x}, {label_y}: {short_r}")
  ) %>% 
  ggplot() +
  geom_point_interactive(
    aes(x = x, y = y, color = r, tooltip = tooltip),
    shape = 16
  ) +
  scale_color_distiller(name = NULL, type = "div", palette = "RdYlGn", limits = c(-1, 1)) +
  labs(
    x = NULL,
    y = NULL
  ) +
  base_theme +
  theme(
    panel.grid.major = element_blank(),
    axis.text.x = element_text(angle = 45, hjust = 1),
    legend.position = c(0.75, 0.75),
    legend.direction = "horizontal",
    legend.key.height = unit(0.5, "lines"),
    legend.key.width = unit(3, "lines"),
    legend.title = element_text(vjust = 1),
  )

girafe(ggobj = correlation_grid_plot, width_svg = 10, height_svg = 7)

Histograms

combined_dataset %>% 
  select(where(is.numeric), -year) %>% 
  pivot_longer(everything(), names_to = "variable", values_to = "value") %>% 
  left_join(labels_df(combined_dataset), by = "variable") %>%
  ggplot() + 
  geom_histogram(aes(x = value)) +
  scale_x_continuous(name = NULL, labels = comma) +
  scale_y_continuous(name = "Counts", labels = comma) +
  facet_wrap(~ label, ncol = 3, scales = "free", switch = "x") +
  base_theme

Categorical Variable Frequencies

frequency_plot = combined_dataset %>% 
  select(coc_category, year) %>% 
  mutate(across(everything(), as.character)) %>% 
  pivot_longer(everything(), names_to = "variable", values_to = "value") %>% 
  count(variable, value, name = "count") %>% 
  group_by(variable) %>%
  mutate(share = percent(count / sum(count), 1)) %>% 
  left_join(labels_df(combined_dataset), by = "variable") %>%
  ggplot() + 
  geom_bar_interactive(
    aes(x = value, y = count, tooltip = glue("{value}: {comma(count, 1)} ({share})")),
    stat = "identity"
  ) +
  scale_y_continuous(name = NULL, labels = label_comma()) +
  labs(x = NULL) +
  facet_wrap(~ label, scales = "free", ncol = 1) +
  base_theme +
  theme(panel.grid.major.x = element_blank())

girafe(ggobj = frequency_plot, height_svg = 8, width_svg = 10)


ucsf-bhhi/coc-data documentation built on Dec. 23, 2021, 1:07 p.m.