obtn: Oregon by the Numbers

# Import Data -------------------------------------------------------------

# source("data-raw/import-data.R")


# Load Packages -----------------------------------------------------------

library(devtools)
library(fs)
library(tidyverse)
library(scales)
library(janitor)
library(beepr)
library(readxl)
library(writexl)
library(here)
library(ggtext)

load_all()

# Define for later --------------------------------------------------------

obtn_year <- year(today())

# Plots -------------------------------------------------------------------

# * Make all plots ----------------------------------------------------------

# ** Delete existing plots ---------------------------------------------------

# Create new directory if it doesn't exist
fs::dir_create(stringr::str_glue("inst/plots/{obtn_year}/"))

# Get list of all plots
existing_plots <- fs::dir_ls(stringr::str_glue("inst/plots/{obtn_year}/"))

# Delete them all
fs::file_delete(existing_plots)

# ** Largest Community -------------------------------------------------------

obtn_plot_largest_community(obtn_year)


# ** 100 Largest Communities ----------------------------------------------

obtn_plot_largest_100_communities(obtn_year)

# ** Tribes Maps -------------------------------------------------------------

# Create vector of tribes
obtn_tribes_vector <- obtn_tribes %>%
  dplyr::filter(year == obtn_year) %>%
  dplyr::distinct(tribe) %>%
  dplyr::pull(tribe)

# Make all top industry maps
purrr::pwalk(list(obtn_year, obtn_tribes_vector), obtn_plot_tribes_map)

# ** State/Rural/Urban Population Pyramids -----------------------------------
purrr::pwalk(list(obtn_year, c("Oregon", "Rural", "Urban")), obtn_plot_population_pyramid)

purrr::pwalk(list(obtn_year, c("Oregon", "Rural", "Urban")), obtn_plot_population_pyramid, language = "Spanish")

# ** County-Level Population Pyramids ----------------------------------------
purrr::pwalk(
  list(obtn_year, obtn_oregon_counties),
  obtn_plot_population_pyramid
)

purrr::pwalk(
  list(obtn_year, obtn_oregon_counties),
  obtn_plot_population_pyramid,
  language = "Spanish"
)

# ** ALICE -------------------------------------------------------------------
purrr::pwalk(list(obtn_year, obtn_oregon_counties), obtn_plot_alice)

# Plot for county thresholds
obtn_plot_alice_threshold(obtn_year)

# ** Median Income Bar Charts ------------------------------------------------
purrr::pwalk(
  list(obtn_year, obtn_oregon_counties),
  obtn_plot_median_income
)


# ** County-Level Race/Ethnicity Bar Charts ----------------------------------

purrr::pwalk(
  list(obtn_year, obtn_oregon_counties),
  obtn_plot_race_ethnicity_bar_chart
)

purrr::pwalk(
  list(obtn_year, obtn_oregon_counties),
  obtn_plot_race_ethnicity_bar_chart,
  language = "Spanish"
)


# ** State/Rural/Urban Race/Ethnicity Bar Charts -----------------------------

purrr::pwalk(
  list(obtn_year, c("Oregon", "Rural", "Urban"), 3.25, 2.43),
  obtn_plot_race_ethnicity_bar_chart
)

purrr::pwalk(
  list(obtn_year, c("Oregon", "Rural", "Urban"), 3.25, 2.43),
  obtn_plot_race_ethnicity_bar_chart,
  language = "Spanish"
)


# ** Choropleth Maps ---------------------------------------------------------

# Create vector of choropleth measures
obtn_data_choropleth_measures <-
  obtn_data_choropleth_measures %>%
  dplyr::filter(year == obtn_year) %>%
  dplyr::pull(measure)

# Make all choropleth maps
purrr::pwalk(
  list(obtn_year, obtn_data_choropleth_measures),
  obtn_plot_choropleth_map
)

purrr::pwalk(
  list(obtn_year, obtn_data_choropleth_measures),
  obtn_plot_choropleth_map,
  language = "Spanish"
)


# ** Race/Ethnicity Statewide Maps -------------------------------------------

obtn_plot_multiple_race_ethnicity_choropleth_maps(obtn_year)

obtn_plot_multiple_race_ethnicity_choropleth_maps(obtn_year, language = "Spanish")

# ** Top Employment Industries -----------------------------------------------

# Create vector of industries
obtn_industries <- obtn_top_employment_industries %>%
  dplyr::filter(year == obtn_year) %>%
  dplyr::distinct(industry) %>%
  dplyr::pull(industry)

# Make all top industry maps
purrr::pwalk(
  list(obtn_year, obtn_industries),
  obtn_plot_top_employment_industries
)


# ** Population Density Map --------------------------------------------------

obtn_plot_population_density_map(obtn_year)
obtn_plot_population_density_map(obtn_year, language = "Spanish")

# For some reason I'm having all sorts of trouble with this.
# Cara made it and I put it in the inst/plots/test folder. This code copies it to the inst/plots/2022 folder

# fs::file_copy("inst/plots/tests/2022-population-density-map.pdf",
#               "inst/plots/2022/2022-population-density-map.pdf",
#               overwrite = TRUE)


# ** State Map ------------------------------------------------------------

obtn_state_map(2023)


# Tables (R) --------------------------------------------------------------

# * Delete all tables -------------------------------------------------------

# Create new directory if it doesn't exist
fs::dir_create(stringr::str_glue("inst/tables/{obtn_year}/"))

# Get list of all tables
existing_tables <- fs::dir_ls(stringr::str_glue("inst/tables/{obtn_year}/"))

# Delete them all
fs::file_delete(existing_tables)

# * County tables ---------------------------------------------------------

purrr::pwalk(
  list(
    obtn_year,
    obtn_oregon_counties
  ),
  obtn_county_table
)

purrr::pwalk(
  list(
    obtn_year,
    obtn_oregon_counties
  ),
  obtn_county_table,
  language = "Spanish"
)

# * Measure tables --------------------------------------------------------

purrr::pwalk(
  list(
    obtn_year,
    obtn_data_choropleth_measures
  ),
  obtn_measure_table
)

purrr::pwalk(
  list(
    obtn_year,
    obtn_data_choropleth_measures
  ),
  obtn_measure_table,
  language = "Spanish"
)

# Tables (InDesign) ------------------------------------------------------------------

# * Big Numbers -------------------------------------------------------------

total_population <- read_excel(here("data-raw", str_glue("{obtn_year}-obtn-by-county.xlsx")), sheet = "Total Population") %>%
  clean_names() %>%
  mutate(population = comma(population))

rural_population <- read_excel(here("data-raw", str_glue("{obtn_year}-obtn-by-county.xlsx")), sheet = "Rural Population") %>%
  clean_names() %>%
  mutate(percent_rural = round_half_up(percent_rural)) %>%
  mutate(percent_rural = percent(percent_rural, accuracy = 1, scale = 1))

net_migration <- read_excel(here("data-raw", str_glue("{obtn_year}-obtn-by-county.xlsx")), sheet = "Net Migration") %>%
  clean_names() %>%
  mutate(net_migration = round_half_up(net_migration)) %>%
  mutate(net_migration = number(net_migration, accuracy = 1))

total_land_area <- obtn_total_land_area %>%
  filter(year == obtn_year) %>%
  mutate(land_area = round_half_up(land_area)) %>%
  arrange(desc(land_area)) %>%
  select(-year) %>%
  mutate(land_area = comma(land_area, accuracy = 1))

public_land <- obtn_public_land %>%
  filter(year == obtn_year) %>%
  rename(public_land = value) %>%
  arrange(desc(public_land)) %>%
  select(-year) %>%
  mutate(public_land = public_land * 100) %>%
  mutate(public_land = round_half_up(public_land)) %>%
  mutate(public_land = percent(public_land, accuracy = 1, scale = 1))

# I renamed the sheet Developed Land in the 2024 data to Developed or Cultivated Land in order to make this code work

developed_cultivated_land <- read_excel(here("data-raw", str_glue("{obtn_year}-obtn-by-measure.xlsx")), sheet = "Developed or Cultivated Land") %>%
  clean_names() %>%
  select(county, numeric_only) %>%
  rename(
    pct_developed_cultivated_land = numeric_only,
    geography = county
  ) %>%
  mutate(pct_developed_cultivated_land = round_half_up(pct_developed_cultivated_land)) %>%
  mutate(
    pct_developed_cultivated_land = percent(
      pct_developed_cultivated_land,
      accuracy = 1,
      scale = 1
    )
  )



life_expectancy <- obtn_life_expectancy %>%
  filter(year == obtn_year) %>%
  filter(geography %in% obtn_oregon_counties) %>%
  select(-year) %>%
  mutate(value = round_half_up(value)) %>%
  pivot_wider(
    id_cols = geography,
    names_from = gender,
    names_prefix = "Life Expectancy ",
    values_from = value
  ) %>%
  select(geography, contains("Life Expectancy"))


financial_hardship <- obtn_data_by_measure %>%
  filter(measure == "Financial Hardship") %>%
  filter(geography %in% obtn_oregon_counties) %>%
  filter(year == obtn_year) %>%
  mutate(below_poverty_and_alice = value) %>%
  mutate(below_poverty_and_alice = round_half_up(below_poverty_and_alice)) %>%
  mutate(below_poverty_and_alice = percent(
    below_poverty_and_alice,
    accuracy = 1,
    scale = 1
  )) %>%
  select(geography, below_poverty_and_alice) %>%
  rename(households_in_financial_hardship = below_poverty_and_alice)

total_population %>%
  left_join(rural_population) %>%
  left_join(net_migration) %>%
  left_join(total_land_area) %>%
  left_join(public_land) %>%
  left_join(developed_cultivated_land) %>%
  left_join(life_expectancy) %>%
  left_join(financial_hardship) %>%
  write_xlsx(str_glue("inst/tables/{obtn_year}/{obtn_year}-big-numbers.xlsx"))


# ** Largest Communities -----------------------------------------------------

get_largest_community_one_third <- function(column_number) {
  obtn_largest_community %>%
    filter(year == obtn_year) %>%
    arrange(desc(population)) %>%
    mutate(rank = row_number()) %>%
    select(rank, largest_community, geography, population) %>%
    mutate(population = comma(population)) %>%
    filter(between(rank, column_number, column_number + 11))
}

get_largest_community_one_third(1) %>%
  bind_cols(get_largest_community_one_third(13)) %>%
  bind_cols(get_largest_community_one_third(25)) %>%
  write_xlsx(str_glue(
    "inst/tables/{obtn_year}/{obtn_year}-largest-communities.xlsx"
  ))

obtn_largest_community %>%
  filter(year == obtn_year) %>%
  left_join(obtn_county_seats, by = "geography") %>%
  select(geography, largest_community, population, county_seat) %>%
  arrange(geography) %>%
  set_names("County", "Largest community", "Population", "County seat") %>%
  write_xlsx(
    str_glue(
      "inst/tables/{obtn_year}/{obtn_year}-largest-community-and-county-seat.xlsx"
    )
  )



# ** Top Employment Industries -----------------------------------------------

read_excel(
  here("data-raw", str_glue("{obtn_year}-obtn-by-county.xlsx")),
  sheet = "Employment Industries",
  range = "A1:D37"
) %>%
  clean_names() %>%
  mutate(geography = str_trim(geography)) %>%
  filter(geography %in% obtn_oregon_counties) %>%
  write_xlsx(str_glue(
    "inst/tables/{obtn_year}/{obtn_year}-top-employment-industries.xlsx"
  ))

get_top_employment_industries <- function(year_to_filter) {
  obtn_top_employment_industries %>%
    filter(top_three_industry == "Y") %>%
    distinct(industry, year) %>%
    filter(year == year_to_filter) %>%
    select(-year) %>%
    arrange(industry)
}

# ** Tribes ------------------------------------------------------------------

make_tribe_table <- function(year_to_filter) {
  obtn_tribes %>%
    filter(year == year_to_filter) %>%
    select(-year) %>%
    mutate(tribe = str_to_upper(tribe)) %>%
    mutate(geography = factor(geography, levels = obtn_oregon_counties)) %>%
    complete(geography, tribe) %>%
    rename(county = geography) %>%
    pivot_wider(
      id_cols = county,
      names_from = "tribe",
      values_from = "present"
    )
}


obtn_year %>%
  make_tribe_table() %>%
  write_xlsx(str_glue("inst/tables/{obtn_year}/{obtn_year}-tribes.xlsx"))


# * Tables by county -------------------------------------------------------


# measure_order <- tribble(
#   ~ measure,
#   "Food Insecurity",
#   "Child Poverty",
#   "Foster Care",
#   "Index Crime",
#   "Voter Participation",
#   "3rd Grade ELA",
#   "5th Grade Math",
#   "9th Grade on Track",
#   "Graduation Rate",
#   "4yr Degree or Greater",
#   "Unemployment Rate",
#   "LFPR",
#   "Job Growth",
#   "Property Tax per Person",
#   "Rent Costs",
#   "Low Weight Births",
#   "Vaccination Rate 2yr olds",
#   "Good Health",
#   "Mental Health Providers",
#   "Tobacco Use",
#   "Broadband Access",
#   "Childcare Availability",
#   "EV",
#   "Mobile Homes",
#   "VMT per capita"
# ) %>%
#   mutate(measure_order = row_number())
#
#
# county_table_data <- obtn_data_by_measure %>%
#   filter(year == obtn_year) %>%
#   filter(category != "Other") %>%
#   mutate(
#     value_formatted = case_when(
#       measure %in% comma_measures ~ comma(value, accuracy = 0.1),
#       measure %in% dollar_measures ~ dollar(value, accuracy = 1),
#       TRUE ~ percent(value / 100, accuracy = 0.1)
#     )
#   ) %>%
#   mutate(
#     value_formatted = case_when(
#       measure == "Letter Sounds" &
#         str_detect(geography, "Oregon", negate = TRUE) ~ as.character(str_glue("{value_formatted} of 26")),
#       measure %in% c("VMT per capita", "EV", "Mental Health Providers") ~ comma(value, accuracy = 1),
#       TRUE ~ value_formatted
#     )
#   ) %>%
#   left_join(measure_order, by = "measure") %>%
#   arrange(geography, measure_order) %>%
#   select(-c(value, measure_order))
#
# oregon_data <- county_table_data %>%
#   filter(str_detect(geography, "Oregon")) %>%
#   select(-category) %>%
#   pivot_wider(id_cols = measure,
#               names_from = "geography",
#               values_from = "value_formatted")
#
# county_table_data_with_state_data <-
#   county_table_data %>%
#   filter(str_detect(geography, "Oregon", negate = TRUE)) %>%
#   left_join(oregon_data, by = "measure") %>%
#   rename(value = value_formatted) %>%
#   relocate(category, .before = 1) %>%
#   select(-c(year, tertile_text, tertile_numeric, value_for_table))
#
#
# make_county_table <- function(county) {
#   county_lowercase <- str_to_lower(county)
#
#   county_table_data_with_state_data %>%
#     filter(geography == county) %>%
#     select(-c(geography, rank)) %>%
#     set_names(c("category", "measure", county, "Oregon", "Rural", "Urban")) %>%
#     add_row(.after = 5) %>%
#     add_row(.after = 11) %>%
#     add_row(.after = 17) %>%
#     add_row(.after = 23)
#
# }
#
# county_table_data_with_state_data
#
# make_county_table("Gilliam") %>%
#   print(n = 50)
#
# map(obtn_oregon_counties, make_county_table) %>%
#   set_names(obtn_oregon_counties) %>%
#   write_xlsx(str_glue("inst/tables/{obtn_year}/{obtn_year}-county-tables.xlsx"))


# * Tables by measure -------------------------------------------------------
#
# make_measure_table <- function(measure_name) {
#   obtn_data_by_measure %>%
#     filter(year == obtn_year) %>%
#     filter(measure == measure_name) %>%
#     mutate(
#       value = case_when(
#         measure %in% c("Mental Health Providers") ~ str_glue("{value}:1"),
#         measure %in% c(
#           "Land Area",
#           "VMT per capita",
#           "Vehicle Miles Traveled",
#           "Total Population",
#           "EV",
#           "Mental Health Providers"
#         ) ~ comma(value, accuracy = 1),
#         measure %in% c("Childcare Availability") ~ comma(value, accuracy = 0.1),
#         measure %in% comma_measures ~ comma(value, accuracy = 0.1),
#         measure %in% dollar_measures ~ dollar(value, accuracy = 1),
#         measure %in% c("Vaccination Rate 2yr olds") ~ percent(value, accuracy = 1, scale = 1),
#         tertile_text == "ID" ~ NA,
#         TRUE ~ percent(value / 100, accuracy = 0.1)
#       )
#     ) %>%
#     # This is for mental health providers because it shows NA:1
#     # for counties with no data
#     mutate(value = str_replace(value, "NA:1", NA_character_)) %>%
#     # In cases where rural, urban, or Oregon are the same as a county, the state-level row should come first
#     # This calculates the number of times an observation appears
#     add_count(value_for_table) %>%
#     # In cases where a value appears more than once and it is for a state-level row, state_tie becomes Y
#     mutate(state_tie = case_when(n > 1 &
#                                    str_detect(geography, "Oregon") ~ "Y")) %>%
#     # We then use the state_tie variable to order correct
#     arrange(desc(value_for_table), state_tie, geography) %>%
#     select(rank, geography, value) %>%
#     set_names(c("Rank", "County", "Amount"))
#
# }
#
# table_measures <-
#   obtn_data_by_measure %>%
#   filter(year == obtn_year) %>%
#   distinct(measure) %>%
#   pull(measure)
#
# make_measure_table("Childcare Availability") %>%
#   print(n = 100)
#
# map(table_measures, make_measure_table) %>%
#   set_names(table_measures) %>%
#   write_xlsx(str_glue("inst/tables/{obtn_year}/{obtn_year}-measures-tables.xlsx"))
#


# Put files to share in share directory -----------------------------------

# dir_delete(stringr::str_glue("inst/share/{obtn_year}/"))

dir_create(stringr::str_glue("inst/share/{obtn_year}/"))

dir_copy(
  stringr::str_glue("inst/plots/{obtn_year}/"),
  new_path = stringr::str_glue("inst/share/{obtn_year}"),
  overwrite = TRUE
)

dir_copy(
  stringr::str_glue("inst/tables/{obtn_year}/"),
  new_path = stringr::str_glue("inst/share/{obtn_year}"),
  overwrite = TRUE
)

zip::zip(
  zipfile = stringr::str_glue("{obtn_year}-plots-and-tables.zip"),
  files = dir_ls(),
  root = stringr::str_glue("inst/share/{obtn_year}/"),
  include_directories = FALSE,
  recurse = TRUE
)


# Get Spanish Language Race/Ethnicity Bar Charts --------------------------

# spanish_race_ethnicity_bar_charts <-
#   dir_ls(stringr::str_glue("inst/share/{obtn_year}/")) |>
#   as_tibble() |>
#   filter(str_detect(value, "race-ethnicity") & str_detect(value, "spanish")) |>
#   pull(value)
#
# zip::zip(
#   zipfile = stringr::str_glue("inst/share/{obtn_year}/race-ethnicity-spanish.zip"),
#   files = spanish_race_ethnicity_bar_charts
# )

# Beep --------------------------------------------------------------------

beepr::beep()