knitr::opts_chunk$set(echo = TRUE, warning = FALSE, message = FALSE)
knitr::opts_knit$set(root.dir = rprojroot::find_rstudio_root_file())

Creating a dataset of partner orgs

library(readxl)
library(sf)
library(janitor)
library(tidyverse)
library(ggmap)
library(zipcodeR)
library(tidygeocoder)

TRWA data

trwa <- read_csv("data-raw/All TRWA Members July 20211314033.csv") |> 
  clean_names() |> 
  rename(lon = "map_longitude",
         lat = "map_latitude",
         zip_code = "employer_postal_code",
         organization = "employer_name") |> 
  mutate(
        sector = "Rural",
        org_type = "Utilities", 
        address = paste0(employer_address_line1, ", ", employer_city,
                          ", ", employer_state_abbrev, " ", zip_code),
        zip_code = case_when(
          nchar(zip_code) == 10 ~ str_sub(zip_code, 1, 5),
          TRUE ~ zip_code
        )) |>  select(organization, address,lon, lat, sector, org_type)

Texas Water Governance Survey

ONLY RUN IF YOU NEED TO RERUN GEOCODING

twg <- read_csv("data-raw/Texas Water Governance Survey Master List - Master List.csv") |> 
  clean_names() |> 
  filter(!is.na(sector),
         !is.na(address)) |> 
  mutate_geocode(address)


write.csv(twg, "data-raw/geocoded_survey.csv")

Merging

twg <- read_csv("data-raw/geocoded_survey.csv") |> 
  select(organization, address,lon, lat, sector, org_type)

org <- twg |> 
  bind_rows(trwa) |> 
  filter(!is.na(lat))

write.csv(org, "data-raw/organizations.csv")

Add county

library(sf)

county <- readRDS("./data/counties.rds")

org_sf <- org |> 
  st_as_sf(coords = c("lon", "lat"),
           crs = 4326, agr = "constant")


# intersect and extract county name
org_sf$county <- apply(st_intersects(county, org_sf, sparse = FALSE), 2, 
               function(col) { 
                  county[which(col), ]$name
               })

org_sf <- org_sf |> 
  as.data.frame() |> 
  select(organization, county)


org <- org |> 
  left_join(org_sf, by = "organization")

Zip Radius

library(ggmap)

#zips <- revgeocode(org, output = "all")

test <- head(org)


expanded <- map2_dfr(test$lon, test$lat, ~revgeocode(c(.x, .y), output = "all"))


df3 <- org %>%
  rowwise() %>%
  do(revgeocode(c(.$lon[1], .$lat[1]), output = "all")) %>%
  ungroup()
df3

Datatable

This can take a long time too!

df <- org |> 
  select(organization, address, county,sector, org_type, lon, lat) |> 
  mutate(search = sector) |> 
  rename(Organization = organization, 
         Address = address,
         Sector = sector,
         Type = org_type,
         County = county) |> 
  arrange(Organization)

df2 <- df |> 
  mutate(search = "All")

#getting county latlon
df <- df |> 
  bind_rows(df2) |> 
  unnest(County) |> 
  mutate(county_name = paste0(County, " ", "County,TX")) |> 
  tidygeocoder::geocode(address =  county_name, method = "osm")

df <- df |> 
  rename(lat = "lat...7",
         lat_c = "lat...10")

county_lonlat <- df |> 
  select(County, lat_c, long) |> 
  distinct()
write.csv(df, "data/organization.csv", row.names = FALSE)
write.csv(county_lonlat, "data/county_lonlat.csv", row.names = FALSE)


ethantenison/TexasWater documentation built on Oct. 22, 2022, 10:36 p.m.