knitr::opts_chunk$set(echo = TRUE, warning = FALSE, message = FALSE)
knitr::opts_knit$set(root.dir = rprojroot::find_rstudio_root_file())

library(readxl)
library(sf)
library(janitor)
library(tidyverse)
library(ggmap)
library(zipcodeR)
library(tidygeocoder)

Master List

master_raw <- read_excel("data-raw/TX water gov Master List_May 25.xlsx")

glimpse(master)

Initial Data cleaning

There are over 60 different unique types

master <- master_raw |> 
  select(organization, address, zip_code, county, sector,
         org_type, longitude, latitude) |> 
  #renaming to fit old column headers 
  rename(Organization = organization, Address = address, County = county, 
         Sector = sector, Type = org_type, lon = longitude, lat = latitude) |> 
  mutate(Type = case_when(
    is.na(Type) ~ "N/A",
    TRUE ~ Type
  ),
  County = case_when(
    County == "cogdoches" ~ "Nacogdoches",
    County == "varro" ~ "Navarro", 
    County == "McLenn" ~ "McLennan",
    TRUE ~ County
  )) |> 
  mutate(county_name = paste0(County, " ", "County,TX"))

#separate out those with and without lonlat 
with_lonlat <- master |> 
  filter(!is.na(lon))

wo_lonlat <- master |> 
  filter(is.na(lon))

Tidygeocode

Honestly there are just too many errors to deal with

#some have address but not lonlat 
wo_lonlat_add2 <- wo_lonlat |> 
  filter(!is.na(Address)) |> 
  mutate(address_exct = paste0(Address, ", ", County, " ", "County,TX")) |> 
  tidygeocoder::geocode(address =  address_exct, method = "osm")

Adding county lonlat

#Takes awhile 
df <- with_lonlat |> 
  tidygeocoder::geocode(address =  county_name, method = "osm") 

df <- df |> 
  dplyr::rename(lat = "lat...8",
         lat_c = "lat...10")

df$random <- sample(6, size = nrow(df), replace = TRUE)

df <- df |> 
  mutate(Sector = case_when(
    random == 1 ~ "Rural",
    random == 2 ~ "Agriculture",
    random == 3 ~ "Energy",
    random == 4 ~ "Groundwater", 
    random == 5 ~ "Flood",
    random == 6 ~ "Policy", 
    TRUE ~ "None"
  )) |> 
  mutate(search = Sector)

df2 <- df |> 
  mutate(search = "All")

df <- df |> 
  bind_rows(df2) |> 
  select(-c(zip_code, random))
write.csv(df, "data/organization_updated.csv", row.names = FALSE)


ethantenison/TexasWater documentation built on Oct. 22, 2022, 10:36 p.m.