knitr::opts_chunk$set(echo = TRUE, warning = FALSE, message = FALSE)
knitr::opts_knit$set(root.dir = rprojroot::find_rstudio_root_file())

Creating a data set of geographies

library(readxl)
library(sf)
library(janitor)
library(tidyverse)
library(rmapshaper)
library(tidygeocoder)
library(wordcloud)
library(RColorBrewer)
library(tm) # to create corpus
library(wordcloud2)
library(viridis)

Social and Water Boards

swcd <-
  st_read("data-raw/shapefiles/SWCDs/SWCD.shp") |> 
  clean_names() |> 
  st_transform("+proj=longlat +ellps=WGS84 +datum=WGS84") |> 
  ms_simplify(keep = 0.2,keep_shapes = TRUE) |> 
  select(swcd_name, geometry) |> 
  rename(name = swcd_name)

saveRDS(swcd, "data/swcd.rds")


plot(swcd['name'])

PWS

Public Water Service Boundaries

pws <-
  st_read("data-raw/shapefiles/PWS_shapefile/PWS_Export.shp") |> 
  clean_names() |> 
  st_transform("+proj=longlat +ellps=WGS84 +datum=WGS84") |> 
  ms_simplify(keep = 0.2,keep_shapes = TRUE) |> 
  select(pws_name, geometry) |> 
  rename(name = pws_name)

saveRDS(pws, "data/pws.rds")


plot(pws['name'])

Most common words

# Create a vector containing only the text
text <- pws$pws_name

# Create a corpus
docs <- Corpus(VectorSource(text))

# standardize corpus
docs <- docs %>%
  tm_map(removeNumbers) %>%
  tm_map(removePunctuation) %>%
  tm_map(stripWhitespace)
docs <- tm_map(docs, content_transformer(tolower))
docs <- tm_map(docs, removeWords, stopwords("english"))

# create a document matrix
dtm <- TermDocumentMatrix(docs)
matrix <- as.matrix(dtm)
words <- sort(rowSums(matrix), decreasing = TRUE)
df <- data.frame(word = names(words), freq = words)
df$word <- as.character(df$word)
df <- df |> 
  mutate(freq = case_when(
         word == "workshop" ~ 7,
         TRUE ~ freq)) |> 
  filter(word != "workshops")
  target <- c("also","really", "helpful", "wasnt", "etc", "asks", "move", "gave", "connect", "fellows")
  "%ni%" <- Negate("%in%")
  df <- filter(df, word %ni% target)
df$word <- as.factor(df$word)

set.seed(27)
wordcloud(words = df$word,
            freq = df$freq, 
            min.freq = 5, 
            rot.per= 0.4,
            colors = viridis(8),
            random.order = FALSE,
            family = "serif", font = 2)

GCDs

Crazy oblong boundaries

gcd <-
  st_read("data-raw/shapefiles/GCD_Shapefiles/TWDB_GCD_NOV2019.shp") |> 
  clean_names() |>
  st_transform("+proj=longlat +ellps=WGS84 +datum=WGS84") |> 
  ms_simplify(keep = 0.2,keep_shapes = TRUE) |> 
  select(shortnam, geometry) |> 
  rename(name = shortnam)

saveRDS(gcd, "data/gcd.rds")


plot(gcd['name'])

Major Aquifers

aqu <-
  st_read("data-raw/shapefiles/major_aquifers/NEW_major_aquifers_dd.shp") |> 
  clean_names() |>
  st_transform("+proj=longlat +ellps=WGS84 +datum=WGS84") |> 
  ms_simplify(keep = 0.2,keep_shapes = TRUE) |> 
  select(aq_name, geometry) |> 
  rename(name=aq_name)

saveRDS(aqu, "data/aqu.rds")


plot(aqu['name'])

Riverbasins

rb <-
  st_read("data-raw/shapefiles/Major_River_Basins_Shapefile/TWDB_MRBs_2014.shp") |> 
  clean_names() |>
  st_transform("+proj=longlat +ellps=WGS84 +datum=WGS84") |> 
  ms_simplify(keep = 0.2,keep_shapes = TRUE) |> 
  select(basin_name, geometry) |> 
  rename(name = basin_name) |> 
  mutate(name = paste0(name, " River Basin"))

saveRDS(rb, "data/rb.rds")

plot(rb['name'])

Major Rivers

riv <-
  st_read("data-raw/shapefiles/Major_Rivers_dd83/MajorRivers_dd83.shp") |> 
  clean_names() |>
  st_transform("+proj=longlat +ellps=WGS84 +datum=WGS84") |> 
  ms_simplify(keep = 0.001,keep_shapes = FALSE) |> 
  select(name, geometry)

saveRDS(riv, "data/riv.rds")

plot(riv['name'])

Regional Water Planning Groups

rwpa <-
  st_read("data-raw/shapefiles/RWPA_Shapefile/TWDB_RWPAs_2014.shp") |> 
  clean_names() |>
  st_transform("+proj=longlat +ellps=WGS84 +datum=WGS84") |> 
  ms_simplify(keep = 0.2,keep_shapes = TRUE) |> 
  select(reg_name, geometry) |> 
  rename(name = reg_name)


saveRDS(rwpa, "data/rwpa.rds")

plot(rwpa['name'])

Counties

county <-
  st_read("data-raw/shapefiles/Texas_County_Boundaries_Detailed-shp/County.shp") |> 
  clean_names() |>
  st_transform("+proj=longlat +ellps=WGS84 +datum=WGS84") |> 
  ms_simplify(keep = 0.2,keep_shapes = TRUE) |> 
  select(cnty_nm, geometry) |> 
  rename(name = cnty_nm)


saveRDS(county, "data/counties.rds")

plot(county['name'])
addy <- tibble(address ="110 Jacob Fontaine Ln")

werk <- addy |> tidygeocoder::geocode(address =  address, method = "osm")

Water districts

wd <-
  st_read("data-raw/Water_Districts/Water_Districts.shp") |> 
  clean_names() |> 
  st_transform("+proj=longlat +ellps=WGS84 +datum=WGS84") |> 
  ms_simplify(keep = 0.2,keep_shapes = TRUE) |> 
  select('name', 'type','county', 'source','fips', 'geometry')


plot(wd['name'])

Regional Flood Planning Groups

fg <-
  st_read("data-raw/shapefiles/Regional_Flood_Planning_Groups/Regional_Flood_Planning_Groups.shp") |> 
  clean_names() |> 
  st_transform("+proj=longlat +ellps=WGS84 +datum=WGS84") |> 
  ms_simplify(keep = 0.2,keep_shapes = TRUE) |> 
  select(rfpg, region_no, geometry) |> 
  rename(name = rfpg)


saveRDS(fg, "data/fg.rds")

plot(fg['name'])


ethantenison/TexasWater documentation built on Oct. 22, 2022, 10:36 p.m.