GIS_functions/AWRR-to-GDB.R

####################################################################################
# This script generates annual and monthly withdrawals by decade for the staff GIS application. Intermediate CSV exports can be used for data requests

# GIS Application:
# 1. Run Part 1 - Annual [There should be 1 saved shapefile]
# 2. Run Part 2 - Monthly [There should be 5 saved shapefiles for each decade]
# 3. See "#REMAINING STEPS IF GDB IS DESIRED" for the steps that need to happen outside of R to create the data deliverable we sent to the GIS team aka Rex.

# CSV Data exports:
# Annual export in Part 1, wide format is the last write.csv step
# Monthly exports by decade generated by running Part 2, wide format is the last write.csv step
####################################################################################

library(rgeos) #readWKT()
library(rgdal) #readOGR()
library(raster) #bind()
library('httr')
library('sqldf')
library('dplyr')
library('tidyr')
library(maptools)
library("beepr")
library(sf)
# USER INPUTS #########################################################################
#WKT_layer <- read.csv('C:/Users/nrf46657/Desktop/VAHydro Development/GitHub/hydro-tools/GIS_LAYERS/MinorBasins.csv')

#load variables
syear = 1982
eyear = 2022

## LOAD CONFIG FILE ###################################################################
source(paste("/var/www/R/config.local.private", sep = ""))
localpath <- paste(github_location,"/USGS_Consumptive_Use", sep = "")

#LOAD from_vahydro() FUNCTION
source(paste(localpath,"/Code/VAHydro to NWIS/from_vahydro.R", sep = ""))
datasite <- "http://deq1.bse.vt.edu:81/d.dh"

#PART 1 - ANNUAL ####################################################################
### RETRIEVE ANNUAL WITHDRAWAL DATA #################################################
wd_annual_data <- list()

## year range
year_range <- format(seq(as.Date(paste0(syear,"/1/1")), as.Date(paste0(eyear,"/1/1")), "years"), format="%Y")

# ## BB- Using data from SQL foundation pull
# for (y in year_range) {
#   print(paste0("PROCESSING YEAR: ", y))
#   startdate <- paste(y, "-01-01",sep='')
#   enddate <- paste(y, "-12-31", sep='')
# 
#   #with power
#   export_view <- paste0("ows-awrr-map-export/wd_mgy?ftype_op=%3D&ftype=&tstime_op=between&tstime%5Bvalue%5D=&tstime%5Bmin%5D=",startdate,"&tstime%5Bmax%5D=",enddate,"&bundle%5B0%5D=well&bundle%5B1%5D=intake&
#                          dh_link_admin_reg_issuer_target_id%5B0%5D=65668&dh_link_admin_reg_issuer_target_id%5B1%5D=91200&dh_link_admin_reg_issuer_target_id%5B2%5D=77498")
#   output_filename <- "wd_mgy_export.csv"
#   wd_annual <- from_vahydro(datasite,export_view,localpath,output_filename)
# 
#   wd_annual_data <- rbind(wd_annual_data, wd_annual)
# }

wd_annual_data <- read.csv(paste0(onedrive_location,"/OWS/foundation_datasets/awrr/",eyear+1,"/foundation_dataset_mgy_1982-",eyear,".csv"))

## Unecessary, if using the foundation data, as this step is done in AWRR_data.R
#remove duplicates - GROUP BY USING MAX
# wd_ann <- sqldf('SELECT "MP_hydroid","Hydrocode","Source.Type","MP.Name","Facility_hydroid","Facility","Use.Type","Year",max("Water.Use.MGY") AS "Water.Use.MGY","Latitude","Longitude","Locality","FIPS.Code"
#                FROM wd_annual_data
#                WHERE Facility != "DALECARLIA WTP"
#                GROUP BY "MP_hydroid","Hydrocode","Source.Type","MP.Name","Facility_hydroid","Facility","Use.Type","Year","Latitude","Longitude","Locality","FIPS.Code"
#                ORDER BY "Water.Use.MGY" DESC ')

year_range <- NULL
for (i in syear:eyear) {
  year_range <- paste0(year_range,'X',i,' AS MGY_', i,',')
}

#rename columns & CONVERT LAT/LON COLUMNS TO WKT COLUMN.
wd_mgy_export <- sqldf(paste0('SELECT MP_hydroid AS MP_ID,
                          Hydrocode AS Hcode,
                          Source_Type AS Source_Type,
                          MP_Name AS MP_Name,
                          Facility_hydroid AS Fac_ID,
                          Facility AS Fac_Name,
                          Use_Type AS UseType,
                          ',year_range,'
                          "POINT "||"("||Longitude||" "||Latitude||")" AS geom,
                          Latitude AS Lat,
                          Longitude AS Lon,
                          FIPS_Code AS FIPS
                       FROM wd_annual_data
                       ORDER BY MGY_',eyear
                       ))

##Foundation data already comes in wide
# #place into export data frame
# wd_mgy_export <- spread(data = wd_mgy, key = MGY, value = USE_MGY,sep = "_")

firstcol = which(colnames(wd_mgy_export)==paste0("MGY_",eyear-4))
lastcol = which(colnames(wd_mgy_export)== paste0("MGY_",eyear))

wd_mgy_export$multi_yr_avg <- round((rowMeans(wd_mgy_export[firstcol:lastcol], na.rm = TRUE, dims = 1)),2)
#save file
write.csv(wd_mgy_export,paste0(export_path,"withdrawal_annual.csv"), row.names = FALSE)


output_location <- paste0(export_path,"shp_output/")
output_file <- paste0("mp_wd_annual_",syear,"-",eyear,".shp")
WKT_layer <- wd_mgy_export

#### CSV TO SHAPEFILE ############################################################
# Read the .csv file
WKT_layer$id <- as.numeric(rownames(WKT_layer))

# look at the data structure
str(WKT_layer)

# view column names
names(WKT_layer)

# SpatialPointsDataFrame does not accept NA values in coordinate fields
r <- sqldf('SELECT 
            CASE
              WHEN Lat = ""
              THEN 99
              WHEN Lat IS NULL
              THEN 99
              ELSE Lat
            END AS Lat,
            CASE
              WHEN Lon = ""
              THEN 99
              WHEN Lon IS NULL
              THEN 99
              ELSE Lon
            END AS Lon,
            *
            FROM WKT_layer
           ')

# first, convert the data.frame to spdf
coordinates(r) <- ~Lon+Lat

## Cleaning up some of the data to only include VA (roughly)
r<-r[(r$Lat > 36.4 & r$Lat < 40) & (r$Lon > -85 & r$Lon < -75),]
## Removing the rest outside of VA
exclusions <- c(58954,59590,59591,59592,59593,59594,60318,60802,60810,60817,60829,
                60834,62262,63125,64320,64374,64769,65025,65521,65549,65552,65557
                ,65646,65662,68485,454961,454962,454964,454965,454966,462401,462485,59596,59099,65156,59596,59099,60447,64320)
r <- r[!(r$MP_ID %in% exclusions),]
# second, assign the CRS in one of two ways
crs(r) <- "+proj=webmerc +zone=18 +datum=WGS84 +units=m +no_defs 
                 +ellps=WGS84 +towgs84=0,0,0"
plot(r, 
     main=paste0("Map of Withdrawal Points: ",syear,"-",eyear))
str(r)
# write a shapefile
writeOGR(r, export_path,
         paste0("mp_wd_annual_",syear,"-",eyear), driver="ESRI Shapefile", overwrite_layer = T)

#PART 2 - MONTHLY ####
### RETRIEVE MONTHLY WITHDRAWAL DATA #################################################
#begin with syear=1982 and eyear=1989, then repeat for 1990-1999, 2000-2009, 2010-2019, 2020-2023
#load variables
syear = 1982
# eyear = 1989
wd_monthly_data <- list()

## year range
year_range <- format(seq(as.Date(paste0(syear,"/1/1")), as.Date(paste0(eyear,"/1/1")), "years"), format="%Y")

#Increase max time for VAHydro URL access
getOption('timeout') #default is 60 seconds
options(timeout=300) #this adjusts to 5min

# #pull from VAHydro
# for (y in year_range) {
#   print(paste0("PROCESSING YEAR: ", y))
#   startdate <- paste(y, "-01-01",sep='')
#   enddate <- paste(y, "-12-31", sep='')
#   
#   #with power
#   export_view <- paste0("ows-annual-report-map-exports-monthly-export/wd_mgm?ftype_op=%3D&ftype=&tstime_op=between&tstime%5Bvalue%5D=&tstime%5Bmin%5D=",startdate,"&tstime%5Bmax%5D=",enddate,"&bundle%5B0%5D=well&
#                          bundle%5B1%5D=intake&dh_link_admin_reg_issuer_target_id%5B0%5D=65668&dh_link_admin_reg_issuer_target_id%5B1%5D=91200&dh_link_admin_reg_issuer_target_id%5B2%5D=77498")
#   output_filename <- "wd_mgm_export.csv"
#   wd_monthly <- from_vahydro(datasite,export_view,localpath,output_filename)
#   
#   wd_monthly_data <- rbind(wd_monthly_data, wd_monthly)
# }

wd_monthly_data <- read.csv(paste0(onedrive_location,"/OWS/foundation_datasets/awrr/",eyear+1,"/awrr_monthly_foundation_",eyear+1,".csv"))

#remove duplicates - GROUP BY USING MAX. Also added logic to remove gw2s and orphan wells
wd_mon <- sqldf('SELECT "MP_hydroid","Hydrocode","Source.Type","MP.Name","Facility_hydroid","Facility",LOWER("Use.Type") AS "Use.Type","Year","Month", max("tsvalue") AS "Water.Use.MGM","Latitude", "Longitude","Locality","FIPS.Code" 
               FROM wd_monthly_data
               WHERE "Use.Type" NOT LIKE "%gw2_%"
                 AND "Use.Type" NOT LIKE "%faci%"
                 AND Facility_hydroid != 67190
               GROUP BY MP_hydroid,Hydrocode,"Source.Type","MP.Name",Facility_hydroid,"Use.Type","Year","Month","Latitude","Longitude","Locality","FIPS.Code"
               ORDER BY "Water.Use.MGM" DESC ')    # Removed Faccility 67190 as it is duplicated (labelled incactive)

#rename columns & CONVERT LAT/LON COLUMNS TO WKT COLUMN
wd_mgm <- sqldf('SELECT MP_hydroid AS MP_ID,
                          Hydrocode AS Hcode,
                          "Source.Type" AS Source_Type,
                          "MP.Name" AS MP_Name,
                          Facility_hydroid AS Fac_ID,
                          Facility AS Fac_Name,
                          "Use.Type" AS UseType,
                          Year,
                          Month,
                          CASE 
                            WHEN Month = 1
                            THEN "Jan"
                            WHEN Month = 2
                            THEN "Feb"
                            WHEN Month = 3
                            THEN "Mar"
                            WHEN Month = 4
                            THEN "Apr"
                            WHEN Month = 5
                            THEN "May"
                            WHEN Month = 6
                            THEN "Jun"
                            WHEN Month = 7
                            THEN "Jul"
                            WHEN Month = 8
                            THEN "Aug"
                            WHEN Month = 9
                            THEN "Sep"
                            WHEN Month = 10
                            THEN "Oct"
                            WHEN Month = 11
                            THEN "Nov"
                            WHEN Month = 12
                            THEN "Dec"
                            ELSE "No Date"
                          END AS Month2,
                          "Water.Use.MGM" AS USE_MGM,
                          "POINT "||"("||Longitude||" "||Latitude||")" AS geom,
                          Latitude AS Lat,
                          Longitude AS Lon,
                          "FIPS.Code" AS FIPS
                       FROM wd_mon
                       ORDER BY Year, Month
                       ') 


#transform from long to wide table
wd_mgm_export <- pivot_wider(data = wd_mgm, id_cols = c("MP_ID","Hcode", "Source_Type", "MP_Name", "Fac_ID", "Fac_Name","UseType","geom","Lat","Lon", "FIPS"), names_from = c("Month2", "year"), values_from = "USE_MGM")

#Order by MP_hydroid after the pivot so years remain in chronological order
wd_mgm_export <- sqldf('SELECT * FROM wd_mgm_export ORDER BY MP_ID')

#save file
write.csv(wd_mgm_export,paste0(export_path,"withdrawal_monthly_",syear,"-",eyear,".csv"), row.names = FALSE)

# output_location <- paste0(export_path,"shp_output/")
# output_file <- paste0("mp_wd_monthly_",syear,"-",eyear,".shp")

### CSV TO SHAPEFILE --------------------------------------------------------------------------
# Read the .csv file
wd_mgm_export <- read.csv(paste0(export_path,"withdrawal_monthly_",syear,"-",eyear,".csv"), stringsAsFactors = F)

syear_range <- c(1982,1990,2000,2010,2020)
eyear_range <- c(1989,1999,2009,2019,eyear)

for (i in 1:length(eyear_range)) {
  wd_mgm_temp <- wd_mgm[wd_mgm$year >= syear_range[i] & wd_mgm$year <= eyear_range[i],]
  
  wd_mgm_temp <-  pivot_wider(data = wd_mgm_temp, id_cols = c("MP_ID","Hcode", "Source_Type", "MP_Name", "Fac_ID", 
                                                         "Fac_Name","UseType","geom","Lat","Lon", "FIPS"), 
                              names_from = c("Month2", "year"), values_from = "USE_MGM")
  WKT_layer <- wd_mgm_temp
  
  WKT_layer$id <- as.numeric(rownames(WKT_layer))
  
  # look at the data structure
  # str(WKT_layer)
  
  # # view column names - BB Already shown in line above
  # names(WKT_layer)
  
  # SpatialPointsDataFrame does not accept NA values in coordinate fields
  r <- sqldf('SELECT CASE
                WHEN Lat = ""
                THEN 99
                WHEN Lat IS NULL
                THEN 99
                ELSE Lat
                END AS Lat,
                CASE
                WHEN Lon = ""
                THEN 99
                WHEN Lon IS NULL
                THEN 99
                ELSE Lon
                END AS Lon,
                *
             FROM WKT_layer
             ')
  
  # first, convert the data.frame to spdf
  coordinates(r) <- ~Lon+Lat
  
  ## Cleaning up some of the data to only include VA (roughly)
  r<-r[(r$Lat > 36.4 & r$Lat < 40) & (r$Lon > -85 & r$Lon < -75),]
  exclusions <- c(58954,59590,59591,59592,59593,59594,60318,60802,60810,60817,60829,
                  60834,62262,63125,64320,64374,64769,65025,65521,65549,65552,65557
                  ,65646,65662,68485,454961,454962,454964,454965,454966,462401,462485,59596,59099,65156,59596,59099,60447,64320)
  r <- r[!(r$MP_ID %in% exclusions),]
  
  # second, assign the CRS in one of two ways
  crs(r) <- "+proj=webmerc +zone=18 +datum=WGS84 +units=m +no_defs 
                   +ellps=WGS84 +towgs84=0,0,0"
  plot(r, 
       main=paste0("Map of Withdrawal Points: ",syear_range[i],"-",eyear_range[i]))
  # str(r)
  # write a shapefile
  writeOGR(r, export_path,
           paste0("mp_wd_monthly_",syear_range[i],"-",eyear_range[i]), driver="ESRI Shapefile", overwrite_layer = T)
  print(paste0("PROCESS COMPLETE: ",syear_range[i]," - ",eyear_range[i]," MONTHLY GIS LAYER"))
  #Return to beginning of Part 2 until 5 shapefiles are generated for each of the 5 decade blocks
}
rm(wd_mgm_temp)

## TEST using the sf package to retrieve feature classes in .gdb
# ogrListLayers("C:/Users/maf95834/Documents/shp_output/OWS_Water_Withdrawal.gdb")
# ogrInfo(dsn = "C:/Users/maf95834/Documents/shp_output/OWS_Water_Withdrawal.gdb", layer= "mp_wd_monthly_2010_2019_Clip")
# a <- readOGR(dsn = "C:/Users/maf95834/Documents/shp_output/OWS_Water_Withdrawal.gdb",
#         layer = "mp_wd_annual_1982_2020")
# 
# b <- as(a, "sf")
# 
# st_write(obj = b,dsn = "C:/Users/maf95834/Documents/shp_output",layer = "test", driver = "FileGDB")
#REMAINING STEPS IF GDB IS DESIRED
# 1) In ArcMap - Load resulting .shp file in arcmap 
# 2) Save as gdb - import multiple feature class files to .gdb 
# 3) Set the coordinate reference system in the  data layer's property page in ArcCatalog
# 4) Export Data Table to .gdb for all layers
# 5) Define projection as WGS_84 for each layer 
# 6) Clip the layers to the VA extent
# Final .gdb should have 6 clipped layers and 6 complete data tables
# Note: in 2022, aka data through 2021, there were only 6 clipped layers and no separate data tables

#PART 3 - Static ############################################################## 
## 2015 WSP Areas to Shapefile ################################################
#PULL IN WSP Region List from local file
wsp_regions <- read.csv(paste(folder,"ows_wsp_regions_wkt.csv",sep=""))
wsp_regions$region_name <- as.character(wsp_regions$region_name)

wsp_df <- sqldf('SELECT *  
         FROM wsp_regions
         WHERE geom IS NOT NULL
         AND "FIPS.Code" NOT LIKE "3%"')

#convert WKT into an sf object
wsp.sf <- st_as_sf(wsp_df, wkt = 'geom')

#convert sf object to SpatialPolygonDataframe
wsp.spdf <- as(wsp.sf, "Spatial")

# assign the CRS
crs(wsp.spdf) <- "+proj=utm +zone=18 +datum=WGS84 +units=m +no_defs 
                 +ellps=WGS84 +towgs84=0,0,0"
# visually confirm that it can plot/worked!
plot(wsp.spdf, 
     main="Map of WSP Areas", border = "black", col = wsp.spdf$region_featureid)
names(wsp.spdf)
# write a shapefile
writeOGR(wsp.spdf, "C:/Users/maf95834/Documents/shp_output","ows_wsp_regions", driver="ESRI Shapefile", overwrite_layer = T)

## Drought Regions to Shapefile ################################################
#PULL IN drought regions from local file
fips_df <- sqldf('SELECT CASE
                            WHEN fips_code IN (51191,51167,51169,51173,51520,51185,51720,51105,51027,51051,51195) THEN "Big Sandy"
                            WHEN fips_code IN (51025,51081,51053,51620,51111,51135,51149,51175,51181,51183,51595) THEN "Chowan"
                            WHEN fips_code IN (51131,51001) THEN "Eastern Shore"
                            WHEN fips_code IN (51049,51075,51085,51087,51009,51570,51125,51145,51147,51041,51670,51680,51730,51760,51007,51065,51003,51011,51029,51540) THEN "Middle James"
                            WHEN fips_code IN (51021,51197,51035,51063,51071,51121,51155,51640,51077,51750) THEN "New River"
                            WHEN fips_code IN (51057,51073,51099,51103,51097,51101,51133,51159,51115,51119,51033,51193) THEN "Northern Coastal Plain"
                            WHEN fips_code IN (51047,51109,51113,51137,51157,51177,51179,51630,51079) THEN "Northern Piedmont"
                            WHEN fips_code IN (51013,51059,51061,51610,51153,51600,51107,51683,51685,51510) THEN "Northern Virginia"
                            WHEN fips_code IN (51031,51037,51067,51083,51590,51089,51161,51690,51019,51770,51775,51143,51117,51515,51141) THEN "Roanoke"
                            WHEN fips_code IN (51820,51840,51171,51015,51043,51165,51660,51187,51790,51139,51069) THEN "Shenandoah"
                            WHEN fips_code IN (51093,51550,51800,51810,51710,51740) THEN "Southeast Virginia"
                            WHEN fips_code IN (51045,51163,51678,51530,51005,51091,51580,51023,51017) THEN "Upper James"
                            WHEN fips_code IN (51830,51036,51095,51127,51650,51199,51735,51700) THEN "York James"
                            ELSE "white"
                          END AS Drought_Region,
                          fips_name, fips_code, fips_geom AS geom
                        FROM fips_csv 
                        WHERE fips_code NOT LIKE "3%"') #EXCLUDE NC LOCALITIES


#convert WKT into an sf object
fips.sf <- st_as_sf(fips_df, wkt = 'geom')

#convert sf object to SpatialPolygonDataframe
fips.spdf <- as(fips.sf, "Spatial")

# assign the CRS
crs(fips.spdf) <- "+proj=utm +zone=18 +datum=WGS84 +units=m +no_defs 
                 +ellps=WGS84 +towgs84=0,0,0"
# visually confirm that it can plot/worked!
plot(fips.spdf, 
     main="Map of Drought Areas", border = "black", col = as.factor(fips.spdf$Drought_Region))
names(fips.spdf)
# write a shapefile
writeOGR(fips.spdf, "C:/Users/maf95834/Documents/shp_output","ows_drought_regions", driver="ESRI Shapefile", overwrite_layer = T)
HARPgroup/hydro-tools documentation built on July 4, 2025, 11:05 a.m.