knitr::opts_chunk$set(  collapse = TRUE,
  comment = "#>",
  out.width = "100%",
  warning = FALSE, message = FALSE)
library(hydrofabric)
library(ggplot2)

Network Manipulation: Geoprocessing

knitr::include_graphics('../man/figures/level2.png')

Getting the reference fabric

All reference and (precomputed) refactored data products live on ScienceBase. They can be accessed with the web interface or can be downloaded programatically. The hydrofab::get_hydrofabric() utility will download the most current geofabric for a Vector Processing Unit (VPU). Options include downloading the "refactored" (default) or "reference" data. If the requested file already exists, the file path will be returned.

Example

library(hydrofabric)
gpkg <- './conus_nextgen.gpkg'

subset_fabric <- get_subset(gpkg = gpkg, 
                            comid = 101,
                            "../inst/extdata/reference-comid-101-subset.gpkg")

1) Running the Process: Refactoring

Concept

| Parameter | Purpose | Elected Value | | ------------- |:-------------:| -----:| | split_flines_meters | the maximum length flowpath desired in the output. | 10,000| | collapse_flines_meters | the minimum length of inter-confluence flowpath desired in the output. | 1,000 | | collapse_flines_main_meters | the minimum length of between-confluence flowpaths.| 1,000 |

You might also have areas where you want to avoid, or, enforce a splitting event. These can be defined with the following values:

| Parameter | Purpose | | ------------- |:-------------:| | exclude_cats | integer vector of COMIDs to be excluded from collapse modifications. | | events | data.frame containing events as generated by get_flowline_index() |

Example

refactored = refactor("../inst/extdata/reference-comid-101-subset.gpkg",
                      split_flines_meters = 10000, 
                      collapse_flines_meters = 1000, 
                      collapse_flines_main_meters = 1000,
                      fac = '/vsis3/lynker-spatial/gridded-resources/fac.vrt',
                      fdr = '/vsis3/lynker-spatial/gridded-resources/fdr.vrt',
                      outfile = "../inst/extdata/refactor.gpkg")
mapview::mapview(read_hydrofabric("../inst/extdata/refactor.gpkg"))

Outputs

To get a high level understanding of what happened with this "refactor", we can look at the length distributions:

reference = read_hydrofabric("../inst/extdata/reference-comid-101-subset.gpkg")
refactor  = read_hydrofabric("../inst/extdata/refactor.gpkg")

refactor$flowpaths$LENGTHKM = add_lengthkm(refactor$flowpaths)
refactor$catchments$areasqkm = add_areasqkm(refactor$catchments)

reference$catchments$areasqkm = add_areasqkm(reference$catchments)
reference$flowpaths$LENGTHKM = add_lengthkm(reference$flowpaths)

ggplot() + 
  geom_density(data = refactor$flowpaths, aes(x = LENGTHKM), color = "blue", lwd = 3) + 
  geom_density(data = reference$flowpaths, aes(x = LENGTHKM), color = "red", lwd = 3) + 
  xlim(0,10) +
  ylim(0,.75) +
  geom_vline(xintercept = 1, size = 1) + 
  theme_light() + 
  labs(x = "Length (km)", y = "Density",  title = "Length Distribution", 
       subtitle = paste0(sum(reference$flowpaths$LENGTHKM >= 10), ' flowlines from reference (>10 km)\n',
                         sum(refactor$flowpaths$LENGTHKM >= 10), ' flowpaths removed from reference (>10 km)')) +
  geom_label( aes(x=1, y=0.75, label= "Minimum Length"), color="black") +
  geom_label( aes(x=2, y=0.5, label=paste(nrow(reference$flowpaths), "\nreference flowlines")), color="red", fill = "white") +
  geom_label( aes(x=5, y=0.2, label= paste(nrow(refactor$flowpaths), "\nrefactored flowpaths")), color="blue", fill = "white") 

And the area distributions:

ggplot() + 
  geom_density(data = refactor$catchments, aes(x = areasqkm), color = "blue", lwd = 3) + 
  geom_density(data = reference$catchments, aes(x = areasqkm), color = "red", lwd = 3) + 
  xlim(0,25) +
  ylim(0,.6) +
  geom_vline(xintercept = 3) + 
  geom_vline(xintercept = 10, size = 1) + 
  geom_vline(xintercept = 15) + 
  theme_light() + 
  labs(x = expression("Area (km²)"),  title = "Area Distribution", 
       subtitle = paste0(sum(reference$catchments$areasqkm >= 25), ' catchments removed from reference (>25 km²)\n',
                         sum(refactor$catchments$areasqkm >= 25), ' divides removed from refactored (>25 km²)')) +
  annotate("rect", xmin = 3, xmax = 15, ymin = 0, ymax = .6,  alpha = .1) + 
  scale_y_continuous("Density", expand = c(0,0)) + 
  geom_label( aes(x=10, y=0.5, label= "Idealized Range"), color="black") +
  geom_label( aes(x=4, y=0.4, label=paste(nrow(reference$catchments), "\nreference catchments")), color="red", fill = "white") +
  geom_label( aes(x=6.5, y=0.2, label= paste(nrow(refactor$catchments), "\nrefactored divides")), color="blue") 

Lastly, we look at the feature count of the network:

d = data.frame(type = c("Reference", "Refactor"), num = c(nrow(reference$catchments), nrow(refactor$catchments)))

ggplot() + 
  geom_col(data = d, aes(x = type, y = num)) + 
  theme_light() +
  labs(title = "Reference vs Refactor Features", x = "Type", y = "Number of Features")

2. Running the Process: Aggregating

knitr::include_graphics('../man/figures/level3.png')

| Parameter | Purpose | Elected Value | | ------------- |:-------------:| -----:| | ideal_size_sqkm | the maximum length flowpath desired in the output. | 10 | | min_length_km | the minimum length of inter-confluence flowpath desired in the output. | 1 | | min_area_sqkm | the minimum length of between-confluence flowpaths. | 3 |

Here, a hydrolocation POINT layer can be passed to help direct the aggregation, but for simplicity is ignored here:

Example

aggregate_to_distribution(gpkg = "../inst/extdata/refactor.gpkg",
                          ideal_size_sqkm = 10, 
                          min_length_km = 1, 
                          min_area_sqkm = 3, 
                          outfile = "../inst/extdata/aggregate.gpkg", 
                          overwrite = TRUE)
r = read_hydrofabric("../inst/extdata/aggregate.gpkg")
mapview::mapview(r)

Outputs

To get a high level understanding of what happens with this "refactor", we can look at the length distributions:

agg = read_hydrofabric("../inst/extdata/aggregate.gpkg")

ggplot() + 
  geom_density(data = refactor$flowpaths,  aes(x = LENGTHKM), color = "blue", lwd = 3) + 
  geom_density(data = reference$flowpaths, aes(x = LENGTHKM), color = "red", lwd = 3) + 
  geom_density(data = agg$flowpaths, aes(x = lengthkm), color = "green", lwd = 3) + 
  xlim(0,10) +
  ylim(0,.75) +
  geom_vline(xintercept = 1, size = 1) + 
  theme_light() + 
  labs(x = "Length (km)", y = "Density",  title = "Length Distribution", 
       subtitle = paste0(sum(reference$flowpaths$LENGTHKM >= 10), ' flowlines from reference (>10 km)\n',
                         sum(refactor$flowpaths$LENGTHKM >= 10), ' flowpaths removed from refactored (>10 km)\n',
                         sum(agg$flowpaths$lengthkm >= 10), ' flowpaths removed from aggregated (>10 km)\n')) +
  geom_label( aes(x=1, y=0.75, label= "Minimum Length"), color="black") +
  geom_label( aes(x=2, y=0.5, label=paste(nrow(reference$flowpaths), "\nreference flowlines")), color="red", fill = "white") +
  geom_label( aes(x=5, y=0.3, label= paste(nrow(refactor$flowpaths), "\nrefactored flowpaths")), color="blue", fill = "white") +
  geom_label( aes(x=8, y=0.2, label= paste(nrow(agg$flowpaths), "\naggregated flowpaths")), color="darkgreen", fill = "white")

And the area distributions:

ggplot() + 
  geom_density(data = refactor$catchments, aes(x = areasqkm), color = "blue", lwd = 3) + 
  geom_density(data = reference$catchments, aes(x = areasqkm), color = "red", lwd = 3) + 
  geom_density(data = agg$catchments, aes(x = areasqkm), color = "green", lwd = 3) + 
  xlim(0,25) +
  ylim(0,.6) +
  geom_vline(xintercept = 3) + 
  geom_vline(xintercept = 10, size = 1) + 
  geom_vline(xintercept = 15) + 
  theme_light() + 
  labs(x = expression("Area (km²)"),  title = "Area Distribution", 
       subtitle = paste0(sum(reference$catchments$AREASQKM >= 25), ' catchments removed from reference (>25 km²)\n',
                         sum(refactor$catchments$areasqkm >= 25), ' divides removed from refactored (>25 km²)\n',
                         sum(agg$catchments$areasqkm >= 25), ' divides removed from aggregated (>25 km²)\n')) +
  annotate("rect", xmin = 3, xmax = 15, ymin = 0, ymax = .6,  alpha = .1) + 
  scale_y_continuous("Density", expand = c(0,0)) + 
  geom_label( aes(x=10, y=0.5, label= "Idealized Range"), color="black") +
  geom_label( aes(x=4, y=0.4, label=paste(nrow(reference$catchments), "\nreference divides")), color="red", fill = "white") +
  geom_label( aes(x=6.5, y=0.22, label= paste(nrow(refactor$catchments), "\nrefactored divides")), color="blue")  +
   geom_label( aes(x=15, y=0.1, label= paste(nrow(agg$catchments), "\naggregated divides")), color="darkgreen", fill = "white")

Lastly, we can look at the cumulative network traits of each fabric:

data.frame(class=rep(c("Flowpath Length", "Divide Area"), each=3),
                  type=rep(c("Reference", "Refactored", 'Aggregated'),2),
                  num = c(sum(reference$flowpaths$lengthkm),
sum(refactor$flowpaths$LENGTHKM),
sum(agg$flowpaths$lengthkm),
sum(reference$catchments$areasqkm),
sum(refactor$catchments$areasqkm),
sum(agg$catchments$areasqkm))) %>% 
ggplot(aes(x=class, y=num, fill=type)) +
  geom_bar(stat="identity", color="black", position=position_dodge())+
  theme_minimal() + 
  scale_fill_manual(values=c('green', 'blue','red'))

Full Run-through

hf = get_subset(gpkg = './conus_nextgen.gpkg', 
                comid = 101,
                "../inst/extdata/reference-comid-101-subset.gpkg") |>
     refactor(fac = '/vsis3/lynker-spatial/gridded-resources/fac.vrt',
              fdr = '/vsis3/lynker-spatial/gridded-resources/fdr.vrt',
              outfile = "../inst/extdata/refactor.gpkg") |>
     aggregate_to_distribution(outfile = "../inst/extdata/aggregate.gpkg")

With all network manipulations, fundamental network traits change. This requires the utilities to rapidly and efficiently recompute key network metric. The nhdplusTools package provides the option to regenerate all or some of these on the fly using graph algorithms and logic (see Blodget et al (2023))

We will return to the get_sorted() utility in the subsetting section

nhdplusTools::add_plus_network_attributes()
nhdplusTools::get_streamorder()
nhdplusTools::calculate_total_drainage_area()
nhdplusTools::get_sorted()
knitr::include_graphics('../man/figures/figure7.png')
unlink('../inst/extdata/refactor.gpkg')
unlink('aggregate.gpkg')


NOAA-OWP/hydrofabric documentation built on Dec. 7, 2024, 11:24 a.m.