knitr::opts_chunk$set( echo = TRUE, warning = FALSE, error = FALSE, message = FALSE ) unlink("../private/twi.tif") unlink(paste0("../private/twi.tif", ".aux.xml")) ## TWI for CAMELs with VRT library(whitebox) library(terra) library(sf) library(opendap.catalog) library(arrow) source("../private/aws.R")
We can connect to the Lynker AWS s3 holdings to extract the refactored and aggregated catchments upstream of USGS gage ID: 01022500
.
cats <- sf::read_sf("/vsis3/formulations-dev/CAMELS20/camels_01022500_2677104/spatial/hydrofabric.gpkg", "catchments")
plot(cats$geom)
Using the VRT produced for hydrofabric work and hosted via Github Pages, we can extract the needed 30m DEM (3DEP 1 - 1 arcsecond).
elev <- rast("/vsicurl/https://mikejohnson51.github.io/opendap.catalog/ned_USGS_1.vrt") dem <- crop(elev, project(vect(cats), crs(elev)))
{ plot(dem) plot(project(vect(cats), crs(elev)), add = TRUE) }
Using the in-memory elevation data we can write a quick function to compute and save a TWI grid:
build_twi <- function(dem, outfile = NULL) { writeRaster(dem, "dem.tif", overwrite = TRUE) gdal_utils("warp", source = "dem.tif", destination = "dem_proj.tif", options = c( "-of", "GTiff", "-t_srs", "EPSG:5070", "-r", "bilinear" ) ) wbt_breach_depressions("dem_proj.tif", "dem_corr.tif") wbt_md_inf_flow_accumulation("dem_corr.tif", "sca.tif") wbt_slope("dem_proj.tif", "slope.tif") wbt_wetness_index( "sca.tif", "slope.tif", 'fin.tif' ) gdal_utils("translate", source = "fin.tif", destination = outfile, options = c("-co", "TILED=YES", "-co", "COPY_SRC_OVERVIEWS=YES", "-co", "COMPRESS=DEFLATE")) unlink("dem.tif") unlink("dem_proj.tif") unlink("dem_corr.tif") unlink("sca.tif") unlink("slope.tif") unlink("fin.tif") return(outfile) } file <- build_twi(dem, outfile = "../private/twi.tif")
{ plot(rast(file)) plot(cats$geom, add = TRUE) }
We next compute a catchment level mean TWI using zonal
.
o <- zonal::execute_zonal(file, cats, "ID", join = FALSE) names(o)[2] <- "TWI" head(o)
plot(merge(cats, o, by = "ID")['TWI'])
The remaining workflow is representative of a process that might update frequently in a lumped formulation (e.g. soil moisture) creating the need to map values back to a common model grid. That is the reason a simple resample/regrid is not possible.
nwm_1km <- materilize_grid( ext = ext(-2303999.62876143, 2304000.37123857, -1920000.70008381, 1919999.29991619), diminsion = c(3841, 4608), projection = 'PROJCS["Sphere_Lambert_Conformal_Conic",GEOGCS["GCS_Sphere",DATUM["D_Sphere",SPHEROID["Sphere",6370000.0,0.0]],PRIMEM["Greenwich",0.0],UNIT["Degree",0.0174532925199433]],PROJECTION["Lambert_Conformal_Conic"],PARAMETER["false_easting",0.0],PARAMETER["false_northing",0.0],PARAMETER["central_meridian",-97.0],PARAMETER["standard_parallel_1",30.0],PARAMETER["standard_parallel_2",60.0],PARAMETER["latitude_of_origin",40.000008],UNIT["Meter",1.0]];-35691800 -29075200 126180232.640845;-100000 10000;-100000 10000;0.001;0.001;0.001;IsHighPrecision' )
(w <- weighting_grid(nwm_1km, cats, "ID"))
For this example we are computing a grouped mean. This is done by multiplying a value by its weight/covergage fraction and dividning bt the sum of the coverage_fraction.
# Define summary function: areal.mean <- function(x, w) { sum((x * w), na.rm = TRUE) / sum(w, na.rm = TRUE) }
dt <- merge(w, o, by = "ID") # Apply areal.mean function over "TWI" grouped by cell exe <- dt[, lapply(.SD, FUN = areal.mean, w = coverage_fraction), by = "cell", .SDcols = "TWI" ] # Inject TWI data with into grid nwm_1km[exe$cell] <- exe$TWI
xx <- crop(nwm_1km, project(vect(cats), crs(nwm_1km))) rasterVis::levelplot(xx)
Say a new value was computed at the grid level and needs to be mapped back to the catchments. The "psuedo" value is:
TWI + a random number selected between 1 and 100
.Using the weight grid, this is a straight forward - table based process!
# Imagine this is a model time step updating the grid values... nwm_1km[] = values(nwm_1km) + sample(1:100, ncell(nwm_1km), replace = T) system.time({ # Extract needed cell from layer (1) using the weight grid w$updated_value = nwm_1km[w$cell][,1] # Apply areal.mean function over "updated_value" grouping by ID exe <- w[, lapply(.SD, FUN = areal.mean, w = coverage_fraction), by = "ID", .SDcols = "updated_value" ] }) head(exe)
plot(merge(cats, exe)['updated_value'])
So the question remains can this scale? I hope to show this with a quick naive, but more realisitic example of how the hydrofabric framework being build can support model engine goals:
# AWS bucket location bucket = 'formulations-dev' subdir = 'hydrofabric/CONUS-hydrofabric/ngen-release/01a/2021-11-01/spatial' local_pr = '/Users/mjohnson/github/zonal/to_build/pr_2020.nc'
cats <- sf::read_sf(file.path("/vsis3", bucket, subdir, "hydrofabric.gpkg"), "catchments")
Since this is "new" we will build the weight grid and upload it to s3. This only needs to be done once for each grid/hydrofabric pair.
write_parquet(weighting_grid(local_pr, cats, "ID"), "../private/gridmet_weights.parquet") put_object( file = "../private/gridmet_weights.parquet", object = file.path(subdir, 'gridmet_weights.parquet'), bucket = bucket, multipart = TRUE ) unlink("../private/gridmet_weights.parquet")
The produced weights parquet file is 814 KB... that's it!
Say we want aggregated catchment averages of the 4th day of GridMet rainfall.
- Remote read the weights file
- Partial read the forcing file
- Apply areal.mean
grouped by ID
system.time({ w = read_parquet(file.path('s3:/',bucket, subdir, 'gridmet_weights.parquet')) w$rain = rast(local_pr)[[4]][w$cell] catchment_rainfall <- w[, lapply(.SD, FUN = areal.mean, w = coverage_fraction), by = "ID", .SDcols = "rain" ] }) head(catchment_rainfall)
This is two generally show how hydrofabric attributes can be used in computation.
We make a naive assumption that run off equals rainfall * %sand
.
basin_attributes
file to get sand%system.time({ sand = read_parquet(file.path('s3:/',bucket, subdir, ... = 'basin_attributes.parquet'), col_select = c("ID", "sand-1m-percent")) model = merge(catchment_rainfall, sand, by = "ID") model$runoff = model$rain * model$`sand-1m-percent` })
plot(merge(cats, model)[c("rain", 'runoff')], border = NA)
If (for some reason) you want to take the catchment level "runoff" values, and map them back to the grid, you can do so...
system.time({ dt <- merge(w, model, by = "ID") # Apply areal.mean function over "TWI" grouped by cell grid_runoff <- dt[, lapply(.SD, FUN = areal.mean, w = coverage_fraction), by = "cell", .SDcols = "runoff" ] })
tmp = materilize_grid(local_pr, fillValue = 0) tmp[grid_runoff$cell] <- grid_runoff$runoff plot(tmp)
#plot(crop(tmp, project(vect(cats), crs(tmp))))
unlink(file) unlink(paste0(file, ".aux.xml"))
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.