validate_literature.R
In lakefetch: Calculate Fetch and Wave Exposure for Lake Sampling Points

# ==============================================================================
# lakefetch Literature Validation Tests
# ==============================================================================
# This script validates fetch calculations against published lake morphometry
# data and documents alignment with the Shore Protection Manual (SPM) methodology.
#
# Validation approach:
# 1. Use lakes with well-documented morphometry from World Lake Database
# 2. Download boundaries from OpenStreetMap
# 3. Calculate fetch at lake center
# 4. Compare to expected values based on published dimensions
#
# Reference methodology:
# - Shore Protection Manual (USACE, 1984) - standard fetch calculation method
# - Mason et al. (2018) - Great Lakes effective fetch methodology
#
# Run with: source("inst/validation/validate_literature.R")
# ==============================================================================

library(sf)

# Load package
if (!requireNamespace("lakefetch", quietly = TRUE)) {
  devtools::load_all(".")
} else {
  library(lakefetch)
}

cat("\n")
cat("=" |> rep(70) |> paste(collapse = ""), "\n")
cat("LAKEFETCH LITERATURE VALIDATION\n")
cat("=" |> rep(70) |> paste(collapse = ""), "\n")
cat("\nValidating fetch calculations against real lakes with published morphometry.\n")
cat("Reference: Shore Protection Manual (USACE, 1984); World Lake Database (ILEC)\n\n")

# ==============================================================================
# Test Lakes with Published Morphometry
# ==============================================================================
# Selected lakes with well-documented dimensions from World Lake Database
# and other limnological literature.

test_lakes <- list(
  # Lake Sunapee, NH - well-documented small lake
  # Source: NH DES, various limnology publications
  lake_sunapee = list(
    name = "Lake Sunapee",
    state = "New Hampshire",
    coords = c(-72.053, 43.423),  # Approximate center
    area_km2 = 16.7,
    max_length_m = 13000,  # ~13 km long
    max_width_m = 2800,    # ~2.8 km wide
    max_depth_m = 33,
    source = "NH DES Lake Survey"
  ),

  # Cayuga Lake, NY - Finger Lake with known dimensions
  # Source: USGS, Cornell limnology
  cayuga_lake = list(
    name = "Cayuga Lake",
    state = "New York",
    coords = c(-76.7, 42.687),  # Approximate center
    area_km2 = 172,
    max_length_m = 61000,  # ~61 km long (very elongated)
    max_width_m = 2800,    # ~2.8 km wide
    max_depth_m = 133,
    source = "USGS / Cornell University"
  ),

  # Green Lake, WI - meromictic lake, well-studied
  # Source: Wisconsin DNR, USGS
  green_lake_wi = list(
    name = "Green Lake",
    state = "Wisconsin",
    coords = c(-89.003, 43.844),  # Approximate center
    area_km2 = 29.6,
    max_length_m = 12000,  # ~12 km
    max_width_m = 3200,    # ~3.2 km
    max_depth_m = 72,
    source = "Wisconsin DNR"
  )
)

# ==============================================================================
# Validation Functions
# ==============================================================================

#' Calculate expected fetch range for a lake
#' Based on lake dimensions, the center point fetch should approximately equal
#' half the dimension in each direction
calculate_expected_fetch <- function(lake_info) {
  # For an elliptical lake approximation:
  # - Min fetch (short axis) ~ max_width / 2
  # - Max fetch (long axis) ~ max_length / 2
  # - Mean fetch ~ approximately the radius of a circle with same area

  equiv_radius <- sqrt(lake_info$area_km2 * 1e6 / pi)  # Equivalent circular radius

  list(
    min_expected = lake_info$max_width_m / 2,
    max_expected = lake_info$max_length_m / 2,
    mean_expected_low = equiv_radius * 0.5,   # Lower bound
    mean_expected_high = equiv_radius * 1.2   # Upper bound (account for elongation)
  )
}

#' Download lake boundary and calculate fetch at center
test_lake_fetch <- function(lake_info, timeout_sec = 60) {
  cat("\n", "-" |> rep(60) |> paste(collapse = ""), "\n")
  cat("Testing:", lake_info$name, "(", lake_info$state, ")\n")
  cat("-" |> rep(60) |> paste(collapse = ""), "\n")

  cat("Published morphometry:\n")
  cat("  Surface area:  ", lake_info$area_km2, "km²\n")
  cat("  Max length:    ", lake_info$max_length_m / 1000, "km\n")
  cat("  Max width:     ", lake_info$max_width_m / 1000, "km\n")
  cat("  Source:        ", lake_info$source, "\n\n")

  # Create initial site at approximate center for boundary download
  sites_df <- data.frame(
    Site = paste0(lake_info$name, "_center"),
    latitude = lake_info$coords[2],
    longitude = lake_info$coords[1],
    lake.name = lake_info$name
  )

  # Try to download boundary
  cat("Downloading boundary from OSM...\n")

  result <- tryCatch({
    # Load and prepare sites
    sites <- load_sites(sites_df)

    # Download lake boundary - returns list with $all_lakes, $sites, $utm_epsg
    lake_data <- get_lake_boundary(sites)

    # Extract the lakes sf object from the returned list
    if (is.null(lake_data) || is.null(lake_data$all_lakes)) {
      return(list(success = FALSE, error = "No lake boundary found"))
    }

    lake_boundary <- lake_data$all_lakes
    n_lakes <- nrow(lake_boundary)

    if (is.null(n_lakes) || n_lakes == 0) {
      return(list(success = FALSE, error = "No lake polygons found"))
    }

    cat("  Downloaded", n_lakes, "lake polygons\n")

    # Find the target lake by name matching
    target_name <- tolower(lake_info$name)

    # Handle cases where name column might have NA or be missing
    if (!"name" %in% names(lake_boundary)) {
      lake_boundary$name <- paste0("lake_", seq_len(nrow(lake_boundary)))
    }
    lake_names <- tolower(as.character(lake_boundary$name))
    lake_names[is.na(lake_names)] <- ""

    # Try exact or partial match
    search_pattern <- gsub(" lake$", "", target_name)
    match_idx <- which(grepl(target_name, lake_names, fixed = TRUE) |
                       grepl(search_pattern, lake_names, fixed = TRUE))

    cat("  Looking for:", target_name, "\n")

    # Calculate areas for all lakes
    lake_boundary$area <- as.numeric(st_area(lake_boundary))

    if (length(match_idx) == 0) {
      # If no name match, find largest lake (likely our target)
      match_idx <- which.max(lake_boundary$area)
      cat("  No name match - using largest lake polygon (area:",
          round(lake_boundary$area[match_idx] / 1e6, 2), "km²)\n")
    } else {
      # Get best match (largest if multiple)
      best_match <- match_idx[which.max(lake_boundary$area[match_idx])]
      cat("  Found", length(match_idx), "matches, using largest:",
          lake_boundary$name[best_match], "\n")
      match_idx <- best_match
    }

    target_lake <- lake_boundary[match_idx, ]
    cat("  Matched lake:", as.character(target_lake$name[1]), "\n")
    cat("  OSM area:", round(target_lake$area[1] / 1e6, 2), "km²\n")

    # Get actual lake centroid for test site
    suppressWarnings({
      lake_centroid <- st_centroid(st_geometry(target_lake))
    })
    centroid_wgs84 <- st_transform(lake_centroid, 4326)
    centroid_coords <- st_coordinates(centroid_wgs84)

    cat("  Using lake centroid at:", round(centroid_coords[2], 4), ",",
        round(centroid_coords[1], 4), "\n")

    # Create site at actual lake center
    center_site <- st_sf(
      Site = paste0(lake_info$name, "_center"),
      site_name = paste0(lake_info$name, "_center"),
      lake_osm_id = as.character(target_lake$osm_id[1]),
      lake_name = as.character(target_lake$name[1]),
      geometry = lake_centroid
    )
    st_crs(center_site) <- st_crs(target_lake)

    # Add required columns for fetch calculation
    center_site$lake_area_km2 <- target_lake$area[1] / 1e6

    # Get lake boundary line
    cat("  Creating boundary line...\n")
    lake_line <- tryCatch({
      st_cast(target_lake, "MULTILINESTRING")
    }, error = function(e) {
      st_boundary(target_lake)
    })

    # Calculate fetch using internal function
    angles <- seq(0, 355, by = lakefetch_options()$angle_resolution_deg)
    cat("  Calculating fetch for", length(angles), "directions...\n")

    # Temporarily set buffer to small value for precise center test
    old_buffer <- lakefetch_options()$buffer_distance_m
    lakefetch_options(buffer_distance_m = 1)

    fetch_vals <- lakefetch:::get_highres_fetch(center_site, lake_line, target_lake, angles)

    lakefetch_options(buffer_distance_m = old_buffer)

    cat("  Fetch calculated successfully\n")

    # Build result structure similar to fetch_calculate output
    for (i in seq_along(angles)) {
      center_site[[paste0("fetch_", angles[i])]] <- fetch_vals[i]
    }
    center_site$fetch_mean <- mean(fetch_vals)
    center_site$fetch_max <- max(fetch_vals)
    center_site$fetch_effective <- mean(sort(fetch_vals, decreasing = TRUE)[1:3])

    list(
      success = TRUE,
      fetch_data = list(results = center_site, lakes = target_lake, angles = angles),
      lake_boundary = target_lake
    )
  }, error = function(e) {
    cat("  Error:", as.character(e$message), "\n")
    list(success = FALSE, error = as.character(e$message))
  })

  return(result)
}

#' Compare calculated fetch to expected values
validate_fetch_result <- function(lake_info, fetch_result) {
  if (!fetch_result$success) {
    cat("\n  SKIPPED: ", fetch_result$error, "\n")
    return(list(pass = NA, reason = fetch_result$error))
  }

  results <- fetch_result$fetch_data$results

  # Get fetch statistics
  fetch_mean <- results$fetch_mean[1]
  fetch_max <- results$fetch_max[1]
  fetch_eff <- results$fetch_effective[1]

  # Get expected ranges
  expected <- calculate_expected_fetch(lake_info)

  # Calculate elongation ratio (length/width)
  elongation <- lake_info$max_length_m / lake_info$max_width_m

  cat("\nCalculated fetch:\n")
  cat("  Mean fetch:      ", round(fetch_mean, 0), "m\n")
  cat("  Max fetch:       ", round(fetch_max, 0), "m\n")
  cat("  Effective fetch: ", round(fetch_eff, 0), "m\n")

  cat("\nExpected ranges (based on morphometry):\n")
  cat("  Max fetch:       ~", round(expected$max_expected, 0), "m (half of max length)\n")
  cat("  Min direction:   ~", round(expected$min_expected, 0), "m (half of max width)\n")
  cat("  Elongation ratio:", round(elongation, 1), "\n")

  # Adjust expectations for elongated lakes
  # For very elongated lakes (ratio > 5), mean fetch will be much higher
  # than circular approximation suggests
  if (elongation > 5) {
    cat("  Note: Lake is highly elongated - adjusting expectations\n")
    expected$mean_expected_high <- expected$mean_expected_high * (elongation / 3)
  }

  cat("  Mean fetch:      ", round(expected$mean_expected_low, 0), "-",
      round(expected$mean_expected_high, 0), "m\n")

  # Validation checks
  # 1. Max fetch should be reasonable relative to lake length
  #    Allow for hitting max_fetch_m cap (50km) for very long lakes
  max_fetch_cap <- lakefetch_options()$max_fetch_m

  # Check if we hit the fetch cap (very long lakes)
  hit_cap <- abs(fetch_max - max_fetch_cap) < 100  # Within 100m of cap

  if (hit_cap) {
    # If we hit the cap, the lake is very long - this is expected behavior
    # Just verify the lake is indeed long enough to warrant hitting the cap
    max_ok <- expected$max_expected > max_fetch_cap * 0.5
    if (max_ok) {
      cat("  Note: Max fetch hit ", max_fetch_cap/1000, "km cap (expected for long lake)\n")
    }
  } else {
    # Normal case: within 50% of expected
    max_ok <- fetch_max >= expected$max_expected * 0.4 &&
              fetch_max <= expected$max_expected * 1.6
  }

  # 2. Mean fetch should be within expected bounds (generous for irregular shapes)
  mean_ok <- fetch_mean >= expected$mean_expected_low * 0.5 &&
             fetch_mean <= expected$mean_expected_high * 2

  # 3. Effective fetch should be reasonable (between mean and max)
  eff_ok <- fetch_eff >= fetch_mean * 0.8 && fetch_eff <= fetch_max * 1.1

  cat("\nValidation:\n")
  cat("  Max fetch within expected range:  ", ifelse(max_ok, "PASS", "FAIL"), "\n")
  cat("  Mean fetch within expected range: ", ifelse(mean_ok, "PASS", "FAIL"), "\n")
  cat("  Effective fetch reasonable:       ", ifelse(eff_ok, "PASS", "FAIL"), "\n")

  all_pass <- max_ok && mean_ok && eff_ok
  cat("\n  Overall: ", ifelse(all_pass, "PASS", "FAIL"), "\n")

  return(list(
    pass = all_pass,
    fetch_mean = fetch_mean,
    fetch_max = fetch_max,
    fetch_eff = fetch_eff,
    expected = expected,
    elongation = elongation
  ))
}

# ==============================================================================
# Run Validation Tests
# ==============================================================================

cat("\n")
cat("=" |> rep(70) |> paste(collapse = ""), "\n")
cat("RUNNING VALIDATION TESTS\n")
cat("=" |> rep(70) |> paste(collapse = ""), "\n")
cat("\nNote: Tests require internet connection for OSM data download.\n")
cat("Tests may be skipped if lake boundaries are not available in OSM.\n")

validation_results <- list()

for (lake_id in names(test_lakes)) {
  lake_info <- test_lakes[[lake_id]]

  # Run test
  fetch_result <- test_lake_fetch(lake_info)

  # Validate
  validation <- validate_fetch_result(lake_info, fetch_result)

  validation_results[[lake_id]] <- list(
    lake = lake_info,
    fetch_result = fetch_result,
    validation = validation
  )
}

# ==============================================================================
# Summary
# ==============================================================================

cat("\n")
cat("=" |> rep(70) |> paste(collapse = ""), "\n")
cat("LITERATURE VALIDATION SUMMARY\n")
cat("=" |> rep(70) |> paste(collapse = ""), "\n\n")

passed <- 0
failed <- 0
skipped <- 0

for (lake_id in names(validation_results)) {
  result <- validation_results[[lake_id]]
  lake_name <- result$lake$name

  if (is.na(result$validation$pass)) {
    status <- "SKIPPED"
    skipped <- skipped + 1
  } else if (result$validation$pass) {
    status <- "PASS"
    passed <- passed + 1
  } else {
    status <- "FAIL"
    failed <- failed + 1
  }

  cat(sprintf("  %-30s %s\n", lake_name, status))
}

cat("\n")
cat(sprintf("Results: %d passed, %d failed, %d skipped\n", passed, failed, skipped))
total_testable <- passed + failed
if (total_testable > 0) {
  cat(sprintf("Pass rate: %.1f%% (of testable lakes)\n", passed / total_testable * 100))
}

# ==============================================================================
# Methodology Notes
# ==============================================================================

cat("\n")
cat("=" |> rep(70) |> paste(collapse = ""), "\n")
cat("METHODOLOGY ALIGNMENT\n")
cat("=" |> rep(70) |> paste(collapse = ""), "\n")

cat("
lakefetch implements fetch calculation following established methods:

1. RAY-CASTING APPROACH
   - Casts rays from each site in multiple directions (default: 5° resolution)
   - Measures distance to shoreline intersection
   - Consistent with USGS and Natural Capital Project algorithms

2. EFFECTIVE FETCH (SPM Method)
   The Shore Protection Manual (USACE, 1984) defines effective fetch as:
   - Multiple radials around a wind direction
   - Weighted by cosine of angle from wind direction

   lakefetch simplifies to mean of top 3 directional fetch values,
   providing a robust exposure metric independent of wind direction.

3. EXPOSURE CLASSIFICATION
   Based on limnological literature thresholds:
   - Sheltered: < 2.5 km effective fetch
   - Moderate:  2.5 - 5.0 km
   - Exposed:   > 5.0 km

References:
- USACE (1984). Shore Protection Manual, 4th ed.
- Mason et al. (2018). Effective fetch maps for Great Lakes. Scientific Data.
- Håkanson (1981). Manual of Lake Morphometry. Springer-Verlag.
")

cat("\n")
cat("=" |> rep(70) |> paste(collapse = ""), "\n")
cat("VALIDATION COMPLETE\n")
cat("=" |> rep(70) |> paste(collapse = ""), "\n\n")
Any scripts or data that you put into this service are public.
lakefetch documentation built on March 20, 2026, 5:10 p.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
lakefetch
Calculate Fetch and Wave Exposure for Lake Sampling Points

inst/validation/validate_literature.R
In lakefetch: Calculate Fetch and Wave Exposure for Lake Sampling Points

Try the lakefetch package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

lakefetch Calculate Fetch and Wave Exposure for Lake Sampling Points

inst/validation/validate_literature.R In lakefetch: Calculate Fetch and Wave Exposure for Lake Sampling Points

Try the lakefetch package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

lakefetch
Calculate Fetch and Wave Exposure for Lake Sampling Points

inst/validation/validate_literature.R
In lakefetch: Calculate Fetch and Wave Exposure for Lake Sampling Points