knitr::opts_chunk$set( collapse = TRUE, comment = "#>", fig.width = 10, fig.height = 7, warning = FALSE, message = FALSE )
The GeoSpatialSuite package provides comprehensive workflow functions that integrate multiple analysis types into complete, automated pipelines. This vignette demonstrates how to build and execute advanced workflows for real-world geospatial analysis scenarios.
By the end of this vignette, you will be able to:
# Load required packages library(geospatialsuite) library(terra) library(sf) # Verify package functionality test_geospatialsuite_package_simple(verbose = TRUE)
The package includes pre-built workflows for common analysis scenarios:
# Create sample multi-temporal data red_raster <- rast(nrows = 80, ncols = 80, xmin = -83.5, xmax = -83.0, ymin = 40.2, ymax = 40.7) values(red_raster) <- runif(6400, 0.1, 0.3) nir_raster <- rast(nrows = 80, ncols = 80, xmin = -83.5, xmax = -83.0, ymin = 40.2, ymax = 40.7) values(nir_raster) <- runif(6400, 0.4, 0.8) # Configure comprehensive NDVI analysis ndvi_config <- list( analysis_type = "ndvi_crop_analysis", input_data = list(red = red_raster, nir = nir_raster), region_boundary = c(-83.5, 40.2, -83.0, 40.7), # Bounding box indices = c("NDVI", "EVI2", "SAVI"), quality_filter = TRUE, temporal_smoothing = FALSE, visualization_config = list( create_maps = TRUE, ndvi_classes = "none", interactive = FALSE ) ) # Execute comprehensive workflow ndvi_workflow_results <- run_comprehensive_geospatial_workflow(ndvi_config) # Access results ndvi_data <- ndvi_workflow_results$results$vegetation_data ndvi_stats <- ndvi_workflow_results$results$statistics ndvi_maps <- ndvi_workflow_results$results$visualizations
# Simulate CDL crop data cdl_raster <- rast(nrows = 80, ncols = 80, xmin = -83.5, xmax = -83.0, ymin = 40.2, ymax = 40.7) values(cdl_raster) <- sample(c(1, 5, 24, 36, 61), 6400, replace = TRUE) # Corn, soybeans, wheat, alfalfa, fallow # Enhanced configuration with crop masking enhanced_ndvi_config <- list( analysis_type = "ndvi_crop_analysis", input_data = list(red = red_raster, nir = nir_raster), region_boundary = "custom", cdl_data = cdl_raster, crop_codes = c(1, 5), # Corn and soybeans only indices = c("NDVI", "SAVI", "DVI"), quality_filter = TRUE, visualization_config = list( create_maps = TRUE, interactive = FALSE ) ) # Run enhanced workflow enhanced_results <- run_comprehensive_geospatial_workflow(enhanced_ndvi_config) # Compare masked vs unmasked results print("Original NDVI stats:") print(global(ndvi_data, "mean", na.rm = TRUE)) print("Crop-masked NDVI stats:") print(global(enhanced_results$results$vegetation_data, "mean", na.rm = TRUE))
# Create multi-band spectral data spectral_stack <- c( red_raster, # Band 1: Red red_raster * 1.2, # Band 2: Green (simulated) red_raster * 0.8, # Band 3: Blue (simulated) nir_raster # Band 4: NIR ) names(spectral_stack) <- c("red", "green", "blue", "nir") # Configure comprehensive vegetation analysis vegetation_config <- list( analysis_type = "vegetation_comprehensive", input_data = spectral_stack, indices = c("NDVI", "EVI2", "SAVI", "GNDVI", "DVI", "RVI"), crop_type = "general", analysis_type_detail = "comprehensive", region_boundary = c(-83.5, 40.2, -83.0, 40.7), visualization_config = list( create_maps = TRUE, comparison_plots = TRUE ) ) # Execute comprehensive vegetation workflow vegetation_results <- run_comprehensive_geospatial_workflow(vegetation_config) # Access detailed results vegetation_indices <- vegetation_results$results$vegetation_analysis$vegetation_indices analysis_details <- vegetation_results$results$vegetation_analysis$analysis_results metadata <- vegetation_results$results$vegetation_analysis$metadata print("Vegetation indices calculated:") print(names(vegetation_indices)) print("Analysis metadata:") print(metadata$indices_used)
# Corn-specific analysis workflow corn_config <- list( analysis_type = "vegetation_comprehensive", input_data = spectral_stack, crop_type = "corn", analysis_type_detail = "comprehensive", cdl_mask = cdl_raster == 1, # Corn mask indices = c("NDVI", "EVI2", "GNDVI"), # Corn-appropriate indices visualization_config = list(create_maps = TRUE) ) corn_results <- run_comprehensive_geospatial_workflow(corn_config) # Extract corn-specific insights if ("stress_analysis" %in% names(corn_results$results$vegetation_analysis$analysis_results)) { stress_results <- corn_results$results$vegetation_analysis$analysis_results$stress_analysis print("Corn stress analysis:") print(stress_results) }
Objective: Monitor environmental conditions around water bodies, assess agricultural impacts on water quality, and identify priority areas for conservation.
# Create environmental monitoring scenario monitoring_extent <- c(-82.8, 39.5, -81.8, 40.5) # Water quality monitoring stations water_stations <- data.frame( station_id = paste0("WQ_", sprintf("%03d", 1:18)), lon = runif(18, monitoring_extent[1] + 0.1, monitoring_extent[3] - 0.1), lat = runif(18, monitoring_extent[2] + 0.1, monitoring_extent[4] - 0.1), nitrate_mg_l = runif(18, 0.5, 15.0), phosphorus_mg_l = runif(18, 0.02, 2.5), turbidity_ntu = runif(18, 1, 45), dissolved_oxygen_mg_l = runif(18, 4, 12), ph = runif(18, 6.8, 8.4), sampling_date = sample(seq(as.Date("2024-05-01"), as.Date("2024-09-30"), by = "day"), 18), watershed = sample(c("Upper_Creek", "Lower_Creek", "Tributary_A"), 18, replace = TRUE) ) # Land use/land cover data lulc_raster <- terra::rast(nrows = 150, ncols = 150, xmin = monitoring_extent[1], xmax = monitoring_extent[3], ymin = monitoring_extent[2], ymax = monitoring_extent[4]) # Simulate realistic LULC pattern lulc_codes <- c(1, 5, 41, 42, 81, 82, 11, 21) # Developed, Forest, Wetland, Agriculture lulc_probs <- c(0.15, 0.25, 0.05, 0.40, 0.08, 0.05, 0.01, 0.01) terra::values(lulc_raster) <- sample(lulc_codes, 22500, replace = TRUE, prob = lulc_probs) names(lulc_raster) <- "LULC" print("Environmental monitoring setup completed")
# Comprehensive water quality assessment water_quality_results <- list() # Analyze each water quality parameter parameters <- c("nitrate_mg_l", "phosphorus_mg_l", "turbidity_ntu", "dissolved_oxygen_mg_l") for (param in parameters) { # Define parameter-specific thresholds thresholds <- switch(param, "nitrate_mg_l" = list("Low" = c(0, 3), "Moderate" = c(3, 6), "High" = c(6, 10), "Excessive" = c(10, Inf)), "phosphorus_mg_l" = list("Low" = c(0, 0.1), "Moderate" = c(0.1, 0.3), "High" = c(0.3, 0.8), "Excessive" = c(0.8, Inf)), "turbidity_ntu" = list("Clear" = c(0, 5), "Moderate" = c(5, 15), "Turbid" = c(15, 30), "Very_Turbid" = c(30, Inf)), "dissolved_oxygen_mg_l" = list("Critical" = c(0, 4), "Stressed" = c(4, 6), "Good" = c(6, 8), "Excellent" = c(8, Inf)) ) # Comprehensive analysis water_quality_results[[param]] <- analyze_water_quality_comprehensive( water_data = water_stations, variable = param, region_boundary = monitoring_extent, thresholds = thresholds, output_folder = paste0("water_quality_", param) ) } print("Water quality analysis completed for all parameters")
# Assess land use impacts on water quality land_use_impact_analysis <- function(water_stations, lulc_raster, buffer_sizes = c(500, 1000, 2000)) { results <- list() for (buffer_size in buffer_sizes) { # Extract land use composition around each station buffered_lulc <- spatial_join_universal( vector_data = water_stations, raster_data = lulc_raster, method = "buffer", buffer_size = buffer_size, summary_function = "mode", # Most common land use variable_names = paste0("dominant_lulc_", buffer_size, "m") ) # Calculate land use percentages for (i in 1:nrow(water_stations)) { station_point <- water_stations[i, ] station_buffer <- sf::st_buffer(sf::st_as_sf(station_point, coords = c("lon", "lat"), crs = 4326), dist = buffer_size) # Extract all LULC values within buffer lulc_values <- terra::extract(lulc_raster, terra::vect(station_buffer)) lulc_table <- table(lulc_values) # Calculate percentages total_pixels <- sum(lulc_table) agriculture_pct <- sum(lulc_table[names(lulc_table) %in% c("1", "5")]) / total_pixels * 100 developed_pct <- sum(lulc_table[names(lulc_table) %in% c("21", "22", "23", "24")]) / total_pixels * 100 water_stations[i, paste0("agriculture_pct_", buffer_size, "m")] <- agriculture_pct water_stations[i, paste0("developed_pct_", buffer_size, "m")] <- developed_pct } results[[paste0("buffer_", buffer_size, "m")]] <- water_stations } return(results) } # Perform land use impact analysis land_use_impacts <- land_use_impact_analysis(water_stations, lulc_raster) # Analyze correlations between land use and water quality correlation_analysis <- function(data) { # Select relevant columns water_quality_vars <- c("nitrate_mg_l", "phosphorus_mg_l", "turbidity_ntu") land_use_vars <- grep("agriculture_pct|developed_pct", names(data), value = TRUE) correlation_matrix <- cor(data[, c(water_quality_vars, land_use_vars)], use = "complete.obs") return(correlation_matrix) } # Analyze correlations for different buffer sizes correlations_500m <- correlation_analysis(land_use_impacts$buffer_500m) correlations_1000m <- correlation_analysis(land_use_impacts$buffer_1000m) correlations_2000m <- correlation_analysis(land_use_impacts$buffer_2000m) print("Land use impact analysis completed") print("Correlations at 1000m buffer:") print(round(correlations_1000m[1:3, 4:7], 3))
# Watershed-scale environmental assessment watershed_assessment <- function(water_stations, environmental_layers) { # Group stations by watershed watershed_summary <- list() for (watershed in unique(water_stations$watershed)) { watershed_stations <- water_stations[water_stations$watershed == watershed, ] # Calculate watershed-level statistics watershed_summary[[watershed]] <- list( n_stations = nrow(watershed_stations), mean_nitrate = mean(watershed_stations$nitrate_mg_l, na.rm = TRUE), mean_phosphorus = mean(watershed_stations$phosphorus_mg_l, na.rm = TRUE), mean_turbidity = mean(watershed_stations$turbidity_ntu, na.rm = TRUE), stations_exceeding_nitrate_threshold = sum(watershed_stations$nitrate_mg_l > 6, na.rm = TRUE), stations_exceeding_phosphorus_threshold = sum(watershed_stations$phosphorus_mg_l > 0.3, na.rm = TRUE) ) } return(watershed_summary) } # Environmental layers for watershed analysis environmental_layers <- list( vegetation = enhanced_ndvi, # From previous case study elevation = elevation, lulc = lulc_raster ) watershed_results <- watershed_assessment(water_stations, environmental_layers) print("Watershed Assessment Results:") for (watershed in names(watershed_results)) { cat("\n", watershed, ":\n") result <- watershed_results[[watershed]] cat(" Stations:", result$n_stations, "\n") cat(" Mean Nitrate:", round(result$mean_nitrate, 2), "mg/L\n") cat(" Mean Phosphorus:", round(result$mean_phosphorus, 3), "mg/L\n") cat(" Stations exceeding nitrate threshold:", result$stations_exceeding_nitrate_threshold, "\n") }
# Identify priority areas for conservation based on water quality risk conservation_priority_analysis <- function(lulc_raster, water_quality_data, slope_data = NULL) { # Create risk factors risk_factors <- list() # Agricultural intensity risk ag_risk <- lulc_raster ag_risk[lulc_raster != 1 & lulc_raster != 5] <- 0 # Non-ag areas = 0 risk ag_risk[lulc_raster == 1 | lulc_raster == 5] <- 1 # Ag areas = 1 risk names(ag_risk) <- "agricultural_risk" # Water quality risk based on monitoring data # Create risk surface using interpolation of water quality data high_risk_stations <- water_quality_data[water_quality_data$nitrate_mg_l > 6 | water_quality_data$phosphorus_mg_l > 0.3, ] if (nrow(high_risk_stations) > 0) { # Simple distance-based risk (closer to high-risk stations = higher risk) risk_raster <- terra::rast(lulc_raster) terra::values(risk_raster) <- 0 for (i in 1:nrow(high_risk_stations)) { station_point <- c(high_risk_stations$lon[i], high_risk_stations$lat[i]) # Create simple distance decay function (this is simplified) distances <- terra::distance(risk_raster, station_point) station_risk <- 1 / (1 + distances / 5000) # 5km decay distance risk_raster <- risk_raster + station_risk } names(risk_raster) <- "water_quality_risk" } else { risk_raster <- ag_risk * 0 # No high-risk stations } # Combined priority score combined_priority <- (ag_risk * 0.6) + (risk_raster * 0.4) names(combined_priority) <- "conservation_priority" # Classify priority levels priority_values <- terra::values(combined_priority, mat = FALSE) priority_breaks <- quantile(priority_values[priority_values > 0], c(0, 0.25, 0.5, 0.75, 1.0), na.rm = TRUE) priority_classified <- terra::classify(combined_priority, cbind(priority_breaks[-length(priority_breaks)], priority_breaks[-1], 1:4)) names(priority_classified) <- "priority_class" return(list( priority_surface = combined_priority, priority_classified = priority_classified, high_risk_stations = high_risk_stations )) } # Generate conservation priorities conservation_priorities <- conservation_priority_analysis(lulc_raster, water_stations) print("Conservation priority analysis completed") print(paste("High-risk stations identified:", nrow(conservation_priorities$high_risk_stations)))
Objective: Analyze landscape changes over time, detect deforestation/land use conversion, and monitor ecosystem health trends.
# Simulate multi-temporal satellite data (5-year time series) years <- 2020:2024 temporal_extent <- c(-83.0, 40.2, -82.0, 41.2) # Create temporal NDVI series with realistic trends temporal_ndvi_series <- list() base_ndvi <- terra::rast(nrows = 120, ncols = 120, xmin = temporal_extent[1], xmax = temporal_extent[3], ymin = temporal_extent[2], ymax = temporal_extent[4]) # Create base NDVI pattern x_coords <- terra::xFromCell(base_ndvi, 1:terra::ncell(base_ndvi)) y_coords <- terra::yFromCell(base_ndvi, 1:terra::ncell(base_ndvi)) base_pattern <- 0.6 + 0.2 * sin((x_coords - min(x_coords)) * 6) + 0.1 * cos((y_coords - min(y_coords)) * 4) + rnorm(terra::ncell(base_ndvi), 0, 0.1) terra::values(base_ndvi) <- pmax(0.1, pmin(0.9, base_pattern)) # Simulate temporal changes for (i in 1:length(years)) { year <- years[i] # Add temporal trends # 1. General vegetation improvement (climate change effect) climate_trend <- (i - 1) * 0.02 # 2. Localized disturbances (development, deforestation) disturbance_effect <- 0 if (i >= 3) { # Disturbance starts in year 3 # Simulate development in lower-left quadrant disturbance_mask <- (x_coords < (min(x_coords) + (max(x_coords) - min(x_coords)) * 0.4)) & (y_coords < (min(y_coords) + (max(y_coords) - min(y_coords)) * 0.4)) disturbance_effect <- ifelse(disturbance_mask, -0.3, 0) } # 3. Inter-annual variability annual_variation <- rnorm(terra::ncell(base_ndvi), 0, 0.05) # Combine effects year_ndvi <- base_ndvi + climate_trend + disturbance_effect + annual_variation year_ndvi <- pmax(0.05, pmin(0.95, year_ndvi)) names(year_ndvi) <- paste0("NDVI_", year) temporal_ndvi_series[[as.character(year)]] <- year_ndvi } print("Multi-temporal data series created for years:", paste(years, collapse = ", "))
# Comprehensive temporal change analysis temporal_analysis_results <- analyze_temporal_changes( data_list = temporal_ndvi_series, dates = as.character(years), region_boundary = temporal_extent, analysis_type = "trend", output_folder = "temporal_analysis_results" ) # Additional change detection analysis change_detection_results <- analyze_temporal_changes( data_list = temporal_ndvi_series, dates = as.character(years), analysis_type = "change_detection" ) # Statistical analysis statistics_results <- analyze_temporal_changes( data_list = temporal_ndvi_series, dates = as.character(years), analysis_type = "statistics" ) print("Temporal analysis completed:") print("Trend analysis:", !is.null(temporal_analysis_results$trend_rasters)) print("Change detection:", length(change_detection_results$change_rasters), "change maps") print("Statistical summary:", length(statistics_results$statistics_rasters), "statistic layers")
# Identify areas of significant change identify_change_hotspots <- function(trend_results, change_results, threshold_slope = 0.02) { # Extract slope (trend) raster slope_raster <- trend_results$trend_rasters$slope r_squared_raster <- trend_results$trend_rasters$r_squared # Identify significant trends significant_trends <- abs(slope_raster) > threshold_slope & r_squared_raster > 0.5 # Classify change types change_types <- slope_raster change_types[slope_raster > threshold_slope & significant_trends] <- 2 # Increasing change_types[slope_raster < -threshold_slope & significant_trends] <- 1 # Decreasing change_types[!significant_trends] <- 0 # No significant change names(change_types) <- "change_type" # Calculate change statistics n_total <- terra::ncell(change_types) n_increasing <- sum(terra::values(change_types) == 2, na.rm = TRUE) n_decreasing <- sum(terra::values(change_types) == 1, na.rm = TRUE) n_stable <- sum(terra::values(change_types) == 0, na.rm = TRUE) change_stats <- list( total_pixels = n_total, increasing_pixels = n_increasing, decreasing_pixels = n_decreasing, stable_pixels = n_stable, increasing_percent = round(n_increasing / n_total * 100, 1), decreasing_percent = round(n_decreasing / n_total * 100, 1) ) return(list( change_types = change_types, significant_trends = significant_trends, change_statistics = change_stats )) } # Identify hotspots change_hotspots <- identify_change_hotspots(temporal_analysis_results, change_detection_results) print("Change Hotspot Analysis:") print(paste("Pixels with increasing vegetation:", change_hotspots$change_statistics$increasing_pixels)) print(paste("Pixels with decreasing vegetation:", change_hotspots$change_statistics$decreasing_pixels)) print(paste("Percentage of area with significant decline:", change_hotspots$change_statistics$decreasing_percent, "%"))
# Synthesize results from all case studies comprehensive_synthesis <- function(agricultural_results, environmental_results, temporal_results) { synthesis_report <- list() # 1. Agricultural insights synthesis_report$agricultural_summary <- list( total_fields_analyzed = nrow(field_boundaries), management_zones_created = management_zones$optimal_zones, average_ndvi = round(mean(terra::values(enhanced_ndvi), na.rm = TRUE), 3), precision_ag_benefits = "Variable-rate applications recommended for all zones" ) # 2. Environmental insights synthesis_report$environmental_summary <- list( water_stations_monitored = nrow(water_stations), parameters_analyzed = length(parameters), watersheds_assessed = length(unique(water_stations$watershed)), high_risk_areas_identified = nrow(conservation_priorities$high_risk_stations) ) # 3. Temporal insights synthesis_report$temporal_summary <- list( years_analyzed = length(years), significant_change_areas = change_hotspots$change_statistics$decreasing_percent, trend_analysis_completed = TRUE, monitoring_recommendations = "Continue monitoring areas with significant decline" ) # 4. Integrated recommendations synthesis_report$integrated_recommendations <- list( precision_agriculture = "Implement zone-based management for optimal yields", water_quality = "Focus conservation efforts on high-risk watersheds", land_change_monitoring = "Establish permanent monitoring plots in change hotspots", data_integration = "Continue multi-source data integration for comprehensive assessment" ) return(synthesis_report) } # Generate comprehensive synthesis final_synthesis <- comprehensive_synthesis( agricultural_results, water_quality_results, temporal_analysis_results ) print("COMPREHENSIVE ANALYSIS SYNTHESIS") print("=====================================") for (section in names(final_synthesis)) { cat("\n", toupper(gsub("_", " ", section)), ":\n") for (item in names(final_synthesis[[section]])) { cat(" ", gsub("_", " ", item), ":", final_synthesis[[section]][[item]], "\n") } }
# Performance optimization for large-scale analyses optimization_tips <- function() { cat("WORKFLOW OPTIMIZATION GUIDE\n") cat("===========================\n\n") cat("Data Management:\n") cat(" - Process data in chunks for large datasets\n") cat(" - Use appropriate raster resolutions for analysis scale\n") cat(" - Implement data caching for repeated analyses\n") cat(" - Clean up intermediate files regularly\n\n") cat("Memory Management:\n") cat(" - Monitor memory usage with gc() and object.size()\n") cat(" - Use terra's out-of-memory processing for large rasters\n") cat(" - Remove unused objects with rm()\n") cat(" - Consider parallel processing for independent operations\n\n") cat("Quality Control:\n") cat(" - Validate inputs before processing\n") cat(" - Implement checkpoints in long workflows\n") cat(" - Log processing steps and parameters\n") cat(" - Create reproducible analysis scripts\n") } optimization_tips()
# Framework for reproducible geospatial research create_reproducible_workflow <- function(project_name, analysis_config) { # Create project structure project_dirs <- c( file.path(project_name, "data", "raw"), file.path(project_name, "data", "processed"), file.path(project_name, "scripts"), file.path(project_name, "results", "figures"), file.path(project_name, "results", "tables"), file.path(project_name, "documentation") ) for (dir in project_dirs) { if (!dir.exists(dir)) dir.create(dir, recursive = TRUE) } # Create analysis log analysis_log <- list( project_name = project_name, creation_date = Sys.time(), geospatialsuite_version = "0.1.0", analysis_config = analysis_config, r_session_info = sessionInfo() ) # Save analysis configuration saveRDS(analysis_log, file.path(project_name, "analysis_log.rds")) cat("Reproducible workflow structure created for:", project_name, "\n") cat("Project directories:", length(project_dirs), "created\n") cat("Analysis log saved\n") return(project_dirs) } # Example usage workflow_config <- list( analysis_types = c("agricultural", "environmental", "temporal"), study_region = "Ohio Agricultural Region", temporal_extent = "2020-2024", spatial_resolution = "30m", coordinate_system = "WGS84" ) project_structure <- create_reproducible_workflow("integrated_geospatial_analysis", workflow_config)
This comprehensive workflow vignette demonstrates:
The workflows in GeoSpatialSuite provide a complete toolkit for professional geospatial analysis, from data preparation through final reporting and recommendations!
This work was developed by the GeoSpatialSuite team with contributions from: Olatunde D. Akanbi, Vibha Mandayam, Yinghui Wu, Jeffrey Yarus, Erika I. Barcelos, and Roger H. French.
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.