R/01-core-utilities.R

Defines functions is_valid_date convert_julian_date extract_dates_universal load_raster_data get_region_boundary `%||%`

Documented in convert_julian_date extract_dates_universal get_region_boundary is_valid_date load_raster_data

# Define null-coalescing operator
`%||%` <- function(x, y) if(is.null(x)) y else x

#' Get region boundary for any specified region
#'
#' @description
#' Universal function to get region boundaries for any geographic area including
#' US states, countries, CONUS, counties, or custom bounding boxes with
#' comprehensive error handling.
#'
#' @param region_def Region definition in various formats:
#'   \itemize{
#'     \item Character: "Ohio", "Nigeria", "CONUS"
#'     \item Character with colon: "Ohio:Franklin" (state:county)
#'     \item Numeric vector: c(xmin, ymin, xmax, ymax) bounding box
#'     \item sf object: existing spatial object
#'   }
#' @param verbose Print progress messages
#'
#' @return sf object with boundary geometry
#'
#' @examples
#' \donttest{
#' # US State with error handling
#' ohio_boundary <- get_region_boundary("Ohio")
#'
#' # Custom bounding box with validation
#' custom_area <- get_region_boundary(c(-84.5, 39.0, -82.0, 41.0))
#' }
#'
#' @export
get_region_boundary <- function(region_def, verbose = FALSE) {

  # Input validation
  if (is.null(region_def)) {
    stop("region_def cannot be NULL", call. = FALSE)
  }

  if (verbose) message("Getting region boundary with enhanced error handling...")

  if (inherits(region_def, "sf")) {
    # sf object provided
    if (verbose) message("Using provided sf object")

    if (nrow(region_def) == 0) {
      stop("Provided sf object contains no features", call. = FALSE)
    }

    # Check for valid geometries
    if (any(!sf::st_is_valid(region_def))) {
      warning("Some geometries in sf object are invalid. Attempting to fix...")
      region_def <- tryCatch({
        sf::st_make_valid(region_def)
      }, error = function(e) {
        warning("Could not fix invalid geometries")
        region_def
      })
    }

    return(region_def)

  } else if (is.character(region_def) && length(region_def) == 1) {

    if (region_def == "CONUS") {
      # Continental US
      if (verbose) message("Getting CONUS boundary")

      if (!requireNamespace("tigris", quietly = TRUE)) {
        stop("Package 'tigris' is required for US boundaries. Please install it.", call. = FALSE)
      }

      tryCatch({
        states <- tigris::states(cb = TRUE, resolution = "20m", year = 2021)
        # Filter out Alaska, Hawaii, and Puerto Rico using base R
        conus <- states[!states$STUSPS %in% c("AK", "HI", "PR"), ]

        if (nrow(conus) == 0) {
          stop("Failed to get CONUS states", call. = FALSE)
        }

        return(conus)

      }, error = function(e) {
        stop(sprintf("Failed to get CONUS boundary: %s", e$message), call. = FALSE)
      })

    } else if (grepl(":", region_def)) {
      # State:County format
      if (verbose) message(sprintf("Getting county boundary: %s", region_def))

      parts <- strsplit(region_def, ":")[[1]]
      if (length(parts) != 2) {
        stop("State:County format should have exactly one colon separator", call. = FALSE)
      }

      state_name <- trimws(parts[1])
      county_name <- trimws(parts[2])

      if (!requireNamespace("tigris", quietly = TRUE)) {
        stop("Package 'tigris' is required for US boundaries. Please install it.", call. = FALSE)
      }

      tryCatch({
        counties <- tigris::counties(state = state_name, cb = TRUE)
        # Filter using base R instead of dplyr
        county <- counties[counties$NAME == county_name, ]

        if (nrow(county) == 0) {
          available_counties <- paste(counties$NAME, collapse = ", ")
          stop(sprintf("County '%s' not found in state '%s'. Available counties: %s",
                       county_name, state_name, available_counties), call. = FALSE)
        }

        return(county)

      }, error = function(e) {
        stop(sprintf("Failed to get county boundary: %s", e$message), call. = FALSE)
      })

    } else {
      # Try US state first, then country
      if (verbose) message(sprintf("Searching for region: %s", region_def))

      # Try US state with enhanced name matching
      state_result <- tryCatch({
        if (!requireNamespace("tigris", quietly = TRUE)) {
          stop("tigris package not available")
        }

        # Get all states first
        states <- tigris::states(cb = TRUE, resolution = "20m", year = 2021)

        if (verbose) {
          message(sprintf("Loaded %d states from tigris", nrow(states)))
          message(sprintf("Available columns: %s", paste(names(states), collapse = ", ")))
          message(sprintf("Sample state names: %s", paste(head(states$NAME, 3), collapse = ", ")))
        }

        # Try exact match with NAME column first (using base R subsetting to avoid dplyr conflicts)
        state_match <- states[states$NAME == region_def, ]

        # If not found, try case-insensitive match
        if (nrow(state_match) == 0) {
          state_match <- states[tolower(states$NAME) == tolower(region_def), ]
          if (verbose && nrow(state_match) > 0) message("Found with case-insensitive match")
        }

        # If still not found, try postal code (STUSPS column)
        if (nrow(state_match) == 0 && "STUSPS" %in% names(states)) {
          state_match <- states[states$STUSPS == toupper(region_def), ]
          if (verbose && nrow(state_match) > 0) message("Found with postal code match")
        }

        # If still not found, try partial match
        if (nrow(state_match) == 0) {
          partial_matches <- grepl(tolower(region_def), tolower(states$NAME), fixed = TRUE)
          state_match <- states[partial_matches, ]
          if (verbose && nrow(state_match) > 0) message("Found with partial match")
        }

        if (nrow(state_match) > 0) {
          if (verbose) message(sprintf("Successfully found state: %s", state_match$NAME[1]))
          return(state_match[1, ])  # Take first match if multiple
        } else {
          if (verbose) message("No state matches found")
          stop("Not found as US state")
        }
      }, error = function(e) {
        if (verbose) message(sprintf("US state search failed: %s", e$message))
        NULL
      })

      if (!is.null(state_result)) {
        return(state_result)
      }

      # Try country
      country_result <- tryCatch({
        if (!requireNamespace("rnaturalearth", quietly = TRUE)) {
          stop("Package 'rnaturalearth' is required for country boundaries. Please install it.",
               call. = FALSE)
        }

        country <- rnaturalearth::ne_countries(scale = "medium", country = region_def,
                                               returnclass = "sf")

        if (nrow(country) == 0) {
          stop("Country not found")
        }

        if (verbose) message("Found as country")
        return(country)

      }, error = function(e) {
        if (verbose) message(sprintf("Country search failed: %s", e$message))
        NULL
      })

      if (!is.null(country_result)) {
        return(country_result)
      }

      # If neither worked, provide helpful error with suggestions
      error_msg <- sprintf("Region '%s' not found as US state or country.", region_def)

      # Try to provide helpful suggestions
      if (requireNamespace("tigris", quietly = TRUE)) {
        tryCatch({
          states <- tigris::states(cb = TRUE, resolution = "20m", year = 2021)
          # Find similar state names
          state_names <- states$NAME
          similar_states <- state_names[grepl(tolower(substr(region_def, 1, 3)),
                                              tolower(state_names))]
          if (length(similar_states) > 0) {
            error_msg <- paste(error_msg,
                               sprintf("\nDid you mean one of these US states? %s",
                                       paste(similar_states[1:min(3, length(similar_states))], collapse = ", ")))
          }
        }, error = function(e) {})
      }

      stop(error_msg, call. = FALSE)
    }

  } else if (is.numeric(region_def) && length(region_def) == 4) {
    # Bounding box provided
    if (verbose) message("Creating boundary from bounding box")

    # Validate bounding box values
    if (any(is.na(region_def)) || any(is.infinite(region_def))) {
      stop("Bounding box contains NA or infinite values", call. = FALSE)
    }

    xmin <- region_def[1]
    ymin <- region_def[2]
    xmax <- region_def[3]
    ymax <- region_def[4]

    # Validate bounding box logic
    if (xmin >= xmax) {
      stop(sprintf("Invalid bounding box: xmin (%.3f) must be < xmax (%.3f)", xmin, xmax),
           call. = FALSE)
    }
    if (ymin >= ymax) {
      stop(sprintf("Invalid bounding box: ymin (%.3f) must be < ymax (%.3f)", ymin, ymax),
           call. = FALSE)
    }

    # Check for reasonable coordinate ranges (assuming geographic coordinates)
    if (abs(xmin) > 180 || abs(xmax) > 180) {
      warning("Longitude values outside typical range [-180, 180]. Ensure coordinates are correct.")
    }
    if (abs(ymin) > 90 || abs(ymax) > 90) {
      warning("Latitude values outside typical range [-90, 90]. Ensure coordinates are correct.")
    }

    tryCatch({
      bbox_poly <- sf::st_polygon(list(matrix(c(
        xmin, ymin, xmax, ymin, xmax, ymax, xmin, ymax, xmin, ymin
      ), ncol = 2, byrow = TRUE)))

      bbox_sf <- sf::st_sf(geometry = sf::st_sfc(bbox_poly, crs = 4326))

      return(bbox_sf)

    }, error = function(e) {
      stop(sprintf("Failed to create bounding box polygon: %s", e$message), call. = FALSE)
    })

  } else {
    stop("Invalid region boundary specification. Must be sf object, character string, or numeric vector of length 4",
         call. = FALSE)
  }
}

#' Load raster data from various sources
#'
#' @description
#' Universal function to load raster data from files, directories, or raster objects
#' with comprehensive error handling and validation.
#'
#' @param input_data Character string (path to file or directory),
#'   character vector of file paths, or a SpatRaster/Raster* object
#' @param pattern File pattern for directory search (default: tif files)
#' @param recursive Search subdirectories recursively
#' @param verbose Print progress messages
#'
#' @return List of terra SpatRaster objects
#'
#' @examples
#' \dontrun{
#' # These examples require directory structures with multiple data files
#' # Load from directory with error handling
#' rasters <- load_raster_data("/path/to/raster/files")
#'
#' # Load from file list with validation
#' rasters <- load_raster_data(c("file1.tif", "file2.tif"))
#' }
#'
#' @export
load_raster_data <- function(input_data, pattern = "\\.(tif|tiff)$",
                             recursive = FALSE, verbose = FALSE) {

  # Input validation
  if (is.null(input_data)) {
    stop("input_data cannot be NULL", call. = FALSE)
  }

  if (verbose) message("Loading raster data with robust error handling...")

  # Safe raster reading function
  read_raster_safe <- function(f) {
    if (verbose) message(sprintf("Reading: %s", basename(f)))

    tryCatch({
      # Check file size (warn if very large)
      file_size_mb <- file.info(f)$size / (1024^2)
      if (file_size_mb > 1000) {
        warning(sprintf("Large file detected (%.1f MB): %s", file_size_mb, basename(f)))
      }

      raster <- terra::rast(f)

      # Basic validation
      if (terra::ncell(raster) == 0) {
        warning(sprintf("Raster has no cells: %s", basename(f)))
        return(NULL)
      }

      # Check for valid CRS
      if (is.na(terra::crs(raster))) {
        warning(sprintf("Raster has no CRS information: %s", basename(f)))
      }

      return(raster)

    }, error = function(e) {
      warning(sprintf("Failed to read raster: %s\nReason: %s", basename(f), e$message))
      return(NULL)
    })
  }

  if (is.character(input_data)) {
    if (length(input_data) == 1) {
      if (dir.exists(input_data)) {
        # Directory provided
        if (verbose) message(sprintf("Searching directory: %s", input_data))

        files <- list.files(input_data, pattern = pattern, full.names = TRUE,
                            ignore.case = TRUE, recursive = recursive)

        if (length(files) == 0) {
          stop(sprintf("No raster files found in directory '%s' matching pattern '%s'",
                       input_data, pattern), call. = FALSE)
        }

        if (verbose) message(sprintf("Found %d potential raster files", length(files)))

        rasters <- lapply(files, read_raster_safe)
        rasters <- Filter(Negate(is.null), rasters)

        if (length(rasters) == 0) {
          stop("No valid raster files could be loaded from directory.", call. = FALSE)
        }

        if (verbose) message(sprintf("Successfully loaded %d rasters", length(rasters)))
        return(rasters)

      } else if (file.exists(input_data)) {
        # Single file provided
        if (verbose) message("Loading single raster file")

        r <- read_raster_safe(input_data)
        if (is.null(r)) {
          stop(sprintf("Failed to read raster file: %s", input_data), call. = FALSE)
        }

        return(list(r))

      } else {
        stop(sprintf("Provided path does not exist: %s", input_data), call. = FALSE)
      }
    } else {
      # Multiple files provided
      if (verbose) message(sprintf("Loading %d raster files", length(input_data)))

      # Check which files exist
      existing <- input_data[file.exists(input_data)]
      missing <- input_data[!file.exists(input_data)]

      if (length(missing) > 0) {
        warning(sprintf("Files do not exist: %s", paste(missing, collapse = ", ")))
      }

      if (length(existing) == 0) {
        stop("None of the specified files exist.", call. = FALSE)
      }

      rasters <- lapply(existing, read_raster_safe)
      rasters <- Filter(Negate(is.null), rasters)

      if (length(rasters) == 0) {
        stop("No valid rasters could be loaded from the file list.", call. = FALSE)
      }

      if (verbose) message(sprintf("Successfully loaded %d/%d rasters",
                                   length(rasters), length(input_data)))
      return(rasters)
    }
  } else if (inherits(input_data, c("SpatRaster", "RasterStack", "RasterLayer"))) {
    # Single raster object provided
    if (verbose) message("Converting raster object to list")

    raster <- tryCatch({
      terra::rast(input_data)
    }, error = function(e) {
      stop(sprintf("Failed to convert raster object: %s", e$message), call. = FALSE)
    })

    return(list(raster))

  } else if (is.list(input_data)) {
    # List of rasters provided
    if (verbose) message(sprintf("Validating list of %d raster objects", length(input_data)))

    # Validate each raster in the list
    valid_rasters <- list()
    for (i in seq_along(input_data)) {
      tryCatch({
        if (inherits(input_data[[i]], c("SpatRaster", "RasterStack", "RasterLayer"))) {
          valid_rasters[[length(valid_rasters) + 1]] <- terra::rast(input_data[[i]])
        } else {
          warning(sprintf("List element %d is not a valid raster object", i))
        }
      }, error = function(e) {
        warning(sprintf("Failed to process list element %d: %s", i, e$message))
      })
    }

    if (length(valid_rasters) == 0) {
      stop("No valid rasters found in the provided list.", call. = FALSE)
    }

    if (verbose) message(sprintf("Validated %d/%d raster objects",
                                 length(valid_rasters), length(input_data)))
    return(valid_rasters)

  } else {
    stop(sprintf("Invalid raster data input type: %s. Must be file path(s), directory, raster object, or list.",
                 class(input_data)[1]), call. = FALSE)
  }
}

#' Extract dates from filenames using various patterns
#'
#' @description
#' Universal function to extract dates from filenames or provide custom labels.
#' Enhanced with more flexible regex patterns that work with any filename prefix.
#'
#' @param input_data Character vector (file paths or folder), or list of raster layers
#' @param date_patterns Named list of custom regex patterns for date extraction
#' @param verbose Print progress messages
#'
#' @return Character vector of extracted or inferred date labels
#'
#' @examples
#' \dontrun{
#' # These examples require external data files not included with the package
#' # Extract dates from filenames
#' dates <- extract_dates_universal(c("ndvi_2023-05-15.tif", "evi_2023-06-15.tif"))
#'
#' # Custom date patterns
#' custom_patterns <- list("MMDDYYYY" = "\\b[0-9]{2}[0-9]{2}[0-9]{4}\\b")
#' dates <- extract_dates_universal(files, custom_patterns)
#' }
#'
#' @export
extract_dates_universal <- function(input_data, date_patterns = NULL, verbose = FALSE) {

  if (verbose) message("Extracting dates from filenames with enhanced patterns...")

  # ENHANCED default patterns - more flexible regex
  default_patterns <- list(
    "YYYY-MM-DD" = "[0-9]{4}-[0-9]{2}-[0-9]{2}",      # Matches anywhere in filename
    "YYYY_MM_DD" = "[0-9]{4}_[0-9]{2}_[0-9]{2}",      # Underscore separated
    "YYYYMMDD"   = "[0-9]{8}",                        # 8 consecutive digits
    "YYYY-MM"    = "[0-9]{4}-[0-9]{2}",               # Year-Month only
    "YYYY"       = "(19|20)[0-9]{2}",                 # Year only (1900s or 2000s)
    "DD-MM-YYYY" = "[0-9]{2}-[0-9]{2}-[0-9]{4}",      # European format
    "MM-DD-YYYY" = "[0-9]{2}-[0-9]{2}-[0-9]{4}",      # US format
    "YYYY-DDD"   = "[0-9]{4}-[0-9]{3}",               # Julian day format
    "YYYYDDD"    = "[0-9]{7}",                        # Julian day without separator
    "YYYY.MM.DD" = "[0-9]{4}\\.[0-9]{2}\\.[0-9]{2}",  # Dot separated
    "YYYY/MM/DD" = "[0-9]{4}/[0-9]{2}/[0-9]{2}",      # Slash separated
    "DD_MM_YYYY" = "[0-9]{2}_[0-9]{2}_[0-9]{4}",      # European with underscores
    "MM_DD_YYYY" = "[0-9]{2}_[0-9]{2}_[0-9]{4}"       # US with underscores
  )

  patterns_to_use <- date_patterns %||% default_patterns

  # Get filenames
  files <- NULL
  if (is.character(input_data)) {
    if (length(input_data) == 1 && dir.exists(input_data)) {
      files <- list.files(input_data, pattern = "\\.(tif|tiff|nc|img)$",
                          full.names = FALSE, ignore.case = TRUE)
      if (length(files) == 0) {
        warning(sprintf("No raster files found in directory: %s", input_data))
        return(character(0))
      }
    } else {
      files <- basename(input_data)
    }
  } else if (is.list(input_data)) {
    files <- names(input_data)
    if (is.null(files)) {
      files <- paste0("Layer_", seq_along(input_data))
    }
  } else {
    stop("Unsupported input type for extracting dates.", call. = FALSE)
  }

  if (verbose) message(sprintf("Processing %d files for date extraction", length(files)))

  # Extract dates with enhanced matching
  dates <- sapply(files, function(f) {
    if (verbose && which(files == f) %% 10 == 0) {
      message(sprintf("Processing file %d/%d: %s", which(files == f), length(files), f))
    }

    # Try each pattern
    for (pattern_name in names(patterns_to_use)) {
      pattern <- patterns_to_use[[pattern_name]]

      # Use regmatches and regexpr for more robust extraction
      matches <- regmatches(f, regexpr(pattern, f))

      if (length(matches) > 0 && matches != "") {
        date_str <- matches[1]  # Take first match

        if (verbose) {
          message(sprintf("  Found pattern %s: %s", pattern_name, date_str))
        }

        # Convert to standard format
        standardized_date <- tryCatch({
          switch(pattern_name,
                 "YYYY-MM-DD" = date_str,
                 "YYYY_MM_DD" = gsub("_", "-", date_str),
                 "YYYYMMDD"   = paste0(substr(date_str, 1, 4), "-",
                                       substr(date_str, 5, 6), "-",
                                       substr(date_str, 7, 8)),
                 "YYYY-MM"    = paste0(date_str, "-01"),  # Add day
                 "YYYY"       = paste0(date_str, "-01-01"), # Add month and day
                 "DD-MM-YYYY" = paste0(substr(date_str, 7, 10), "-",
                                       substr(date_str, 4, 5), "-",
                                       substr(date_str, 1, 2)),
                 "MM-DD-YYYY" = paste0(substr(date_str, 7, 10), "-",
                                       substr(date_str, 1, 2), "-",
                                       substr(date_str, 4, 5)),
                 "YYYY-DDD"   = convert_julian_date(date_str),
                 "YYYYDDD"    = convert_julian_date(paste0(substr(date_str, 1, 4), "-",
                                                           substr(date_str, 5, 7))),
                 "YYYY.MM.DD" = gsub("\\.", "-", date_str),
                 "YYYY/MM/DD" = gsub("/", "-", date_str),
                 "DD_MM_YYYY" = paste0(substr(date_str, 7, 10), "-",
                                       substr(date_str, 4, 5), "-",
                                       substr(date_str, 1, 2)),
                 "MM_DD_YYYY" = paste0(substr(date_str, 7, 10), "-",
                                       substr(date_str, 1, 2), "-",
                                       substr(date_str, 4, 5)),
                 date_str  # Default: return as-is
          )
        }, error = function(e) {
          if (verbose) message(sprintf("  Error standardizing %s: %s", date_str, e$message))
          date_str
        })

        # Validate the standardized date
        if (is_valid_date(standardized_date)) {
          if (verbose) message(sprintf("  Successfully extracted: %s", standardized_date))
          return(standardized_date)
        } else {
          if (verbose) message(sprintf("  Invalid date after standardization: %s", standardized_date))
        }
      }
    }

    # If no date pattern matched, try alternative approaches

    # Look for any 4-digit year in the filename
    year_match <- regmatches(f, regexpr("(19|20)[0-9]{2}", f))
    if (length(year_match) > 0) {
      year <- year_match[1]
      if (verbose) message(sprintf("  Found year only: %s", year))
      return(paste0(year, "-01-01"))
    }

    # Look for any sequence of 6-8 digits (could be YYYYMMDD or YYMMDD)
    digit_match <- regmatches(f, regexpr("[0-9]{6,8}", f))
    if (length(digit_match) > 0) {
      digits <- digit_match[1]
      if (nchar(digits) == 8) {
        # Assume YYYYMMDD
        formatted <- paste0(substr(digits, 1, 4), "-",
                            substr(digits, 5, 6), "-",
                            substr(digits, 7, 8))
        if (is_valid_date(formatted)) {
          if (verbose) message(sprintf("  Extracted from digits: %s", formatted))
          return(formatted)
        }
      } else if (nchar(digits) == 6) {
        # Assume YYMMDD and add 20 prefix for 2000s
        formatted <- paste0("20", substr(digits, 1, 2), "-",
                            substr(digits, 3, 4), "-",
                            substr(digits, 5, 6))
        if (is_valid_date(formatted)) {
          if (verbose) message(sprintf("  Extracted from 6 digits: %s", formatted))
          return(formatted)
        }
      }
    }

    # If all else fails, return unknown
    return(paste0("Unknown_", which(files == f)))
  })

  # Check extraction success
  n_extracted <- sum(!grepl("^Unknown_", dates))
  if (verbose) {
    message(sprintf("Successfully extracted dates from %d/%d files (%.1f%%)",
                    n_extracted, length(dates), (n_extracted/length(dates))*100))

    if (n_extracted > 0) {
      message("Extracted dates sample:")
      sample_dates <- dates[!grepl("^Unknown_", dates)]
      message(paste("  ", head(sample_dates, 3), collapse = "\n"))
    }
  }

  if (n_extracted == 0) {
    warning("No dates could be extracted from any filenames. Showing first few filenames:")
    warning(paste("Files:", paste(head(files, 3), collapse = ", ")))
    warning("Consider providing custom date_patterns or check filename formats.")
  } else if (n_extracted < length(dates) * 0.5) {
    warning(sprintf("Only extracted dates from %.1f%% of files. Consider custom date patterns.",
                    (n_extracted/length(dates))*100))
  }

  return(unname(dates))
}

#' Convert Julian date to standard format
#'
#' @description
#' Internal function to convert Julian dates (YYYY-DDD) to standard YYYY-MM-DD format.
#'
#' @param julian_str Julian date string in format "YYYY-DDD"
#' @return Standard date string "YYYY-MM-DD"
#' @keywords internal
convert_julian_date <- function(julian_str) {
  tryCatch({
    parts <- strsplit(julian_str, "-")[[1]]
    year <- as.numeric(parts[1])
    julian_day <- as.numeric(parts[2])

    # Convert Julian day to date
    date_obj <- as.Date(julian_day - 1, origin = paste0(year, "-01-01"))
    return(format(date_obj, "%Y-%m-%d"))
  }, error = function(e) {
    return(julian_str)  # Return original if conversion fails
  })
}

#' Validate date string
#'
#' @description
#' Internal function to check if a date string is valid.
#'
#' @param date_str Date string to validate
#' @return Logical indicating if date is valid
#' @keywords internal
is_valid_date <- function(date_str) {
  tryCatch({
    as.Date(date_str)
    return(TRUE)
  }, error = function(e) {
    return(FALSE)
  })
}

Try the geospatialsuite package in your browser

Any scripts or data that you put into this service are public.

geospatialsuite documentation built on Nov. 6, 2025, 1:06 a.m.