R/access_db.R
In geeLite: Building and Managing Local Databases from 'Google Earth Engine'

Documented in aggr_by_freq expand_to_daily fetch_vars init_postp linear_interp load_external_postp read_db read_variables source_with_notification

# Main Functions -------------------------------------------------------------

#' Fetch Variable Information from an SQLite Database
#'
#' Displays information on the available variables in the SQLite database
#' (\code{data/geelite.db}).
#' @param path [mandatory] (character) Path to the root directory of the
#'   generated database.
#' @param format [mandatory] (character) A character string. Possible values
#'   are \code{"data.frame"} (default) to return a \code{data.frame} object,
#'   or one of \code{"markdown"}, \code{"latex"}, \code{"html"}, \code{"pipe"}
#'   (Pandoc's pipe tables), \code{"simple"} (Pandoc's simple tables), and
#'   \code{"rst"} to be passed on to knitr for formatting.
#' @return Returns the variable information in the selected format. If
#'   \code{format = "data.frame"}, a \code{data.frame} is returned. For other
#'   formats, the output is printed in the specified format and \code{NULL} is
#' @export
#' @examples
#' # Example: Printing the available variables
#' \dontrun{
#'   fetch_vars(path = "path/to/db")
#' }
#' @importFrom RSQLite dbDisconnect dbListTables dbReadTable SQLite
#' @importFrom dplyr arrange distinct pull select filter bind_rows
#' @importFrom lubridate ymd
#' @importFrom knitr kable
#'
fetch_vars <- function(path, format = c("data.frame", "markdown", "latex",
                                        "html", "pipe", "simple", "rst")) {

  # Convert to absolute path and check existence
  path <- normalizePath(path, mustWork = FALSE)

  # To avoid 'no visible binding for global variable' messages (CRAN test)
  band <- zonal_stat <- NULL

  # Ensure that 'format' is one of the allowed options
  format <- match.arg(format)

  # Validate parameters
  params <- list(path = path, file_path = "data/geelite.db")
  validate_params(params)

  # Retrieve the list of tables
  db_path <- file.path(path, "data/geelite.db")
  if (!file.exists(db_path)) {
    stop(paste0("Database file does not exist at: ", db_path))
  }

  # Establish a connection to the SQLite database
  con <- db_connect(db_path = db_path)

  # Ensure the connection is closed when the function exits
  on.exit(dbDisconnect(con), add = TRUE)

  # List all tables in the connected SQLite database
  tables <- dbListTables(conn = con)

  # Filter out system tables, the 'grid' table, and sort the table names
  tables_drop <- c("grid", "spatial_ref_sys", "sqlite_sequence",
                   "geometry_columns")
  tables <- sort(setdiff(tables, tables_drop))
  tables_df <- data.frame(Table = tables, stringsAsFactors = FALSE)

  # Initialize a list to hold detailed information
  detailed_info_list <- list()

  id_counter <- 1 # Start ID counter from 1

  # Process each table to extract variable information
  for (i in seq_len(nrow(tables_df))) {
    table_name <- tables_df$Table[i]

    # Read the table data from the database
    table_data <- dbReadTable(con, table_name, check.names = FALSE)

    # Extract date columns matching the YYYY_MM_DD format
    date_cols <- grep("^\\d{4}_\\d{2}_\\d{2}$", names(table_data), value = TRUE)
    dates <- ymd(date_cols)

    # Determine the first and last dates and calculate the mean interval
    first_date <- min(dates, na.rm = TRUE)
    last_date <- max(dates, na.rm = TRUE)
    mean_interval <- round(mean(as.numeric(diff(dates)), na.rm = TRUE), 2)

    # Identify unique combinations of band and zonal_stat
    unique_bands_stats <- table_data %>%
      select(band, zonal_stat) %>%
      distinct() %>%
      arrange(band, zonal_stat)

    # For each unique combination, create a row in the detailed_info_list
    for (j in seq_len(nrow(unique_bands_stats))) {
      band_name <- unique_bands_stats$band[j]
      stat_name <- unique_bands_stats$zonal_stat[j]
      variable_name <- paste(table_name, band_name, stat_name, sep = "/")
      tmp <- data.frame(
        ID = id_counter,
        Variable = variable_name,
        Table = table_name,
        Band = band_name,
        Stat = stat_name,
        Start = first_date,
        End = last_date,
        Freq_Days = mean_interval,
        stringsAsFactors = FALSE
      )
      detailed_info_list[[length(detailed_info_list) + 1]] <- tmp
      id_counter <- id_counter + 1
    }
  }

  # Combine all data frames in the list into one data frame
  detailed_info <- do.call(rbind, detailed_info_list)

  # Print or return the output based on user preference
  if (format != "data.frame") {
    print(
      kable(detailed_info, format = format,
            col.names = c("ID", "Variable", "Table", "Band", "Stat",
                          "Start", "End", "Freq (Days)"))
    )
  } else {
    return(detailed_info)
  }
}

# ------------------------------------------------------------------------------

#' Reading, Aggregating, and Processing the SQLite Database
#'
#' Reads, aggregates, and processes the SQLite database
#'   (\code{data/geelite.db}).
#' @param path [mandatory] (character) Path to the root directory of the
#'   generated database.
#' @param variables [optional] (character or integer) Names or IDs of the
#'   variables to be read. Use the \code{fetch_vars} function to identify
#'   available variables and IDs (default: \code{"all"}).
#' @param freq [optional] (character) The frequency for data aggregation.
#'   Options include \code{"day"}, \code{"week"}, \code{"month"},
#'   \code{"bimonth"}, \code{"quarter"}, \code{"season"}, \code{"halfyear"},
#'   \code{"year"} (default: \code{"month"}).
#' @param prep_fun [optional] (function or \code{NULL}) A function for
#'   pre-processing time series data prior to aggregation. If \code{NULL}, a
#'   default linear interpolation (via \code{\link{linear_interp}}) will be
#'   used for daily-frequency data. If non-daily, the default behavior simply
#'   returns the vector without interpolation.
#' @param aggr_funs [optional] (function or list) A function or a list of
#'   functions for aggregating data to the specified frequency (\code{freq}).
#'   Users can directly refer to variable names or IDs. The default function is
#'   the mean: \code{function(x) mean(x, na.rm = TRUE)}.
#' @param postp_funs [optional] (function or list) A function or list of
#'   functions applied to the time series data of a single bin after
#'   aggregation. Users can directly refer to variable names or IDs. The
#'   default is \code{NULL}, indicating no post-processing.
#' @return A list where the first element (\code{grid}) is a simple feature
#'   (sf) object, and subsequent elements are data frame objects corresponding
#'   to the variables.
#' @export
#' @examples
#' # Example: Reading variables by IDs
#' \dontrun{
#'   db_list <- read_db(path = "path/to/db",
#'     variables = c(1, 3))
#' }
#'
read_db <- function(
    path,
    variables = "all",
    freq = c("month", "day", "week", "bimonth", "quarter",
             "season", "halfyear", "year"),
    prep_fun = NULL,
    aggr_funs = function(x) mean(x, na.rm = TRUE),
    postp_funs = NULL
) {

  # Convert to absolute path and check existence
  path <- normalizePath(path, mustWork = FALSE)

  # Validate 'freq' parameter
  freq <- match.arg(freq)

  # If user has specified "external" post-processing
  if (identical(postp_funs, "external")) {
    postp_funs <- load_external_postp(path)
  }

  # Ensure 'aggr_funs' is a named list
  if (is.function(aggr_funs) || (is.list(aggr_funs) &&
                                 all(sapply(aggr_funs, is.function)))) {
    aggr_funs <- list(default = aggr_funs)
  }

  # Ensure 'postp_funs' is a named list
  if (is.function(postp_funs) ||
      (is.list(postp_funs) && all(sapply(postp_funs, is.function))) ||
      is.null(postp_funs)) {
    postp_funs <- list(default = postp_funs)
  }

  # If no prep_fun was provided, default to linear interpolation
  if (is.null(prep_fun)) {
    prep_fun <- function(x) {
      linear_interp(x)
    }
  }

  # Validate 'path' and obtain the list of available variables
  variables_all <- fetch_vars(path, format = "data.frame")

  # Validate parameters and determine which variables to read
  variables <- validate_variables_param(variables, variables_all,
                                        prep_fun, aggr_funs, postp_funs)

  # Read and process data for the selected variables
  db_list <- read_variables(path, variables, freq, prep_fun,
                            aggr_funs, postp_funs)

  return(db_list)
}

# ------------------------------------------------------------------------------

#' Initialize Post-Processing Folder and Files
#'
#' Creates a \code{postp} folder at the specified path and adds two empty
#' files: \code{structure.json} and \code{functions.R}.
#' @param path [mandatory] \code{character} The path to the root directory
#'   where the \code{postp} folder should be created.
#' @param verbose [optional] (logical) Display messages (default: \code{TRUE}).
#' @details
#' The \code{structure.json} file is initialized with a default JSON structure:
#' \code{"default": null}. This file is intended for mapping variables to
#' post-processing functions. The \code{functions.R} file is created with a
#' placeholder comment indicating where to define the R functions for
#' post-processing. If the \code{postp} folder already exists, an error will
#' be thrown to prevent overwriting existing files.
#' @return No return value, called for side effects.
#' @export
#' @examples
#' # Example: Initialize post-processing files in the database directory
#' \dontrun{
#'   init_postp("path/to/db")
#' }
#'
init_postp <- function(path, verbose = TRUE) {

  # Convert to absolute path and check existence
  path <- normalizePath(path, mustWork = FALSE)

  # Define the 'postp' folder and file paths
  postp_folder <- file.path(path, "postp")
  json_file <- file.path(postp_folder, "structure.json")
  r_script_file <- file.path(postp_folder, "functions.R")

  # Check if the 'postp' folder already exists
  if (dir.exists(postp_folder)) {
    stop(paste0("The 'postp' folder already exists at: ", postp_folder))
  }

  # Create the 'postp' folder
  dir.create(postp_folder, showWarnings = FALSE)

  # Create an empty 'structure.json' file
  writeLines('{\n  "default": null\n}', json_file)

  # Create an empty 'functions.R' file
  writeLines("# Define your post-processing functions here\n", r_script_file)

  # Prepare messages for structure and functions file initialization
  message <- list(
    "Structure file initialized: 'postp/structure.json'.",
    "Function file initialized: 'postp/functions.R'."
  )

  # Output information if 'verbose' is TRUE
  output_message(message, verbose)

}

# Internal Functions -----------------------------------------------------------

#' Simple Linear Interpolation
#'
#' Replaces \code{NA} values with linear interpolation.
#' @param x [mandatory] (numeric) A numeric vector possibly containing
#'   \code{NA} values.
#' @return A numeric vector of the same length as \code{x}, with \code{NA}
#'   values replaced by linear interpolation.
#' @keywords internal
#' @importFrom stats approx
#'
linear_interp <- function(x) {
  # If all values are NA, return as-is
  if (all(is.na(x))) {
    return(x)
  }

  # Identify non-NA indices
  idx <- which(!is.na(x))

  # If fewer than 2 non-NA values, cannot interpolate meaningfully
  if (length(idx) < 2) {
    return(x)
  }

  # Perform linear interpolation using approx
  # rule = 2 => extend boundary values if the first or last are NA
  approx(
    x      = idx,
    y      = x[idx],
    xout   = seq_along(x),
    method = "linear",
    rule   = 2
  )$y
}

# ------------------------------------------------------------------------------

#' Read Variables from Database
#'
#' Reads the specified variables from the SQLite database.
#' @param path [mandatory] (character) Path to the root directory of the
#'   generated database.
#' @param variables [mandatory] (character) A vector of variable names to read.
#' @param freq [mandatory] (character) Specifies the frequency to aggregate the
#'   data.
#' @param prep_fun [mandatory] (function) Function used for pre-processing.
#' @param aggr_funs [mandatory] (function or list) Aggregation function(s).
#' @param postp_funs [mandatory] (function or list) Post-processing function(s).
#' @return A list of variables read from the database.
#' @keywords internal
#' @importFrom sf st_read
#' @importFrom magrittr %>%
#' @importFrom dplyr rename select filter
#' @importFrom RSQLite dbDisconnect dbReadTable SQLite
#'
read_variables <- function(path, variables, freq, prep_fun,
                           aggr_funs, postp_funs) {

  # To avoid 'no visible binding for global variable' messages (CRAN test)
  id <- band <- GEOMETRY <- zonal_stat <- aggregation <- preprocess <- NULL
  postprocess <- NULL

  # Connect to the SQLite database
  db_path <- file.path(path, "data/geelite.db")
  if (!file.exists(db_path)) {
    stop(paste0("Database file does not exist at: ", db_path))
  }

  # Establish a connection to the SQLite database
  con <- db_connect(db_path = db_path)

  # Ensure the connection is closed when the function exits
  on.exit(dbDisconnect(con), add = TRUE)

  # Read the 'grid' table as an sf object
  grid_sf <- tryCatch(
    sf::st_read(con, "grid", quiet = TRUE) %>%
      select(-1) %>% rename(geometry = GEOMETRY),
    error = function(e) {
      stop("Failed to read the 'grid' table: ", e$message)
    }
  )

  # Initialize the list to store grid and variable data
  db_list <- list(grid = grid_sf)

  # Retrieve all available variables from the database
  variables_all <- fetch_vars(path, format = "data.frame")

  # Match the requested variables with the available ones
  variables_info <- variables_all[match(variables, variables_all$Variable), ]

  # Initialize a list to store data for each variable
  variables_data <- vector("list", length(variables))

  last_table_read <- NULL # Track the last table read to optimize reads
  table_data_cache <- list() # Cache tables to avoid redundant reads

  # Iterate over each variable to extract and process data
  for (i in seq_len(nrow(variables_info))) {
    var_info <- variables_info[i, ]
    variable_name <- var_info$Variable
    table_name <- var_info$Table
    band_name <- var_info$Band
    stat_name <- var_info$Stat

    # Read the table data if it hasn't been read already
    if (is.null(last_table_read) || last_table_read != table_name) {
      if (!table_name %in% names(table_data_cache)) {
        table_data <- tryCatch(
          dbReadTable(con, table_name, check.names = FALSE),
          error = function(e) {
            warning(paste("Failed to read table:", table_name, "-", e$message))
            return(NULL)
          }
        )
        table_data_cache[[table_name]] <- table_data
      } else {
        table_data <- table_data_cache[[table_name]]
      }
      last_table_read <- table_name # Update the last table read
    } else {
      table_data <- table_data_cache[[table_name]]
    }

    # Skip processing if the table data couldn't be read
    if (is.null(table_data)) {
      warning(paste("Skipping variable due to missing table:", table_name))
      next
    }

    # Filter the table data for the current variable's band and stat
    table_data_subset <- table_data %>%
      filter(band == band_name & zonal_stat == stat_name)

    # Skip if no data is found for the current variable
    if (nrow(table_data_subset) == 0) {
      warning(paste("No data found for variable", variable_name))
      next
    }

    if (is.null(freq)) {

      # If no aggregation is specified, prepare the data accordingly
      variable_data <- table_data_subset %>%
        select(-band, -zonal_stat) %>%
        transform(preprocess = NA_character_,
                  aggregation = NA_character_,
                  postprocess = NA_character_) %>%
        subset(select = c("id", "aggregation", "preprocess", "postprocess",
                          setdiff(names(table_data_subset),
                                  c("band", "zonal_stat"))))

    } else {

      # If aggregation is specified, proceed with aggregating the data
      # Capture the body of the pre-processing function as a string
      preprocess_body <- paste(deparse(body(prep_fun)), collapse = "\n")

      # Perform aggregation based on the specified frequency and functions
      variable_data <- aggr_by_freq(
        table = table_data_subset,
        freq = freq,
        prep_fun = prep_fun,
        aggr_funs = aggr_funs,
        postp_funs = postp_funs,
        variable_name = variable_name,
        preprocess_body = preprocess_body
      )

    }

    # Store the processed data for the current variable
    variables_data[[i]] <- variable_data
  }

  # Assign names to the list elements based on variable names
  names(variables_data) <- variables

  # Combine the grid data with all variable data into a single list
  db_list <- c(db_list, variables_data)

  return(db_list)
}

# ------------------------------------------------------------------------------

#' Aggregate Data by Frequency
#'
#' Aggregates data from a wide-format data frame according to a specified
#' frequency and applies aggregation and post-processing functions.
#' @param table [mandatory] (data.frame) A wide-format data frame.
#' @param freq [mandatory] (character) Specifies the frequency to aggregate the
#'   data.
#' @param prep_fun [mandatory] (function) Function used for pre-processing.
#' @param aggr_funs [mandatory] (function or list) Aggregation function(s).
#' @param postp_funs [mandatory] (function or list) Post-processing function(s).
#' @param variable_name [mandatory] (character) Name of the current variable.
#' @param preprocess_body [mandatory] (character) Body of the \code{prep_fun}
#'   function.
#' @return A data frame in wide format with aggregated values.
#' @keywords internal
#' @importFrom magrittr %>%
#' @importFrom reshape2 dcast melt
#' @importFrom lubridate floor_date ymd
#' @importFrom tidyr all_of pivot_longer pivot_wider
#' @importFrom dplyr arrange bind_rows group_by mutate summarise
#'
aggr_by_freq <- function(table, freq, prep_fun, aggr_funs,
                         postp_funs, variable_name, preprocess_body) {

  # To avoid 'no visible binding for global variable' messages (CRAN test)
  . <- id <- value <- freq_date <- aggregation <- postprocess <- NULL

  # Identify columns that represent dates (excluding 'zonal_stat' if present)
  date_cols <- grep("_", names(table), value = TRUE)
  date_cols <- setdiff(date_cols, grep("zonal_stat", date_cols, value = TRUE))

  # Convert the wide-format data to long format for easier manipulation
  df_long <- melt(table, id.vars = c("id", "band", "zonal_stat"),
                  measure.vars = date_cols,
                  variable.name = "date", value.name = "value")

  # Convert the 'date' column from character to Date object
  df_long$date <- as.Date(gsub("_", "-", df_long$date), format = "%Y-%m-%d")

  # Expand the data to daily frequency, filling in missing dates
  df_exp <- expand_to_daily(df_long, prep_fun)

  # Create a new column 'freq_date' that floors the date to the specified 'freq'
  df_exp <- df_exp %>%
    mutate(freq_date = floor_date(date, freq))

  # Retrieve the aggregation function(s) for the current variable
  funcs <- aggr_funs[[variable_name]]
  if (is.null(funcs)) {
    funcs <- aggr_funs[["default"]]
  }
  if (is.null(funcs)) {
    stop(paste("No aggregation function specified for variable",
               variable_name))
  }
  if (is.function(funcs)) {
    funcs <- list(funcs)
  }

  # Initialize a list to store aggregation results
  aggr_results <- list()

  # Iterate over each aggregation function to process the data
  for (i in seq_along(funcs)) {
    aggr_fun <- funcs[[i]]
    # Capture the body of the aggregation function as a string
    aggregation_body <- paste(deparse(body(aggr_fun)), collapse = "\n")

    # Perform grouping and aggregation
    df_group <- df_exp %>%
      group_by(id, freq_date) %>%
      summarise(value = aggr_fun(value), .groups = "drop") %>%
      mutate(aggregation = aggregation_body,
             preprocess = preprocess_body)

    # Append the aggregated data to the results list
    aggr_results[[i]] <- df_group
  }

  # Combine all aggregation results into a single data frame
  df_combined_aggr <- bind_rows(aggr_results)

  # Retrieve post-processing functions for the current variable
  postp_funcs_var <- postp_funs[[variable_name]]
  if (is.null(postp_funcs_var)) {
    postp_funcs_var <- postp_funs[["default"]]
  }

  if (is.null(postp_funcs_var)) {
    # If no post-processing functions, assign NA to 'postprocess' column
    df_final <- transform(df_combined_aggr, postprocess = NA_character_)
  } else {
    # If post-processing functions are provided, apply each function
    if (is.function(postp_funcs_var)) {
      postp_funcs_var <- list(postp_funcs_var)
    }
    postp_results <- list()
    for (i in seq_along(postp_funcs_var)) {
      postp_fun <- postp_funcs_var[[i]]
      # Capture the body of the post-processing function as a string
      postprocess_body <- paste(deparse(body(postp_fun)), collapse = "\n")

      # Apply the post-processing function to the 'value' column
      df_postp <- transform(df_combined_aggr,
                            value = postp_fun(value),
                            postprocess = postprocess_body)

      # Append the post-processed data to the results list
      postp_results[[i]] <- df_postp
    }
    # Combine all post-processed data into a single data frame
    df_final <- bind_rows(postp_results)
  }

  # Convert the long-format aggregated data back to wide format
  df_wide <- dcast(df_final, id + aggregation + postprocess ~ freq_date,
                   value.var = "value")

  # Order the data frame by 'id', 'aggregation', and 'postprocess' columns
  df_wide <- df_wide[order(df_wide$id,
                           df_wide$aggregation,
                           df_wide$postprocess), ]

  return(df_wide) # Return the aggregated wide-format data frame
}

# ------------------------------------------------------------------------------

#' Expand Data to Daily Frequency
#'
#' Expands the input data frame to a daily frequency, filling in any missing
#' dates within the observed range.
#' @param df_long [mandatory] (data.frame) A long-format data frame with at
#'   least the columns \code{id} and \code{date}.
#' @param prep_fun [mandatory] (function) Function used for pre-processing.
#' @return A data frame with daily dates and preprocessed \code{value} column.
#' @keywords internal
#' @importFrom dplyr group_by mutate ungroup
#' @importFrom tidyr complete
#'
expand_to_daily <- function(df_long, prep_fun) {

  # To avoid 'no visible binding for global variable' messages (CRAN test)
  id <- value <- NULL

  # Validate that 'df_long' contains the required columns
  required_cols <- c("id", "date", "value")
  if (!all(required_cols %in% names(df_long))) {
    stop("df_long must contain 'id', 'date', and 'value' columns.")
  }

  # Generate a sequence of all dates within the range of the data
  date_range <- seq(min(df_long$date, na.rm = TRUE),
                    max(df_long$date, na.rm = TRUE), by = "day")

  # Expand the data to include all dates for each 'id' and apply pre-processing
  df_exp <- df_long %>%
    group_by(id) %>%
    complete(date = date_range) %>% # Fill in missing dates
    mutate(value = prep_fun(value)) %>% # Apply the pre-processing function
    ungroup()

  return(df_exp)
}

# ------------------------------------------------------------------------------

#' Source an R Script with Notifications About Functions Loaded
#'
#' Sources an R script into a dedicated environment and lists the functions
#' that have been loaded.
#' @param file [mandatory] (character) A character string specifying the path
#'   to the R script to be sourced.
#' @return An environment containing the functions loaded from the sourced file.
#' @keywords internal
#'
source_with_notification <- function(file) {

  # Validate 'file' parameter
  if (!is.character(file) || length(file) != 1) {
    stop("'file' must be a single character string.")
  }

  # Check if the file exists
  if (!file.exists(file)) {
    stop(paste0("The specified file does not exist: ", file))
  }

  # Create a new environment for sourcing
  source_env <- new.env()

  # Source the file into the new environment
  tryCatch(
    sys.source(file, envir = source_env),
    error = function(e) {
      stop("Failed to source the file: ", e$message)
    }
  )

  # Get the list of functions in the source environment after sourcing
  loaded_functions <- ls(envir = source_env, pattern = "^.+$", all.names = TRUE)

  return(source_env)
}

# ------------------------------------------------------------------------------

#' Load External Post-Processing Functions
#'
#' Loads post-processing functions and their configuration from an external
#' folder named \code{postp}, located in the root directory of the database.
#' The folder must contain two files: \code{structure.json} (which defines the
#' post-processing configuration) and \code{functions.R} (which contains the R
#' function definitions to be used for post-processing). The function checks
#' for these files and loads the JSON configuration and sources the R script.
#' If the required files are missing, it stops execution and notifies the user
#' with instructions on how to set up the files correctly.
#' @param path [mandatory] (character) The path to the root directory
#'   where the database is located.
#' @return Returns a list of post-processing functions loaded from the
#'   \code{structure.json} file. The functions defined in \code{functions.R} are
#'   sourced and made available in the returned environment.
#' @note The \code{postp} folder must contain two files: \code{structure.json}
#'   and \code{functions.R}. The \code{structure.json} file contains mappings of
#'   variables to the post-processing functions, while \code{functions.R}
#'   contains the actual function definitions that will be used for
#'   post-processing.
#' @keywords internal
#' @importFrom jsonlite fromJSON
#'
load_external_postp <- function(path) {

  # Define the paths to the 'postp' folder and its required files
  postp_folder <- file.path(path, "postp")
  json_file <- file.path(postp_folder, "structure.json")
  r_script_file <- file.path(postp_folder, "functions.R")

  # Check if the 'postp' folder exists
  if (!dir.exists(postp_folder)) {
    stop(paste0(
      "The 'postp' folder is missing in the database root directory.\n",
      "Run 'init_postp()' to create the folder and initialize ",
      "'structure.json' and 'functions.R'."
    ))
  }

  # Check if the 'structure.json' file exists within the 'postp' folder
  if (!file.exists(json_file)) {
    stop(paste0(
      "The 'structure.json' file is missing in the 'postp' folder.\n",
      "To use 'external', make sure to include a valid 'structure.json' file ",
      "in the 'postp' folder."
    ))
  }

  # Check if the 'functions.R' file exists within the 'postp' folder
  if (!file.exists(r_script_file)) {
    stop(paste0(
      "The 'functions.R' file is missing in the 'postp' folder.\n",
      "To use 'external', make sure to include a valid 'functions.R' file ",
      "in the 'postp' folder."
    ))
  }

  # Load the post-processing functions configuration from the JSON file
  postp_funs_json <- tryCatch(
    jsonlite::fromJSON(json_file),
    error = function(e) {
      stop("Failed to parse 'structure.json': ", e$message)
    }
  )

  # Source the R script to load function definitions into a new environment
  source_env <- source_with_notification(r_script_file)

  # Convert function names from JSON to actual function objects in R
  postp_funs <- lapply(postp_funs_json, function(fun_list) {
    if (is.null(fun_list)) {
      return(NULL) # Return NULL if no functions are specified
    }
    # Flatten and map each function name to its corresponding R function object
    unlist(lapply(fun_list, function(fun_names) {
      lapply(fun_names, function(fun_name) {
        if (!exists(fun_name, envir = source_env)) {
          stop(paste0("Function '", fun_name,
                      "' not found in the sourced environment."))
        }
        fun_obj <- get(fun_name, envir = source_env)
        if (!is.function(fun_obj)) {
          stop(paste0("'", fun_name, "' is not a valid function."))
        }
        return(fun_obj)
      })
    }), recursive = FALSE)
  })

  # Return the loaded post-processing functions
  return(postp_funs)
}
Any scripts or data that you put into this service are public.
geeLite documentation built on Aug. 9, 2025, 1:08 a.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
geeLite
Building and Managing Local Databases from 'Google Earth Engine'

R/access_db.R
In geeLite: Building and Managing Local Databases from 'Google Earth Engine'

Defines functions load_external_postp source_with_notification expand_to_daily aggr_by_freq read_variables linear_interp init_postp read_db fetch_vars

Documented in aggr_by_freq expand_to_daily fetch_vars init_postp linear_interp load_external_postp read_db read_variables source_with_notification

Try the geeLite package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

geeLite Building and Managing Local Databases from 'Google Earth Engine'

R/access_db.R In geeLite: Building and Managing Local Databases from 'Google Earth Engine'

Defines functions load_external_postp source_with_notification expand_to_daily aggr_by_freq read_variables linear_interp init_postp read_db fetch_vars

Documented in aggr_by_freq expand_to_daily fetch_vars init_postp linear_interp load_external_postp read_db read_variables source_with_notification

Try the geeLite package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

geeLite
Building and Managing Local Databases from 'Google Earth Engine'

R/access_db.R
In geeLite: Building and Managing Local Databases from 'Google Earth Engine'