R/HVTMSM_support.R
In HVT: Constructing Hierarchical Voronoi Tessellations and Overlay Heatmaps for Data Analysis

#' @importFrom magrittr %>%
#' @importFrom stats runif setNames
#' @keywords internal

# Calculate dataset statistics for scaling
calculate_dataset_stats <- function(numeric_dataset) {
  list(
    mean = numeric_dataset %>% dplyr::summarise(dplyr::across(dplyr::everything(), ~ round(mean(.), 4))),
    sd   = numeric_dataset %>% dplyr::summarise(dplyr::across(dplyr::everything(), ~ round(stats::sd(.),   4)))
  )
}

# Pre-compute HVT models sequentially to ensure consistency
precompute_hvt_models <- function(entire_dataset, ncell_range, hvt_params,
                                  verbose = FALSE, time_column) {
  
  # Pre-prepare dataset once
  numeric_dataset <- entire_dataset %>% dplyr::select(-dplyr::all_of(time_column))
  
  # Prepare HVT training dataset
  hvt_train_dataset <- numeric_dataset
  
  # Train models using lapply
  hvt_models <- lapply(ncell_range, function(nclust) {
    tryCatch({
      set.seed(279)
      hvt_args <- c(list(dataset = hvt_train_dataset, n_cells = nclust), hvt_params)  
      hvt_results <- do.call(trainHVT, hvt_args)
      if (is.null(hvt_results) || length(hvt_results) == 0) {
        stop("trainHVT returned NULL or empty results")
      }
      return(hvt_results)
    }, error = function(e) NULL)
  })
  
  # Name the list elements
  names(hvt_models) <- as.character(ncell_range)
  
  return(hvt_models)
}

# Process a single nclust value using pre-computed HVT model
process_single_nclust_with_precomputed_model <- function(nclust, hvt_model, numeric_dataset, entire_dataset, expost_forecasting,
                                  time_column, k_range, nn_range, num_simulations,
                                  time_values, n_ahead, mae_metric, 
                                  raw_dataset_stats, parallel, verbose) {
  
  tryCatch({
    if (verbose) cat("Processing nclust =", nclust, "\n")
    
    # Check if HVT model exists
    if (is.null(hvt_model)) {
      stop("Pre-computed HVT model for nclust = ", nclust, " is NULL")
    }
    
    # Score complete dataset using pre-computed model
    complete_dataset <- dplyr::bind_rows(entire_dataset, expost_forecasting)
    scoring <- score_hvt_model(complete_dataset, hvt_model)
    
    # Prepare temporal data
    temporal_data_all <- prepare_temporal_data(scoring, complete_dataset, time_column)
    
    # Create training-only temporal data for transition probabilities
    max_train_t <- max(entire_dataset[[time_column]], na.rm = TRUE)
    temporal_data_train <- temporal_data_all %>%
      dplyr::filter(.data[[time_column]] <= max_train_t)
    
    # Get transition probabilities using ONLY training data
    prob_trans_matx <- get_transition_probabilities(temporal_data_train, time_column)
    
    # Get initial state
    initial_state <- get_initial_state(temporal_data_train, time_column)
    
    # Prepare centroid data
    centroid_2d_points <- scoring$cellID_coordinates
    centroid_data_for_mae <- prepare_centroid_data_for_mae(
       hvt_model, numeric_dataset, scoring,
      complete_dataset, raw_dataset_stats, mae_metric, centroid_2d_points
    )
    
    # Detect problematic states
    scored_cells <- unique(scoring$scoredPredictedData$Cell.ID)
    problematic_detection <- detect_problematic_states(prob_trans_matx, scored_cells)
    problematic_states <- problematic_detection$all_problematic
    problematic_states <- intersect(problematic_states, scored_cells)
    
    # Filter to training cells only
    training_cells <- unique(temporal_data_train$Cell.ID)
    problematic_states <- problematic_states[problematic_states %in% training_cells]
    
    if (verbose && length(problematic_states) > 0) {
      cat("  Found", length(problematic_states), "problematic states\n")
    }
    
    # Initialize attempts storage
    all_attempts_by_metric <- initialize_attempts_storage(mae_metric)
    
    # Check if there are problematic states
    if (length(problematic_states) == 0) {
      # No problematic states - run baseline simulation only
      baseline_mae <- run_baseline_simulation(
        transition_matrix = prob_trans_matx,
        temporal_data_train = temporal_data_train,
        scoring = scoring,
        hvt_results = hvt_model,
        initial_state = initial_state,
        centroid_data_for_mae = centroid_data_for_mae,
        mae_metrics = mae_metric,
        raw_dataset_stats = raw_dataset_stats,
        time_column = time_column,
        entire_dataset = entire_dataset,
        expost_forecasting = expost_forecasting,
        num_simulations = num_simulations,
        time_values = time_values,
        n_ahead = n_ahead
      )
      
      # Store baseline results
      for (metric in mae_metric) {
        status_msg <- if (is.na(baseline_mae[[metric]]) || is.null(baseline_mae[[metric]])) {
          "ERROR: Data structure error in MSM baseline"
        } else {
          "Successful Iteration without Problematic state"
        }
        
        all_attempts_by_metric[[metric]][[1]] <- data.frame(
          `Number of Cells` = nclust, 
          k = "-", 
          `Number of nearest neighbors` = "-",
          mae = baseline_mae[[metric]], 
          status = status_msg, 
          stringsAsFactors = FALSE, 
          check.names = FALSE
        )
      }
      
      comprehensive_results <- list(
        simulation_results = list(),
        all_attempts = all_attempts_by_metric
      )
      
    } else {
      # Problematic states exist - run full k-nn combinations
      clustering_data <- centroid_2d_points %>% dplyr::select(-.data$Cell.ID)
      
      comprehensive_results <- process_all_k_nn_combinations(
        k_local_range = k_range,
        nn_range = nn_range,  
        problematic_states = problematic_states,
        centroid_2d_points = centroid_2d_points,
        clustering_data = clustering_data,
        hvt_results = hvt_model,
        scoring = scoring,
        nclust = nclust,
        mae_metric = mae_metric,
        all_attempts_by_metric = all_attempts_by_metric,
        prob_trans_matx = prob_trans_matx,
        temporal_data_complete = temporal_data_all,
        initial_state = initial_state,
        centroid_data_for_mae = centroid_data_for_mae,
        raw_dataset_stats = raw_dataset_stats,
        time_column = time_column,
        entire_dataset = entire_dataset,
        expost_forecasting = expost_forecasting,
        num_simulations = num_simulations,
        time_values = time_values,
        n_ahead = n_ahead,
        parallel = FALSE,
        verbose = verbose
      )
    }
    
    # Process and format results
    processed_results <- process_simulation_results(
      simulation_results = comprehensive_results$simulation_results,
      mae_metric = mae_metric,
      nclust = nclust,
      all_attempts_by_metric = comprehensive_results$all_attempts
    )
    
    return(list(
      successful_results = processed_results$successful_results,
      best_result = processed_results$best_result,
      all_attempts = processed_results$all_attempts,
      success = TRUE,
      nclust = nclust
    ))
  }, error = function(e) {
    if (verbose) cat("  ERROR with nclust", nclust, ":", e$message, "\n")
    
    all_config_failure <- list()
    for (metric in mae_metric) {
      all_config_failure[[metric]] <- data.frame(
        `Number of Cells` = nclust, k = "-", `Number of nearest neighbors` = "-",
        mae = NA, status = paste0("ERROR: High-level error: ", e$message),
        stringsAsFactors = FALSE, check.names = FALSE
      )
    }
    
    return(list(
      successful_results = NULL,
      best_result = NULL,
      all_attempts = all_config_failure,
      success = FALSE,
      error = e$message,
      nclust = nclust
    ))
  })
}

# Collect and format final optimization results

collect_final_results <- function(optimization_results, mae_metric, verbose) {
  
  # Efficient result categorization
  successful_results <- optimization_results[purrr::map_lgl(optimization_results, ~ .x$success %||% FALSE)]
  failed_results <- optimization_results[purrr::map_lgl(optimization_results, ~ !(.x$success %||% FALSE))]
  
  final_output <- list()
  
  if (length(optimization_results) > 0) {
    # Process each metric efficiently
    for (metric in mae_metric) {
      metric_key <- paste0(metric, "_mae")
      
      # Collect successful results with consistent data types (5 columns only)
      all_successful_metric <- purrr::map_dfr(optimization_results, function(x) {
        
        # Get k,nn simulation results
        sim_results <- if (!is.null(x$successful_results) && !is.null(x$successful_results[[metric]])) {
          result <- x$successful_results[[metric]]
          result$k <- as.character(result$k)
          result$`Number of nearest neighbors` <- as.character(result$`Number of nearest neighbors`)
          req <- c("Number of Cells","k","Number of nearest neighbors","mae","status")
          result <- result[, intersect(names(result), req), drop = FALSE]
          for (nm in setdiff(req, names(result))) {
            if (nrow(result) > 0) {
              result[[nm]] <- NA
            } else {
              result[[nm]] <- if (nm %in% c("Number of Cells")) integer(0) else if (nm %in% c("k", "Number of nearest neighbors", "status")) character(0) else numeric(0)
            }
          }
          result[req]
        } else {
          data.frame(
            `Number of Cells` = integer(0),
            k = character(0),
            `Number of nearest neighbors` = character(0),
            mae = numeric(0),
            status = character(0),
            check.names = FALSE
          )
        }
        
        # Get baseline results from all_attempts
        baseline_results <- if (!is.null(x$all_attempts) && !is.null(x$all_attempts[[metric]])) {
          tryCatch({
            if (is.list(x$all_attempts[[metric]]) && !is.data.frame(x$all_attempts[[metric]])) {
              attempts_list <- lapply(x$all_attempts[[metric]], function(df) {
                if (is.data.frame(df) && nrow(df) > 0) {
                  df$k <- as.character(df$k)
                  df$`Number of nearest neighbors` <- as.character(df$`Number of nearest neighbors`)
                }
                df
              })
              attempts_df <- do.call(rbind, attempts_list)
            } else {
              attempts_df <- x$all_attempts[[metric]]
            }
            
            if (is.data.frame(attempts_df) && "status" %in% colnames(attempts_df)) {
              success_rows <- attempts_df[grepl("^Successful Iteration", attempts_df$status), ]
              if (nrow(success_rows) > 0) {
                success_rows$k <- as.character(success_rows$k)
                success_rows$`Number of nearest neighbors` <- as.character(success_rows$`Number of nearest neighbors`)
              }
              req <- c("Number of Cells","k","Number of nearest neighbors","mae","status")
              success_rows <- success_rows[, intersect(names(success_rows), req), drop = FALSE]
              for (nm in setdiff(req, names(success_rows))) success_rows[[nm]] <- NA
              success_rows[req]
            } else {
              data.frame(
                `Number of Cells` = integer(0),
                k = character(0),
                `Number of nearest neighbors` = character(0),
                mae = numeric(0),
                status = character(0),
                check.names = FALSE
              )
            }
          }, error = function(e) {
            data.frame(
              `Number of Cells` = integer(0),
              k = character(0),
              `Number of nearest neighbors` = character(0),
              mae = numeric(0),
              status = character(0),
              check.names = FALSE
            )
          })
        } else {
          data.frame(
            `Number of Cells` = integer(0),
            k = character(0),
            `Number of nearest neighbors` = character(0),
            mae = numeric(0),
            status = character(0),
            check.names = FALSE
          )
        }
        
        # Ensure both sim_results and baseline_results have consistent column types
        if (nrow(sim_results) > 0) {
          sim_results$k <- as.character(sim_results$k)
          sim_results$`Number of nearest neighbors` <- as.character(sim_results$`Number of nearest neighbors`)
        }
        
        if (nrow(baseline_results) > 0) {
          baseline_results$k <- as.character(baseline_results$k) 
          baseline_results$`Number of nearest neighbors` <- as.character(baseline_results$`Number of nearest neighbors`)
        }
        
        # Safe combine with consistent structures and drop duplicates
        combined <- dplyr::bind_rows(sim_results, baseline_results)
        combined <- dplyr::distinct(
          combined,
          .data$`Number of Cells`, .data$k, .data$`Number of nearest neighbors`, .data$mae, .data$status,
          .keep_all = FALSE
        )
        combined
      })
      
      # Sort all_successful_metric by Number of Cells, k, and nn
      if (nrow(all_successful_metric) > 0) {
        # Convert to numeric for proper sorting, handling special cases
        all_successful_metric$k_sort <- suppressWarnings({
          ifelse(is.na(all_successful_metric$k) | all_successful_metric$k == "-", 999, as.numeric(all_successful_metric$k))
        })
        all_successful_metric$nn_sort <- suppressWarnings({
          ifelse(is.na(all_successful_metric$`Number of nearest neighbors`) | 
                 all_successful_metric$`Number of nearest neighbors` == "-", 999, 
                 as.numeric(all_successful_metric$`Number of nearest neighbors`))
        })
        # Replace any NAs from conversion with 999
        all_successful_metric$k_sort[is.na(all_successful_metric$k_sort)] <- 999
        all_successful_metric$nn_sort[is.na(all_successful_metric$nn_sort)] <- 999
        
        all_successful_metric <- all_successful_metric %>%
          dplyr::arrange(.data$`Number of Cells`, .data$k_sort, .data$nn_sort) %>%
          dplyr::select(-.data$k_sort, -.data$nn_sort)
      }
      
      # Collect best results per nclust
      best_results_metric <- purrr::map_dfr(successful_results, function(x) {
        if (!is.null(x$best_result) && !is.null(x$best_result[[metric]])) {
          result <- x$best_result[[metric]]
          result$k <- as.character(result$k)
          result$`Number of nearest neighbors` <- as.character(result$`Number of nearest neighbors`)
          result
        } else {
          data.frame()
        }
      })
      
      # Enhanced all attempts collection with error handling
      all_attempts_metric <- tryCatch({
        purrr::map_dfr(optimization_results, function(x) {
          if (!is.null(x$all_attempts) && !is.null(x$all_attempts[[metric]])) {
            if (length(x$all_attempts[[metric]]) > 0) {
              tryCatch({
                if (is.list(x$all_attempts[[metric]]) && !is.data.frame(x$all_attempts[[metric]])) {
                  attempts_df <- do.call(rbind, x$all_attempts[[metric]])
                } else {
                  attempts_df <- x$all_attempts[[metric]]
                }
                
                if (nrow(attempts_df) > 0) {
                  attempts_df$k <- as.character(attempts_df$k)
                  attempts_df$`Number of nearest neighbors` <- as.character(attempts_df$`Number of nearest neighbors`)
                }
                
                attempts_df
              }, error = function(e) {
                data.frame()
              })
            } else {
              data.frame()
            }
          } else {
            data.frame()
          }
        })
      }, error = function(e) {
        data.frame()
      })
      
      # Sort all results tables by Number of Cells, k, and nn
      if (nrow(all_attempts_metric) > 0) {
        # Convert to numeric for proper sorting, handling special cases
        all_attempts_metric$k_sort <- suppressWarnings({
          ifelse(is.na(all_attempts_metric$k) | all_attempts_metric$k == "-", 999, as.numeric(all_attempts_metric$k))
        })
        all_attempts_metric$nn_sort <- suppressWarnings({
          ifelse(is.na(all_attempts_metric$`Number of nearest neighbors`) | 
                 all_attempts_metric$`Number of nearest neighbors` == "-", 999, 
                 as.numeric(all_attempts_metric$`Number of nearest neighbors`))
        })
        # Replace any NAs from conversion with 999
        all_attempts_metric$k_sort[is.na(all_attempts_metric$k_sort)] <- 999
        all_attempts_metric$nn_sort[is.na(all_attempts_metric$nn_sort)] <- 999
        
        all_attempts_metric <- all_attempts_metric %>%
          dplyr::arrange(.data$`Number of Cells`, .data$k_sort, .data$nn_sort) %>%
          dplyr::select(-.data$k_sort, -.data$nn_sort)
      }
      
      if (nrow(best_results_metric) > 0) {
        # Convert to numeric for proper sorting, handling special cases
        best_results_metric$k_sort <- suppressWarnings({
          ifelse(is.na(best_results_metric$k) | best_results_metric$k == "-", 999, as.numeric(best_results_metric$k))
        })
        best_results_metric$nn_sort <- suppressWarnings({
          ifelse(is.na(best_results_metric$`Number of nearest neighbors`) | 
                 best_results_metric$`Number of nearest neighbors` == "-", 999, 
                 as.numeric(best_results_metric$`Number of nearest neighbors`))
        })
        # Replace any NAs from conversion with 999
        best_results_metric$k_sort[is.na(best_results_metric$k_sort)] <- 999
        best_results_metric$nn_sort[is.na(best_results_metric$nn_sort)] <- 999
        
        best_results_metric <- best_results_metric %>%
          dplyr::arrange(.data$`Number of Cells`, .data$k_sort, .data$nn_sort) %>%
          dplyr::select(-.data$k_sort, -.data$nn_sort)
        req <- c("Number of Cells","k","Number of nearest neighbors","mae","status")
        best_results_metric <- best_results_metric[, intersect(names(best_results_metric), req), drop = FALSE]
        for (nm in setdiff(req, names(best_results_metric))) best_results_metric[[nm]] <- NA
        best_results_metric <- best_results_metric[req]
      }
      
      # Find overall best
      overall_best_metric <- if (nrow(all_successful_metric) > 0) {
        tryCatch({
        best_row <- all_successful_metric %>% 
          dplyr::arrange(.data$mae) %>% 
          dplyr::slice(1)
          best_row
        }, error = function(e) {
          data.frame()
        })
      } else {
        data.frame()
      }
      
      # enforce 5-column shape everywhere
      req <- c("Number of Cells","k","Number of nearest neighbors","mae","status")
      shape <- function(df){
        if (is.null(df) || !nrow(df)) return(data.frame(`Number of Cells`=integer(0),k=character(0),`Number of nearest neighbors`=character(0),mae=numeric(0),status=character(0),check.names=FALSE))
        df <- df[, intersect(names(df), req), drop = FALSE]
        for (nm in setdiff(req, names(df))) df[[nm]] <- NA
        df[req]
      }
      final_output[[metric_key]] <- list(
        successful_results = shape(all_successful_metric),
        nclust_best_results = shape(best_results_metric),
        overall_best = shape(overall_best_metric),
        all_results = shape(all_attempts_metric)
      )
    }
  }
  
  # Print summary if verbose
  if (verbose) {
    cat("Optimization Summary:\n")
    cat("  Total configurations tested:", length(optimization_results), "\n")
    cat("  Successfully processed:", length(successful_results), "\n")
    cat("  Failed:", length(failed_results), "\n")
    
    for (metric in mae_metric) {
      metric_key <- paste0(metric, "_mae")
      all_results <- final_output[[metric_key]]$all_results
      if (nrow(all_results) > 0) {
        successful_count <- sum(grepl("^Successful Iteration", all_results$status), na.rm = TRUE)
        total_count <- nrow(all_results)
        success_rate <- round(100 * successful_count / total_count, 1)
        cat("  ", metric, "success rate:", success_rate, "% (", successful_count, "/", total_count, ")\n")
        
        if (nrow(final_output[[metric_key]]$overall_best) > 0) {
          best <- final_output[[metric_key]]$overall_best
          cat("  Best", metric, "MAE:", round(best$mae[1], 4), 
              "with", best$`Number of Cells`[1], "cells, k =", best$k[1], ", nn =", best$`Number of nearest neighbors`[1], "\n")
        }
      }
    }
  }
  
  return(final_output)
}



# ============================================================================
# HVT PROCESSING FUNCTIONS
score_hvt_model <- function(complete_dataset, hvt_results) {
  scoring <- scoreHVT(
    complete_dataset, hvt_results,
    analysis.plots = FALSE,
    names.column = complete_dataset[, 1]
  )
  
  if (is.null(scoring) || is.null(scoring$scoredPredictedData)) {
    stop("scoreHVT returned NULL or missing scoredPredictedData")
  }
  return(scoring)
}

# Prepare temporal data for MSM
prepare_temporal_data <- function(scoring, complete_dataset, time_column) {
  scored_data <- scoring$scoredPredictedData
  scored_data[[time_column]] <- as.POSIXct(complete_dataset[[time_column]])
  
  # Reorder columns to put time first
  time_col_pos <- which(names(scored_data) == time_column)
  if (length(time_col_pos) == 1) {
    other_cols <- names(scored_data)[-time_col_pos]
    scored_data <- scored_data[, c(time_column, other_cols), drop = FALSE]
  }
  
  temporal_data_all <- scored_data %>% dplyr::select(dplyr::all_of(c(time_column, "Cell.ID")))
  
  if (any(is.na(temporal_data_all[[time_column]]))) {
    stop("Failed to parse time column")
  }
  return(temporal_data_all)
}

# Get transition probabilities from training data ONLY
get_transition_probabilities <- function(temporal_data_train, time_column) {
  
  prob_trans_matx <- getTransitionProbability(
    df = temporal_data_train,  # ONLY training data
    cellid_column = "Cell.ID",
    time_column = time_column,
    type = "with_self_state"
  )
  
  if (is.null(prob_trans_matx) || nrow(prob_trans_matx) == 0) {
    stop("getTransitionProbability (train window) returned NULL or empty matrix")
  }
  
  return(prob_trans_matx)
}

# Get initial state for simulation
get_initial_state <- function(temporal_data_train, time_column) {
  last_t_train <- max(temporal_data_train[[time_column]], na.rm = TRUE)
  initial_state <- temporal_data_train$Cell.ID[temporal_data_train[[time_column]] == last_t_train][1]
  return(initial_state)
}

# Prepare centroid data for MAE calculation
# prepare_centroid_data_for_mae <- function(dependent_variables, hvt_results, numeric_dataset,
#                                           scoring, complete_dataset, raw_dataset_stats,
#                                           mae_metric, centroid_2d_points) {
#   if (is.null(dependent_variables)) {
#     # Normal flow: align to training variables only
#     centroid_data_for_mae <- hvt_results[[3]]$summary %>%
#       dplyr::select(dplyr::any_of(colnames(numeric_dataset)))
#   } else {
#     # DV flow: build normalized per-state centroids for DVs
#     assign_df <- scoring$scoredPredictedData %>% dplyr::select("Cell.ID")
#     dv_df <- cbind(assign_df, complete_dataset[, dependent_variables, drop = FALSE])
#     
#     mu_vec <- as.numeric(raw_dataset_stats$mean[1, dependent_variables, drop = TRUE])
#     sd_vec <- as.numeric(raw_dataset_stats$sd[1, dependent_variables, drop = TRUE])
#     sd_vec[is.na(sd_vec) | sd_vec == 0] <- 1
#     
#     dv_norm <- as.data.frame(Map(function(col, m, s) (col - m)/s,
#                                  dv_df[dependent_variables], mu_vec, sd_vec))
#     dv_norm$Cell.ID <- dv_df[["Cell.ID"]]
#     
#     agg_fun <- switch(mae_metric[1],
#                       mean = function(x) mean(x, na.rm = TRUE),
#                       median = function(x) median(x, na.rm = TRUE),
#                       mode = function(x) {
#                         tb <- table(x);
#                         as.numeric(names(tb)[which.max(tb)])
#                       })
#     
#     dv_state_map_scaled <- dv_norm %>%
#       dplyr::group_by(.data$Cell.ID) %>%
#       dplyr::summarise(dplyr::across(dplyr::all_of(dependent_variables), ~ agg_fun(.x)), .groups = "drop")
#     
#     all_cells <- data.frame(Cell.ID = centroid_2d_points[["Cell.ID"]])
#     dv_state_map_scaled <- dplyr::left_join(all_cells, dv_state_map_scaled, by = "Cell.ID")
#     
#     for (nm in dependent_variables) {
#       dv_state_map_scaled[[nm]][is.na(dv_state_map_scaled[[nm]])] <- 0
#     }
#     
#     centroid_data_for_mae <- dv_state_map_scaled %>% dplyr::select(-"Cell.ID")
#   }
#   
#   return(centroid_data_for_mae)
# }

# Prepare centroid data for MAE calculation
prepare_centroid_data_for_mae <- function(hvt_results, numeric_dataset,
                                          scoring, complete_dataset, raw_dataset_stats,
                                          mae_metric, centroid_2d_points) {
  # Normal flow: align to training variables only
  centroid_data_for_mae <- hvt_results[[3]]$summary %>%
    dplyr::select(dplyr::any_of(colnames(numeric_dataset)))
  
  return(centroid_data_for_mae)
}
# Initialize attempts storage for each metric
initialize_attempts_storage <- function(mae_metric) {
  all_attempts_by_metric <- list()
  for (metric in mae_metric) {
    all_attempts_by_metric[[metric]] <- list()
  }
  return(all_attempts_by_metric)
}

# ============================================================================
# SIMULATION FUNCTIONS
# ============================================================================

# Run MSM simulation directly for optimization - MSM handles all validation
run_single_simulation <- function(k_val, n_nn, cluster_data, transition_matrix,
                                  temporal_data_train, scoring, hvt_results, initial_state,
                                  problematic_states, centroid_data_for_mae, mae_metrics,
                                  raw_dataset_stats, time_column, entire_dataset,
                                  expost_forecasting, num_simulations, time_values, n_ahead) {
  
  # Create temporal data properly for MSM
  scored_data <- scoring$scoredPredictedData
  complete_dataset <- rbind(entire_dataset, expost_forecasting)
  scored_data[[time_column]] <- as.POSIXct(complete_dataset[[time_column]])
  temporal_data_all <- scored_data %>% dplyr::select(dplyr::all_of(c(time_column, "Cell.ID")))
  
  tryCatch({
    # Set optimization flag for hvt_msm_opt
    options(hvt.msm.optimization = TRUE)
  
    # Let MSM handle ALL validation and clustering - no pre-validation
    msm_result <- msm(
      state_time_data = temporal_data_all,
      forecast_type = "ex-post",
      transition_probability_matrix = transition_matrix,
      initial_state = initial_state,
      num_simulations = num_simulations,
      scoreHVT_results = scoring,
      trainHVT_results = hvt_results,
      actual_data = expost_forecasting,
      raw_dataset = complete_dataset,
      k = k_val,
      handle_problematic_states = length(problematic_states) > 0,
      n_nearest_neighbor = n_nn,
      mae_metric = mae_metrics[1],
      show_simulation = FALSE,
      time_column = time_column,
      precomputed_problematic_states = problematic_states,
      plot_mode = "mae-only"
    )
    
    # Extract MAE from MSM results
    mae_results <- extract_mae_from_msm_results(msm_result, mae_metrics)
    
    # Cleanup heavy objects
    rm(msm_result, temporal_data_all, scored_data)
    gc(verbose = FALSE)
    
    # Return success with models
    if (!is.null(mae_results) && !is.na(mae_results[[mae_metrics[1]]])) {
      return(list(
        success = TRUE,
        k = k_val,
        nn = n_nn,
        mae_results = mae_results
        ))
    } else {
      return(list(success = FALSE, k = k_val, nn = n_nn, error = "MAE extraction failed"))
    }
    
  }, error = function(e) {
    # MSM failed (could be validation, clustering, or simulation failure)
    gc(verbose = FALSE)
    return(list(success = FALSE, k = k_val, nn = n_nn, error = as.character(e$message)))
  })
}

# Extract MAE values from MSM results
extract_mae_from_msm_results <- function(msm_result, mae_metrics) {
  mae_results <- list()
  
  # New compact structure from mae-only path
  if (!is.null(msm_result$mae_by_metric)) {
    for (metric in mae_metrics) {
      mm <- msm_result$mae_by_metric[[metric]]
      if (!is.null(mm)) {
        centroid_maes <- sapply(mm$centroid_mae_list, function(x) x[["mae"]])
        mae_results[[metric]] <- mean(centroid_maes, na.rm = TRUE)
      } else {
        mae_results[[metric]] <- NA
      }
    }
    return(mae_results)
  }
  
  # Backward-compatible path (plots-based)
  for (metric in mae_metrics) {
    if (!is.null(msm_result$plots) && length(msm_result$plots) >= 2) {
      centroid_maes <- sapply(msm_result$plots[[1]], function(x) x[["mae"]])
      mae_results[[metric]] <- mean(centroid_maes, na.rm = TRUE)
    } else {
      mae_results[[metric]] <- NA
    }
  }
  mae_results
}

# Run baseline simulation using MSM without problematic state handling
run_baseline_simulation <- function(transition_matrix, temporal_data_train, scoring, hvt_results,
                                    initial_state, centroid_data_for_mae, mae_metrics, raw_dataset_stats,
                                    time_column, entire_dataset, expost_forecasting, num_simulations,
                                    time_values, n_ahead) {
  
  # Create temporal data for MSM
  scored_data <- scoring$scoredPredictedData
  complete_dataset <- rbind(entire_dataset, expost_forecasting)
  scored_data[[time_column]] <- as.POSIXct(complete_dataset[[time_column]])
  temporal_data_all <- scored_data %>% 
    dplyr::select(dplyr::all_of(c(time_column, "Cell.ID")))
  
  tryCatch({
    # Set optimization flag for hvt_msm_opt
    options(hvt.msm.optimization = TRUE)
    
      # Baseline MSM call: no problematic state handling
      msm_result <- msm(
      state_time_data = temporal_data_all,
      forecast_type = "ex-post",
      transition_probability_matrix = transition_matrix,
      initial_state = initial_state,
      num_simulations = num_simulations,
      scoreHVT_results = scoring,
      trainHVT_results = hvt_results,
      actual_data = expost_forecasting,
      raw_dataset = complete_dataset,
      handle_problematic_states = FALSE,  # Baseline = no problematic state handling
      mae_metric = mae_metrics[1],
      show_simulation = FALSE,
      time_column = time_column,
      plot_mode = "mae-only"
    )
    
    # Extract MAE from MSM results
    mae_results <- extract_mae_from_msm_results(msm_result, mae_metrics)
    
    # Cleanup
    rm(msm_result, temporal_data_all, scored_data)
    gc(verbose = FALSE)
    
    return(mae_results)
    
  }, error = function(e) {
    # Return NA for all metrics if baseline fails
    mae_results <- list()
    for (metric in mae_metrics) {
      mae_results[[metric]] <- NA
    }
    return(mae_results)
  })
}

# Process all k-nn combinations with pre-filtering checkpoint for invalid combinations
process_all_k_nn_combinations <- function(k_local_range, nn_range, problematic_states,
                                          centroid_2d_points, clustering_data, hvt_results,
                                          scoring, nclust, mae_metric, all_attempts_by_metric,
                                          prob_trans_matx, temporal_data_complete, initial_state, 
                                          centroid_data_for_mae, raw_dataset_stats, time_column,
                                          entire_dataset, expost_forecasting, num_simulations,
                                          time_values, n_ahead, parallel,
                                          verbose = FALSE) {
  
  # Generate ALL possible k-nn combinations
  all_combinations <- expand.grid(k = k_local_range, nn = nn_range)
  
  if (verbose) {
    cat("  Total k-nn combinations:", nrow(all_combinations), "\n")
  }
  
  # CHECKPOINT: Pre-filter mathematically invalid combinations
  valid_combinations <- list()
  invalid_combinations <- list()
  
  for (i in seq_len(nrow(all_combinations))) {
    k_val <- all_combinations$k[i]
    nn_val <- all_combinations$nn[i]
    
    # Check 1: k must be less than or equal to number of cells
    if (k_val > nclust) {
      invalid_combinations[[length(invalid_combinations) + 1]] <- list(
        k = k_val,
        nn = nn_val,
        status = paste0("Mathematical bound reached - Invalid k (", k_val, ") for given n_cells (", nclust, ")")
      )
      next
    }
    
    # Check 2: nn must be <= (cells - k) [mathematical upper bound]
    max_possible_nn <- nclust - k_val
    if (nn_val > max_possible_nn) {
      invalid_combinations[[length(invalid_combinations) + 1]] <- list(
        k = k_val,
        nn = nn_val,
        status = paste0("Mathematical bound reached - Invalid nn (", nn_val, ") for given n_cells (", nclust, ") and k (", k_val, ")")
      )
      next
    }
    
    # Passed both checks - valid combination
    valid_combinations[[length(valid_combinations) + 1]] <- list(
      k = k_val,
      nn = nn_val,
      status = "ready_for_msm",
      valid = TRUE
    )
  }
  
  # Store invalid combinations in all_attempts_by_metric
  if (length(invalid_combinations) > 0) {
    for (combo in invalid_combinations) {
      for (metric in mae_metric) {
        idx <- length(all_attempts_by_metric[[metric]]) + 1
        all_attempts_by_metric[[metric]][[idx]] <- data.frame(
          `Number of Cells` = nclust,
          k = as.character(combo$k),
          `Number of nearest neighbors` = as.character(combo$nn),
          mae = NA,
          status = combo$status,  # Already contains "ERROR: " prefix
          stringsAsFactors = FALSE,
          check.names = FALSE
        )
      }
    }
  }
  
  if (verbose) {
    cat("  Valid combinations:", length(valid_combinations), "\n")
    cat("  Pre-filtered (invalid):", length(invalid_combinations), "\n")
  }
  
  # Run MSM simulations for VALID combinations only (invalid ones already filtered)
  simulation_results <- list()
  
  if (length(valid_combinations) > 0) {
    # Simulation function for valid combinations
    simulation_function <- function(combo) {
      # Call run_single_simulation which calls MSM
      result <- run_single_simulation(
        k_val = combo$k, n_nn = combo$nn, cluster_data = NULL,
        transition_matrix = prob_trans_matx, temporal_data_train = NULL,
        scoring = scoring, hvt_results = hvt_results, initial_state = initial_state,
        problematic_states = problematic_states, centroid_data_for_mae = NULL,
        mae_metrics = mae_metric, raw_dataset_stats = NULL,
        time_column = time_column, entire_dataset = entire_dataset,
        expost_forecasting = expost_forecasting, num_simulations = num_simulations,
        time_values = NULL, n_ahead = NULL
      )
      return(result)
    }
    
    # Execute ALL combinations
    if (parallel) {
      if (!requireNamespace("furrr", quietly = TRUE) || !requireNamespace("future", quietly = TRUE)) {
        stop("Packages 'furrr' and 'future' are required for parallel processing. ",
             "Please install them with: install.packages(c('furrr', 'future'))")
      }
      simulation_results <- furrr::future_map(valid_combinations, simulation_function,
                                       .options = furrr::furrr_options(seed = TRUE, packages = c("dplyr", "data.table")))
    } else {
      simulation_results <- purrr::map(valid_combinations, simulation_function)
    }
    
    if (verbose) {
      successful_count <- sum(sapply(simulation_results, function(x) x$success %||% FALSE))
      cat("  Completed:", length(simulation_results), "simulations,", successful_count, "successful\n")
    }
  }
  
  return(list(simulation_results = simulation_results, all_attempts = all_attempts_by_metric))
}

# Process simulation results - lightweight version
process_simulation_results <- function(simulation_results, mae_metric, nclust, all_attempts_by_metric) {
  
  if (length(simulation_results) == 0) {
    formatted_results <- list()
    best_results <- list()
    for (metric in mae_metric) {
      formatted_results[[metric]] <- data.frame()
      best_results[[metric]] <- data.frame()
    }
    return(list(
      successful_results = formatted_results,
      best_result = best_results,
      all_attempts = all_attempts_by_metric
    ))
  }
  
  formatted_results <- list()
  best_results <- list()
  successful_results_list <- list()
  
  # Initialize lists for each metric
  for (metric in mae_metric) {
    successful_results_list[[metric]] <- list()
  }
  
  # Process each simulation result
  for (result in simulation_results) {
    if (result$success && !is.null(result$mae_results)) {
      # Process successful results
      for (metric in mae_metric) {
        # Add to all attempts
        idx <- length(all_attempts_by_metric[[metric]]) + 1
        all_attempts_by_metric[[metric]][[idx]] <- data.frame(
          `Number of Cells` = nclust, 
          k = as.character(result$k), 
          `Number of nearest neighbors` = as.character(result$nn), 
          mae = round(result$mae_results[[metric]], 4), 
          status = "Successful Iteration by Handling Problematic state", 
          stringsAsFactors = FALSE, 
          check.names = FALSE
        )
        
        # Add to successful results WITHOUT models
        success_idx <- length(successful_results_list[[metric]]) + 1
        successful_results_list[[metric]][[success_idx]] <- data.frame(
          `Number of Cells` = nclust, 
          k = as.character(result$k), 
          `Number of nearest neighbors` = as.character(result$nn), 
          mae = round(result$mae_results[[metric]], 4),
          status = "Successful Iteration by Handling Problematic state",
          stringsAsFactors = FALSE,
          check.names = FALSE
        )
      }
    } else {
      # Process failed results
      for (metric in mae_metric) {
        idx <- length(all_attempts_by_metric[[metric]]) + 1
        status_msg <- if (!is.null(result$error)) {
          result$error  # Message comes as-is from MSM (already formatted)
        } else {
          "ERROR: MSM failed"
        }
        all_attempts_by_metric[[metric]][[idx]] <- data.frame(
          `Number of Cells` = nclust, 
          k = as.character(result$k), 
          `Number of nearest neighbors` = as.character(result$nn), 
          mae = NA, 
          status = status_msg, 
          stringsAsFactors = FALSE, 
          check.names = FALSE
        )
      }
    }
  }
  
  # Format successful results and find best combinations WITHOUT models
  for (metric in mae_metric) {
    if (length(successful_results_list[[metric]]) > 0) {
      # Extract configs and de-duplicate strictly on the 5 columns
      configs <- purrr::map_dfr(successful_results_list[[metric]], ~ .x)
      configs <- dplyr::distinct(configs, `Number of Cells`, k, `Number of nearest neighbors`, mae, status, .keep_all = FALSE)
      
      # Ensure numeric conversion for sorting
      configs$k_numeric <- as.numeric(configs$k)
      configs$nn_numeric <- as.numeric(configs$`Number of nearest neighbors`)
      configs$mae_numeric <- as.numeric(configs$mae)
      
      # Deduplicate and sort, keep only required 5 cols
      configs <- dplyr::distinct(configs, .data$`Number of Cells`, .data$k, .data$`Number of nearest neighbors`, .data$mae, .keep_all = TRUE)
      sort_order <- order(configs$`Number of Cells`, configs$k_numeric, configs$nn_numeric)
      formatted <- configs[sort_order, c("Number of Cells","k","Number of nearest neighbors","mae","status")]
      formatted_results[[metric]] <- formatted
      
      # Find best combination WITHOUT storing models
      best_idx <- which.min(configs$mae_numeric)
      best_combination <- configs[best_idx, ] %>%
        dplyr::select(-.data$k_numeric, -.data$nn_numeric, -.data$mae_numeric)
      best_results[[metric]] <- best_combination[, c("Number of Cells","k","Number of nearest neighbors","mae","status")]
      
    } else {
      formatted_results[[metric]] <- data.frame()
      best_results[[metric]] <- data.frame()
    }
  }
  
  # Sort all attempts
  for (metric in mae_metric) {
    if (length(all_attempts_by_metric[[metric]]) > 0) {
      combined_attempts <- do.call(rbind, all_attempts_by_metric[[metric]])
      
      # Sort by cells, k, nn
      combined_attempts$k_sort <- ifelse(combined_attempts$k == "-", 999, as.numeric(combined_attempts$k))
      combined_attempts$nn_sort <- ifelse(combined_attempts$`Number of nearest neighbors` == "-", 999, 
                                          as.numeric(combined_attempts$`Number of nearest neighbors`))
      
      combined_attempts <- combined_attempts %>%
        dplyr::arrange(.data$`Number of Cells`, .data$k_sort, .data$nn_sort) %>%
        dplyr::select(-.data$k_sort, -.data$nn_sort)
      
      # Convert back to list format
      all_attempts_by_metric[[metric]] <- split(combined_attempts, seq_len(nrow(combined_attempts)))
      all_attempts_by_metric[[metric]] <- lapply(all_attempts_by_metric[[metric]], as.data.frame)
    }
  }
  
  gc(verbose = FALSE)
  
  return(list(
    successful_results = formatted_results,
    best_result = best_results,
    all_attempts = all_attempts_by_metric
  ))
}

# ============================================================================
# PROBLEMATIC STATE DETECTION FUNCTIONS
# ============================================================================

# Detect all types of problematic states in a transition matrix
detect_problematic_states <- function(prob_trans_matx, scored_cells) {
  # Missing states: states in scored_cells but not in transition matrix
  transition_states <- unique(prob_trans_matx$Current_State)
  missing_states <- setdiff(scored_cells, transition_states)
  
  # Self-transition states (100% self-transition)
  self_transition_states <- prob_trans_matx %>%
    dplyr::group_by(.data$Current_State) %>%
    dplyr::summarize(
      is_self_only = dplyr::n() == 1 && all(.data$Current_State == .data$Next_State),
      .groups = 'drop'
    ) %>%
    dplyr::filter(.data$is_self_only == TRUE) %>%
    dplyr::pull(.data$Current_State)
  
  # Cyclic states detection
  cyclic_states <- find_cyclic_states(prob_trans_matx)
  
  # Combine all problematic states
  all_problematic <- unique(c(missing_states, self_transition_states, cyclic_states))
  
  return(list(
    missing_states = missing_states,
    self_transition_states = self_transition_states,
    cyclic_states = cyclic_states,
    all_problematic = all_problematic
  ))
}

# Detect cyclic states in a transition matrix
find_cyclic_states <- function(trans_table) {
  all_states <- unique(c(trans_table$Current_State, trans_table$Next_State))
  state_info <- trans_table %>%
    dplyr::group_by(.data$Current_State) %>%
    dplyr::summarize(next_states = list(unique(.data$Next_State)), .groups = 'drop')
  adjacency <- setNames(lapply(state_info$next_states, unlist), as.character(state_info$Current_State))
  two_way_cycles <- c(); one_way_traps <- c(); processed_pairs <- character(0)
  for (state_a in all_states) {
    if (!as.character(state_a) %in% names(adjacency)) next
    next_states_a <- adjacency[[as.character(state_a)]]
    other_states_a <- next_states_a[next_states_a != state_a]
    for (state_b in other_states_a) {
      pair_key <- paste(sort(c(state_a, state_b)), collapse = "-")
      if (pair_key %in% processed_pairs) next
      processed_pairs <- c(processed_pairs, pair_key)
      if (!as.character(state_b) %in% names(adjacency)) next
      next_states_b <- adjacency[[as.character(state_b)]]
      states_in_system <- c(state_a, state_b)
      reaches_outside_a <- setdiff(next_states_a, states_in_system)
      reaches_outside_b <- setdiff(next_states_b, states_in_system)
      if (length(reaches_outside_a) == 0 && length(reaches_outside_b) == 0) {
        a_can_reach_b <- state_b %in% next_states_a
        b_can_reach_a <- state_a %in% next_states_b
        if (a_can_reach_b && b_can_reach_a) two_way_cycles <- c(two_way_cycles, state_a, state_b)
        else if (a_can_reach_b && !b_can_reach_a) {
          if (length(next_states_b) == 1 && next_states_b[1] == state_b) one_way_traps <- c(one_way_traps, state_a, state_b)
        } else if (!a_can_reach_b && b_can_reach_a) {
          if (length(next_states_a) == 1 && next_states_a[1] == state_a) one_way_traps <- c(one_way_traps, state_a, state_b)
        }
      }
    }
  }
  unique(c(two_way_cycles, one_way_traps))
}

# ============================================================================
# MISSING MSM HELPER FUNCTIONS
# ============================================================================

# Validate MSM clustering for problematic states
validate_msm_clustering <- function(cluster_data, problematic_states, n_nearest_neighbor,
                                    centroid_2d_points, verbose = TRUE) {
  if (length(problematic_states) == 0) {
    return(list(is_valid = TRUE, message = "No problematic states found"))
  }
  validation_issues <- c()
  for (prob_state in problematic_states) {
    prob_cluster <- cluster_data$cluster[cluster_data$Cell.ID == prob_state]
    cluster_members <- cluster_data$Cell.ID[cluster_data$cluster == prob_cluster]
    valid_neighbors <- setdiff(cluster_members, c(prob_state, problematic_states))
    if (verbose) {
      cat("Problematic state", prob_state, "in cluster", prob_cluster,
          "- Valid neighbors:", length(valid_neighbors), "\n")
    }
    if (length(valid_neighbors) == 0) {
      validation_issues <- c(validation_issues,
                             paste0("State ", prob_state, " has no valid neighbors in cluster ", prob_cluster))
    } else if (length(valid_neighbors) < n_nearest_neighbor) {
      validation_issues <- c(validation_issues,
                             paste0("State ", prob_state, " has only ", length(valid_neighbors),
                                    " valid neighbors, but ", n_nearest_neighbor, " requested"))
    }
  }
  if (length(validation_issues) > 0) {
    return(list(
      is_valid = FALSE,
      message = paste(validation_issues, collapse = "; ")
    ))
  }
  list(is_valid = TRUE, message = "MSM clustering validation passed")
}

# Choose a nearest non-problematic neighbor for a problematic state
find_nearest_neighbor_enhanced <- function(problematic_state,
                                           centroid_2d_points,
                                           cluster_data,
                                           problematic_states,
                                           n_nearest_neighbor,
                                           scored_cells,
                                           initial_state) {
  if (is.null(cluster_data)) {
    valid_states <- setdiff(scored_cells, problematic_states)
    if (length(valid_states) > 0) return(sample(valid_states, 1))
    warning("No valid fallback states available, returning initial state")
    return(initial_state)
  }
  current_cluster <- cluster_data$cluster[cluster_data$Cell.ID == problematic_state]
  coords <- centroid_2d_points[centroid_2d_points$Cell.ID %in%
                                 cluster_data$Cell.ID[cluster_data$cluster == current_cluster], ]
  current_coords <- coords[coords$Cell.ID == problematic_state, c("x", "y")]
  distances <- sqrt((coords$x - current_coords$x)^2 + (coords$y - current_coords$y)^2)
  valid_neighbors <- coords$Cell.ID[!coords$Cell.ID %in% c(problematic_state, problematic_states)]
  if (length(valid_neighbors) == 0) {
    warning(paste0("No valid neighbors for problematic state ", problematic_state,
                   " in cluster ", current_cluster, ". Using global fallback."))
    all_coords <- centroid_2d_points
    all_distances <- sqrt((all_coords$x - current_coords$x)^2 +
                            (all_coords$y - current_coords$y)^2)
    global_valid <- setdiff(all_coords$Cell.ID, c(problematic_state, problematic_states))
    if (length(global_valid) > 0) {
      global_neighbor_distances <- all_distances[all_coords$Cell.ID %in% global_valid]
      closest_global <- global_valid[which.min(global_neighbor_distances)]
      return(closest_global)
    } else {
      warning("No valid neighbors globally available, returning initial state")
      return(initial_state)
    }
  }
  neighbor_distances <- distances[coords$Cell.ID %in% valid_neighbors]
  num_neighbors <- min(length(valid_neighbors), n_nearest_neighbor)
  nearest_n <- valid_neighbors[order(neighbor_distances)][1:num_neighbors]
  neighbor_probs <- 1/neighbor_distances[order(neighbor_distances)][1:num_neighbors]
  neighbor_probs <- neighbor_probs/sum(neighbor_probs)
  random_shock <- round(runif(min=0, max=1, n=1), 4)
  cumulative_probs <- cumsum(neighbor_probs)
  nearest_n[which.max(random_shock <= cumulative_probs)]
}

# Single simulation step for MSM
simulate_step_enhanced <- function(i, current_state, random_val,
                                   transition_probability_matrix,
                                   centroid_2d_points,
                                   cluster_data,
                                   problematic_states,
                                   n_nearest_neighbor,
                                   scored_cells,
                                   initial_state) {
  random_shock <- if (is.null(random_val)) round(runif(min=0, max=1, n=1), 4) else random_val
  if (i == 1) return(initial_state)
  
  # Handle problematic states only if handle_problematic_states = TRUE (indicated by cluster_data being non-NULL)
  # If cluster_data is NULL, just follow the transition matrix normally, even for problematic states
  if (!is.null(cluster_data) && current_state %in% problematic_states) {
    return(find_nearest_neighbor_enhanced(current_state, centroid_2d_points, cluster_data,
                                          problematic_states, n_nearest_neighbor, scored_cells, initial_state))
  }
  transition <- subset(transition_probability_matrix, Current_State == current_state)
  if (nrow(transition) == 0) {
    if (current_state %in% scored_cells) return(current_state)
    else return(find_nearest_neighbor_enhanced(current_state, centroid_2d_points, cluster_data,
                                               problematic_states, n_nearest_neighbor, scored_cells, initial_state))
  }
  transition <- transition[order(-transition$Transition_Probability), ]
  transition$cumulative_prob <- cumsum(transition$Transition_Probability)
  next_state <- transition$Next_State[which.max(random_shock <= transition$cumulative_prob)]
  next_state <- as.numeric(next_state)
  
  # Replace problematic states only if handle_problematic_states = TRUE (cluster_data is non-NULL)
  # If handle_problematic_states = FALSE, follow transition matrix even if it leads to problematic states
  if (!is.null(cluster_data) && next_state %in% problematic_states) {
    return(find_nearest_neighbor_enhanced(next_state, centroid_2d_points, cluster_data,
                                          problematic_states, n_nearest_neighbor, scored_cells, initial_state))
  }
  # Always replace invalid states (not in scored_cells)
  if (!next_state %in% scored_cells) {
    return(find_nearest_neighbor_enhanced(next_state, centroid_2d_points, cluster_data,
                                          problematic_states, n_nearest_neighbor, scored_cells, initial_state))
  }
  next_state
}

# Simulate a full MSM sequence
simulate_sequence_enhanced <- function(sim_index, n_ahead,
                                       initial_state, transition_probability_matrix,
                                       centroid_2d_points, cluster_data, problematic_states,
                                       n_nearest_neighbor, scored_cells) {
  current_state <- initial_state
  simulated_values <- sapply(1:n_ahead, function(i) {
    random_val <- NULL
    current_state <<- simulate_step_enhanced(i, current_state, random_val,
                                            transition_probability_matrix,
                                            centroid_2d_points, cluster_data,
                                            problematic_states, n_nearest_neighbor,
                                            scored_cells, initial_state)
    current_state
  })
  as.numeric(simulated_values)
}

# Adjust invalid initial state using neighbors
adjust_initial_state <- function(initial_state, transition_cells, problematic_states,
                                 scored_cells, centroid_2d_points, cluster_data,
                                 n_nearest_neighbor) {
  if (!(initial_state %in% transition_cells) || initial_state %in% problematic_states) {
    return(find_nearest_neighbor_enhanced(initial_state, centroid_2d_points, cluster_data,
                                          problematic_states, n_nearest_neighbor,
                                          scored_cells, initial_state))
  }
  initial_state
}
Any scripts or data that you put into this service are public.
HVT documentation built on Dec. 18, 2025, 5:06 p.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
HVT
Constructing Hierarchical Voronoi Tessellations and Overlay Heatmaps for Data Analysis

R/HVTMSM_support.R
In HVT: Constructing Hierarchical Voronoi Tessellations and Overlay Heatmaps for Data Analysis

Defines functions calculate_dataset_stats

Try the HVT package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

HVT Constructing Hierarchical Voronoi Tessellations and Overlay Heatmaps for Data Analysis

R/HVTMSM_support.R In HVT: Constructing Hierarchical Voronoi Tessellations and Overlay Heatmaps for Data Analysis

Defines functions calculate_dataset_stats

Try the HVT package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

HVT
Constructing Hierarchical Voronoi Tessellations and Overlay Heatmaps for Data Analysis

R/HVTMSM_support.R
In HVT: Constructing Hierarchical Voronoi Tessellations and Overlay Heatmaps for Data Analysis