Nothing
# Copyright (c) 2024 Omid Arhami omid.arhami@uga.edu
# R/core.R
#' TopoLow Core Functions
#' Vectorized Processing of Dissimilarity Matrix for Convergence Error Calculations
#'
#' @description
#' Efficiently processes elements of the dissimilarity matrix for calculating convergence error
#' using pre-processed numeric representations of thresholds. This optimized version
#' eliminates expensive string operations during optimization.
#'
#' @details
#' This function handles threshold logic for convergence error calculation by using
#' pre-processed numeric matrices:
#'
#' - For "greater than" thresholds (threshold_mask = 1): Returns the numeric value if the
#' calculated distance is less than the threshold, otherwise returns NA
#'
#' - For "less than" thresholds (threshold_mask = -1): Returns the numeric value if the
#' calculated distance is greater than the threshold, otherwise returns NA
#'
#' - For regular values (threshold_mask = 0): Returns the numeric value
#'
#' This function operates on entire matrices at once using vectorized operations,
#' which is significantly faster than processing each element individually.
#'
#' @param distances_numeric Numeric matrix. The numeric dissimilarity values (without threshold
#' indicators)
#' @param threshold_mask Integer matrix. Codes representing threshold types:
#' 1 for "greater than" (>), -1 for "less than" (<), or 0 for exact values
#' @param p_dist_mat Numeric matrix. The calculated distance matrix to compare against
#'
#' @return Numeric matrix with processed distance values. Elements where threshold
#' conditions are not satisfied will contain NA.
#'
#' @keywords internal
vectorized_process_distance_matrix <- function(distances_numeric, threshold_mask, p_dist_mat) {
# Create result matrix (defaults to NA)
result <- matrix(NA, nrow = nrow(distances_numeric), ncol = ncol(distances_numeric))
# Find indices for each threshold type
gt_indices <- which(threshold_mask == 1) # Greater than (>)
lt_indices <- which(threshold_mask == -1) # Less than (<)
normal_indices <- which(threshold_mask == 0) # Normal values
# Process greater than thresholds
if (length(gt_indices) > 0) {
valid <- !is.na(distances_numeric[gt_indices]) &
!is.na(p_dist_mat[gt_indices]) &
p_dist_mat[gt_indices] < distances_numeric[gt_indices]
if (any(valid)) {
result[gt_indices[valid]] <- distances_numeric[gt_indices[valid]]
}
}
# Process less than thresholds
if (length(lt_indices) > 0) {
valid <- !is.na(distances_numeric[lt_indices]) &
!is.na(p_dist_mat[lt_indices]) &
p_dist_mat[lt_indices] > distances_numeric[lt_indices]
if (any(valid)) {
result[lt_indices[valid]] <- distances_numeric[lt_indices[valid]]
}
}
# Process normal values
if (length(normal_indices) > 0) {
result[normal_indices] <- distances_numeric[normal_indices]
result[is.infinite(result)] <- NA
}
return(result)
}
#' Main topolow algorithm implementation
#'
#' @description
#' `r lifecycle::badge("stable")`
#'
#' topolow (topological stochastic pairwise reconstruction for Euclidean embedding) optimizes
#' point positions in an N-dimensional space to match a target dissimilarity matrix.
#' The algorithm uses a physics-inspired approach with spring and repulsive forces
#' to find optimal point configurations while handling missing and thresholded measurements.
#'
#' @details
#' The algorithm iteratively updates point positions using:
#' * Spring forces between points with measured dissimilarities.
#' * Repulsive forces between points without measurements.
#' * Conditional forces for thresholded measurements (< or >).
#' * An adaptive spring constant that decays over iterations.
#' * Convergence monitoring based on relative error change.
#' * Automatic matrix reordering to optimize convergence.
#' Consider if downstream analyses depend on specific point ordering: The order of points in
#' the output is adjusted to put high-dissimilarity points in the opposing ends.
#'
#' This function replaces the deprecated [create_topolow_map()]. The core algorithm
#' is identical, but includes performance improvements and enhanced validation.
#'
#' @param dissimilarity_matrix Matrix. A square, symmetric dissimilarity matrix. Can contain
#' NA values for missing measurements and character strings with < or > prefixes for
#' thresholded measurements.
#' @param ndim Integer. Number of dimensions for the embedding space.
#' @param mapping_max_iter Integer. Maximum number of map optimization iterations.
#' @param k0 Numeric. Initial spring constant controlling spring forces.
#' @param cooling_rate Numeric. Rate of spring constant decay per iteration (0 < cooling_rate < 1).
#' @param c_repulsion Numeric. Repulsion constant controlling repulsive forces.
#' @param relative_epsilon Numeric. Convergence threshold for relative change in error.
#' Default is 1e-4.
#' @param convergence_counter Integer. Number of iterations below threshold before declaring
#' convergence. Default is 5.
#' @param initial_positions Matrix or NULL. Optional starting coordinates. If NULL,
#' random initialization is used. Matrix should have nrow = nrow(dissimilarity_matrix)
#' and ncol = ndim.
#' @param write_positions_to_csv Logical. Whether to save point positions to a CSV file.
#' Default is FALSE.
#' @param output_dir Character. Directory to save the CSV file. Required if
#' `write_positions_to_csv` is TRUE.
#' @param verbose Logical. Whether to print progress messages. Default is FALSE.
#'
#' @return A `list` object of class `topolow`. This list contains the results of the
#' optimization and includes the following components:
#' \itemize{
#' \item `positions`: A `matrix` of the optimized point coordinates in the N-dimensional space.
#' \item `est_distances`: A `matrix` of the Euclidean distances between points in the final optimized configuration.
#' \item `mae`: The final Mean Absolute Error between the target dissimilarities and the estimated distances.
#' \item `iter`: The total number of iterations performed before the algorithm terminated.
#' \item `parameters`: A `list` containing the input parameters used for the optimization run.
#' \item `convergence`: A `list` containing the final convergence status, including a logical `achieved` flag and the final `error` value.
#' }
#'
#' @examples
#' # Create a simple dissimilarity matrix
#' dist_mat <- matrix(c(0, 2, 3, 2, 0, 4, 3, 4, 0), nrow=3)
#'
#' # Run topolow in 2D
#' result <- euclidean_embedding(
#' dissimilarity_matrix = dist_mat,
#' ndim = 2,
#' mapping_max_iter = 100,
#' k0 = 1.0,
#' cooling_rate = 0.001,
#' c_repulsion = 0.01,
#' verbose = FALSE
#' )
#'
#' # View results
#' head(result$positions)
#' print(result$mae)
#'
#' # Example with thresholded measurements
#' thresh_mat <- matrix(c(0, ">2", 3, ">2", 0, "<5", 3, "<5", 0), nrow=3)
#' result_thresh <- euclidean_embedding(
#' dissimilarity_matrix = thresh_mat,
#' ndim = 2,
#' mapping_max_iter = 50,
#' k0 = 0.5,
#' cooling_rate = 0.01,
#' c_repulsion = 0.001
#' )
#'
#' @seealso [create_topolow_map()] for the deprecated predecessor function.
#'
#' @importFrom stats runif dist hclust
#' @importFrom utils write.csv
#' @export
euclidean_embedding <- function(dissimilarity_matrix,
ndim,
mapping_max_iter = 1000,
k0,
cooling_rate,
c_repulsion,
relative_epsilon = 1e-4,
convergence_counter = 5,
initial_positions = NULL,
write_positions_to_csv = FALSE,
output_dir,
verbose = FALSE) {
# Input validation with informative messages
if (!is.matrix(dissimilarity_matrix)) {
stop("dissimilarity_matrix must be a matrix")
}
if (nrow(dissimilarity_matrix) != ncol(dissimilarity_matrix)) {
stop("dissimilarity_matrix must be square")
}
# Check if there are any finite non-zero dissimilarities
finite_dissim <- suppressWarnings(as.numeric(dissimilarity_matrix))
finite_dissim[is.infinite(finite_dissim)] <- NA
if (sum(!is.na(finite_dissim) & finite_dissim != 0) == 0) {
warning("No finite non-zero dissimilarities found. Results may be unreliable.")
}
if (!is.numeric(ndim) || ndim < 1 || ndim != round(ndim)) {
stop("ndim must be a positive integer")
}
if (!is.numeric(mapping_max_iter) || mapping_max_iter < 1 || mapping_max_iter != round(mapping_max_iter)) {
stop("mapping_max_iter must be a positive integer")
}
if (!is.numeric(k0) || k0 <= 0) {
stop("k0 must be a positive number")
}
if (k0 > 30) {
warning("High k0 value (> 30) may lead to instability")
}
if (!is.numeric(cooling_rate) || cooling_rate <= 0 || cooling_rate >= 1) {
stop("cooling_rate must be between 0 and 1")
}
if (!is.numeric(c_repulsion) || c_repulsion <= 0) {
stop("c_repulsion must be a positive number")
}
if (!is.numeric(relative_epsilon) || relative_epsilon <= 0) {
stop("relative_epsilon must be a positive number")
}
if (!is.numeric(convergence_counter) ||
convergence_counter < 1 ||
convergence_counter != round(convergence_counter)) {
stop("convergence_counter must be a positive integer")
}
# Validate initial positions if provided
if (!is.null(initial_positions)) {
if (!is.matrix(initial_positions)) {
stop("initial_positions must be a matrix")
}
if (nrow(initial_positions) != nrow(dissimilarity_matrix)) {
stop("initial_positions must have same number of rows as dissimilarity_matrix")
}
if (ncol(initial_positions) != ndim) {
stop("initial_positions must have ndim columns")
}
}
# Initialize variables
n <- nrow(dissimilarity_matrix)
if (n < 2) {
stop("dissimilarity_matrix must have at least 2 rows/columns")
}
###=== Reorder rows and columns so largest values are furthest from diagonal
if (n > 1) {
# Extract numeric values for clustering, handling threshold indicators
numeric_matrix <- matrix(NA, n, n)
for(i in 1:n) {
for(j in 1:n) {
val <- dissimilarity_matrix[i,j]
if(!is.na(val)) {
if(is.character(val)) {
# Remove < or > prefix and extract numeric value
numeric_matrix[i,j] <- as.numeric(gsub("^[<>]", "", val))
} else {
numeric_matrix[i,j] <- as.numeric(val)
}
}
}
}
# Create spectral ordering to concentrate largest values in corners
tryCatch({
# Calculate average dissimilarity for each point using only non-NA values
avg_dissim <- numeric(n)
for(i in 1:n) {
# Get all values for this point (excluding diagonal)
row_vals <- numeric_matrix[i, -i]
col_vals <- numeric_matrix[-i, i]
all_vals <- c(row_vals, col_vals)
# Calculate average using only non-NA values
non_na_vals <- all_vals[!is.na(all_vals)]
if(length(non_na_vals) > 0) {
avg_dissim[i] <- mean(non_na_vals)
} else {
avg_dissim[i] <- 0 # If no measurements, put in middle
}
}
# Check if we have meaningful averages (not all zeros)
if(sum(avg_dissim > 0) > 1) {
# Order points by average dissimilarity (descending)
# This puts high-dissimilarity points at extremes, low at center
new_order <- order(avg_dissim)
# Apply ordering to the original matrix (with NAs and thresholds intact)
dissimilarity_matrix <- dissimilarity_matrix[new_order, new_order]
if (verbose) cat("Matrix reordered for spectral pattern (largest values in corners)\n")
} else {
if (verbose) cat("Insufficient data for meaningful spectral ordering\n")
}
}, error = function(e) {
# If ordering fails, skip reordering
if (verbose) cat("Could not reorder matrix:", e$message, "\n")
})
}
# If initial positions provided, ensure they match the reordered matrix
if (!is.null(initial_positions)) {
if (!is.null(rownames(initial_positions)) &&
!is.null(rownames(dissimilarity_matrix))) {
if (!identical(rownames(initial_positions), rownames(dissimilarity_matrix))) {
# Reorder initial_positions to match
initial_positions <- initial_positions[rownames(dissimilarity_matrix), ]
}
}
}
#====
# Pre-compute NA status once
is_na_matrix <- is.na(dissimilarity_matrix)
node_degrees <- rowSums(!is_na_matrix)
node_degrees_1 <- node_degrees + 1
distances_numeric <- matrix(Inf, n, n)
threshold_mask <- matrix(0, n, n) # 0 = number, 1 = >, -1 = <
for(i in 1:n) {
for(j in 1:n) {
if(!is.na(dissimilarity_matrix[i,j])) {
if(is.character(dissimilarity_matrix[i,j])) {
if(startsWith(dissimilarity_matrix[i,j], ">")) {
distances_numeric[i,j] <- as.numeric(sub(">", "", dissimilarity_matrix[i,j]))
threshold_mask[i,j] <- 1
} else if(startsWith(dissimilarity_matrix[i,j], "<")) {
distances_numeric[i,j] <- as.numeric(sub("<", "", dissimilarity_matrix[i,j]))
threshold_mask[i,j] <- -1
} else {
distances_numeric[i,j] <- as.numeric(dissimilarity_matrix[i,j])
}
} else {
distances_numeric[i,j] <- dissimilarity_matrix[i,j]
}
}
}
}
# Initialize positions
positions <- if(is.null(initial_positions)) {
dissimilarity_matrix_numeric <- suppressWarnings(as.numeric(as.character(dissimilarity_matrix)))
dissimilarity_matrix_numeric[is.na(dissimilarity_matrix_numeric)] <- NA
init_step <- max(dissimilarity_matrix_numeric, na.rm=TRUE) / n
# random initialization
random_steps <- matrix(stats::runif((n-1) * ndim, 0, 2*init_step), nrow=n-1, ncol=ndim)
# First row stays zero, subsequent rows are cumulative sums
pos <- rbind(matrix(0, 1, ndim), apply(random_steps, 2, cumsum))
pos
} else {
initial_positions
}
rownames(positions) <- rownames(dissimilarity_matrix)
# Initialize convergence tracking
convergence_error0 <- 1e12
prev_convergence_error <- convergence_error0
k <- k0
# OPTIMIZATION 3: Create point pairs once
point_pairs <- matrix(0, n*(n-1)/2, 2)
pair_idx <- 1
for(i in 1:(n-1)) {
for(j in (i+1):n) {
point_pairs[pair_idx, ] <- c(i, j)
pair_idx <- pair_idx + 1
}
}
# OPTIMIZATION 4: Pre-compute which pairs have measured dissimilarities
has_measurement <- matrix(FALSE, nrow(point_pairs), 1)
for(idx in 1:nrow(point_pairs)) {
i <- point_pairs[idx, 1]
j <- point_pairs[idx, 2]
has_measurement[idx] <- distances_numeric[i, j] != Inf
}
################## Main optimization loop
for(iter in 1:mapping_max_iter) {
k_2 <- k / 2
stop <- FALSE
# Randomize point pair ordering for this iteration
random_order <- sample(nrow(point_pairs))
shuffled_pairs <- point_pairs[random_order,]
shuffled_has_measurement <- has_measurement[random_order]
# Calculate forces between pairs in random order
for(pair_idx in 1:nrow(shuffled_pairs)) {
i <- shuffled_pairs[pair_idx, 1]
j <- shuffled_pairs[pair_idx, 2]
delta <- positions[j,] - positions[i,]
distance <- sqrt(sum(delta^2))
distance_01 <- distance + 0.01
# Use pre-computed measurement status
if(shuffled_has_measurement[pair_idx]) {
# Inline logic for processing ideal distance based on threshold mask
if(threshold_mask[i, j] == 1) { # ">" case
ideal_distance_processed <- if(distance < distances_numeric[i, j]) distances_numeric[i, j] else NULL
} else if(threshold_mask[i, j] == -1) { # "<" case
ideal_distance_processed <- if(distance > distances_numeric[i, j]) distances_numeric[i, j] else NULL
} else { # normal value
ideal_distance_processed <- distances_numeric[i, j]
}
if (!is.null(ideal_distance_processed)) {
# Spring force
adjustment_factor <- 2*k*(ideal_distance_processed-distance)*delta/distance_01
positions[i,] <- positions[i,] - adjustment_factor/(4*node_degrees_1[i]+k)
positions[j,] <- positions[j,] + adjustment_factor/(4*node_degrees_1[j]+k)
} else {
# Repulsive force for >threshold measurements
force <- c_repulsion/(2*distance_01^2)*(delta/distance_01)
positions[i,] <- positions[i,] - force/node_degrees_1[i]
positions[j,] <- positions[j,] + force/node_degrees_1[j]
}
} else {
# Repulsive force for missing measurements
force <- c_repulsion/(2*distance_01^2)*(delta/distance_01)
positions[i,] <- positions[i,] - force/node_degrees_1[i]
positions[j,] <- positions[j,] + force/node_degrees_1[j]
}
}
# Check for numerical stability less frequently
if(iter %% 5 == 0) {
if(any(!is.finite(positions))) {
warning("Numerical instability detected at iteration ", iter)
stop <- TRUE
break
}
}
if(stop) break
# Calculate current distances and convergence error
p_dist_mat <- as.matrix(stats::dist(positions))
rownames(p_dist_mat) <- rownames(positions)
colnames(p_dist_mat) <- rownames(positions)
dissimilarity_matrix_convergence_error <- vectorized_process_distance_matrix(distances_numeric, threshold_mask, p_dist_mat)
# Vectorized convergence error calculation
valid_mask <- !is.na(dissimilarity_matrix_convergence_error)
convergence_error <- mean(abs(dissimilarity_matrix_convergence_error[valid_mask] - p_dist_mat[valid_mask]))
# Calculate evaluation metrics similarly
dissimilarity_matrix_raw <- suppressWarnings(as.numeric(dissimilarity_matrix))
valid_indices_raw <- !is.na(dissimilarity_matrix_raw)
mae <- mean(abs(dissimilarity_matrix_raw[valid_indices_raw] - p_dist_mat[valid_indices_raw]))
if (verbose) {
cat(sprintf(
"Iteration=%d, MAE=%.4f, convergence_error=%.4f\n",
iter, mae, convergence_error
))
}
# Check convergence
if (iter > 1) {
if (is.na(prev_convergence_error) ||
is.na(convergence_error) ||
convergence_error > convergence_error0 ||
is.na((prev_convergence_error - convergence_error)/prev_convergence_error)) {
if (verbose) {
cat(paste(
"! Skipping convergence check for this iteration.",
"Please check model parameters k, cooling_rate, and c_repulsion.\n"
))
cat(sprintf(
"ndim=%d, k0=%.4f, cooling_rate=%.4f, c_repulsion=%.4f, MAE=%.4f, convergence_error=%.4f\n",
ndim, k0, cooling_rate, c_repulsion, mae, convergence_error
))
}
} else {
if ((prev_convergence_error - convergence_error)/prev_convergence_error <
relative_epsilon) {
convergence_counter <- convergence_counter - 1
if(convergence_counter <= 0) {
if(verbose) cat("Convergence achieved\n")
break
}
}
prev_convergence_error <- convergence_error
}
}
# Update spring constant
k <- k * (1 - cooling_rate)
}
# Save positions if requested
if(write_positions_to_csv) {
if (!dir.exists(output_dir)) {
dir.create(output_dir, recursive = TRUE, showWarnings = FALSE)
}
if (missing(output_dir)) {
stop("An 'output_dir' must be provided when 'write_positions_to_csv' is TRUE.", call. = FALSE)
}
csv_filename <- sprintf(
"Positions_dim_%d_k0_%.4f_cooling_%.4f_c_repulsion_%.4f.csv",
ndim, k0, cooling_rate, c_repulsion
)
# Write to the SPECIFIED directory
full_path <- file.path(output_dir, csv_filename)
utils::write.csv(positions, file = full_path, row.names = TRUE)
}
# Create result object
result <- structure(
list(
positions = positions,
est_distances = p_dist_mat,
mae = mae,
iter = iter,
parameters = list(
ndim = ndim,
k0 = k0,
cooling_rate = cooling_rate,
c_repulsion = c_repulsion
),
convergence = list(
achieved = convergence_counter <= 0,
error = convergence_error
)
),
class = "topolow"
)
return(result)
}
#' Main TopoLow algorithm implementation (DEPRECATED)
#'
#' @description
#' `r lifecycle::badge("deprecated")`
#'
#' `create_topolow_map()` was deprecated in version 2.0.0 and will be removed in
#' a future version. Please use [euclidean_embedding()] instead, which provides
#' the same functionality with improved performance and additional features.
#'
#' @details
#' This function has been superseded by [euclidean_embedding()], which offers:
#' * Enhanced matrix reordering for better optimization
#' * Improved parameter validation with informative warnings
#' * Consistent naming convention (dissimilarity vs distance)
#' * Better documentation and examples
#'
#' The core algorithm remains identical, ensuring your results will be equivalent.
#' The main changes are:
#' * Parameter name: `distance_matrix` --> `dissimilarity_matrix`
#' * Function name: `create_topolow_map()` --> `euclidean_embedding()`
#'
#' @param distance_matrix Matrix. Square, symmetric distance matrix. Can contain NA values
#' for missing measurements and character strings with < or > prefixes for thresholded
#' measurements.
#' @param ndim Integer. Number of dimensions for the embedding space.
#' @param mapping_max_iter Integer. Maximum number of map optimization iterations.
#' @param k0 Numeric. Initial spring constant controlling spring forces.
#' @param cooling_rate Numeric. Rate of spring constant decay per iteration (0 < cooling_rate < 1).
#' @param c_repulsion Numeric. Repulsion constant controlling repulsive forces.
#' @param relative_epsilon Numeric. Convergence threshold for relative change in error.
#' Default is 1e-4.
#' @param convergence_counter Integer. Number of iterations below threshold before declaring
#' convergence. Default is 5.
#' @param initial_positions Matrix or NULL. Optional starting coordinates. If NULL,
#' random initialization is used. Matrix should have nrow = nrow(distance_matrix)
#' and ncol = ndim.
#' @param write_positions_to_csv Logical. Whether to save point positions to CSV file.
#' Default is FALSE.
#' @param output_dir Character. Directory to save CSV file. Required if
#' `write_positions_to_csv` is TRUE.
#' @param verbose Logical. Whether to print progress messages. Default is FALSE.
#'
#' @return A `list` object of class `topolow`. This list contains the results of the
#' optimization and includes the following components:
#' \itemize{
#' \item `positions`: A `matrix` of the optimized point coordinates in the n-dimensional space.
#' \item `est_distances`: A `matrix` of the Euclidean distances between points in the final optimized configuration.
#' \item `mae`: The final Mean Absolute Error between the target distances and the estimated distances.
#' \item `iter`: The total number of iterations performed before the algorithm terminated.
#' \item `parameters`: A `list` containing the input parameters used for the optimization run.
#' \item `convergence`: A `list` containing the final convergence status, including a logical `achieved` flag and the final `error` value.
#' }
#'
#' @examples
#' \donttest{
#' # Simple example (deprecated - use euclidean_embedding() instead)
#' dist_mat <- matrix(c(0, 2, 3, 2, 0, 4, 3, 4, 0), nrow=3)
#'
#' # This will generate a deprecation warning
#' result <- create_topolow_map(
#' dist_mat,
#' ndim = 2,
#' mapping_max_iter = 100,
#' k0 = 1.0,
#' cooling_rate = 0.001,
#' c_repulsion = 0.01,
#' verbose = FALSE
#' )
#'
#' # Recommended approach with new function:
#' result_new <- euclidean_embedding(
#' dissimilarity_matrix = dist_mat,
#' ndim = 2,
#' mapping_max_iter = 100,
#' k0 = 1.0,
#' cooling_rate = 0.001,
#' c_repulsion = 0.01,
#' verbose = FALSE
#' )
#' }
#'
#' @seealso [euclidean_embedding()] for the replacement function.
#'
#' @keywords internal
#' @export
create_topolow_map <- function(distance_matrix,
ndim,
mapping_max_iter=1000,
k0,
cooling_rate,
c_repulsion,
relative_epsilon = 1e-4,
convergence_counter = 3,
initial_positions = NULL,
write_positions_to_csv = FALSE,
output_dir,
verbose = FALSE) {
# Issue deprecation warning
lifecycle::deprecate_warn(
when = "2.0.0",
what = "create_topolow_map()",
with = "euclidean_embedding()",
details = c(
"i" = "The new function provides the same functionality with improvements:",
"*" = "Parameter name: 'distance_matrix' --> 'dissimilarity_matrix'",
"*" = "Enhanced matrix reordering for better optimization"
)
)
# Handle missing output_dir parameter for backward compatibility
if (write_positions_to_csv && missing(output_dir)) {
output_dir <- getwd()
warning("output_dir not specified, using current working directory")
}
# Call the new function with parameter mapping
result <- euclidean_embedding(
dissimilarity_matrix = distance_matrix,
ndim = ndim,
mapping_max_iter = mapping_max_iter,
k0 = k0,
cooling_rate = cooling_rate,
c_repulsion = c_repulsion,
relative_epsilon = relative_epsilon,
convergence_counter = convergence_counter,
initial_positions = initial_positions,
write_positions_to_csv = write_positions_to_csv,
output_dir = if(write_positions_to_csv) output_dir else NULL,
verbose = verbose
)
return(result)
}
#' Print method for topolow objects
#'
#' Provides a concise display of key optimization results from `euclidean_embedding`.
#'
#' @param x A `topolow` object returned by `euclidean_embedding()`.
#' @param ... Additional arguments passed to print (not used).
#' @return The original `topolow` object (invisibly). This function is called for its
#' side effect of printing a summary to the console.
#' @examples
#' # Create a simple dissimilarity matrix and run the optimization
#' dist_mat <- matrix(c(0, 2, 3, 2, 0, 4, 3, 4, 0), nrow=3)
#' result <- euclidean_embedding(dist_mat, ndim=2, mapping_max_iter=50,
#' k0=1.0, cooling_rate=0.001, c_repulsion=0.1,
#' verbose = FALSE)
#' # Print the result object
#' print(result)
#' @export
print.topolow <- function(x, ...) {
cat("topolow optimization result:\n")
cat(sprintf("Dimensions: %d\n", x$parameters$ndim))
cat(sprintf("Iterations: %d\n", x$iter))
cat(sprintf("MAE: %.4f\n", x$mae))
cat(sprintf("Convergence achieved: %s\n", x$convergence$achieved))
cat(sprintf("Final convergence error: %.4f\n", x$convergence$error))
invisible(x)
}
#' Summary method for topolow objects
#'
#' Provides a more detailed summary of the optimization results from `euclidean_embedding`,
#' including parameters, convergence, and performance metrics.
#'
#' @param object A `topolow` object returned by `euclidean_embedding()`.
#' @param ... Additional arguments passed to summary (not used).
#' @return No return value. This function is called for its side effect of
#' printing a detailed summary to the console.
#' @examples
#' # Create a simple dissimilarity matrix and run the optimization
#' dist_mat <- matrix(c(0, 2, 3, 2, 0, 4, 3, 4, 0), nrow=3)
#' result <- euclidean_embedding(dist_mat, ndim=2, mapping_max_iter=50,
#' k0=1.0, cooling_rate=0.001, c_repulsion=0.1,
#' verbose = FALSE)
#' # Summarize the result object
#' summary(result)
#' @export
summary.topolow <- function(object, ...) {
print(object)
cat("\nParameters:\n")
cat(sprintf("k0: %.4f\n", object$parameters$k0))
cat(sprintf("cooling_rate: %.4f\n", object$parameters$cooling_rate))
cat(sprintf("c_repulsion: %.4f\n", object$parameters$c_repulsion))
}
#' Automatic Euclidean Embedding with Parameter Optimization
#'
#' @description
#' A user-friendly wrapper function that automatically optimizes parameters and
#' performs Euclidean embedding on a dissimilarity matrix. This function handles
#' the entire workflow from parameter optimization to final embedding.
#'
#' @param dissimilarity_matrix Square symmetric dissimilarity matrix. Can contain
#' NA values for missing measurements and threshold indicators (< or >).
#' @param output_dir Character. Directory for saving optimization files and results.
#' Required - no default.
#' @param ndim_range Integer vector of length 2. Range for number of dimensions
#' (minimum, maximum). Default: c(2, 10)
#' @param k0_range Numeric vector of length 2. Range for initial spring constant
#' (minimum, maximum). Default: c(0.1, 15)
#' @param cooling_rate_range Numeric vector of length 2. Range for cooling rate
#' (minimum, maximum). Default: c(0.001, 0.07)
#' @param c_repulsion_range Numeric vector of length 2. Range for repulsion constant
#' (minimum, maximum). Default: c(0.001, 0.4)
#' @param n_initial_samples Integer. Number of samples for initial parameter
#' optimization. Default: 100
#' @param n_adaptive_samples Integer. Number of samples for adaptive refinement.
#' Default: 250
#' @param max_cores Integer. Maximum number of cores to use. Default: NULL (auto-detect)
#' @param folds Integer. Number of cross-validation folds. Default: 20
#' @param mapping_max_iter Integer. Maximum iterations for final embedding.
#' Half this value is used for parameter search. Default: 1000
#' @param clean_intermediate Logical. Whether to remove intermediate files. Default: TRUE
#' @param verbose Character. Verbosity level: "off" (no output), "standard" (progress updates),
#' or "full" (detailed output including from internal functions). Default: "standard"
#' @param fallback_to_defaults Logical. Whether to use default parameters if
#' optimization fails. Default: TRUE
#' @param save_results Logical. Whether to save the final positions as CSV. Default: FALSE
#'
#' @return A list containing:
#' \item{positions}{Matrix of optimized coordinates}
#' \item{est_distances}{Matrix of estimated distances}
#' \item{mae}{Mean absolute error}
#' \item{optimal_params}{List of optimal parameters found, including cross-validation MAE during optimization}
#' \item{optimization_summary}{Summary of the optimization process}
#' \item{data_characteristics}{Summary of input data characteristics}
#' \item{runtime}{Total runtime in seconds}
#'
#' @examples
#' # Example 1: Basic usage with small matrix
#' test_data <- data.frame(
#' object = rep(paste0("Obj", 1:4), each = 4),
#' reference = rep(paste0("Ref", 1:4), 4),
#' score = sample(c(1, 2, 4, 8, 16, 32, 64, "<1", ">12"), 16, replace = TRUE)
#' )
#' dist_mat <- list_to_matrix(
#' data = test_data, # Pass the data frame, not file path
#' object_col = "object",
#' reference_col = "reference",
#' value_col = "score",
#' is_similarity = TRUE
#' )
#' \dontrun{
#' # Note: output_dir is required for actual use
#' result <- Euclidify(
#' dissimilarity_matrix = dist_mat,
#' output_dir = tempdir() # Use temp directory for example
#' )
#' coordinates <- result$positions
#' }
#'
#' # Example 2: Using custom parameter ranges
#' \dontrun{
#' result <- Euclidify(
#' dissimilarity_matrix = dist_mat,
#' output_dir = tempdir(),
#' n_initial_samples = 10,
#' n_adaptive_samples = 7,
#' verbose = "off"
#' )
#' }
#'
#' # Example 3: Handling missing data
#' dist_mat_missing <- dist_mat
#' dist_mat_missing[1, 3] <- dist_mat_missing[3, 1] <- NA
#' \dontrun{
#' result <- Euclidify(
#' dissimilarity_matrix = dist_mat_missing,
#' output_dir = tempdir(),
#' n_initial_samples = 10,
#' n_adaptive_samples = 7,
#' verbose = "off"
#' )
#' }
#'
#' # Example 4: Using threshold indicators
#' dist_mat_threshold <- dist_mat
#' dist_mat_threshold[1, 2] <- ">2"
#' dist_mat_threshold[2, 1] <- ">2"
#' \dontrun{
#' result <- Euclidify(
#' dissimilarity_matrix = dist_mat_threshold,
#' output_dir = tempdir(),
#' n_initial_samples = 10,
#' n_adaptive_samples = 7,
#' verbose = "off"
#' )
#' }
#'
#' # Example 5: Parallel processing with custom cores
#' \dontrun{
#' result <- Euclidify(
#' dissimilarity_matrix = dist_mat,
#' output_dir = tempdir(),
#' max_cores = 4,
#' n_adaptive_samples = 100,
#' save_results = TRUE # Save positions to CSV
#' )
#' }
#'
#' @export
Euclidify <- function(dissimilarity_matrix,
output_dir,
ndim_range = c(2, 10),
k0_range = c(0.1, 20),
cooling_rate_range = c(0.0001, 0.1),
c_repulsion_range = c(0.0001, 1),
n_initial_samples = 50,
n_adaptive_samples = 150,
max_cores = NULL,
folds = 20,
mapping_max_iter = 500,
clean_intermediate = TRUE,
verbose = "standard",
fallback_to_defaults = FALSE,
save_results = FALSE) {
# Start timing
start_time <- Sys.time()
# Input validation
if (missing(output_dir)) {
stop("output_dir must be specified.")
}
if (!is.matrix(dissimilarity_matrix)) {
stop("dissimilarity_matrix must be a matrix")
}
if (nrow(dissimilarity_matrix) != ncol(dissimilarity_matrix)) {
stop("dissimilarity_matrix must be square")
}
if (length(ndim_range) != 2 || ndim_range[1] > ndim_range[2]) {
stop("ndim_range must be a vector of length 2 with min <= max")
}
if (length(k0_range) != 2 || k0_range[1] > k0_range[2]) {
stop("k0_range must be a vector of length 2 with min <= max")
}
if (length(cooling_rate_range) != 2 || cooling_rate_range[1] < 0 || cooling_rate_range[2] > 1 ||
cooling_rate_range[1] > cooling_rate_range[2]) {
stop("cooling_rate_range must be a vector of length 2 with 0 <= min <= max <= 1")
}
if (length(c_repulsion_range) != 2 || c_repulsion_range[1] < 0 || c_repulsion_range[2] < c_repulsion_range[1]) {
stop("c_repulsion_range must be a vector of length 2 with min >= 0 and min <= max")
}
if (!verbose %in% c("off", "standard", "full")) {
stop("verbose must be 'off', 'standard', or 'full'")
}
# Set verbose flags for internal functions
verbose_internal <- (verbose == "full")
verbose_main <- (verbose %in% c("standard", "full"))
# Calculate mapping_max_iter for search (half of final)
mapping_max_iter_search <- round(mapping_max_iter / 2)
# Determine cores
if (is.null(max_cores)) {
available_cores <- future::availableCores()
max_cores <- max(1, available_cores - 1)
}
if (verbose_main) {
cat("=== TOPOLOW AUTOMATIC EUCLIDEAN EMBEDDING ===\n")
cat("Dataset size:", nrow(dissimilarity_matrix), "x", ncol(dissimilarity_matrix), "\n")
cat("Missing values:", sum(is.na(dissimilarity_matrix)),
"(", round(sum(is.na(dissimilarity_matrix)) / (nrow(dissimilarity_matrix)^2) * 100, 1), "%)\n")
cat("Using", max_cores, "cores\n")
cat("Output directory:", output_dir, "\n\n")
}
# Create output directory structure
if (!dir.exists(output_dir)) {
dir.create(output_dir, recursive = TRUE)
}
optimization_dir <- file.path(output_dir, "optimization")
if (dir.exists(optimization_dir) && clean_intermediate) {
unlink(optimization_dir, recursive = TRUE)
}
if (!dir.exists(optimization_dir)) {
dir.create(optimization_dir, recursive = TRUE)
}
# Initialize results storage
optimization_summary <- list()
optimal_params <- NULL
# Step 1: Assess data characteristics
if (verbose_main) cat("Step 1: Assessing data characteristics...\n")
# Calculate non-Euclidean character
n <- nrow(dissimilarity_matrix)
# Convert to numeric matrix properly (fixing the bug)
dissim_numeric <- matrix(suppressWarnings(as.numeric(as.vector(dissimilarity_matrix))),
nrow = n, ncol = n)
dissim_numeric[is.na(dissim_numeric)] <- median(dissim_numeric, na.rm = TRUE)
D_squared <- dissim_numeric^2
J <- diag(n) - (1/n) * matrix(1, n, n)
B <- -0.5 * J %*% D_squared %*% J
eigenvals <- eigen(B, symmetric = TRUE)$values
positive_eigenvals <- eigenvals[eigenvals > 1e-12]
negative_eigenvals <- eigenvals[eigenvals < -1e-12]
if (length(positive_eigenvals) > 0) {
deviation_score <- sum(abs(negative_eigenvals)) / sum(positive_eigenvals)
} else {
deviation_score <- 1.0
}
# Determine optimal dimensions based on eigenvalues
cumulative_variance <- cumsum(positive_eigenvals) / sum(positive_eigenvals)
dims_90_percent <- which(cumulative_variance >= 0.90)[1]
if (is.na(dims_90_percent)) dims_90_percent <- length(positive_eigenvals)
dims_90_percent <- max(1, dims_90_percent) # Ensure at least 1 dimension
if (verbose_main) {
cat(" Non-Euclidean deviation score:", round(deviation_score, 4), "\n")
cat(" (Higher values indicate greater deviation from Euclidean geometry.\n")
cat(" Values > 0.1 suggest significant non-Euclidean structure.)\n")
cat(" Dimensions for 90% variance:", dims_90_percent, "\n")
}
# Adjust ndim_range based on data characteristics (improved logic)
original_max <- ndim_range[2]
suggested_max <- dims_90_percent + 2
if (suggested_max < ndim_range[1]) {
if (verbose_main) {
cat(" WARNING: Suggested max dimensions (", suggested_max,
") is less than your minimum (", ndim_range[1], ").\n", sep = "")
cat(" Consider lowering ndim_range minimum for better results.\n")
}
} else if (suggested_max < ndim_range[2]) {
ndim_range[2] <- suggested_max
if (verbose_main) {
cat(" Adjusted max dimensions from", original_max, "to", ndim_range[2],
"based on data characteristics.\n")
}
}
# Step 2: Initial parameter optimization (returns log-transformed parameters)
if (verbose_main) cat("\nStep 2: Initial parameter optimization...\n")
tryCatch({
initial_results <- initial_parameter_optimization(
dissimilarity_matrix = dissimilarity_matrix,
mapping_max_iter = mapping_max_iter_search,
relative_epsilon = 1e-4,
convergence_counter = 3,
scenario_name = "auto_optimization",
N_min = ndim_range[1],
N_max = ndim_range[2],
k0_min = k0_range[1],
k0_max = k0_range[2],
cooling_rate_min = cooling_rate_range[1],
cooling_rate_max = cooling_rate_range[2],
c_repulsion_min = c_repulsion_range[1],
c_repulsion_max = c_repulsion_range[2],
num_samples = n_initial_samples,
max_cores = max_cores,
folds = folds,
verbose = verbose_internal,
write_files = TRUE,
output_dir = optimization_dir
)
optimization_summary$initial_results <- initial_results
if (nrow(initial_results) > 0) {
if (verbose_main) {
cat(" Initial optimization completed successfully\n")
cat(" Best initial MAE:", round(min(initial_results$Holdout_MAE, na.rm = TRUE), 4), "\n")
cat(" Parameters are in log scale for adaptive sampling\n")
}
# Step 3: Adaptive sampling refinement (parameters are already log-transformed)
samples_file <- file.path(optimization_dir, "model_parameters",
"auto_optimization_model_parameters.csv")
if (file.exists(samples_file)) {
if (verbose_main) cat("\nStep 3: Adaptive parameter refinement...\n")
tryCatch({
run_adaptive_sampling(
initial_samples_file = samples_file,
scenario_name = "auto_optimization",
dissimilarity_matrix = dissimilarity_matrix,
max_cores = max_cores,
num_samples = n_adaptive_samples,
mapping_max_iter = mapping_max_iter,
relative_epsilon = 1e-5,
folds = folds,
output_dir = optimization_dir,
verbose = verbose_internal
)
}, error = function(e) {
if (verbose_main) cat(" WARNING: Adaptive sampling failed:", e$message, "\n")
if (verbose_main) cat(" Continuing with initial optimization results\n")
optimization_summary$adaptive_error <- e$message
})
# Step 4: Extract optimal parameters (improved handling)
final_params_file <- file.path(optimization_dir, "model_parameters",
"auto_optimization_model_parameters.csv")
if (file.exists(final_params_file)) {
final_params <- tryCatch({
results <- read.csv(final_params_file, stringsAsFactors = FALSE)
# More robust cleaning
results <- results %>%
filter(!is.na(.data$NLL) & !is.na(.data$Holdout_MAE) &
is.finite(.data$NLL) & is.finite(.data$Holdout_MAE)) %>%
na.omit()
if (nrow(results) > 5) { # Only apply outlier cleaning if we have enough data
results <- as.data.frame(lapply(results, clean_data, k = 3))
results <- na.omit(results)
}
results
}, error = function(e) {
if (verbose_main) cat(" WARNING: Error reading final results:", e$message, "\n")
data.frame() # Return empty data frame
})
if (nrow(final_params) > 0) {
# Find best parameters (they are already in log scale)
best_idx <- which.min(final_params$Holdout_MAE)
optimal_params <- list(
ndim = round(exp(final_params$log_N[best_idx])),
k0 = exp(final_params$log_k0[best_idx]),
cooling_rate = exp(final_params$log_cooling_rate[best_idx]),
c_repulsion = exp(final_params$log_c_repulsion[best_idx]),
CV_MAE = final_params$Holdout_MAE[best_idx],
nll = final_params$NLL[best_idx]
)
# Validate optimal parameters are reasonable
if (optimal_params$ndim < 1 || optimal_params$ndim > 50 ||
optimal_params$k0 <= 0 || optimal_params$k0 > 100 ||
optimal_params$cooling_rate <= 0 || optimal_params$cooling_rate >= 1 ||
optimal_params$c_repulsion <= 0 || optimal_params$c_repulsion > 10) {
if (verbose_main) cat(" WARNING: Optimal parameters are unreasonable, using fallback\n")
optimal_params <- NULL
} else {
if (verbose_main) {
cat(" Adaptive refinement completed successfully\n")
cat(" Optimal parameters found:\n")
cat(" - Dimensions:", optimal_params$ndim, "\n")
cat(" - k0:", round(optimal_params$k0, 4), "\n")
cat(" - cooling_rate:", round(optimal_params$cooling_rate, 6), "\n")
cat(" - c_repulsion:", round(optimal_params$c_repulsion, 6), "\n")
cat(" - Cross-validation MAE:", round(optimal_params$CV_MAE, 4), "\n")
cat(" We recommend saving these parameters to set better ranges and \n")
cat(" reduced iterations in future runs. It saves time.\n")
# Issue warning if parameters are too close (0.05 of the corresponding range) to their range limits
if (abs(optimal_params$ndim - ndim_range[1]) < 0.05 * (ndim_range[2] - ndim_range[1]) ||
abs(optimal_params$k0 - k0_range[1]) < 0.05 * (k0_range[2] - k0_range[1]) ||
abs(optimal_params$cooling_rate - cooling_rate_range[1]) < 0.05 * (cooling_rate_range[2] - cooling_rate_range[1]) ||
abs(optimal_params$c_repulsion - c_repulsion_range[1]) < 0.05 * (c_repulsion_range[2] - c_repulsion_range[1]) ||
abs(optimal_params$ndim - ndim_range[2]) < 0.05 * (ndim_range[2] - ndim_range[1]) ||
abs(optimal_params$k0 - k0_range[2]) < 0.05 * (k0_range[2] - k0_range[1]) ||
abs(optimal_params$cooling_rate - cooling_rate_range[2]) < 0.05 * (cooling_rate_range[2] - cooling_rate_range[1]) ||
abs(optimal_params$c_repulsion - c_repulsion_range[2]) < 0.05 * (c_repulsion_range[2] - c_repulsion_range[1])) {
cat(" WARNING: Some optimal parameters are very close to their range limits.\n")
}
cat(" This may indicate the need for wider parameter ranges in future runs.\n")
}
optimization_summary$final_params <- final_params
}
}
}
}
}
}, error = function(e) {
if (verbose_main) cat(" WARNING: Parameter optimization failed:", e$message, "\n")
optimization_summary$error <- e$message
})
# Fallback to initial results if adaptive failed
if (is.null(optimal_params) && !is.null(optimization_summary$initial_results)) {
initial_results <- optimization_summary$initial_results
if (nrow(initial_results) > 0) {
# Initial results are now in log scale, so we need to extract them properly
valid_results <- initial_results %>%
filter(!is.na(.data$Holdout_MAE) & !is.na(.data$log_N) & !is.na(.data$log_k0) &
!is.na(.data$log_cooling_rate) & !is.na(.data$log_c_repulsion) &
is.finite(.data$Holdout_MAE))
if (nrow(valid_results) > 0) {
best_idx <- which.min(valid_results[["Holdout_MAE"]])
optimal_params <- list(
ndim = round(exp(valid_results$log_N[best_idx])),
k0 = exp(valid_results$log_k0[best_idx]),
cooling_rate = exp(valid_results$log_cooling_rate[best_idx]),
c_repulsion = exp(valid_results$log_c_repulsion[best_idx]),
CV_MAE = valid_results$Holdout_MAE[best_idx]
)
if (verbose_main) cat(" Using initial optimization results\n")
}
}
}
# Warn if no optimal parameters found and stop the operation
if (is.null(optimal_params) && !fallback_to_defaults) {
if (verbose_main) {
cat(" WARNING: No optimal parameters found from initial or adaptive optimization.\n")
cat(" Consider adjusting parameter ranges or if you just want to get a less\n")
cat(" accurate map with non-optimized parameters, set fallback_to_defaults = TRUE.\n")
}
stop(" STOP: Parameter optimization failed and fallback is disabled")
}
# Enhanced fallback to default parameters with better data-driven selection
if (is.null(optimal_params) && fallback_to_defaults) {
# Respect user's ndim_range while considering data characteristics
fallback_ndim <- max(ndim_range[1], min(ndim_range[2], max(2, dims_90_percent)))
# Scale default parameters based on matrix size and characteristics
matrix_size <- nrow(dissimilarity_matrix)
sparsity <- sum(is.na(dissimilarity_matrix)) / (matrix_size^2)
# Adjust parameters based on data characteristics
base_k0 <- if (sparsity > 0.5) 8.0 else 6.0 # Higher k0 for sparse data
base_cooling <- if (sparsity > 0.5) 0.005 else 0.01 # Slower cooling for sparse data
base_repulsion <- if (deviation_score > 0.2) 0.01 else 0.02 # Higher repulsion for non-Euclidean data
optimal_params <- list(
ndim = fallback_ndim,
k0 = base_k0,
cooling_rate = base_cooling,
c_repulsion = base_repulsion
)
if (verbose_main) {
cat("\n Using data-driven default parameters:\n")
cat(" - Dimensions:", optimal_params$ndim, "(based on eigenvalue analysis)\n")
cat(" - k0:", optimal_params$k0, "(adjusted for sparsity:", round(sparsity*100, 1), "%)\n")
cat(" - cooling_rate:", optimal_params$cooling_rate, "\n")
cat(" - c_repulsion:", optimal_params$c_repulsion, "(adjusted for non-Euclidean score:", round(deviation_score, 3), ")\n")
}
optimization_summary$used_defaults <- TRUE
optimization_summary$default_reasoning <- list(
sparsity = sparsity,
deviation_score = deviation_score,
dims_90_percent = dims_90_percent
)
}
# Step 4: Final embedding with optimal parameters
if (verbose_main) cat("\nStep 4: Running final embedding with optimal parameters...\n")
embedding_result <- tryCatch({
euclidean_embedding(
dissimilarity_matrix = dissimilarity_matrix,
ndim = optimal_params$ndim,
mapping_max_iter = mapping_max_iter * 2, # Extra iterations for final
k0 = optimal_params$k0,
cooling_rate = optimal_params$cooling_rate,
c_repulsion = optimal_params$c_repulsion,
relative_epsilon = 1e-6,
convergence_counter = 5,
verbose = verbose_internal,
write_positions_to_csv = save_results,
output_dir = output_dir
)
}, error = function(e) {
if (verbose_main) cat(" ERROR: Final embedding failed:", e$message, "\n")
NULL
})
if (is.null(embedding_result)) {
stop("Final embedding failed")
}
if (verbose_main) {
cat(" Embedding completed successfully\n")
cat(" Final MAE:", round(embedding_result$mae, 4), "\n")
cat(" Iterations:", embedding_result$iter, "\n")
cat(" Convergence achieved:", embedding_result$convergence$achieved, "\n")
}
# Clean up intermediate files
if (clean_intermediate) {
unlink(optimization_dir, recursive = TRUE)
if (verbose_main) cat("\nCleaned up intermediate files\n")
}
# Prepare final results (without redundant embedding_result)
results <- list(
positions = embedding_result$positions,
est_distances = embedding_result$est_distances,
mae = embedding_result$mae,
optimal_params = optimal_params,
optimization_summary = optimization_summary,
data_characteristics = list(
n_objects = nrow(dissimilarity_matrix),
missing_percentage = sum(is.na(dissimilarity_matrix)) / (nrow(dissimilarity_matrix)^2) * 100,
non_euclidean_score = deviation_score,
eigenvalue_dims_90 = dims_90_percent
),
runtime = difftime(Sys.time(), start_time, units = "secs")
)
# Save positions as CSV if requested
if (save_results) {
positions_file <- file.path(output_dir, "euclidify_positions.csv")
write.csv(embedding_result$positions, positions_file, row.names = TRUE)
if (verbose_main) cat("\nPositions saved to:", positions_file, "\n")
}
if (verbose_main) {
cat("\n=== EUCLIDIFY COMPLETED ===\n")
cat("Total runtime:", round(as.numeric(results$runtime), 1), "seconds\n")
cat("Final embedding dimensions:", optimal_params$ndim, "\n")
cat("Final MAE:", round(embedding_result$mae, 4), "\n")
}
return(results)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.