R/surreal-image.R
In surreal: Create Datasets with Hidden Images in Residual Plots

Documented in surreal_image

#' Check if a string is a URL
#'
#' @param x Character string to check
#'
#' @return Logical
#'
#' @noRd
is_url <- function(x) {

  grepl("^https?://", x, ignore.case = TRUE)
}

#' Download a URL to a temporary file
#'
#' @param url URL to download
#'
#' @return Path to the temporary file
#'
#' @noRd
download_to_temp <- function(url) {
  ext <- tolower(tools::file_ext(url))
  # Handle URLs with query strings

ext <- sub("\\?.*$", "", ext)
  if (!nzchar(ext)) ext <- "png"

  temp_file <- tempfile(fileext = paste0(".", ext))

  tryCatch(
    {
      utils::download.file(url, temp_file, mode = "wb", quiet = TRUE)
    },
    error = function(e) {
      cli::cli_abort(c(
        "Failed to download image from URL: {.url {url}}",
        "x" = conditionMessage(e)
      ))
    }
  )

  temp_file
}

#' Load an image file in various formats
#'
#' @param image_path Path to the image file or URL
#'
#' @return A numeric array (2D for grayscale, 3D for RGB/RGBA)
#'
#' @noRd
load_image_file <- function(image_path) {
  # Handle URLs by downloading to temp file
 if (is_url(image_path)) {
    image_path <- download_to_temp(image_path)
    on.exit(unlink(image_path), add = TRUE)
  } else if (!file.exists(image_path)) {
    cli::cli_abort(c(
      "Image file not found: {.file {image_path}}",
      "i" = "Check that the file path is correct."
    ))
  }

  ext <- tolower(tools::file_ext(image_path))
  # Handle URLs with query strings
  ext <- sub("\\?.*$", "", ext)

  image <- switch(ext,
    "png" = {
      png::readPNG(image_path)
    },
    "jpg" = ,
    "jpeg" = {
      require_packages("jpeg")
      jpeg::readJPEG(image_path)
    },
    "bmp" = {
      require_packages("bmp")
      bmp::read.bmp(image_path)
    },
    "tif" = ,
    "tiff" = {
      require_packages("tiff")
      tiff::readTIFF(image_path)
    },
    "svg" = {
      require_packages("rsvg")
      # Render SVG to bitmap (returns raw array)
      rsvg::rsvg(image_path) / 255
    },
    cli::cli_abort(c(
      "Unsupported image format: {.val {ext}}",
      "i" = "Supported formats: PNG, JPEG, BMP, TIFF, SVG"
    ))
  )

  image
}

#' Convert image array to grayscale
#'
#' @param image Numeric array from image loading
#'
#' @return 2D numeric matrix with values 0-1
#'
#' @noRd
image_to_grayscale <- function(image) {
  if (length(dim(image)) == 2) {
    # Already grayscale
    return(image)
  }

  if (length(dim(image)) == 3) {
    n_channels <- dim(image)[3]

    if (n_channels >= 3) {
      # RGB or RGBA - use luminance formula (ITU-R BT.601)
      gray <- 0.299 * image[, , 1] + 0.587 * image[, , 2] + 0.114 * image[, , 3]
    } else {
      # Single channel or grayscale with alpha
      gray <- image[, , 1]
    }
    return(gray)
  }

  cli::cli_abort(c(
    "Unexpected image dimensions: {.val {paste(dim(image), collapse = ' x ')}}",
    "i" = "Expected a 2D or 3D array."
  ))
}

#' Auto-detect mode (dark or light) based on image histogram
#'
#' @param gray_image 2D grayscale matrix (values 0-1)
#'
#' @return Character: "dark" or "light"
#'
#' @noRd
auto_detect_mode <- function(gray_image) {
  median_val <- stats::median(as.vector(gray_image), na.rm = TRUE)
  # If image is mostly light (median > 0.5), subject is likely dark
  if (median_val > 0.5) "dark" else "light"
}

#' Calculate optimal threshold using Otsu's method
#'
#' Finds the threshold that maximizes between-class variance,
#' effectively separating foreground from background.
#'
#' @param gray_image 2D grayscale matrix (values 0-1)
#'
#' @return Numeric threshold value between 0 and 1
#'
#' @noRd
otsu_threshold <- function(gray_image) {
  vals <- as.vector(gray_image)

  # Create histogram with 256 bins
  breaks <- seq(0, 1, length.out = 257)
  h <- graphics::hist(vals, breaks = breaks, plot = FALSE)
  counts <- h$counts
  mids <- h$mids

  total <- sum(counts)
  sum_all <- sum(mids * counts)

  sum_bg <- 0
  weight_bg <- 0
 max_variance <- 0
  best_threshold <- 0.5

  for (i in seq_along(counts)) {
    weight_bg <- weight_bg + counts[i]
    if (weight_bg == 0) next

    weight_fg <- total - weight_bg
    if (weight_fg == 0) break

    sum_bg <- sum_bg + mids[i] * counts[i]

    mean_bg <- sum_bg / weight_bg
    mean_fg <- (sum_all - sum_bg) / weight_fg

    # Between-class variance
    variance <- weight_bg * weight_fg * (mean_bg - mean_fg)^2

    if (variance > max_variance) {
      max_variance <- variance
      best_threshold <- mids[i]
    }
  }

  best_threshold
}

#' Auto-estimate max_points based on image size
#'
#' @param n_extracted Number of points extracted from image
#' @param img_area Total image area (width * height)
#'
#' @return Integer max_points or NULL if no downsampling needed
#'
#' @noRd
auto_max_points <- function(n_extracted, img_area) {
  # Target: 2000-5000 points for good quality without being too slow
  target <- min(5000L, max(2000L, as.integer(sqrt(img_area) * 5)))

  # Only downsample if needed
  if (n_extracted <= target) NULL else target
}

#' Extract x,y coordinates from grayscale image based on threshold
#'
#' @param gray_image 2D grayscale matrix (values 0-1)
#' @param mode "dark" or "light"
#' @param threshold Numeric threshold value (0-1)
#' @param invert_y Flip y coordinates
#'
#' @return List with x and y coordinate vectors
#'
#' @noRd
extract_points_from_image <- function(gray_image, mode, threshold, invert_y) {
  if (mode == "dark") {
    activated <- which(gray_image < threshold, arr.ind = TRUE)
  } else {
    activated <- which(gray_image > threshold, arr.ind = TRUE)
  }

  if (nrow(activated) == 0) {
    cli::cli_abort(c(
      "No points found with threshold {.val {threshold}} and mode {.val {mode}}.",
      "i" = "Try adjusting the {.arg threshold} value.",
      "i" = "For {.val dark} mode, pixels below threshold are selected.",
      "i" = "For {.val light} mode, pixels above threshold are selected."
    ))
  }

  x <- activated[, 2]
  y <- if (invert_y) {
    nrow(gray_image) - activated[, 1] + 1
  } else {
    activated[, 1]
  }

  list(x = x, y = y)
}

#' Downsample points if they exceed max_points
#'
#' @param coords List with x and y vectors
#' @param max_points Maximum number of points
#' @param verbose Print info about downsampling
#'
#' @return List with (potentially downsampled) x and y vectors
#'
#' @noRd
downsample_points <- function(coords, max_points, verbose = FALSE) {
  n_points <- length(coords$x)

  if (is.null(max_points) || n_points <= max_points) {
    if (verbose) {
      cli::cli_alert_info("Using {.val {n_points}} points from image.")
    }
    return(coords)
  }

  if (verbose) {
    cli::cli_alert_info(
      "Downsampling from {.val {n_points}} to {.val {max_points}} points."
    )
  }

  idx <- sample(seq_len(n_points), max_points)

  list(
    x = coords$x[idx],
    y = coords$y[idx]
  )
}

#' Apply the surreal method to an image file
#'
#' This function loads an image file, extracts pixel coordinates based on
#' a brightness threshold, and applies the surreal method to create a dataset
#' where the image appears in the residual plot.
#'
#' @param image_path Character. Path to an image file or a URL (PNG, JPEG, BMP, TIFF, or SVG).
#' @param mode Character. Either `"auto"` (default) to automatically detect,
#'   `"dark"` to select dark pixels, or `"light"` to select light pixels.
#' @param threshold Numeric or `NULL`. Value between 0 and 1 for grayscale
#'   threshold. If `NULL` (default), automatically calculated using Otsu's method.
#'   For `"dark"` mode, pixels below threshold are selected.
#'   For `"light"` mode, pixels above threshold are selected.
#' @param max_points Integer or `NULL`. Maximum number of points to use. If
#'   `NULL` (default), automatically estimated based on image size (typically
#'   2000-5000 points). Set to `Inf` to use all points without downsampling.
#' @param invert_y Logical. If `TRUE`, flip y-coordinates so image appears
#'   right-side up in residual plot. Default is `TRUE`.
#' @inheritParams surreal
#'
#' @return A `data.frame` containing the results of the surreal method
#'   application with columns `y`, `X1`, `X2`, ..., `Xp`.
#'
#' @details
#' By default, all parameters are automatically detected:
#' - **mode**: Detected from image histogram (dark subject on light background or vice versa)
#' - **threshold**: Calculated using Otsu's method to optimally separate foreground/background
#' - **max_points**: Estimated based on image dimensions (2000-5000 points)
#'
#' You can override any of these by specifying explicit values.
#'
#' **Input Support:**
#' - Local file paths
#' - URLs (http:// or https://) - images are downloaded to a temporary file
#'
#' **Format Support:**
#' - PNG: Supported via the `png` package (included)
#' - JPEG: Requires the `jpeg` package
#' - BMP: Requires the `bmp` package
#' - TIFF: Requires the `tiff` package
#' - SVG: Requires the `rsvg` package (renders vector graphics to bitmap)
#'
#' @examples
#' \dontrun{
#' # Simplest usage - everything auto-detected
#' result <- surreal_image("https://www.r-project.org/logo/Rlogo.png")
#' model <- lm(y ~ ., data = result)
#' plot(model$fitted, model$residuals, pch = 16)
#'
#' # Override specific parameters
#' result <- surreal_image("image.png", mode = "dark", threshold = 0.3)
#'
#' # Use all points (no downsampling)
#' result <- surreal_image("image.png", max_points = Inf)
#' }
#'
#' @seealso
#' [surreal()] for details on the surreal method parameters.
#' [surreal_text()] for embedding text instead of images.
#'
#' @export
surreal_image <- function(
    image_path,
    mode = "auto",
    threshold = NULL,
    max_points = NULL,
    invert_y = TRUE,
    R_squared = 0.3,
    p = 5,
    n_add_points = 40,
    max_iter = 100,
    tolerance = 0.01,
    verbose = FALSE) {

  mode <- match.arg(mode, c("auto", "dark", "light"))

  if (!is.character(image_path) || length(image_path) != 1) {
    cli::cli_abort("{.arg image_path} must be a single character string.")
  }

  if (!is.null(threshold)) {
    if (!is.numeric(threshold) || threshold < 0 || threshold > 1) {
      cli::cli_abort(
        "{.arg threshold} must be a numeric value between 0 and 1, or NULL for auto-detection (got {.val {threshold}})."
      )
    }
  }

  # Handle max_points: NULL = auto, Inf = no limit, positive integer = explicit
  user_max_points <- max_points
  if (!is.null(max_points) && !is.infinite(max_points)) {
    if (!is.numeric(max_points) || max_points < 1) {
      cli::cli_abort(
        "{.arg max_points} must be a positive integer, Inf, or NULL for auto-detection (got {.val {max_points}})."
      )
    }
    max_points <- as.integer(max_points)
  }

  # Step 1: Load the image
  if (verbose) cli::cli_alert_info("Loading image: {.file {image_path}}")
  image <- load_image_file(image_path)

  # Step 2: Convert to grayscale
  if (verbose) cli::cli_alert_info("Converting to grayscale.")
  gray <- image_to_grayscale(image)

  if (verbose) {
    cli::cli_alert_info(
      "Image dimensions: {.val {nrow(gray)}} x {.val {ncol(gray)}}"
    )
  }

  # Step 3: Auto-detect mode if needed
  if (mode == "auto") {
    mode <- auto_detect_mode(gray)
    if (verbose) {
      cli::cli_alert_success("Auto-detected mode: {.val {mode}}")
    }
  }

  # Step 4: Auto-detect threshold if needed
  if (is.null(threshold)) {
    threshold <- otsu_threshold(gray)
    if (verbose) {
      cli::cli_alert_success(
        "Auto-detected threshold (Otsu): {.val {round(threshold, 3)}}"
      )
    }
  }

  # Step 5: Extract coordinates based on threshold and mode
  if (verbose) {
    cli::cli_alert_info(
      "Extracting points with mode = {.val {mode}}, threshold = {.val {round(threshold, 3)}}."
    )
  }
  coords <- extract_points_from_image(gray, mode, threshold, invert_y)

  # Step 6: Auto-estimate max_points if needed
  if (is.null(user_max_points)) {
    img_area <- nrow(gray) * ncol(gray)
    max_points <- auto_max_points(length(coords$x), img_area)
    if (verbose && !is.null(max_points)) {
      cli::cli_alert_success("Auto-estimated max_points: {.val {max_points}}")
    }
  } else if (is.infinite(user_max_points)) {
    max_points <- NULL
  }

  # Step 7: Downsample if necessary
  coords <- downsample_points(coords, max_points, verbose)

  # Step 8: Apply the surreal method
  if (verbose) cli::cli_alert_info("Applying surreal method.")
  result <- surreal(
    y_hat = coords$x,
    R_0 = coords$y,
    R_squared = R_squared,
    p = p,
    n_add_points = n_add_points,
    max_iter = max_iter,
    tolerance = tolerance,
    verbose = verbose
  )

  result
}

Any scripts or data that you put into this service are public.

surreal documentation built on Jan. 11, 2026, 9:07 a.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

surreal
Create Datasets with Hidden Images in Residual Plots

R/surreal-image.R
In surreal: Create Datasets with Hidden Images in Residual Plots

Defines functions surreal_image downsample_points extract_points_from_image auto_max_points otsu_threshold auto_detect_mode image_to_grayscale load_image_file download_to_temp is_url

Documented in surreal_image

Try the surreal package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

surreal Create Datasets with Hidden Images in Residual Plots

R/surreal-image.R In surreal: Create Datasets with Hidden Images in Residual Plots

Defines functions surreal_image downsample_points extract_points_from_image auto_max_points otsu_threshold auto_detect_mode image_to_grayscale load_image_file download_to_temp is_url

Documented in surreal_image

Try the surreal package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

surreal
Create Datasets with Hidden Images in Residual Plots

R/surreal-image.R
In surreal: Create Datasets with Hidden Images in Residual Plots