R/2d.R

Defines functions find_non_empty_bins update_trimesh_index gen_edges calc_2d_dist tri_bin_centroids extract_hexbin_mean extract_hexbin_centroids hex_binning find_pts compute_std_counts assign_data gen_hex_coord gen_centroids

Documented in assign_data calc_2d_dist compute_std_counts extract_hexbin_centroids extract_hexbin_mean find_non_empty_bins find_pts gen_centroids gen_edges gen_hex_coord hex_binning tri_bin_centroids update_trimesh_index

#' Generate centroid coordinate
#'
#' This function generates all possible centroids in the hexagonal grid.
#'
#' @param nldr_obj A list of a tibble contains scaled first and second columns
#' of NLDR data, and numeric vectors representing the limits of the original NLDR data.
#' @param b1 Number of bins along the x axis.
#' @param q The buffer amount as proportion of data range.
#'
#' @return A tibble contains hexIDs (\code{h}),
#' x and y coordinates (\code{c_x}, \code{c_y} respectively)
#' of all hexagon bin centroids.
#'
#' @examples
#' gen_centroids(nldr_obj = scurve_model_obj$nldr_obj, b1 = 5, q = 0.1)
#'
#' @export
gen_centroids <- function(nldr_obj, b1 = 5, q = 0.1){

  ## To check whether b2 greater than 2
  if (b1 < 2) {
    cli::cli_abort("Number of bins along the x-axis at least should be 2.")
  }

  ## To check whether q is between a specific range
  if (!dplyr::between(q, 0.05, 0.2)) {
    cli::cli_abort("The buffer should be within 0.05 and 0.2.")
  }

  ## To compute the range
  lim1 <- nldr_obj$lim1
  lim2 <- nldr_obj$lim2
  r2 <- diff(lim2)/diff(lim1)

  ## To compute hexagonal configurations
  bin_obj <- calc_bins_y(nldr_obj = nldr_obj, b1 = b1, q = q)

  # To obtain the bins along the y-axis
  b2 <- bin_obj$b2

  # To obtain the width of the hexagon
  a1 <- bin_obj$a1

  ## To initialise starting point coordinates
  s1 <- -q
  s2 <- -q * r2

  # Generate x-coordinate of centroids for odd rows
  c_x_vec_odd <- seq(s1, by = a1, length.out = b1) ## since x range is 0-1

  # Generate x-coordinate of centroids for even rows
  c_x_vec_even <- c_x_vec_odd + a1/2
  c_x_vec <- c(c_x_vec_odd, c_x_vec_even)

  # To compute vertical spacing
  a2 <- sqrt(3) * a1/2

  # Generate y-coordinate of centroids
  c_y_vec <- seq(s2, by = a2, length.out = b2)
  c_y <- rep(c_y_vec, each = b1)

  ## Do the number of belongs y axis is even or odd and adjust the x-coordinates
  if ((b2 %% 2) == 0) {

    c_x <- rep(c_x_vec, b2/2)

  } else {

    c_x <- append(rep(c_x_vec, floor(b2/2)), c_x_vec_odd)

  }

  centroids_data <- tibble::tibble(h = 1:length(c_x), c_x = c_x, c_y = c_y)

  return(centroids_data)

}

#' Generate hexagonal polygon coordinates
#'
#' This function generates the coordinates of hexagons after passing
#' hexagonal centroids.
#'
#' @param centroids_data The dataset with all hexagonal bin IDs
#' and centroid coordinates.
#' @param a1 The width of the hexagon.
#'
#' @return A tibble contains hexagon id (\code{h}),
#' x and y coordinates (\code{x}, \code{y}) of hexagons.
#'
#' @examples
#' width <- scurve_model_obj$hb_obj$a1
#' all_centroids_df <- scurve_model_obj$hb_obj$centroids
#' gen_hex_coord(centroids_data = all_centroids_df, a1 = width)
#'
#' @export
gen_hex_coord <- function(centroids_data, a1) {
  if (missing(a1)) cli::cli_abort("Need to initialize the width of the hexagon.")
  gen_hex_coord_cpp(centroids_data$h, centroids_data$c_x, centroids_data$c_y, a1)
}

#' Assign data to hexagons
#'
#' This function assigns the data to hexagons.
#'
#' @param nldr_obj A list of a tibble contains scaled first and second columns
#' of NLDR data, and numeric vectors representing the limits of the original NLDR data.
#' @param centroids_data The dataset with centroid coordinates.
#'
#' @return A tibble contains embedding components (\code{emb1}, \code{emb2})
#' and corresponding hexagon ID (\code{h}).
#'
#' @examples
#' all_centroids_df <- scurve_model_obj$hb_obj$centroids
#' assign_data(nldr_obj = scurve_model_obj$nldr_obj,
#' centroids_data = all_centroids_df)
#'
#' @export
assign_data <- function(nldr_obj, centroids_data) {

  scaled_nldr_df <- nldr_obj$scaled_nldr

  # Select embedding dimensions
  matrix_nldr <- as.matrix(scaled_nldr_df[, 1:2])
  centroid_matrix <- as.matrix(centroids_data[, 2:3])

  # Use C++ function to find nearest centroid indices
  min_column <- compute_highd_dist(matrix_nldr, centroid_matrix)

  # Map to hex bin IDs
  hb_ids <- centroids_data$h[min_column]

  # Add hex bin ID column
  scaled_nldr_df$h <- hb_ids

  return(scaled_nldr_df)
}

#' Compute standardise counts in hexagons
#'
#' This function computes the standardize number of points within each hexagon.
#'
#' @param scaled_nldr_hexid A tibble that contains the scaled embedding with hexagonal bin IDs.
#'
#' @return A tibble that contains hexagon IDs (\code{h}), bin counts (\code{n_h}),
#' and standardize counts (\code{w_h}).
#' @importFrom dplyr count mutate rename
#'
#' @examples
#' umap_with_hb_id <- scurve_model_obj$hb_obj$data_hb_id
#' compute_std_counts(scaled_nldr_hexid = umap_with_hb_id)
#'
#' @export
compute_std_counts <- function(scaled_nldr_hexid) {

  ## Group by hexagon IDs
  std_df <- scaled_nldr_hexid |>
    count(h) |>
    mutate(w_h = n/sum(n)) |>
    rename(n_h = n)

  return(std_df)

}

#' Find points in hexagonal bins
#'
#' This function maps points to their corresponding hexagonal bins.
#'
#' @param scaled_nldr_hexid A tibble that contains the scaled embedding with hexagonal bin IDs.
#'
#' @return A tibble with hexagonal bin IDs (\code{h}) and the corresponding points.
#' @importFrom dplyr filter pull summarize
#' @importFrom tibble tibble
#'
#' @examples
#' umap_with_hb_id <- scurve_model_obj$hb_obj$data_hb_id
#' find_pts(scaled_nldr_hexid = umap_with_hb_id)
#'
#' @export
find_pts <- function(scaled_nldr_hexid) {

  pts_df <- scaled_nldr_hexid |>
    group_by(h) |>
    summarize(pts_list = list(ID), .groups = "drop")

  return(pts_df)

}


#' Hexagonal binning
#'
#' This function generates the hexagonal object.
#'
#' @param nldr_obj A list of a tibble contains scaled first and second columns
#' of NLDR data, and numeric vectors representing the limits of the original NLDR data.
#' @param b1 Number of bins along the x axis.
#' @param q The buffer amount as proportion of data range.
#'
#' @return A object that contains numeric vector that contains binwidths (\code{a1}),
#' vertical distance (\code{a2}), bins along the x and y axes respectively (\code{bins}),
#' numeric vector that contains hexagonal
#' starting point coordinates all hexagonal bin centroids (\code{centroids}),
#' hexagonal coordinates of the full grid (\code{hex_poly}),
#' embedding components with their corresponding hexagon IDs (\code{data_hb_id}),
#' hex bins with their corresponding standardise counts (\code{std_cts}),
#' total number of hex bins (\code{b}), number of non-empty hex bins (\code{m})
#' and points within each hexagon (\code{pts_bins}).
#'
#'
#' @examples
#' hex_binning(nldr_obj = scurve_model_obj$nldr_obj, b1 = 5, q = 0.1)
#'
#' @export
hex_binning <- function(nldr_obj, b1 = 5, q = 0.1) {

  scaled_nldr <- nldr_obj$scaled_nldr

  ## To compute the range
  lim1 <- nldr_obj$lim1
  lim2 <- nldr_obj$lim2
  r2 <- diff(lim2)/diff(lim1)

  ## To compute the number of bins along the y-axis
  bin_obj <- calc_bins_y(nldr_obj = nldr_obj, b1 = b1, q = q)
  b2 <- bin_obj$b2

  ## To obtain the width of the hexagon
  a1 <- bin_obj$a1

  # To compute vertical spacing
  a2 <- sqrt(3) * a1/2

  ## To initialise starting point coordinates
  s1 <- -q
  s2 <- -q * r2

  ## To generate all the centroids of the grid
  all_centroids_df <- gen_centroids(nldr_obj = nldr_obj, b1 = b1, q = q)

  ## To generate the hexagon coordinates
  all_hex_coord <- gen_hex_coord(centroids_data = all_centroids_df, a1 = a1)

  ## To find which 2-D embedding assigned to which hexagon
  nldr_hex_id <- assign_data(nldr_obj = nldr_obj, centroids_data = all_centroids_df)

  ## To generate standardize counts of each hexagon
  std_df <- compute_std_counts(scaled_nldr_hexid = nldr_hex_id)

  ## To find which points are within each hexagon
  pts_df <- find_pts(scaled_nldr_hexid = nldr_hex_id)

  ## To generate the object of hexagon info
  hex_bin_obj <- list(a1 = a1,
                      a2 = a2,
                      bins = c(b1, b2),
                      start_point = c(s1, s2),
                      centroids = all_centroids_df,
                      hex_poly = all_hex_coord,
                      data_hb_id = nldr_hex_id,
                      std_cts = std_df,
                      b = NROW(all_centroids_df),
                      m = length(std_df$h),
                      pts_bins = pts_df
  )
  class(hex_bin_obj) <- "hex_bin_obj"

  return(hex_bin_obj)

}


#' Extract hexagonal bin centroids coordinates and the corresponding standardise counts.
#'
#' @param centroids_data A tibble that contains all hexagonal bin centroid
#' coordinates with hexagon IDs.
#' @param counts_data A tibble that contains hexagon IDs with the standardise
#' number of points within each hexagon.
#'
#' @return A tibble contains hexagon ID (\code{h}), centroid coordinates (\code{c_x}, \code{c_y}),
#' bin counts (\code{n_h}), and standardise counts (\code{w_h}).
#'
#' @examples
#' all_centroids_df <- scurve_model_obj$hb_obj$centroids
#' counts_data <- scurve_model_obj$hb_obj$std_cts
#' extract_hexbin_centroids(centroids_data = all_centroids_df,
#' counts_data = counts_data)
#'
#' @export
extract_hexbin_centroids <- function(centroids_data, counts_data) {

  # Fast merge without unnecessary sorting
  merged_data <- merge(centroids_data, counts_data, by = "h", all = TRUE)

  # Replace NA values with 0 for both columns
  merged_data$w_h[is.na(merged_data$w_h)] <- 0
  merged_data$n_h[is.na(merged_data$n_h)] <- 0

  return(merged_data)
}

#' Extract hexagonal bin mean coordinates and the corresponding standardize counts.
#'
#' @param data_hb A tibble with embedding components and hexagonal bin IDs.
#' @param counts_data A tibble that contains hexagon IDs with the standardise
#' number of points within each hexagon.
#' @param centroids_data A tibble that contains all hexagonal bin centroid
#' coordinates with hexagon IDs.
#'
#' @return A tibble contains hexagon ID (\code{h}), bin means (\code{c_x}, \code{c_y}),
#' bin counts (\code{n_h}), and standardise counts (\code{w_h}).
#'
#' @examples
#' all_centroids_df <- scurve_model_obj$hb_obj$centroids
#' counts_data <- scurve_model_obj$hb_obj$std_cts
#' umap_with_hb_id <- scurve_model_obj$hb_obj$data_hb_id
#' extract_hexbin_mean(data_hb = umap_with_hb_id, counts_data = counts_data,
#' centroids_data = all_centroids_df)
#'
#' @export
extract_hexbin_mean <- function(data_hb, counts_data, centroids_data) {

  ## To arrange the hexagon IDs
  counts_data <- counts_data |>
    dplyr::arrange(h)

  ## To join the datasets
  centroids_data <- dplyr::full_join(centroids_data, counts_data, by = "h") |>
    dplyr::select(-c(c_x, c_y))

  ## To compute hexagonal bin means
  hex_mean_df <- data_hb |>
    dplyr::select(-ID) |>
    dplyr::group_by(h) |>
    dplyr::summarise(dplyr::across(tidyselect::everything(), mean)) |>
    dplyr::arrange(h)

  ## Rename columns
  names(hex_mean_df) <- c("h", "c_x", "c_y")

  centroids_data <- dplyr::right_join(centroids_data, hex_mean_df, by = "h") |>
    dplyr::select(h, c_x, c_y, n_h, w_h)

  # Replace NA values with 0 for both columns
  centroids_data$w_h[is.na(centroids_data$w_h)] <- 0
  centroids_data$n_h[is.na(centroids_data$n_h)] <- 0

  return(centroids_data)
}

#' Triangulate bin centroids
#'
#' This function triangulates the bin centroids using the x and y coordinates
#' provided in the input data frame and returns the triangular object.
#'
#' @param centroids_data The tibble containing the all the bin centroids.
#'
#' @return A triangular object representing the triangulated bin centroids.
#' @importFrom interp tri.mesh
#'
#' @examples
#' all_centroids_df <- scurve_model_obj$hb_obj$centroids
#' counts_data <- scurve_model_obj$hb_obj$std_cts
#' umap_with_hb_id <- scurve_model_obj$hb_obj$data_hb_id
#' df_bin_centroids <- extract_hexbin_mean(data_hb = umap_with_hb_id,
#' counts_data = counts_data, centroids_data = all_centroids_df)
#' tri_bin_centroids(centroids_data = df_bin_centroids)
#'
#' @export
tri_bin_centroids <- function(centroids_data){
  tr <- tri.mesh(centroids_data[["c_x"]], centroids_data[["c_y"]])

  # Create a list to store the triangulation object and the counts
  result <- list(
    trimesh_object = tr,
    n_h = centroids_data[["n_h"]]
  )

  return(result)
}

#' Calculate 2-D Euclidean distances between vertices
#'
#' This function calculates the 2-D distances between pairs of points in a data frame.
#'
#' @param trimesh_data A tibble that contains the x and y coordinates of start
#' and end points.
#' @param select_vars selected columns in the resulting data frame.
#'
#' @return A tibble with columns for the starting point, ending point,
#' and calculated distances.
#' @importFrom dplyr select mutate
#' @importFrom tidyselect all_of
#'
#' @examples
#' tr_from_to_df <- scurve_model_obj$trimesh_data
#' calc_2d_dist(trimesh_data = tr_from_to_df)
#'
#' @export
calc_2d_dist <- function(trimesh_data,
                         select_vars = c("from", "to", "x_from", "y_from", "x_to",
                                         "y_to", "from_count", "to_count", "distance")) {

  # Calculate distances using Rcpp
  dist <- calc_2d_dist_cpp(
    trimesh_data$x_from,
    trimesh_data$y_from,
    trimesh_data$x_to,
    trimesh_data$y_to
  )

  # Add distances and return selected columns
  trimesh_data <- trimesh_data |>
    dplyr::mutate(distance = dist) |>
    dplyr::select(dplyr::all_of(select_vars))

  return(trimesh_data)
}

#' Generate edge information
#'
#' This function generates edge information from a given triangular object,
#' including the coordinates of the vertices and the from-to relationships
#' between the vertices.
#'
#' @param tri_object The triangular object from which to generate edge information.
#' @param a1 A numeric value for bin width.
#'
#' @return A tibble that contains the edge information, including the from-to
#' relationships and the corresponding x and y coordinates.
#' @importFrom tibble tibble as_tibble
#' @importFrom dplyr mutate filter rename distinct left_join select
#' @importFrom interp triangles
#'
#' @examples
#' all_centroids_df <- scurve_model_obj$hb_obj$centroids
#' counts_data <- scurve_model_obj$hb_obj$std_cts
#' umap_with_hb_id <- scurve_model_obj$hb_obj$data_hb_id
#' df_bin_centroids <- extract_hexbin_centroids(counts_data = counts_data,
#' centroids_data = all_centroids_df)
#' tr1_object <- tri_bin_centroids(centroids_data = df_bin_centroids)
#' gen_edges(tri_object = tr1_object, a1 = scurve_model_obj$hb_obj$a1)
#'
#' @export
gen_edges <- function(tri_object, a1) { #centroids_data
  tri <- tri_object$trimesh_object
  counts <- tri_object$n_h

  # Create tr_df with coordinates and bin counts
  n <- length(tri$x)
  tr_df <- data.frame(
    ID = seq_len(n),
    x = tri$x,
    y = tri$y,
    n_obs = counts
  )

  # Get triangles
  trang <- triangles(tri)

  # Build edges: from-to
  tr_arcs_df <- data.frame(
    from = c(trang[, 1], trang[, 1], trang[, 2]),
    to = c(trang[, 2], trang[, 3], trang[, 3])
  )

  # Canonical edge representation: lower ID first
  from <- pmin(tr_arcs_df$from, tr_arcs_df$to)
  to   <- pmax(tr_arcs_df$from, tr_arcs_df$to)
  edges_all <- unique(data.frame(from, to))

  # Vectorized coordinate and count lookup
  x_from <- tri$x[edges_all$from]
  y_from <- tri$y[edges_all$from]
  x_to   <- tri$x[edges_all$to]
  y_to   <- tri$y[edges_all$to]
  from_count <- counts[edges_all$from]
  to_count   <- counts[edges_all$to]

  # Compute Euclidean distance directly (vectorized)
  dx <- x_from - x_to
  dy <- y_from - y_to
  dist <- sqrt(dx^2 + dy^2)

  # Keep only close-enough edges
  max_dist <- sqrt(a1^2 + (sqrt(3) * a1 / 2)^2)
  keep <- dist <= max_dist

  # Final output
  edge_data <- tibble::tibble(
    from = edges_all$from[keep],
    to = edges_all$to[keep],
    x_from = x_from[keep],
    y_from = y_from[keep],
    x_to = x_to[keep],
    y_to = y_to[keep],
    from_count = from_count[keep],
    to_count = to_count[keep]
  )

  return(edge_data)
}

#' Update from and to values in trimesh data
#'
#' This function update the from and to indexes.
#'
#' @param trimesh_data A tibble that contains wireframe data.
#'
#' @return A tibble that contains the updated edge information.
#'
#' @importFrom dplyr left_join mutate select
#'
#' @examples
#' tr_from_to_df <- scurve_model_obj$trimesh_data
#' update_trimesh_index(trimesh_data = tr_from_to_df)
#'
#' @export
update_trimesh_index <- function(trimesh_data) {

  # Create unique sorted vector of node IDs
  unique_values <- sort(unique(c(trimesh_data$from, trimesh_data$to)))

  # Create named vector mapping old IDs to new ones
  id_map <- setNames(seq_along(unique_values), unique_values)

  # Vectorized replacement using the map
  trimesh_data$from <- id_map[as.character(trimesh_data$from)]
  trimesh_data$to   <- id_map[as.character(trimesh_data$to)]

  return(trimesh_data)

}

#' Find the number of bins required to achieve required number of non-empty bins.
#'
#' This function determines the number of bins along the x and y axes
#' to obtain a specific number of non-empty bins.
#'
#' @param nldr_obj A list of a tibble contains scaled first and second columns
#' of NLDR data, and numeric vectors representing the limits of the original NLDR data.
#' @param m The desired number of non-empty bins.
#' @param q The buffer amount as proportion of data range.
#'
#' @return The number of bins along the x and y axes
#' needed to achieve a specific number of non-empty bins.
#'
#' @examples
#' find_non_empty_bins(nldr_obj = scurve_model_obj$nldr_obj, m = 5)
#'
#' @export
find_non_empty_bins <- function(nldr_obj, m = 2, q = 0.1) {

  ## To check whether q is between a specific range
  if (!dplyr::between(q, 0.05, 0.2)) {
    cli::cli_abort("The buffer should be within 0.05 and 0.2.")
  }

  scaled_nldr_data <- nldr_obj$scaled_nldr

  max_bins_along_axis <- ceiling(sqrt(NROW(scaled_nldr_data)))

  ## Since having 1 bin along x or y-axis is not obvious therefore started from 2
  num_bins_x_vec <- 2:max_bins_along_axis

  ## To initialise the number of bins along the x-axis
  b1 <- num_bins_x_vec[1]

  ### Generate the full grid
  hb_obj <- hex_binning(nldr_obj = nldr_obj, b1 = b1, q = q)

  ## To compute the number of bins along the y-axis
  b2 <- hb_obj$bins[2]

  num_of_non_empty_bins <- hb_obj$m

  i <- 1

  while (num_of_non_empty_bins < m) {

    i <- i + 1

    if (length(num_bins_x_vec) < i) {
      stop("There is no matching number of bins along the x and y axes
           for the required number of non empty bins.")
    }

    b1 <- num_bins_x_vec[2]

    ### Generate the full grid
    hb_obj <- hex_binning(nldr_obj = nldr_obj, b1 = b1, q = q)

    ## To compute the number of bins along the y-axis
    b2 <- hb_obj$bins[2]

    num_of_non_empty_bins <- hb_obj$m

    if (num_of_non_empty_bins >= m) {
      return(list(b1 = b1, b2 = b2))
      break
    } else {
      next
    }
  }
}

Try the quollr package in your browser

Any scripts or data that you put into this service are public.

quollr documentation built on Aug. 8, 2025, 6:08 p.m.