Nothing
#' Generate Gaussian cluster with the Mobius Cluster
#'
#' This function generates a dataset consisting of a mobius cluster and Gaussian cluster.
#'
#' @param n A numeric vector (default: c(200, 100)) representing the sample sizes.
#' @param p A numeric value (default: 4) representing the number of dimensions.
#' @return A data containing the mobius cluster and Gaussian cluster.
#' @export
#'
#' @examples
#' mobgau <- make_mobiusgau(n = c(200, 100), p = 4)
make_mobiusgau <- function(n = c(200, 100), p = 4) {
if (p < 3) {
cli::cli_abort("p should be greater than 3.")
}
if (length(n) != 2) {
cli::cli_abort("n should contain exactly two values.")
}
if (any(n < 0)) {
cli::cli_abort("Values in n should be positive.")
}
## To generate data
df <- gen_multicluster(n = n, p = p, k = 2,
loc = matrix(c(
rep(0, p),
rep(0, p)
), nrow = 2, byrow = TRUE),
scale = c(1, 0.3),
shape = c("mobius", "gaussian"),
rotation = NULL,
is_bkg = FALSE)
return(df)
}
#' Generate Multiple Gaussian Clusters
#'
#' This function generates a dataset consisting of multiple Gaussian clusters.
#'
#' @param n A numeric vector (default: c(300, 200, 500)) representing the sample sizes.
#' @param p A numeric value (default: 4) representing the number of dimensions.
#' @param k A numeric value (default: 5) representing the number of clusters.
#' @param loc A numeric matrix (default: NULL) representing the locations/centroids of clusters.
#' @param scale A numeric vector (default: NULL) representing the scaling factors of clusters.
#' @return A data containing the Gaussian clusters.
#' @export
#'
#' @examples
#' loc_matrix <- matrix(c(0, 0, 0, 0,
#' 5, 9, 0, 0,
#' 3, 4, 10, 7
#' ), nrow = 3, byrow = TRUE)
#' multigau <- make_multigau(n = c(300, 200, 500),
#' p = 4, k = 3,
#' loc = loc_matrix, scale = c(0.2, 1.5, 0.5))
make_multigau <- function(n = c(300, 200, 500), p = 4, k = 3, loc = NULL, scale = NULL) {
if (p < 2) {
cli::cli_abort("p should be greater than 2.")
}
if (k < 1) {
cli::cli_abort("k should be greater than 1.")
}
if (length(n) != k) {
cli::cli_abort("n should contain exactly {.val {k}} values.")
}
if (any(n < 0)) {
cli::cli_abort("Values in n should be positive.")
}
if (length(scale) != k) {
cli::cli_abort("scale should contain exactly {.val {k}} values.")
}
if (any(scale < 0)) {
cli::cli_abort("Values in scale should be positive.")
}
if (!is.matrix(loc)) {
cli::cli_abort("loc should be a matrix.")
}
if (NROW(loc) != k) {
cli::cli_abort("Number of rows in loc should be {.val {p}}.")
}
if (NCOL(loc) != p) {
cli::cli_abort("Number of rows in loc should be {.val {k}}.")
}
if (is.null(loc)) {
loc <- gen_clustloc(p = p, k = k)
}
if(is.null(scale)) {
scale <- sample(seq(0.5, 2.3), k, replace = TRUE)
}
## To generate data
df <- gen_multicluster(n = n, p = p, k = k,
loc = loc,
scale = scale,
shape = rep("gaussian", k),
rotation = NULL,
is_bkg = FALSE)
return(df)
}
#' Generate Curvy Quadratic and Gaussian Clusters
#'
#' This function generates synthetic high-dimensional data containing
#' two clusters: one quadratic-shaped cluster and one Gaussian-shaped
#' cluster. The clusters are positioned apart in feature space with
#' different scaling factors.
#'
#' @param n A numeric vector of length 2, specifying the number of
#' observations in each cluster. All values must be positive.
#' @param p Integer. Number of dimensions. Must be at least 3.
#'
#'
#' @return A tibble containing \eqn{n[1] + n[2]} rows and \eqn{p} columns,
#' with generated features (\code{x1, x2, ..., xp}) and a
#' \code{cluster} label.
#'
#' @examples
#' # Generate 2 clusters in 4D: one quadratic, one Gaussian
#' curvygau <- make_curvygau()
#'
#' @export
make_curvygau <- function(n = c(200, 100), p = 4) {
if (p < 3) {
cli::cli_abort("p should be greater than 3.")
}
if (length(n) != 2) {
cli::cli_abort("n should contain exactly two values.")
}
if (any(n < 0)) {
cli::cli_abort("Values in n should be positive.")
}
## To generate data
df <- gen_multicluster(n = n, p = p, k = 2,
loc = matrix(c(
-0.7, -0.7, 0, 0,
0, 0, 0.7, 0.7
), nrow = 2, byrow = TRUE),
scale = c(1, 0.1),
shape = c("quadratic", "gaussian"),
rotation = NULL,
is_bkg = FALSE)
return(df)
}
#' Generate Multiple Interlocked Circles in High-Dimensional Space
#'
#' This function generates \eqn{k} interlocked circular clusters in a
#' \eqn{p}-dimensional space. The circles are constructed using
#' \code{gen_multicluster()}, with each circle positioned in a different
#' coordinate plane and slightly offset so that they interlock with a
#' central circle (hub-like structure).
#'
#' @param n An integer vector of length \eqn{k} giving the number of points
#' in each circle. Default is \code{c(200, 100)}.
#' @param p Integer, the dimensionality of the embedding space. Must be
#' at least 3. Default is \code{4}.
#' @param k Integer, the number of circles to generate. Default is \code{2}.
#' @param offset Numeric, the amount of positional shift applied to each
#' circle along the second coordinate axis to prevent complete overlap.
#' Default is \code{0.5}.
#'
#'
#' @return A data frame (or tibble, depending on \code{gen_multicluster()})
#' containing the generated points and cluster assignments.
#' @export
#'
#' @examples
#' # Generate two interlocked circles in 4-D
#' twolink_circles <- make_klink_circles()
make_klink_circles <- function(n = c(200, 100), p = 4, k = 2, offset = 0.5) {
if (length(n) != k) {
stop("Length of n must equal k (number of circles).")
}
# Locations: each circle gets a small offset along axis 2
loc <- matrix(0, nrow = k, ncol = p)
loc[, 2] <- seq(0, by = offset, length.out = k)
# Build rotations:
# Circle 1 stays in (1,2) plane; subsequent ones rotated into (2,3), (3,4), etc.
rotations <- vector("list", k)
for (i in seq_len(k)) {
if (i == 1) {
rotations[[i]] <- list(
list(plane = c(1, 2), angle = 0) # no rotation
)
} else {
# Rotate each new circle into a different orthogonal plane
rotations[[i]] <- list(
list(plane = c(i, i + 1), angle = 90 %% 360)
)
}
}
names(rotations) <- paste0("cluster", seq_len(k))
# Generate with gen_multicluster
df <- gen_multicluster(
n = n, p = p, k = k,
loc = loc,
scale = rep(1, k),
shape = rep("circle", k),
rotation = rotations,
is_bkg = FALSE
)
return(df)
}
#' Generate a Chain of Interlocked Circles in High-Dimensional Space
#'
#' This function generates \eqn{k} interlocked circular clusters in a
#' \eqn{p}-dimensional space. Unlike \code{make_klink_circles()}, the circles
#' are arranged in a **chain-like structure**, where each circle interlocks
#' only with its immediate neighbor, resembling links in a chain.
#'
#' @param n An integer vector of length \eqn{k} giving the number of points
#' in each circle. Default is \code{c(200, 100)}.
#' @param p Integer, the dimensionality of the embedding space. Must be
#' at least 3. Default is \code{4}.
#' @param k Integer, the number of circles to generate. Default is \code{2}.
#' @param offset Numeric, the positional shift applied to each circle along
#' its linking axis to ensure interlocking instead of overlap. Default is
#' \code{0.5}.
#' @param angle Numeric, the rotation angle (in degrees) used when placing
#' each subsequent circle into its respective plane. Default is \code{90}.
#'
#'
#' @return A data frame (or tibble, depending on \code{gen_multicluster()})
#' containing the generated points and cluster assignments.
#' @export
#'
#' @examples
#' # Generate two chain-linked circles in 4-D
#' twochain_circles <- make_chain_circles()
make_chain_circles <- function(n = c(200, 100), p = 4, k = 2, offset = 0.5, angle = 90) {
if (length(n) != k) {
stop("Length of n must equal k (number of circles).")
}
if (p < 3) {
stop("Need at least 3 dimensions to interlock circles.")
}
# Initialize locations: each circle offset on its linking axis
loc <- matrix(0, nrow = k, ncol = p)
# Example: cluster i is shifted along axis (i+1)
for (i in seq_len(k)) {
shift_axis <- (i %% (p - 1)) + 2 # use axes 2..p cyclically
loc[i, shift_axis] <- offset
}
# Build rotations
rotations <- vector("list", k)
for (i in seq_len(k)) {
if (i == 1) {
rotations[[i]] <- list(
list(plane = c(1, 2), angle = 0) # Circle 1 in x1-x2 plane
)
} else {
# Circle i in (i, i+1) plane, wrapping if needed
plane <- c(i, i + 1)
plane[plane > p] <- plane[plane > p] - (p - 1)
rotations[[i]] <- list(
list(plane = plane, angle = angle)
)
}
}
names(rotations) <- paste0("cluster", seq_len(k))
# Generate circles
df <- gen_multicluster(
n = n, p = p, k = k,
loc = loc,
scale = rep(1, k),
shape = rep("circle", k),
rotation = rotations,
is_bkg = FALSE
)
return(df)
}
#' Generate Multiple Interlocked curvycycle in High-Dimensional Space
#'
#' This function generates \eqn{k} interlocked circular clusters in a
#' \eqn{p}-dimensional space. The curvycycle are constructed using
#' \code{gen_multicluster()}, with each curvycycle positioned in a different
#' coordinate plane and slightly offset so that they interlock with a
#' central curvycycle (hub-like structure).
#'
#' @param n An integer vector of length \eqn{k} giving the number of points
#' in each curvycycle. Default is \code{c(200, 100)}.
#' @param p Integer, the dimensionality of the embedding space. Must be
#' at least 3. Default is \code{4}.
#' @param k Integer, the number of curvycycle to generate. Default is \code{2}.
#' @param offset Numeric, the amount of positional shift applied to each
#' curvycycle along the second coordinate axis to prevent complete overlap.
#' Default is \code{0.5}.
#'
#'
#' @return A data frame (or tibble, depending on \code{gen_multicluster()})
#' containing the generated points and cluster assignments.
#' @export
#'
#' @examples
#' # Generate two interlocked curvycycle in 4-D
#' twolink_curvycycle <- make_klink_curvycycle()
make_klink_curvycycle <- function(n = c(200, 100), p = 4, k = 2, offset = 0.5) {
if (length(n) != k) {
stop("Length of n must equal k (number of curvycycle).")
}
# Locations: each curvycycle gets a small offset along axis 2
loc <- matrix(0, nrow = k, ncol = p)
loc[, 2] <- seq(0, by = offset, length.out = k)
# Build rotations:
# curvycycle 1 stays in (1,2) plane; subsequent ones rotated into (2,3), (3,4), etc.
rotations <- vector("list", k)
for (i in seq_len(k)) {
if (i == 1) {
rotations[[i]] <- list(
list(plane = c(1, 2), angle = 0) # no rotation
)
} else {
# Rotate each new curvycycle into a different orthogonal plane
rotations[[i]] <- list(
list(plane = c(i, i + 1), angle = 90 %% 360)
)
}
}
names(rotations) <- paste0("cluster", seq_len(k))
# Generate with gen_multicluster
df <- gen_multicluster(
n = n, p = p, k = k,
loc = loc,
scale = rep(1, k),
shape = rep("curvycycle", k),
rotation = rotations,
is_bkg = FALSE
)
return(df)
}
#' Generate a Chain of Interlocked curvycycle in High-Dimensional Space
#'
#' This function generates \eqn{k} interlocked circular clusters in a
#' \eqn{p}-dimensional space. Unlike \code{make_klink_curvycycle()}, the curvycycle
#' are arranged in a **chain-like structure**, where each curvycycle interlocks
#' only with its immediate neighbor, resembling links in a chain.
#'
#' @param n An integer vector of length \eqn{k} giving the number of points
#' in each curvycycle. Default is \code{c(200, 100)}.
#' @param p Integer, the dimensionality of the embedding space. Must be
#' at least 3. Default is \code{4}.
#' @param k Integer, the number of curvycycle to generate. Default is \code{2}.
#' @param offset Numeric, the positional shift applied to each curvycycle along
#' its linking axis to ensure interlocking instead of overlap. Default is
#' \code{0.5}.
#' @param angle Numeric, the rotation angle (in degrees) used when placing
#' each subsequent curvycycle into its respective plane. Default is \code{90}.
#'
#'
#' @return A data frame (or tibble, depending on \code{gen_multicluster()})
#' containing the generated points and cluster assignments.
#' @export
#'
#' @examples
#' # Generate two chain-linked curvycycle in 4-D
#' twochain_curvycycle <- make_chain_curvycycle()
make_chain_curvycycle <- function(n = c(200, 100), p = 4, k = 2, offset = 0.5, angle = 90) {
if (length(n) != k) {
stop("Length of n must equal k (number of curvycycle).")
}
if (p < 3) {
stop("Need at least 3 dimensions to interlock curvycycle.")
}
# Initialize locations: each curvycycle offset on its linking axis
loc <- matrix(0, nrow = k, ncol = p)
# Example: cluster i is shifted along axis (i+1)
for (i in seq_len(k)) {
shift_axis <- (i %% (p - 1)) + 2 # use axes 2..p cyclically
loc[i, shift_axis] <- offset
}
# Build rotations
rotations <- vector("list", k)
for (i in seq_len(k)) {
if (i == 1) {
rotations[[i]] <- list(
list(plane = c(1, 2), angle = 0) # curvycycle 1 in x1-x2 plane
)
} else {
# curvycycle i in (i, i+1) plane, wrapping if needed
plane <- c(i, i + 1)
plane[plane > p] <- plane[plane > p] - (p - 1)
rotations[[i]] <- list(
list(plane = plane, angle = angle)
)
}
}
names(rotations) <- paste0("cluster", seq_len(k))
# Generate curvycycle
df <- gen_multicluster(
n = n, p = p, k = k,
loc = loc,
scale = rep(1, k),
shape = rep("curvycycle", k),
rotation = rotations,
is_bkg = FALSE
)
return(df)
}
#' Generate Concentric Circles with a Gaussian Cluster in High Dimensions
#'
#' This function generates a dataset consisting of multiple circular clusters
#' together with a single Gaussian cluster in a \eqn{p}-dimensional space.
#' The circles are placed concentrically at the origin with varying scales,
#' while the Gaussian cluster serves as an additional background or center cluster.
#'
#' @param n An integer vector of length \code{num_circles + 1}, giving the
#' number of points in each cluster (circles first, followed by the Gaussian).
#' Default is \code{c(200, 100, 100)}.
#' @param p Integer, the dimensionality of the embedding space. Must be at least 3.
#' Default is \code{4}.
#' @param num_circles Integer, the number of circular clusters to generate.
#' Default is \code{2}.
#' @param scale_circles Numeric vector of length \code{num_circles}, giving
#' the scale (radius) of each circular cluster. Default is \code{c(1, 2)}.
#'
#' @return A data frame (or tibble, depending on \code{gen_multicluster()})
#' containing the generated dataset with cluster assignments.
#'
#' @examples
#' # Two circles (radii 1 and 2) plus one Gaussian cluster in 4-D
#' gaucircles <- make_gaucircles()
#'
#'
#' @export
make_gaucircles <- function(n = c(200, 100, 100), p = 4, num_circles = 2, scale_circles = c(1, 2)) {
k <- num_circles + 1
if (p < 3) {
cli::cli_abort("p should be greater than 3.")
}
if (length(n) != k) {
cli::cli_abort("n should contain exactly {.val {k}} values.")
}
if (length(scale_circles) != num_circles) {
cli::cli_abort("scale_circles should contain exactly {.val {num_circles}} values.")
}
if (any(n < 0)) {
cli::cli_abort("Values in n should be positive.")
}
## To generate data
df <- gen_multicluster(n = n, p = p, k = k,
loc = matrix(rep(rep(0, p), k),
nrow = k, byrow = TRUE),
scale = c(scale_circles, 0.1),
shape = c(rep( "circle", num_circles), "gaussian"),
rotation = NULL,
is_bkg = FALSE)
return(df)
}
#' Generate Concentric Curvycycles with a Gaussian Cluster in High Dimensions
#'
#' This function generates a dataset consisting of multiple circular clusters
#' together with a single Gaussian cluster in a \eqn{p}-dimensional space.
#' The curvycycle are placed concentrically at the origin with varying scales,
#' while the Gaussian cluster serves as an additional background or center cluster.
#'
#' @param n An integer vector of length \code{num_curvycycle + 1}, giving the
#' number of points in each cluster (curvycycle first, followed by the Gaussian).
#' Default is \code{c(200, 100, 100)}.
#' @param p Integer, the dimensionality of the embedding space. Must be at least 3.
#' Default is \code{4}.
#' @param num_curvycycle Integer, the number of circular clusters to generate.
#' Default is \code{2}.
#' @param scale_curvycycle Numeric vector of length \code{num_curvycycle}, giving
#' the scale (radius) of each circular cluster. Default is \code{c(1, 2)}.
#'
#' @return A data frame (or tibble, depending on \code{gen_multicluster()})
#' containing the generated dataset with cluster assignments.
#'
#' @examples
#' # Two curvycycle (radii 1 and 2) plus one Gaussian cluster in 4-D
#' gaucurvycycle <- make_gaucurvycycle()
#'
#'
#' @export
make_gaucurvycycle <- function(n = c(200, 100, 100), p = 4, num_curvycycle = 2, scale_curvycycle = c(1, 2)) {
k <- num_curvycycle + 1
if (p < 3) {
cli::cli_abort("p should be greater than 3.")
}
if (length(n) != k) {
cli::cli_abort("n should contain exactly {.val {k}} values.")
}
if (length(scale_curvycycle) != num_curvycycle) {
cli::cli_abort("scale_curvycycle should contain exactly {.val {num_curvycycle}} values.")
}
if (any(n < 0)) {
cli::cli_abort("Values in n should be positive.")
}
## To generate data
df <- gen_multicluster(n = n, p = p, k = k,
loc = matrix(rep(rep(0, p), k),
nrow = k, byrow = TRUE),
scale = c(scale_curvycycle, 0.1),
shape = c(rep( "curvycycle", num_curvycycle), "gaussian"),
rotation = NULL,
is_bkg = FALSE)
return(df)
}
#' Generate a Single Grid Cluster in High Dimensions
#'
#' This function generates a dataset consisting of one grid-like cluster
#' (a structured cube grid) in 2D, with optional Gaussian noise dimensions
#' added to extend the dataset into higher dimensions.
#'
#' @param n Integer, the number of points in the grid cluster. Must be positive.
#' Default is \code{500}.
#' @param p Integer, the dimensionality of the embedding space. Must be at least 2.
#' Default is \code{4}.
#'
#'
#' @return A tibble containing the generated dataset with columns:
#' \itemize{
#' \item \code{x1, x2, ..., xp} — coordinates of the data points.
#' \item \code{cluster} — cluster assignment (always 1 for the grid).
#' }
#'
#'
#' @examples
#' # Default: 500 points, 4D space (grid in 2D + 2 noise dimensions)
#' onegrid <- make_onegrid()
#'
#'
#' @export
make_onegrid <- function(n = 500, p = 4){
if (p < 2) {
cli::cli_abort("p should be greater than 2.")
}
if (length(n) != 1) {
cli::cli_abort("n should contain exactly one values.")
}
if (any(n < 0)) {
cli::cli_abort("Values in n should be positive.")
}
## To generate data
df <- gen_multicluster(n = n, p = 2, k = 1,
loc = NULL,
scale = c(1),
shape = c("gridcube"),
rotation = NULL,
is_bkg = FALSE)
if (p > 2) {
noise_df <- gen_noisedims(n = NROW(df), p = (p-2), m = rep(0, p-2), s = rep(0.01, p-2)) |>
as.matrix()
colnames(noise_df) <- paste0("x", 3:p)
df <- cbind(df, noise_df)
}
# Create the tibble
df <- tibble::as_tibble(df, .name_repair = "minimal")
names(df[-3]) <- paste0("x", 1:p)
df <- df |>
dplyr::select(dplyr::starts_with("x"), "cluster")
return(df)
}
#' Generate Two Overlapping Grid Clusters in High Dimensions
#'
#' This function generates a dataset consisting of two overlapping
#' grid-like clusters in a 2D space, with optional noise dimensions
#' added to reach higher-dimensional spaces. The overlap is controlled
#' by scaling factors for the grids.
#'
#' @param n A numeric vector of length 2 specifying the number of points
#' in each grid cluster.
#' @param p An integer specifying the total number of dimensions.
#' Must be greater than or equal to 2. If \code{p > 2}, additional
#' noise dimensions are appended.
#'
#' @return A tibble with \code{n[1] + n[2]} rows and \code{p + 1} columns:
#' \itemize{
#' \item \code{x1, ..., xp} — coordinates of the generated points.
#' \item \code{cluster} — cluster membership label.
#' }
#'
#'
#' @examples
#' # Generate two overlapping grid clusters in 4-D
#' df <- make_twogrid_overlap()
#'
#' @export
make_twogrid_overlap <- function(n = c(500, 500), p = 4){
if (p < 2) {
cli::cli_abort("p should be greater than 2.")
}
if (length(n) != 2) {
cli::cli_abort("n should contain exactly one values.")
}
if (any(n < 0)) {
cli::cli_abort("Values in n should be positive.")
}
## To generate data
df <- gen_multicluster(n = n, p = 2, k = 2,
loc = NULL,
scale = c(1, 3),
shape = rep("gridcube", 2),
rotation = NULL,
is_bkg = FALSE)
if (p > 2) {
noise_df <- gen_noisedims(n = NROW(df), p = (p-2), m = rep(0, p-2), s = rep(0.01, p-2)) |>
as.matrix()
colnames(noise_df) <- paste0("x", 3:p)
df <- cbind(df, noise_df)
}
# Create the tibble
df <- tibble::as_tibble(df, .name_repair = "minimal")
names(df[-3]) <- paste0("x", 1:p)
df <- df |>
dplyr::select(dplyr::starts_with("x"), "cluster")
return(df)
}
#' Generate Two Shifted Grid Clusters in High Dimensions
#'
#' This function generates two grid-shaped clusters in a 2D space, where one grid
#' is shifted relative to the other. Optionally, additional noise dimensions
#' can be added to embed the structure in a higher-dimensional space.
#'
#' @param n A numeric vector of length 2 specifying the number of points in each cluster.
#' Default is \code{c(500, 500)}.
#' @param p An integer specifying the total number of dimensions for the output dataset.
#' Must be at least 2. If \code{p > 2}, additional noise dimensions will be added.
#' Default is 4.
#'
#'
#' @return A tibble with \code{n[1] + n[2]} rows and \code{p + 1} columns:
#' \itemize{
#' \item \code{x1, x2, ..., xp}: Numeric coordinates of the points.
#' \item \code{cluster}: Cluster membership label (factor with 2 levels).
#' }
#'
#' @examples
#' # Generate 2 shifted grid clusters in 4-D
#' make_twogrid_shift <- make_twogrid_shift()
#'
#'
#' @export
make_twogrid_shift <- function(n = c(500, 500), p = 4){
if (p < 2) {
cli::cli_abort("p should be greater than 2.")
}
if (length(n) != 2) {
cli::cli_abort("n should contain exactly one values.")
}
if (any(n < 0)) {
cli::cli_abort("Values in n should be positive.")
}
## To generate data
df <- gen_multicluster(n = n, p = 2, k = 2,
loc = matrix(c(0, 0, 0.5, 0.5), nrow = 2, byrow = TRUE),
scale = rep(1, 2),
shape = rep("gridcube", 2),
rotation = NULL,
is_bkg = FALSE)
if (p > 2) {
noise_df <- gen_noisedims(n = NROW(df), p = (p-2), m = rep(0, p-2), s = rep(0.01, p-2)) |>
as.matrix()
colnames(noise_df) <- paste0("x", 3:p)
df <- cbind(df, noise_df)
}
# Create the tibble
df <- tibble::as_tibble(df, .name_repair = "minimal")
names(df[-3]) <- paste0("x", 1:p)
df <- df |>
dplyr::select(dplyr::starts_with("x"), "cluster")
return(df)
}
#' Generate Parallel Multi-Shape Clusters
#'
#' This function generates synthetic high-dimensional data consisting of
#' \eqn{k} clusters of a specified shape (e.g., crescents), arranged in
#' parallel along alternating dimensions. The first cluster is shifted
#' along the first dimension, the second along the third dimension,
#' the third along the first dimension again, and so on.
#'
#' @param n A numeric vector of length \eqn{k}, specifying the number of
#' observations in each cluster. All values must be positive.
#' @param k Integer. Number of clusters to generate. Must be greater than 1.
#' @param p Integer. Number of dimensions. Must be at least 3.
#' @param shift Numeric. The distance between cluster centers along the
#' alternating dimensions (default is `0.4`).
#' @param shape Character string. Shape of the clusters to generate (e.g.,
#' `"crescent"`, `"gridcube"`, etc.). Must be a single value.
#'
#'
#' @return A tibble containing \eqn{\sum n} rows and \eqn{p} columns, with
#' the generated features (`x1, x2, ..., xp`) and a `cluster` label.
#'
#' @examples
#' # Generate 2 crescent-shaped clusters in 4D
#' twocrescent <- make_shape_para(n = c(500, 300), k = 2, p = 4, shape = "crescent")
#'
#'
#' @export
make_shape_para <- function(n = c(500, 300), k = 2, p = 4, shift = 0.4, shape = "crescent") {
if (p < 3) {
cli::cli_abort("p should be greater than 3.")
}
if (k < 1) {
cli::cli_abort("k should be greater than 1.")
}
if (length(shape) != 1) {
cli::cli_abort("shape should contain exactly one value.")
}
if (length(n) != k) {
cli::cli_abort("n should contain exactly {.val {k}} values.")
}
if (any(n < 0)) {
cli::cli_abort("Values in n should be positive.")
}
## Construct location matrix
loc <- matrix(0, nrow = k, ncol = p)
for (i in seq_len(k)) {
if (i %% 2 == 1) {
loc[i, 1] <- shift * ((i - 1) %/% 2 + 1) # shift along x1
} else {
loc[i, 3] <- shift * (i %/% 2) # shift along x3
}
}
## To generate data
df <- gen_multicluster(n = n, p = p, k = k,
loc = loc,
scale = rep(1, k),
shape = rep(shape, k),
rotation = NULL,
is_bkg = FALSE)
return(df)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.