Nothing
#' Generate data with three branches
#'
#' This function generates a dataset representing a structure with three branches.
#'
#' @param n A numeric vector (default: c(200, 500, 300)) representing the sample sizes.
#' @param p A numeric value (default: 4) representing the number of dimensions.
#' @return A data containing three branches.
#' @export
#'
#' @examples
#' set.seed(20240412)
#' tree_data <- gen_three_branch_data(n = c(200, 500, 300), p = 4)
gen_three_branch_data <- function(n = c(200, 500, 300), p = 4) {
if (p < 4) {
stop(cli::cli_alert_danger("p should be 4 or greater."))
}
if (length(n) != 3) {
stop(cli::cli_alert_danger("n should contain exactly 3 values."))
}
if (any(n < 0)) {
stop(cli::cli_alert_danger("Values in n should be positive."))
}
x1 <- stats::runif(n[1], -2, 2)
x2 <- -(x1^3 + stats::runif(n[1], 0, 6)) + stats::runif(n[1], 0, 0.2)
x3 <- stats::rnorm(n[1], 10, 0.1)
x4 <- stats::rnorm(n[1], 10, 0.1)
df1 <- tibble::tibble(x1 = x1,
x2 = x2,
x3 = x3,
x4 = x4)
x1 <- stats::runif(n[2], 0, 2)
x2 <- (x1^3 + stats::runif(n[2], 0, 6)) + stats::runif(n[2], 0, 0.2)
x3 <- stats::rnorm(n[2], 10, 0.1)
x4 <- stats::rnorm(n[2], 10, 0.1)
df2 <- tibble::tibble(x1 = x1,
x2 = x2,
x3 = x3,
x4 = x4)
x1 <- stats::runif(n[3], -2, 0)
x2 <- -(x1^3 + stats::runif(n[3], 0, 6)) + stats::runif(n[3], 0, 0.2) + 10
x3 <- stats::rnorm(n[3], 10, 0.1)
x4 <- stats::rnorm(n[3], 10, 0.1)
df3 <- tibble::tibble(x1 = x1,
x2 = x2,
x3 = x3,
x4 = x4)
df <- bind_rows(df1, df2, df3)
if (p > 4) {
cli::cli_alert_info("Adding noise dimensions to reach the desired dimensionality.")
noise_mat <- gen_noise_dims(
n = NROW(df), num_noise = p - 4,
min_n = -0.5, max_n = 0.5
)
colnames(noise_mat) <- paste0("x", 5:p)
df <- bind_cols(df, noise_mat)
}
cli::cli_alert_success("Data generation completed successfully! 🎉")
return(df)
}
#' Generate data with five branches
#'
#' This function generates a dataset representing a structure with five branches.
#'
#' @param n A numeric vector (default: c(200, 100, 300, 400, 300)) representing the sample sizes.
#' @param p A numeric value (default: 4) representing the number of dimensions.
#' @return A data containing five branches.
#' @export
#'
#' @examples
#' set.seed(20240412)
#' tree_data <- gen_five_branch_data(n = c(200, 100, 300, 400, 300), p = 4)
gen_five_branch_data <- function(n = c(200, 100, 300, 400, 300), p = 4) {
if (p < 4) {
stop(cli::cli_alert_danger("p should be 4 or greater."))
}
if (length(n) != 5) {
stop(cli::cli_alert_danger("n should contain exactly 5 values."))
}
if (any(n < 0)) {
stop(cli::cli_alert_danger("Values in n should be positive."))
}
x1 <- stats::runif(n[1], -3, 3)
x2 <- abs(0.5 * x1)
x3 <- stats::rnorm(n[1], 10, 0.03)
x4 <- stats::rnorm(n[1], 10, 0.03)
df1 <- tibble::tibble(x1 = x1,
x2 = x2,
x3 = x3,
x4 = x4)
x1 <- stats::runif(n[2], -0.5, 0.5)
x2 <- abs(10 * x1)
x3 <- stats::rnorm(n[2], 10, 0.03)
x4 <- stats::rnorm(n[2], 10, 0.03)
df2 <- tibble::tibble(x1 = x1,
x2 = x2,
x3 = x3,
x4 = x4)
x1 <- stats::runif(n[3], -6, 3)
x2 <- (-1) * abs(0.5 * x1 + 5)
x3 <- stats::rnorm(n[3], 10, 0.03)
x4 <- stats::rnorm(n[3], 10, 0.03)
df3 <- tibble::tibble(x1 = x1,
x2 = x2,
x3 = x3,
x4 = x4)
x1 <- stats::runif(n[4], -0.5, 0.5)
x2 <- (-1) * abs(10 * x1) - 5
x3 <- stats::rnorm(n[4], 10, 0.03)
x4 <- stats::rnorm(n[4], 10, 0.03)
df4 <- tibble::tibble(x1 = x1,
x2 = x2,
x3 = x3,
x4 = x4)
x1 <- stats::runif(n[5], -5, 5)
x2 <- x1
x3 <- stats::rnorm(n[5], 10, 0.03)
x4 <- stats::rnorm(n[5], 10, 0.03)
df5 <- tibble::tibble(x1 = x1,
x2 = x2,
x3 = x3,
x4 = x4)
df <- bind_rows(df1, df2, df3, df4, df5)
if (p > 4) {
cli::cli_alert_info("Adding noise dimensions to reach the desired dimensionality.")
noise_mat <- gen_noise_dims(
n = NROW(df), num_noise = p - 4,
min_n = -0.5, max_n = 0.5
)
colnames(noise_mat) <- paste0("x", 5:p)
df <- bind_cols(df, noise_mat)
}
cli::cli_alert_success("Data generation completed successfully! 🎉")
return(df)
}
#' Generate data with seven branches
#'
#' This function generates a dataset representing a structure with seven branches.
#'
#' @param n A numeric vector (default: c(200, 100, 250, 300, 150, 400, 50)) representing the sample sizes.
#' @param p A numeric value (default: 4) representing the number of dimensions.
#' @return A data containing seven branches.
#' @export
#'
#' @examples
#' set.seed(20240412)
#' seven_branching_data <- gen_seven_branch_data(
#' n = c(200, 100, 250, 300, 150, 400, 50), p = 4)
gen_seven_branch_data <- function(n = c(200, 100, 250, 300, 150, 400, 50), p = 4) {
if (p < 4) {
stop(cli::cli_alert_danger("p should be 4 or greater."))
}
if (length(n) != 7) {
stop(cli::cli_alert_danger("n should contain exactly 7 values."))
}
if (any(n < 0)) {
stop(cli::cli_alert_danger("Values in n should be positive."))
}
x1 <- stats::runif(n[1], -2, 2)
x2 <- -(x1^3 + stats::runif(n[1], 0, 1)) + stats::runif(n[1], 0, 0.2)
x3 <- rep(0, n[1]) + stats::rnorm(n[1], 10, 0.03)
x4 <- rep(0, n[1]) - stats::rnorm(n[1], 10, 0.03)
df1 <- tibble::tibble(x1 = x1,
x2 = x2,
x3 = x3,
x4 = x4)
x1 <- stats::runif(n[2], -2, 1.5)
x2 <- (x1^3 + stats::runif(n[2], 0, 1)) + stats::runif(n[2], 0, 0.2)
x3 <- rep(0, n[2]) + stats::rnorm(n[2], 10, 0.03)
x4 <- rep(0, n[2]) - stats::rnorm(n[2], 10, 0.03)
df2 <- tibble::tibble(x1 = x1,
x2 = x2,
x3 = x3,
x4 = x4)
x1 <- stats::runif(n[3], -2, 1.5)
x2 <- (1 + (x1 - 3)^2 + stats::runif(n[3], 0, 1)) + stats::runif(n[3], 0, 0.1)
x3 <- rep(0, n[3]) + stats::rnorm(n[3], 10, 0.03)
x4 <- rep(0, n[3]) - stats::rnorm(n[3], 10, 0.03)
df3 <- tibble::tibble(x1 = x1,
x2 = x2,
x3 = x3,
x4 = x4)
x1 <- stats::runif(n[4], -0.5, 3)
x2 <- (1 + -(x1 - 3)^2 + stats::runif(n[4], 0, 1)) + stats::runif(n[4], 0, 0.1)
x3 <- rep(0, n[4]) + stats::rnorm(n[4], 10, 0.03)
x4 <- rep(0, n[4]) - stats::rnorm(n[4], 10, 0.03)
df4 <- tibble::tibble(x1 = x1,
x2 = x2,
x3 = x3,
x4 = x4)
x1 <- stats::runif(n[5], -1, 1)
x2 <- (20 + x1^3 + stats::runif(n[5], 0, 0.1)) + stats::runif(n[5], 0, 0.01)
x3 <- rep(0, n[5]) + stats::rnorm(n[5], 10, 0.03)
x4 <- rep(0, n[5]) - stats::rnorm(n[5], 10, 0.03)
df5 <- tibble::tibble(x1 = x1,
x2 = x2,
x3 = x3,
x4 = x4)
x1 <- stats::runif(n[6], -2, 2)
x2 <- (x1^2 + stats::runif(n[6], 0, 0.1)) + stats::runif(n[6], 0, 0.01) + 10
x3 <- rep(0, n[6]) + stats::rnorm(n[6], 10, 0.03)
x4 <- rep(0, n[6]) - stats::rnorm(n[6], 10, 0.03)
df6 <- tibble::tibble(x1 = x1,
x2 = x2,
x3 = x3,
x4 = x4)
x1 <- stats::runif(n[7], -2, 2)
x2 <- (x1^2 + stats::runif(n[7], 0, 0.2)) + stats::runif(n[7], 0, 0.01) + 15
x3 <- rep(0, n[7]) + stats::rnorm(n[7], 10, 0.03)
x4 <- rep(0, n[7]) - stats::rnorm(n[7], 10, 0.03)
df7 <- tibble::tibble(x1 = x1,
x2 = x2,
x3 = x3,
x4 = x4)
df <- rbind(df1, df2, df3, df4, df5, df6, df7)
if (p > 4) {
cli::cli_alert_info("Adding noise dimensions to reach the desired dimensionality.")
noise_mat <- gen_noise_dims(
n = NROW(df), num_noise = p - 4,
min_n = -0.5, max_n = 0.5
)
colnames(noise_mat) <- paste0("x", 5:p)
df <- bind_cols(df, noise_mat)
}
cli::cli_alert_success("Data generation completed successfully! 🎉")
return(df)
}
#' Generate data with four branches
#'
#' This function generates a dataset representing a structure with four branches.
#'
#' @param n A numeric vector (default: c(200, 300, 150, 250)) representing the sample sizes.
#' @param p A numeric value (default: 4) representing the number of dimensions.
#' @return A data containing four branches.
#' @export
#'
#' @examples
#' set.seed(20240412)
#' four_branching_data <- gen_four_branch_data(n = c(200, 300, 150, 250), p = 4)
gen_four_branch_data <- function(n = c(200, 300, 150, 250), p = 4) {
if (p < 4) {
stop(cli::cli_alert_danger("p should be 4 or greater."))
}
if (length(n) != 4) {
stop(cli::cli_alert_danger("n should contain exactly 4 values."))
}
if (any(n < 0)) {
stop(cli::cli_alert_danger("Values in n should be positive."))
}
x1 <- stats::runif(n[1], -5, 1)
x2 <- (exp(x1) + stats::runif(n[1], 0, 0.1)) + stats::runif(n[1], 0, 0.2)
x3 <- rep(0, n[1]) + stats::rnorm(n[1], 10, 0.03)
x4 <- rep(0, n[1]) - stats::rnorm(n[1], 10, 0.03)
df1 <- tibble::tibble(x1 = x1,
x2 = x2,
x3 = x3,
x4 = x4)
x1 <- stats::runif(n[2], -1, 5)
x2 <- (exp(-x1) + stats::runif(n[2], 0, 0.1)) + stats::runif(n[2], 0, 0.2)
x3 <- rep(0, n[2]) + stats::rnorm(n[2], 10, 0.03)
x4 <- rep(0, n[2]) - stats::rnorm(n[2], 10, 0.03)
df2 <- tibble::tibble(x1 = x1,
x2 = x2,
x3 = x3,
x4 = x4)
x1 <- stats::runif(n[3], 0, 5)
x2 <- (log(x1) + stats::runif(n[3], 0, 0.1)) + stats::runif(n[3], 0, 0.2)
x3 <- rep(0, n[3]) + stats::rnorm(n[3], 10, 0.03)
x4 <- rep(0, n[3]) - stats::rnorm(n[3], 10, 0.03)
df3 <- tibble::tibble(x1 = x1,
x2 = x2,
x3 = x3,
x4 = x4)
x1 <- stats::runif(n[4], -5, 0)
x2 <- (log(-x1) + stats::runif(n[4], 0, 0.1)) + stats::runif(n[4], 0, 0.2)
x3 <- rep(0, n[4]) + stats::rnorm(n[4], 10, 0.03)
x4 <- rep(0, n[4]) - stats::rnorm(n[4], 10, 0.03)
df4 <- tibble::tibble(x1 = x1,
x2 = x2,
x3 = x3,
x4 = x4)
x1 <- stats::runif(sum(n) * 0.1, -5, 0)
x2 <- stats::runif(sum(n) * 0.1, 0, 0.8) + stats::runif(sum(n) * 0.1, 0, 0.8)
x3 <- rep(0, sum(n) * 0.1) + stats::rnorm(sum(n) * 0.1, 10, 0.03)
x4 <- rep(0, sum(n) * 0.1) - stats::rnorm(sum(n) * 0.1, 10, 0.03)
df5 <- tibble::tibble(x1 = x1,
x2 = x2,
x3 = x3,
x4 = x4)
df <- bind_rows(df1, df2, df3, df4, df5)
if (p > 4) {
cli::cli_alert_info("Adding noise dimensions to reach the desired dimensionality.")
noise_mat <- gen_noise_dims(
n = NROW(df), num_noise = p - 4,
min_n = -0.5, max_n = 0.5
)
colnames(noise_mat) <- paste0("x", 5:p)
df <- bind_cols(df, noise_mat)
}
cli::cli_alert_success("Data generation completed successfully! 🎉")
return(df)
}
#' Generate data with eight branches
#'
#' This function generates a dataset representing a structure with eight branches.
#'
#' @param n A numeric vector (default: c(200, 300, 150, 250, 100, 100, 100, 100)) representing the sample sizes.
#' @param p A numeric value (default: 4) representing the number of dimensions.
#' @return A data containing eight branches.
#' @export
#'
#' @examples
#' set.seed(20240412)
#' branching_data <- gen_eight_branch_data(n = c(200, 300, 150, 250, 100, 100, 100, 100), p = 4)
gen_eight_branch_data <- function(n = c(200, 300, 150, 250, 100, 100, 100, 100), p = 4) {
if (p < 4) {
stop(cli::cli_alert_danger("p should be 4 or greater."))
}
if (length(n) != 8) {
stop(cli::cli_alert_danger("n should contain exactly 5 values."))
}
if (any(n < 0)) {
stop(cli::cli_alert_danger("Values in n should be positive."))
}
x1 <- stats::runif(n[1], -1, 2)
x2 <- (exp(x1) + stats::runif(n[1], 0, 0.1)) + stats::runif(n[1])
x3 <- rep(0, n[1]) + stats::rnorm(n[1], 10, 0.03)
x4 <- rep(0, n[1]) - stats::rnorm(n[1], 10, 0.03)
df1 <- tibble::tibble(x1 = x1,
x2 = x2,
x3 = x3,
x4 = x4)
x1 <- stats::runif(n[2], -1, 1)
x2 <- (exp(2 * x1) + stats::runif(n[2], 0, 0.1)) + stats::runif(n[2], 0, 0.2)
x3 <- rep(0, n[2]) + stats::rnorm(n[2], 10, 0.03)
x4 <- rep(0, n[2]) - stats::rnorm(n[2], 10, 0.03)
df2 <- tibble::tibble(x1 = x1,
x2 = x2,
x3 = x3,
x4 = x4)
x1 <- stats::runif(n[3], -1, 0.6)
x2 <- (exp(3 * x1) + stats::runif(n[3], 0, 0.1)) + stats::runif(n[3], 0, 0.2)
x3 <- rep(0, n[3]) + stats::rnorm(n[3], 10, 0.03)
x4 <- rep(0, n[3]) - stats::rnorm(n[3], 10, 0.03)
df3 <- tibble::tibble(x1 = x1,
x2 = x2,
x3 = x3,
x4 = x4)
x1 <- stats::runif(n[4], -1, 3)
x2 <- (exp(0.5 * x1) + stats::runif(n[4], 0, 0.1)) + stats::runif(n[4], 0, 0.2)
x3 <- rep(0, n[4]) + stats::rnorm(n[4], 10, 0.03)
x4 <- rep(0, n[4]) - stats::rnorm(n[4], 10, 0.03)
df4 <- tibble::tibble(x1 = x1,
x2 = x2,
x3 = x3,
x4 = x4)
x1 <- stats::runif(n[5], -2, 1)
x2 <- (exp(-x1) + stats::runif(n[5], 0, 0.1)) + stats::runif(n[5], 0, 0.2)
x3 <- rep(0, n[5]) + stats::rnorm(n[5], 10, 0.03)
x4 <- rep(0, n[5]) - stats::rnorm(n[5], 10, 0.03)
df5 <- tibble::tibble(x1 = x1,
x2 = x2,
x3 = x3,
x4 = x4)
x1 <- stats::runif(n[6], -1, 1)
x2 <- (exp(2 * -x1) + stats::runif(n[6], 0, 0.1)) + stats::runif(n[6], 0, 0.2)
x3 <- rep(0, n[6]) + stats::rnorm(n[6], 10, 0.03)
x4 <- rep(0, n[6]) - stats::rnorm(n[6], 10, 0.03)
df6 <- tibble::tibble(x1 = x1,
x2 = x2,
x3 = x3,
x4 = x4)
x1 <- stats::runif(n[7], -0.6, 1)
x2 <- (exp(3 * -x1) + stats::runif(n[7], 0, 0.1)) + stats::runif(n[7], 0, 0.2)
x3 <- rep(0, n[7]) + stats::rnorm(n[7], 10, 0.03)
x4 <- rep(0, n[7]) - stats::rnorm(n[7], 10, 0.03)
df7 <- tibble::tibble(x1 = x1,
x2 = x2,
x3 = x3,
x4 = x4)
x1 <- stats::runif(n[8], -3, 1)
x2 <- (exp(0.5 * -x1) + stats::runif(n[8], 0, 0.1)) + stats::runif(n[8], 0, 0.2)
x3 <- rep(0, n[8]) + stats::rnorm(n[8], 10, 0.03)
x4 <- rep(0, n[8]) - stats::rnorm(n[8], 10, 0.03)
df8 <- tibble::tibble(x1 = x1,
x2 = x2,
x3 = x3,
x4 = x4)
df <- dplyr::bind_rows(df1, df2, df3, df4, df5, df6, df7, df8)
if (p > 4) {
cli::cli_alert_info("Adding noise dimensions to reach the desired dimensionality.")
noise_mat <- gen_noise_dims(
n = NROW(df), num_noise = p - 4,
min_n = -0.5, max_n = 0.5
)
colnames(noise_mat) <- paste0("x", 5:p)
df <- dplyr::bind_cols(df, noise_mat)
}
cli::cli_alert_success("Data generation completed successfully! 🎉")
return(df)
}
#' Generate Curvy Branching Cluster Data
#'
#' This function generates two curvy clusters and one Gaussian cluster in the middle.
#'
#' @param n A numeric vector (default: c(200, 200, 100)) representing the sample sizes.
#' @param p A numeric value (default: 4) representing the number of dimensions.
#' @return A data containing two curvy clusters and one Gaussian cluster.
#' @export
#'
#' @examples
#'
#' # Generate curvy branching cluster data with custom parameters
#' set.seed(20240412)
#' data <- gen_curvy_branch_clust(n = c(200, 200, 100), p = 4)
gen_curvy_branch_clust <- function(n = c(200, 200, 100), p = 4) {
if (p < 4) {
stop(cli::cli_alert_danger("p should be 4 or greater."))
}
if (length(n) != 3) {
stop(cli::cli_alert_danger("n should contain exactly 3 values."))
}
if (any(n < 0)) {
stop(cli::cli_alert_danger("Values in n should be positive."))
}
theta <- stats::runif(n[1], 0.20, 0.90 * pi)
x1 <- cos(theta) + stats::rnorm(n[1], 1, 0.06)
x2 <- sin(theta) + stats::rnorm(n[1], 1, 0.06)
x3 <- cos(theta) + stats::rnorm(n[1], 1, 0.06)
x4 <- sin(theta) + stats::rnorm(n[1], 1, 0.06)
df1 <- tibble::tibble(x1 = x1,
x2 = x2,
x3 = x3,
x4 = x4)
theta1 <- stats::runif(n[2], 0.20, 0.90 * pi)
x1 <- cos(-theta1) + stats::rnorm(n[2], 1, 0.06)
x2 <- sin(-theta1) + stats::rnorm(n[2], 1, 0.06)
x3 <- cos(-theta1) + stats::rnorm(n[2], 1, 0.06)
x4 <- sin(-theta1) + stats::rnorm(n[2], 1, 0.06)
df2 <- tibble::tibble(x1 = x1,
x2 = x2,
x3 = x3,
x4 = x4)
x1 <- stats::rnorm(n[3], mean = 1, sd = 0.08)
x2 <- stats::rnorm(n[3], mean = 1, sd = 0.08)
x3 <- stats::rnorm(n[3], mean = 1, sd = 0.08)
x4 <- stats::rnorm(n[3], mean = 1, sd = 0.08)
df3 <- tibble::tibble(x1 = x1,
x2 = x2,
x3 = x3,
x4 = x4)
df <- dplyr::bind_rows(df1, df2, df3)
if (p > 4) {
cli::cli_alert_info("Adding noise dimensions to reach the desired dimensionality.")
noise_mat <- gen_noise_dims(
n = NROW(df), num_noise = p - 4,
min_n = -0.5, max_n = 0.5
)
colnames(noise_mat) <- paste0("x", 5:p)
df <- dplyr::bind_cols(df, noise_mat)
}
cli::cli_alert_success("Data generation completed successfully! 🎉")
return(df)
}
#' Generate Curvy Branching Cluster Data with Background Noise
#'
#' This function generates data with two curvy clusters and one Gaussian cluster with background noise.
#'
#' @param n A numeric vector (default: c(200, 200, 100, 50)) representing the sample sizes.
#' @param p A numeric value (default: 4) representing the number of dimensions.
#' @return A data containing two curvy clusters and one Gaussian cluster with background noise.
#' @export
#'
#' @examples
#' # Generate curvy branching cluster data with background noise with custom parameters
#' set.seed(20240412)
#' data <- gen_curvy_branch_clust_bkg(n = c(200, 200, 100, 50), p = 4)
gen_curvy_branch_clust_bkg <- function(n = c(200, 200, 100, 50), p = 4) {
if (p < 4) {
stop(cli::cli_alert_danger("p should be 4 or greater."))
}
if (length(n) != 4) {
stop(cli::cli_alert_danger("n should contain exactly 4 values."))
}
if (any(n < 0)) {
stop(cli::cli_alert_danger("Values in n should be positive."))
}
theta <- stats::runif(n[1], 0.20, 0.90 * pi)
x1 <- cos(theta) + stats::rnorm(n[1], 1, 0.06)
x2 <- sin(theta) + stats::rnorm(n[1], 1, 0.06)
x3 <- cos(theta) + stats::rnorm(n[1], 1, 0.06)
x4 <- sin(theta) + stats::rnorm(n[1], 1, 0.06)
df1 <- tibble::tibble(x1 = x1,
x2 = x2,
x3 = x3,
x4 = x4)
theta1 <- stats::runif(n[2], 0.20, 0.90 * pi)
x1 <- cos(-theta1) + stats::rnorm(n[2], 1, 0.06)
x2 <- sin(-theta1) + stats::rnorm(n[2], 1, 0.06)
x3 <- cos(-theta1) + stats::rnorm(n[2], 1, 0.06)
x4 <- sin(-theta1) + stats::rnorm(n[2], 1, 0.06)
df2 <- tibble::tibble(x1 = x1,
x2 = x2,
x3 = x3,
x4 = x4)
x1 <- stats::rnorm(n[3], mean = 1, sd = 0.08)
x2 <- stats::rnorm(n[3], mean = 1, sd = 0.08)
x3 <- stats::rnorm(n[3], mean = 1, sd = 0.08)
x4 <- stats::rnorm(n[3], mean = 1, sd = 0.08)
df3 <- tibble::tibble(x1 = x1,
x2 = x2,
x3 = x3,
x4 = x4)
x1 <- stats::rnorm(n[4], mean = 1, sd = 1)
x2 <- stats::rnorm(n[4], mean = 1, sd = 1)
x3 <- stats::rnorm(n[4], mean = 1, sd = 1)
x4 <- stats::rnorm(n[4], mean = 1, sd = 1)
df4 <- tibble::tibble(x1 = x1,
x2 = x2,
x3 = x3,
x4 = x4)
df <- dplyr::bind_rows(df1, df2, df3, df4)
if (p > 4) {
cli::cli_alert_info("Adding noise dimensions to reach the desired dimensionality.")
noise_mat <- gen_noise_dims(
n = NROW(df), num_noise = p - 4,
min_n = -0.5, max_n = 0.5
)
colnames(noise_mat) <- paste0("x", 5:p)
df <- dplyr::bind_cols(df, noise_mat)
}
cli::cli_alert_success("Data generation completed successfully! 🎉")
return(df)
}
#' Generate Curvy Branching Clusters with Noise
#'
#' This function generates data with curvy branching clusters along with added noise.
#'
#' @param n The total number of data points to be generated.
#' @param num_noise The number of additional noise dimensions to be generated.
#' @param min_n The minimum value for the noise added to the data points.
#' @param max_n The maximum value for the noise added to the data points.
#'
#' @return A matrix containing the generated data, with each row representing a data point.
#' @export
#'
#' @examples
#'
#' # Generate curvy branching clusters with noise with custom parameters
#' set.seed(20240412)
#' data <- gen_two_curvy(n = c(300, 200), p = 4)
gen_two_curvy <- function(n = c(300, 200), p = 4) {
if (p < 4) {
stop(cli::cli_alert_danger("p should be 4 or greater."))
}
if (length(n) != 2) {
stop(cli::cli_alert_danger("n should contain exactly 2 values."))
}
if (any(n < 0)) {
stop(cli::cli_alert_danger("Values in n should be positive."))
}
theta1 <- stats::runif(n[1], 0.20, 0.90 * pi)
df1 <- tibble::tibble(
x1 = cos(theta1) + stats::rnorm(n[1], 1, 0.06),
x2 = sin(theta1) + stats::rnorm(n[1], 1, 0.06),
x3 = cos(theta1) + stats::rnorm(n[1], 1, 0.06),
x4 = sin(theta1) + stats::rnorm(n[1], 1, 0.06)
)
theta2 <- stats::runif(n[2], 0.20, 0.90 * pi)
df2 <- tibble::tibble(
x1 = cos(-theta2) + stats::rnorm(n[2], 1, 0.06),
x2 = sin(-theta2) + stats::rnorm(n[2], 1, 0.06),
x3 = cos(-theta2) + stats::rnorm(n[2], 1, 0.06),
x4 = sin(-theta2) + stats::rnorm(n[2], 1, 0.06)
)
df <- dplyr::bind_rows(df1, df2)
if (p > 4) {
cli::cli_alert_info("Adding noise dimensions to reach the desired dimensionality.")
noise_mat <- gen_noise_dims(
n = NROW(df), num_noise = p - 4,
min_n = -0.5, max_n = 0.5
)
colnames(noise_mat) <- paste0("x", 5:p)
df <- dplyr::bind_cols(df, noise_mat)
}
cli::cli_alert_success("Data generation completed successfully! 🎉")
return(df)
}
extend_nonlinear <- function(n, d) {
if (length(n) != 1) {
stop("n should be a single integer specifying the number of points")
}
if (d < 2) {
stop("d must be at least 2")
}
x1 <- stats::runif(n[1], -2, 2)
coords <- matrix(0, nrow = n[1], ncol = d)
coords[, 1] <- x1
# Second dimension (as in the original code)
coords[, 2] <- -(x1^3 + stats::runif(n[1], 0, 6)) + stats::runif(n[1], 0, 0.2)
# Extend to higher dimensions
if (d > 2) {
for (i in 3:d) {
# Introduce non-linearity based on x1 and add random noise
# You can experiment with different non-linear functions and noise levels
power <- sample(2:5, 1) # Random power for the polynomial
scale_factor <- runif(1, 0.5, 2) # Random scaling
noise_level <- runif(1, 0, 1)
coords[, i] <- scale_factor * ((-1)^(i %/% 2)) * (x1^power) + stats::runif(n[1], -noise_level, noise_level * 2)
}
}
return(coords)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.