inst/extra/nonlinear.R

#' Generate points on a nonlinear 2D manifold
#'
#' This function generates points on a nonlinear 2D manifold based on a given equation.
#'
#' @param n The number of points to generate.
#' @param num_noise The number of noise dimensions to add to the generated points.
#' @param min_n The minimum value for the noise dimensions.
#' @param max_n The maximum value for the noise dimensions.
#'
#' @return A matrix containing the generated points on the nonlinear 2D manifold.
#'
#' @examples
#' set.seed(20240412)
#' nonlinear_points <- nonlinear_2d(
#'   n = 100, num_noise = 2, min_n = -0.01,
#'   max_n = 0.01
#' )
#'
#' @export
nonlinear_2d <- function(n, num_noise, min_n, max_n) {
  if (n <= 0) {
    stop("Number of points should be a positive number.")
  }

  if (num_noise < 0) {
    stop("Number of noise dimensions should be a positive number.")
  }

  if (missing(n)) {
    stop("Missing n.")
  }

  if (missing(num_noise)) {
    stop("Missing num_noise.")
  }

  theta <- stats::runif(n, 0.2, 0.6 * pi)
  x <- cos(theta) + stats::rnorm(n, 10, 0.03)
  y <- sin(theta) + stats::rnorm(n, 10, 0.03)

  nonlinear_mat <- matrix(c(x, y), ncol = 2)

  if (num_noise != 0) {
    if (missing(min_n)) {
      stop("Missing min_n.")
    }

    if (missing(max_n)) {
      stop("Missing max_n.")
    }

    noise_mat <- gen_noise_dims(
      n = dim(nonlinear_mat)[1], num_noise = num_noise,
      min_n = min_n, max_n = max_n
    )
    nonlinear_mat <- cbind(nonlinear_mat, noise_mat)

    nonlinear_mat
  } else {
    nonlinear_mat
  }
}

#' Generate Sine Curve Data with Noise
#'
#' This function generates a dataset representing a sine curve with added noise.
#'
#' @param n The number of samples to generate.
#' @param num_noise The number of additional noise dimensions to add to the data.
#' @param min_n The minimum value for the noise dimensions.
#' @param max_n The maximum value for the noise dimensions.
#' @return A matrix containing the sine curve data with noise.
#' @export
#'
#' @examples
#' set.seed(20240412)
#' sine_curve <- sine_curve(n = 100, num_noise = 2, min_n = -0.05, max_n = 0.05)
sine_curve <- function(n, num_noise, min_n, max_n) {
  if (n <= 0) {
    stop("Number of points should be a positive number.")
  }

  if (num_noise < 0) {
    stop("Number of noise dimensions should be a positive number.")
  }

  if (missing(n)) {
    stop("Missing n.")
  }

  if (missing(num_noise)) {
    stop("Missing num_noise.")
  }

  theta <- stats::runif(n, 0, 1.80 * pi)
  x <- theta
  y <- sin(theta)
  df <- matrix(c(x, y), ncol = 2)

  if (num_noise != 0) {
    if (missing(min_n)) {
      stop("Missing min_n.")
    }

    if (missing(max_n)) {
      stop("Missing max_n.")
    }

    noise_mat <- gen_noise_dims(
      n = dim(df)[1], num_noise = num_noise,
      min_n = min_n, max_n = max_n
    )
    df <- cbind(df, noise_mat)

    df
  } else {
    df
  }
}

#' Generate Nonlinear Connected Data with Noise
#'
#' This function generates a dataset representing nonlinear connected clusters with added noise.
#'
#' @param n The total number of samples to generate.
#' @param num_noise The number of additional noise dimensions to add to the data.
#' @param min_n The minimum value for the noise dimensions.
#' @param max_n The maximum value for the noise dimensions.
#' @return A matrix containing the nonlinear connected data with noise.
#' @export
#'
#' @examples
#' set.seed(20240412)
#' nonlinear_connect <- nonlinear_connect(
#'   n = 400, num_noise = 2, min_n = -0.05,
#'   max_n = 0.05
#' )
nonlinear_connect <- function(n, num_noise, min_n, max_n) {
  if (n <= 0) {
    stop("Number of points should be a positive number.")
  }

  if (num_noise < 0) {
    stop("Number of noise dimensions should be a positive number.")
  }

  if (missing(n)) {
    stop("Missing n.")
  }

  if (missing(num_noise)) {
    stop("Missing num_noise.")
  }

  # To check that the assigned n is divided by three
  if ((n %% 4) != 0) {
    warning("The sample size should be a product of four.")
    cluster_size <- floor(n / 4)
  } else {
    cluster_size <- n / 4
  }

  theta <- stats::runif(cluster_size, 0, 0.80 * pi)
  x <- cos(theta) + stats::rnorm(cluster_size, 10, 0.03)
  y <- sin(theta) + stats::rnorm(cluster_size, 10, 0.03)
  z <- rep(0, cluster_size) + stats::rnorm(cluster_size, 10, 0.03)
  w <- rep(0, cluster_size) - stats::rnorm(cluster_size, 10, 0.03)

  df1 <- matrix(c(x, y, z, w), ncol = 4)

  x <- cos(-theta) + stats::rnorm(cluster_size, 10, 0.03) + stats::rnorm(cluster_size, 0.1, 0)
  y <- sin(-theta) + stats::rnorm(cluster_size, 10, 0.03) + stats::rnorm(cluster_size, 0.1, 0)
  z <- rep(0, cluster_size) + stats::rnorm(cluster_size, 10, 0.03)
  w <- rep(0, cluster_size) - stats::rnorm(cluster_size, 10, 0.03)

  df2 <- matrix(c(x, y, z, w), ncol = 4)

  x <- cos(-theta) + stats::rnorm(cluster_size, 10, 0.03) + stats::rnorm(cluster_size, 0.1, 0)
  z <- sin(-theta) + stats::rnorm(cluster_size, 10, 0.03) + stats::rnorm(cluster_size, 0.1, 0)
  y <- rep(0, cluster_size) + stats::rnorm(cluster_size, 10, 0.03)
  w <- rep(0, cluster_size) - stats::rnorm(cluster_size, 10, 0.03)

  df3 <- matrix(c(x, y, z, w), ncol = 4)

  x <- cos(theta) + stats::rnorm(cluster_size, 10, 0.03) + stats::rnorm(cluster_size, 0.1, 0)
  z <- sin(theta) + stats::rnorm(cluster_size, 10, 0.03) + stats::rnorm(cluster_size, 0.1, 0)
  y <- rep(0, cluster_size) + stats::rnorm(cluster_size, 10, 0.03)
  w <- rep(0, cluster_size) - stats::rnorm(cluster_size, 10, 0.03)

  df4 <- matrix(c(x, y, z, w), ncol = 4)

  df <- rbind(df1, df2, df3, df4)

  if (num_noise != 0) {
    if (missing(min_n)) {
      stop("Missing min_n.")
    }

    if (missing(max_n)) {
      stop("Missing max_n.")
    }

    noise_mat <- gen_noise_dims(
      n = dim(df)[1], num_noise = num_noise,
      min_n = min_n, max_n = max_n
    )
    df <- cbind(df, noise_mat)

    df
  } else {
    df
  }
}


#' Generate Nonlinear Mirror Data with Noise
#'
#' This function generates a dataset representing two mirror-image clusters with added noise.
#'
#' @param n The total number of samples to generate.
#' @param num_noise The number of additional noise dimensions to add to the data.
#' @param min_n The minimum value for the noise dimensions.
#' @param max_n The maximum value for the noise dimensions.
#' @return A matrix containing the nonlinear mirror data with noise.
#' @export
#'
#' @examples
#' set.seed(20240412)
#' nonlinear_mirror <- nonlinear_mirror(
#'   n = 400, num_noise = 2, min_n = -0.05,
#'   max_n = 0.05
#' )
nonlinear_mirror <- function(n, num_noise, min_n, max_n) {
  if (n <= 0) {
    stop("Number of points should be a positive number.")
  }

  if (num_noise < 0) {
    stop("Number of noise dimensions should be a positive number.")
  }

  if (missing(n)) {
    stop("Missing n.")
  }

  if (missing(num_noise)) {
    stop("Missing num_noise.")
  }

  # To check that the assigned n is divided by two
  if ((n %% 2) != 0) {
    warning("The sample size should be a product of two.")
    cluster_size <- floor(n / 2)
  } else {
    cluster_size <- n / 2
  }

  x <- stats::runif(cluster_size, -8, 1.5)
  y <- -(exp(x) + stats::runif(cluster_size, 0, 1)) + stats::runif(cluster_size, 0, 0.7)
  z <- rep(0, cluster_size) + stats::rnorm(cluster_size, 10, 0.03)
  w <- rep(0, cluster_size) - stats::rnorm(cluster_size, 10, 0.03)

  df1 <- matrix(c(x, y, z, w), ncol = 4)

  x <- stats::runif(cluster_size, -8, 1.5)
  y <- (exp(x) + stats::runif(cluster_size, 0, 1)) + stats::runif(cluster_size, 0, 0.7)
  z <- rep(0, cluster_size) + stats::rnorm(cluster_size, 10, 0.03)
  w <- rep(0, cluster_size) - stats::rnorm(cluster_size, 10, 0.03)

  df2 <- matrix(c(x, y, z, w), ncol = 4)

  df <- rbind(df1, df2)

  if (num_noise != 0) {
    if (missing(min_n)) {
      stop("Missing min_n.")
    }

    if (missing(max_n)) {
      stop("Missing max_n.")
    }

    noise_mat <- gen_noise_dims(
      n = dim(df)[1], num_noise = num_noise,
      min_n = min_n, max_n = max_n
    )
    df <- cbind(df, noise_mat)

    df
  } else {
    df
  }
}

#' Generate Two Curvy Pancakes with Noise
#'
#' This function generates a dataset representing two curvy pancake-shaped clusters with added noise.
#'
#' @param n The total number of samples to generate.
#' @param num_noise The number of additional noise dimensions to add to the data.
#' @param min_n The minimum value for the noise dimensions.
#' @param max_n The maximum value for the noise dimensions.
#' @return A matrix containing the two curvy pancakes data with noise.
#' @export
#'
#' @examples
#' set.seed(20240412)
#' two_curvy_pancakes <- two_curvy_pancakes(
#'   n = 300, num_noise = 2,
#'   min_n = -0.05, max_n = 0.05
#' )
two_curvy_pancakes <- function(n, num_noise, min_n, max_n) {
  if (n <= 0) {
    stop("Number of points should be a positive number.")
  }

  if (num_noise < 0) {
    stop("Number of noise dimensions should be a positive number.")
  }

  if (missing(n)) {
    stop("Missing n.")
  }

  if (missing(num_noise)) {
    stop("Missing num_noise.")
  }

  # To check that the assigned n is divided by two
  if ((n %% 2) != 0) {
    warning("The sample size should be a product of two.")
    cluster_size <- floor(n / 2)
  } else {
    cluster_size <- n / 2
  }

  phi <- stats::runif(cluster_size, max = 2 * pi)
  rho <- sqrt(stats::runif(cluster_size))

  theta <- stats::runif(cluster_size, 0, 1.80 * pi)
  x <- theta
  y <- sin(theta)
  df1 <- matrix(c(x, y, sqrt(1) * rho * cos(phi) + 4, sqrt(1) * rho * sin(phi) + 4), ncol = 4)
  df2 <- matrix(c(x + 1, y + 1, sqrt(1) * rho * cos(phi) + 6, sqrt(1) * rho * sin(phi) + 6), ncol = 4)

  df <- rbind(df1, df2)

  if (num_noise != 0) {
    if (missing(min_n)) {
      stop("Missing min_n.")
    }

    if (missing(max_n)) {
      stop("Missing max_n.")
    }

    noise_mat <- gen_noise_dims(
      n = dim(df)[1], num_noise = num_noise,
      min_n = min_n, max_n = max_n
    )
    df <- cbind(df, noise_mat)

    df
  } else {
    df
  }
}

#' Generate Two Curvilinear Data with Noise
#'
#' This function generates a dataset representing two curvilinear clusters with added noise.
#'
#' @param n The total number of samples to generate.
#' @param num_noise The number of additional noise dimensions to add to the data.
#' @param min_n The minimum value for the noise dimensions.
#' @param max_n The maximum value for the noise dimensions.
#' @return A matrix containing the two curvilinear data with noise.
#' @export
#'
#' @examples
#' set.seed(20240412)
#' two_curvilinear <- two_curvilinear(
#'   n = 250, num_noise = 2, min_n = -0.05,
#'   max_n = 0.05
#' )
two_curvilinear <- function(n, num_noise, min_n, max_n) {
  if (n <= 0) {
    stop("Number of points should be a positive number.")
  }

  if (num_noise < 0) {
    stop("Number of noise dimensions should be a positive number.")
  }

  if (missing(n)) {
    stop("Missing n.")
  }

  if (missing(num_noise)) {
    stop("Missing num_noise.")
  }

  # To check that the assigned n is divided by two
  if (((n - n * 0.2) %% 2) != 0) {
    warning("The sample size should be a product of two.")
    cluster_size <- floor((n - n * 0.2) / 2)
  } else {
    cluster_size <- (n - n * 0.2) / 2
  }

  x <- stats::runif(cluster_size, -2, -0.5)
  y <- (x^2 + stats::runif(cluster_size, 0, 0.1)) + stats::runif(cluster_size, 0, 0.2)
  z <- rep(0, cluster_size) + stats::rnorm(cluster_size, 10, 0.03)
  w <- rep(0, cluster_size) - stats::rnorm(cluster_size, 10, 0.03)

  df1 <- matrix(c(x, y, z, w), ncol = 4)

  x <- stats::runif(cluster_size, 0.5, 2)
  y <- (x^2 + stats::runif(cluster_size, 0, 0.1)) + stats::runif(cluster_size, 0, 0.2)
  z <- rep(0, cluster_size) + stats::rnorm(cluster_size, 10, 0.03)
  w <- rep(0, cluster_size) - stats::rnorm(cluster_size, 10, 0.03)

  df2 <- matrix(c(x, y, z, w), ncol = 4)

  x <- stats::rnorm(n * 0.2, mean = 0, sd = 0.4)
  y <- stats::rnorm(n * 0.2, mean = 1.5, sd = 0.5)
  z <- rep(0, n * 0.2) + stats::rnorm(n * 0.2, 10, 0.03)
  w <- rep(0, n * 0.2) - stats::rnorm(n * 0.2, 10, 0.03)

  df3 <- matrix(c(x, y, z, w), ncol = 4)

  df <- rbind(df1, df2, df3)

  if (num_noise != 0) {
    if (missing(min_n)) {
      stop("Missing min_n.")
    }

    if (missing(max_n)) {
      stop("Missing max_n.")
    }

    noise_mat <- gen_noise_dims(
      n = dim(df)[1], num_noise = num_noise,
      min_n = min_n, max_n = max_n
    )
    df <- cbind(df, noise_mat)

    df
  } else {
    df
  }
}

#' Generate Swiss Roll Data
#'
#' This function generates data points in the shape of a Swiss roll.
#'
#' @param n The total number of data points to be generated.
#' @param num_noise The number of additional noise dimensions to be generated.
#' @param min_n The minimum value for the noise added to the data points.
#' @param max_n The maximum value for the noise added to the data points.
#'
#' @return A matrix containing the generated Swiss roll data points.
#' @export
#'
#' @examples
#'
#' # Generate Swiss roll data with noise with custom parameters
#' set.seed(20240412)
#' data <- swiss_roll(n = 200, num_noise = 2, min_n = -0.05, max_n = 0.05)
swiss_roll <- function(n, num_noise, min_n, max_n) {
  if (n <= 0) {
    stop("Number of points should be a positive number.")
  }

  if (num_noise < 0) {
    stop("Number of noise dimensions should be a positive number.")
  }

  if (missing(n)) {
    stop("Missing n.")
  }

  if (missing(num_noise)) {
    stop("Missing num_noise.")
  }

  phi <- stats::runif(n, min = 1.5 * pi, max = 4.5 * pi)
  x <- phi * cos(phi)
  y <- phi * sin(phi)
  z <- stats::runif(n, max = 10)
  df <- matrix(c(x, y, z), ncol = 3)

  if (num_noise != 0) {
    if (missing(min_n)) {
      stop("Missing min_n.")
    }

    if (missing(max_n)) {
      stop("Missing max_n.")
    }

    noise_mat <- gen_noise_dims(
      n = dim(df)[1], num_noise = num_noise,
      min_n = min_n, max_n = max_n
    )
    df <- cbind(df, noise_mat)

    df
  } else {
    df
  }
}


generate_swiss_roll <- function(n, p, noise_sd = 0) {
  t <- runif(n, min = 0, max = 3 * pi)  # Control parameter
  x <- t * cos(t)
  y <- t * sin(t)
  z <- runif(n, min = -1, max = 1)  # Adding some vertical variation

  data <- matrix(0, nrow = n, ncol = p)
  data[,1] <- x
  data[,2] <- y
  if (p > 2) {
    data[,3] <- z
  }
  if (p > 3) {
    for (i in 4:p) {
      data[,i] <- sin(i * t) / i  # Additional non-linearity
    }
  }

  if (noise_sd > 0) {
    data <- data + matrix(rnorm(n * p, sd = noise_sd), nrow = n, ncol = p)
  }

  as.data.frame(data)
}

generate_sine_highd <- function(n, p) {
  t <- seq(0, 2 * pi, length.out = n)  # Parameter controlling the sine wave
  data <- matrix(0, nrow = n, ncol = p)  # Initialize data

  data[,1] <- sin(t)  # First dimension: sine wave
  if (p > 1) {
    data[,2] <- cos(t)  # Second dimension: cosine wave
  }
  if (p > 2) {
    for (i in 3:p) {
      data[,i] <- sin(i * t) / i  # Higher dimensions: scaled sine variations
    }
  }

  as.data.frame(data)
}

generate_spiral_highd <- function(n, p, noise_sd = 0) {
  t <- seq(0, 4 * pi, length.out = n)  # Spiral parameter
  data <- matrix(0, nrow = n, ncol = p)  # Initialize matrix

  data[,1] <- sin(t)  # First dimension
  data[,2] <- cos(t)  # Second dimension
  if (p > 2) {
    data[,3] <- t / (4 * pi)  # Smooth elevation in the third dimension
  }
  if (p > 3) {
    for (i in 4:p) {
      data[,i] <- sin(i * t) / i  # Adding non-linearity
    }
  }

  # Add optional Gaussian noise
  if (noise_sd > 0) {
    data <- data + matrix(rnorm(n * p, sd = noise_sd), nrow = n, ncol = p)
  }

  as.data.frame(data)
}

Try the cardinalR package in your browser

Any scripts or data that you put into this service are public.

cardinalR documentation built on Aug. 21, 2025, 5:27 p.m.