Nothing
#' Generate Normal Data for Conditional Independence Testing
#'
#' This function generates continuous data where X and Y are both functions of Z1 and Z2 with added normal noise.
#'
#' @param N Integer. Sample size.
#'
#' @return A data frame with columns Z1, Z2, X, and Y.
#' @importFrom stats rnorm
#' @export
#'
NormalData <- function(N){
Z1 <- stats::rnorm(N,0,1)
Z2 <- stats::rnorm(N,0,1)
X <- stats::rnorm(N, Z1 + Z2, 1)
Y <- stats::rnorm(N, Z1 + Z2, 1)
df <- data.frame(Z1, Z2, X, Y)
return(df)
}
#' Generate Sine-Gaussian Data (Univariate)
#'
#' This function generates data with a nonlinear sinusoidal dependency based on a Gaussian density envelope.
#'
#' @param N Integer. Sample size.
#' @param a Numeric. Frequency parameter of the sine function. Default is 1.
#' @param d Numeric. Strength of dependency between X and Y. Default is 0.
#'
#' @importFrom stats rnorm
#' @return A data frame with columns Z, X, and Y.
#' @export
SineGaussian <- function(N, a = 1, d = 0){
Z = stats::rnorm(N,0,1)
X = exp(-(Z)^2 / 2) * sin(a * (Z)) + 0.3*stats::rnorm(N,0,0.1)
Y = exp(-(Z)^2 / 2) * sin(a * (Z)) + d*X + 0.3*stats::rnorm(N,0,0.1)
df <- data.frame(Z,X,Y)
return(df)
}
#' Generate Sine-Gaussian Data (Bivariate)
#'
#' This function generates bivariate data with nonlinear dependencies based on a Gaussian density envelope and sinusoidal functions.
#'
#' @param N Integer. Sample size.
#' @param a Numeric. Frequency parameter for the sine function. Default is 1.
#' @param d Numeric. Strength of dependency between X and Y. Default is 0.
#' @importFrom stats rnorm
#' @return A data frame with columns Z1, Z2, X, and Y.
#' @export
SineGaussianBiv <- function(N, a = 1, d = 0){
Z1 = stats::rnorm(N,0,1)
Z2 = stats::rnorm(N,0,1)
X = (exp(-(Z1)^2 / 2) * sin(a * (Z1))) - (exp(-(Z2)^2 / 2) * sin(a * (Z2))) + 0.3*stats::rnorm(N,0,0.1)
Y = (exp(-(Z1)^2 / 2) * sin(a * (Z1))) + (exp(-(Z2)^2 / 2) * sin(a * (Z2))) + 0.3*stats::rnorm(N,0,0.1) + d*X
return(data.frame(Z1,Z2,X,Y))
}
#' Generate Sine-Gaussian Data (Bivariate)
#'
#' This function generates bivariate data with nonlinear dependencies based on a Gaussian density envelope and sinusoidal functions.
#'
#' @param N Integer. Sample size.
#' @param a Numeric. Frequency parameter for the sine function. Default is 1.
#' @param d Numeric. Strength of dependency between X and Y. Default is 0.
#'
#' @importFrom stats rnorm
#' @return A data frame with columns Z1, Z2, X, and Y.
#' @export
#'
SineGaussianNoise <- function(N, a = 1, d = 0){
Z = stats::rnorm(N,0,1)
X = exp(-(Z)^2 / 2) * sin(a * (Z))*stats::rnorm(N,0,1)
Y = exp(-(Z)^2 / 2) * sin(a * (Z))*stats::rnorm(N,0,1) + d*X
return(data.frame(Z,X,Y))
}
#' Generate Nonlinear Categorical Data (Univariate)
#'
#' Generates a dataset with a single Z influencing categorical X and Y.
#'
#' @param N Integer. Sample size.
#' @param d Numeric. Dependency strength. Default is 0.
#'
#' @importFrom stats runif rnorm
#' @return A data frame with columns Z, X, and Y.
#' @export
#'
NonLinearCategorization <- function(N, d = 0) {
Z <- stats::runif(N, -1, 1)
X <- stats::rnorm(N, mean = Z, sd = 1)
Y <- character(N)
for (i in 1:N) {
score_y <- cos(Z[i] * pi) + Z[i] + d * X[i]
if (score_y > 1) {
Y[i] <- "Very High"
} else if (score_y > 0.5) {
Y[i] <- "High"
} else if (score_y > 0) {
Y[i] <- "Medium"
} else {
Y[i] <- "Low"
}
}
return(data.frame(Z, X, Y = factor(Y, levels = c("Low", "Medium", "High", "Very High"))
))
}
#' Generate Bivariate Nonlinear Categorical Data
#'
#' Generates categorical variables X and Y based on nonlinear combinations of Z1 and Z2.
#'
#' @param N Integer. Sample size.
#' @importFrom stats runif
#' @return A data frame with columns Z1, Z2, X, and Y.
#' @export
#'
BivNonLinearCategorization <- function(N) {
Z1 <- stats::runif(N, -2, 2)
Z2 <- stats::runif(N, -2,2)
X <- character(N)
Y <- character(N)
for (i in 1:N) {
score_x <- sin(Z2[i] * pi) + Z1[i]
if (score_x > 1) {
X[i] <- "Category F"
} else if (score_x > 0.5) {
X[i] <- "Category A"
} else if (score_x > 0) {
X[i] <- "Category X"
} else {
X[i] <- "Category 99"
}
}
for (i in 1:N) {
score_y <- cos(Z1[i] * pi) + Z2[i]
if (score_y > 1) {
Y[i] <- "Category A"
} else if (score_y > 0.5) {
Y[i] <- "Category 99"
} else if (score_y > 0) {
Y[i] <- "Category F"
} else {
Y[i] <- "Category X"
}
}
return(data.frame(Z1, Z2, X, Y))
}
#' Generate Bivariate Multinomial Categorical Data
#'
#' Creates a multinomial dataset where the probabilities are nonlinear functions of Z1 and Z2.
#'
#' @param N Integer. Sample size.
#' @param zeta Numeric. Strength of interaction. Default is 1.5.
#' @importFrom stats rnorm runif
#' @return A data frame with columns Z1, Z2, X, and Y (both factors).
#' @export
#'
BivMultinominal <- function(N, zeta = 1.5) {
Z1 <- stats::rnorm(N)
Z2 <- stats::rnorm(N)
xb1 <- Z2 + zeta*Z1*Z2 + Z1
xb2 <- Z2 - Z1
xp1 <- 1/(1+exp(xb1) + exp(xb2))
xp2 <- exp(xb1) /(1+exp(xb1) + exp(xb2))
random <- stats::runif(N,0, 1)
X <- ifelse(random < xp1, 0, ifelse(random < xp1 + xp2,1,2))
yb1 = zeta*Z1*Z2
yb2 <- exp(Z2) + Z1
yp1 <- 1/(1+exp(yb1) + exp(yb2))
yp2 <- exp(yb1) /(1+exp(yb1) + exp(yb2))
random <- stats::runif(N,0, 1)
Y <- ifelse(random < yp1, 0, ifelse(random < yp1 + yp2,1,2))
return(data.frame(Z1,Z2, X,Y))
}
#' Generate Categorical Data Based on Interactions
#'
#' Creates categorical X and Y variables based on the interaction of signs and sums of Z1 and Z2.
#'
#' @param N Integer. Sample size.
#' @importFrom stats rnorm
#' @return A data frame with columns Z1, Z2, X, and Y.
#' @export
InteractiondData <- function(N) {
Z1 <- stats::rnorm(N)
Z2 <- stats::rnorm(N)
X <- character(N)
Y <- character(N)
for (i in 1:N) {
if (Z1[i] < 0 && Z2[i] < 0) {
X[i] <- "V"
} else if (Z1[i] < 0 && Z2[i] >= 0) {
X[i] <- "W"
} else if (Z1[i] >= 0 && Z2[i] < 0) {
X[i] <- "G"
} else {
X[i] <- "U"
}
if (Z1[i] + Z2[i] < -1) {
Y[i] <- "W"
} else if (Z1[i] + Z2[i] < 0) {
Y[i] <- "V"
} else if (Z1[i] + Z2[i] < 1) {
Y[i] <- "G"
} else {
Y[i] <- "U"
}
}
data_frame <- data.frame(Z1, Z2, X, Y)
return(data_frame)
}
#' Generate Categorical Data Based on Exponential and Logarithmic Functions
#'
#' Categorizes based on thresholds of exponential and logarithmic transformations of Z1 and Z2.
#'
#' @param N Integer. Sample size.
#' @importFrom stats rnorm
#' @return A data frame with columns Z1, Z2, X, and Y.
#' @export
ExpLogData <- function(N) {
Z1 <- stats::rnorm(N)
Z2 <- stats::rnorm(N)
X <- character(N)
Y <- character(N)
for (i in 1:N) {
X[i] <- ifelse(exp(Z1[i]) + Z2[i] > 1.5, "Category A",
ifelse(exp(Z1[i]) + Z2[i] > 0.5, "Category C",
ifelse(exp(Z1[i]) > 0, "Category S", "Category B")))
Y[i] <- ifelse(log(abs(Z1[i]) + 1) + Z2[i] > 0.5, "Category C",
ifelse(log(abs(Z1[i]) + 1) + Z2[i] > 0, "Category A",
ifelse(log(abs(Z1[i]) + 1) > -0.5, "Category S", "Category B")))
}
return(data.frame(Z1, Z2, X, Y))
}
#' Generate Categorical Trigonometric Data
#'
#' Uses sine and cosine functions of Z1 and Z2 to generate categorical outcomes.
#'
#' @param N Integer. Sample size.
#' @importFrom stats rnorm runif
#' @return A data frame with columns Z1, Z2, X, and Y.
#' @export
TrigData <- function(N) {
Z1 <- stats::runif(N, -pi, pi)
Z2 <- stats::rnorm(N)
X <- character(N)
Y <- character(N)
for (i in 1:N) {
X[i] <- ifelse(sin(Z1[i]) + cos(Z2[i]) > 1, "Category A",
ifelse(sin(Z1[i]) + cos(Z2[i]) > 0, "Category B",
ifelse(sin(Z1[i]) > -1, "Category D", "Category C")))
Y[i] <- ifelse(cos(Z1[i]) - sin(Z2[i]) > 1, "Blue",
ifelse(cos(Z1[i]) - sin(Z2[i]) > 0, "Green",
ifelse(cos(Z1[i]) > -1, "Yellow", "Red")))
}
return(data.frame(Z1, Z2, X, Y))
}
#' Generate Categorical Polynomial Data
#'
#' Generates X and Y categories based on polynomial combinations of Z1 and Z2.
#'
#' @param N Integer. Sample size.
#' @importFrom stats rnorm
#' @return A data frame with columns Z1, Z2, X, and Y.
#' @export
PolyData <- function(N) {
Z1 <- stats::rnorm(N)
Z2 <- stats::rnorm(N)
X <- character(N)
Y <- character(N)
for (i in 1:N) {
X[i] <- ifelse(Z1[i]^2 + Z2[i]^2 > 2, "Down",
ifelse(Z1[i]^2 + Z2[i] > 0.5, "Up",
ifelse(Z1[i] + Z2[i]^2 > 0, "Left", "Right")))
Y[i] <- ifelse(Z1[i]^3 + Z2[i] > 1, "East",
ifelse(Z1[i]^2 - Z2[i]^2 > 0, "West",
ifelse(Z1[i] - Z2[i]^3 > -1, "North", "South")))
}
return(data.frame(Z1, Z2, X, Y))
}
#' Generate Nonlinear Categorical Data (Bivariate)
#'
#' Creates categorical X and Y variables based on sinusoidal and cosine functions of Z1 and Z2.
#'
#' @param N Integer. Sample size.
#' @importFrom stats runif
#' @return A data frame with columns Z1, Z2, X, and Y.
#' @export
#'
NonLinearData <- function(N) {
Z1 <- stats::runif(N, -1, 1)
Z2 <- stats::runif(N, -1, 1)
X <- character(N)
Y <- character(N)
for (i in 1:N) {
if (sin(Z1[i] * pi) + Z2[i] > 1) {
X[i] <- "Very High"
} else if (sin(Z1[i] * pi) + Z2[i] > 0.5) {
X[i] <- "High"
} else if (sin(Z1[i] * pi) + Z2[i] > 0) {
X[i] <- "Medium"
} else {
X[i] <- "Low"
}
if (cos(Z1[i] * pi) + Z2[i] > 1) {
Y[i] <- "Class A"
} else if (cos(Z1[i] * pi) + Z2[i] > 0.5) {
Y[i] <- "Class B"
} else if (cos(Z1[i] * pi) + Z2[i] > 0) {
Y[i] <- "Class C"
} else {
Y[i] <- "Class D"
}
}
return(data.frame(Z1, Z2, X, Y))
}
#' Generate Complex Categorical Data
#'
#' A more intricate categorization based on combinations of Z1 and Z2.
#'
#' @param N Integer. Sample size.
#'
#' @importFrom stats rnorm
#' @return A data frame with columns Z1, Z2, X, and Y.
#' @export
#'
#' @examples
#' head(ComplexCategorization(100))
#'
ComplexCategorization <- function(N) {
Z1 <- stats::rnorm(N)
Z2 <- stats::rnorm(N)
X <- character(N)
Y <- character(N)
for (i in 1:N) {
# Define X categories based on the quadrant
X[i] <- if (Z1[i] > 0 && Z2[i] > 0) {
"Northeast"
} else if (Z1[i] > 0 && Z2[i] <= 0) {
"Southeast"
} else if (Z1[i] <= 0 && Z2[i] > 0) {
"Northwest"
} else {
"Southwest"
}
Y_score <- Z1[i] + Z2[i]
Y[i] <- if (Y_score > 1) {
"High Risk"
} else if (Y_score > 0) {
"Moderate Risk"
} else if (Y_score > -1) {
"Low Risk"
} else {
"Minimal Risk"
}
}
return(data.frame(Z1, Z2, X, Y))
}
#' Generate Binary Data
#'
#' Creates binary data based on a nonlinear interaction of Z1 and Z2.
#'
#' @param N Integer. Sample size.
#' @param threshold Numeric. Threshold for binary classification. Default is 0.
#'
#' @importFrom stats rnorm
#' @return A data frame with columns Z1, Z2, X, and Y.
#' @export
#'
#' @examples
#' head(BinaryData(100))
#'
BinaryData <- function(N, threshold = 0) {
Z1 <- stats::rnorm(N)
Z2 <- stats::rnorm(N)
threshold <- threshold
X <- ifelse(stats::rnorm(N, Z1 + Z2 + Z1*Z2, 1) < threshold, 1, 0)
Y <- ifelse(stats::rnorm(N, Z1 + Z2 + Z1*Z2, 1) < threshold, 1, 0)
df <- data.frame(Z1,Z2,X,Y)
return(df)
}
#' Generate Nonlinear Normal Data
#'
#' Creates nonlinear continuous data based on an exponential interaction of Z1 and Z2.
#'
#' @param N Integer. Sample size.
#' @return A data frame with columns Z1, Z2, X, and Y.
#' @export
#' @importFrom stats rnorm
#' @examples
#' head(NonLinNormal(N = 100))
#'
NonLinNormal <- function(N){
Z1 <- stats::rnorm(N,0,1)
Z2 <- stats::rnorm(N,0,1)
X <- Z1*Z2 + stats::rnorm(N,0,1)
Y <- exp(Z1*Z2) + stats::rnorm(N,0,1)
df <- data.frame(Z1,Z2,X,Y)
return(df)
}
#' Generate Data with Uniform Noise
#'
#' Adds uniform noise to a nonlinear combination of Z1 and Z2.
#'
#' @param N Integer. Sample size.
#' @return A data frame with columns Z1, Z2, X, and Y.
#' @export
#' @importFrom stats rnorm runif
#' @examples
#' head(UniformNoise(100))
#'
UniformNoise <- function(N) {
Z1 = stats::rnorm(N, 0, 1)
Z2 = stats::rnorm(N, 0, 1)
X = Z2 - Z1 - Z2 * Z1 + stats::runif(N, min=-2, max=2)
Y = Z2 + Z1 + Z2 * Z1 + stats::runif(N, min=-2, max=2)
df <- data.frame(Z1, Z2, X, Y)
return(df)
}
#' Generate Data with Exponential Noise
#'
#' Adds exponential noise to a nonlinear combination of Z1 and Z2.
#'
#' @param N Integer. Sample size.
#' @param rate_param Numeric. Rate parameter for the exponential distribution. Default is 1.
#' @return A data frame with columns Z1, Z2, X, and Y.
#' @export
#' @importFrom stats rnorm rexp
#' @examples
#' head(ExponentialNoise(100))
#'
ExponentialNoise <- function(N, rate_param = 1) {
Z1 = stats::rnorm(N, 0, 1)
Z2 = stats::rnorm(N, 0, 1)
rate_param = rate_param
X = Z2 - Z1 - Z2 * Z1 + stats::rexp(N, rate = rate_param) - (1 / rate_param)
Y = Z2 + Z1 + Z2 * Z1 + stats::rexp(N, rate = rate_param) - (1 / rate_param)
df <- data.frame(Z1, Z2, X, Y)
return(df)
}
#' Generate Data with Poisson Noise
#'
#' Adds Poisson noise to a nonlinear combination of Z1 and Z2.
#'
#' @param N Integer. Sample size.
#' @param lambda Numeric. Rate parameter for the Poisson distribution. Default is 1.
#'
#' @return A data frame with columns Z1, Z2, X, and Y.
#' @importFrom stats rnorm rpois
#' @export
#'
#' @examples
#' head(PoissonNoise(100))
#'
PoissonNoise <- function(N, lambda = 1){
Z1 = stats::rnorm(N,0,1)
Z2 = stats::rnorm(N,0,1)
X = Z2*Z1 + (stats::rpois(N, lambda = lambda)-1)
Y = Z2*Z1 + (stats::rpois(N, lambda = lambda)-1)
df <- data.frame(Z1,Z2,X,Y)
return(df)
}
#' Generate High-dimensional Nonlinear Normal Data
#'
#' Creates a Z-dimensional nonlinear dataset with complex dependencies between features and targets.
#'
#' @param N Integer. Sample size.
#' @param d Numeric. Dependency strength. Default is 0.
#' @param Zs Integer. Number of Z variables. Default is 10.
#'
#' @return A data frame with columns Z1-Z10, X, and Y.
#' @importFrom stats rnorm
#' @export
#'
#' @examples
#' head(NonLinNormalZs(N = 100, Zs = 20))
#'
NonLinNormalZs <- function(N, d = 0, Zs = 20) {
Z <- replicate(Zs, stats::rnorm(N, 0, 1))
colnames(Z) <- paste0("Z", 1:Zs)
Z_df <- as.data.frame(Z)
X <- Z[,1] * Z[,2] + sin(Z[,3] * Z[,4]) + abs(Z[,5]) + stats::rnorm(N, 0, 1)
Y <- Z[,1] * Z[,2] + cos(Z[,6] * Z[,7]) - abs(Z[,8]) + stats::rnorm(N, 0, 1) + d*X
df <- cbind(Z_df, X = X, Y = Y)
return(df)
}
#' Generate Quadratic Threshold Data
#'
#' Generates data with a quadratic threshold effect based on Z1 and Z2.
#'
#' @param N Integer. Sample size.
#'
#' @return A data frame with columns Z1, Z2, X, and Y.
#' @export
#' @importFrom stats rnorm
#' @examples
#' head(QuadThresh(100))
QuadThresh <- function(N) {
Z1 <- stats::rnorm(N)
Z2 <- stats::rnorm(N)
X <- Z1 + 2 * Z2 + stats::rnorm(N, 0, 0.2) # continuous linear combination
Y <- ifelse(Z1 + Z2 > 1, "Strong",
ifelse(Z1 + Z2 > 0, "Weak",
ifelse(Z1 + Z2 > -1, "Medium", 0)))
return(data.frame(Z1, Z2, X, Y))
}
#' Generate Grid Partitioned Data
#'
#' Generates data with a grid partitioning effect based on Z1 and Z2.
#'
#' @param N Integer. Sample size.
#'
#' @return A data frame with columns Z1, Z2, X, and Y.
#' @export
#' @importFrom stats rnorm
#' @examples
#' head(GridPartition(100))
#'
GridPartition <- function(N) {
Z1 <- stats::rnorm(N)
Z2 <- stats::rnorm(N)
X <- sin(pi * Z1) + cos(pi * Z2) + stats::rnorm(N, 0, 0.2) # continuous nonlinear combo
Y <- ifelse(Z1 + Z2 < -1, "High",
ifelse(Z1 + Z2 < 0, "Low",
ifelse(Z1 + Z2 < 1, "Medium", "No opinion")))
return(data.frame(Z1, Z2, X, Y))
}
#' Generate Polynomial Decision Boundary Data
#'
#' Generates data with a polynomial decision boundary based on Z1 and Z2.
#'
#' @param N Integer. Sample size.
#'
#' @return A data frame with columns Z1, Z2, X, and Y.
#' @export
#' @importFrom stats rnorm
#' @examples
#' head(PolyDecision(100))
#'
PolyDecision <- function(N) {
Z1 <- stats::rnorm(N)
Z2 <- stats::rnorm(N)
X <- Z1^2 + Z2^2 + stats::rnorm(N, 0, 1)
Y <- ifelse(Z1^3 + Z2 > 1, "Blue",
ifelse(Z1^2 - Z2^2 > 0, "White",
ifelse(Z1 - Z2^3 > -1, "Black", "Red")))
return(data.frame(Z1, Z2, X, Y))
}
#' Generate Sinusoidal and Cosine Data
#'
#' Generates data with sinusoidal and cosine dependencies based on Z1 and Z2.
#'
#' @param N Integer. Sample size.
#'
#' @return A data frame with columns Z1, Z2, X, and Y.
#'
#' @importFrom stats runif rnorm
#' @export
#'
#' @examples
#' head(SinCosThreshold(100))
SinCosThreshold <- function(N) {
Z1 <- stats::runif(N, -1, 1)
Z2 <- stats::runif(N, -1, 1)
X <- sin(Z1 * pi) + Z2 + stats::rnorm(N, 0, 0.1)
Y <- ifelse(cos(Z1 * pi) + Z2 > 1, "Laptop",
ifelse(cos(Z1 * pi) + Z2 > 0.5, "Desktop",
ifelse(cos(Z1 * pi) + Z2 > 0, "GamePad", "Phone")))
return(data.frame(Z1, Z2, X, Y))
}
#' Generate Exponential and Logarithmic Data
#'
#' Generates data with exponential and logarithmic dependencies based on Z1 and Z2.
#'
#' @param N Integer. Sample size.
#'
#' @return A data frame with columns Z1, Z2, X, and Y.
#' @importFrom stats rnorm
#' @export
#'
#' @examples
#' head(ExpLogThreshold(100))
ExpLogThreshold <- function(N) {
Z1 <- stats::rnorm(N)
Z2 <- stats::rnorm(N)
X <- exp(Z1) + Z2 + stats::rnorm(N, 0, 0.2)
Y <- ifelse(log(abs(Z1) + 1) + Z2 > 0.5, "Goblin",
ifelse(log(abs(Z1) + 1) + Z2 > 0, "Orc",
ifelse(log(abs(Z1) + 1) > -0.5, "Troll", "Elf")))
return(data.frame(Z1, Z2, X, Y))
}
#' Generate Hard Case Data with Two Z Variables
#'
#' Generates data with a hard case scenario where X and Y are influenced by two Z variables in a nonlinear manner.
#'
#' @param N Integer. Sample size.
#'
#' @return A data frame with columns X, Y, Z1, and Z2.
#' @export
#' @importFrom stats runif rnorm
#'
#' @examples
#' head(HardCase(100))
#'
HardCase <- function(N) {
Z1 <- stats::runif(N, -2, 2)
Z2 <- stats::runif(N, -2, 2)
hZ <- sin(Z1) * cos(Z2)
X <- hZ + 0.2 * stats::rnorm(N)
Y <- hZ^2 + 0.2 * stats::rnorm(N)
data.frame(X, Y, Z1, Z2)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.