## ---- Class: Simrel ----
#' @title Simulation of Linear Model Data
#' @description Simulates univariate, bivariate and multivariate linear model data where users can specify few parameters for simulating data with wide range of properties.
#' @import R6
#' @param n Number of training samples
#' @param p Number of predictor variables
#' @param q Number of relevant predictor variables
#' @param relpos Position of relevant predictor components
#' @param gamma Decay factor of eigenvalues of predictor variables
#' @param R2 Coefficient of determination
#' @param ntest (Optional) Number of test samples
#' @param muX (Optional) Mean vector of predictor variables
#' @param muY (Optional) Mean vector of response variables
#' @param sim.obj (Optional) Previously fitted simulation object, the parameters will be taken from the object
#' @param lambda.min (Optional) Minimum value that eigenvalues can be
#' @return simrel object (A list)
#' @rdname simrel
#' @export
Simrel <- R6::R6Class(
"Simrel",
private = list(
..parameters = list(
n = 100,
p = 20,
q = 10,
m = 1,
relpos = c(1, 2, 3),
ypos = NULL,
gamma = 0.8,
eta = 0.3,
lambda.min = 1e-5,
rho = NULL,
R2 = 0.9,
ntest = NULL,
muX = NULL,
muY = NULL,
type = "univariate"
),
..properties = list(
relpred = NULL,
eigen_x = NULL,
eigen_y = NULL,
sigma_z = NULL,
sigma_zinv = NULL,
sigma_y = NULL,
sigma_w = NULL,
sigma_zy = NULL,
sigma_zw = NULL,
sigma = NULL,
rotation_x = NULL,
rotation_y = NULL,
beta_z = NULL,
beta = NULL,
beta0 = NULL,
Rsq_y = NULL,
Rsq_w = NULL,
minerror = NULL
),
..get_cov = function(pos, Rsq, eta, p, lambda){
out <- vector("numeric", p)
alph <- runif(length(pos), -1, 1)
out[pos] <- sign(alph) * sqrt(Rsq * abs(alph) / sum(abs(alph)) * lambda[pos] * eta)
return(out)
},
..get_rotate = function(predPos){
n <- length(predPos)
Qmat <- matrix(rnorm(n ^ 2), n)
Qmat <- scale(Qmat, scale = FALSE)
qr.Q(qr(Qmat))
},
..is_pd = function(sigma_mat){
all(eigen(sigma_mat)$values > 0)
},
..get_eigen = function(gamma, p) {
return((exp(-gamma * (1:p))) / (exp(-gamma)))
},
..predpos = function(p, q, relpos) {
relpos_list <- if (!is.list(relpos)) list(relpos) else relpos
irrelpos <- setdiff(seq_len(p), Reduce(union, relpos_list))
out <- lapply(seq_along(relpos_list), function(i){
pos <- relpos_list[[i]]
ret <- c(pos, sample(irrelpos, q[i] - length(pos)))
irrelpos <<- setdiff(irrelpos, ret)
return(ret)
})
if(!is.list(relpos)) out <- unlist(out)
return(out)
},
..get_data = function(n, p, sigma, rotation_x, m = 1,
rotation_y = NULL, muX = NULL, muY = NULL){
rotate_y <- all(exists("rotation_y"), !is.null(rotation_y))
sigma_rot <- chol(sigma)
train_cal <- matrix(rnorm(n * (p + m), 0, 1), nrow = n) %*% sigma_rot
Z <- train_cal[, (m + 1):(m + p), drop = F]
X <- Z %*% t(rotation_x)
W <- train_cal[, 1:m, drop = F]
Y <- if (rotate_y) W %*% t(rotation_y) else W
if (!is.null(muX)) X <- sweep(X, 2, muX, "+")
if (!is.null(muY)) Y <- sweep(Y, 2, muY, "+")
list(y = unname(Y), x = X)
}
),
active = list(
list_properties = function(){
properties <- private$..properties
properties <- properties[!sapply(properties, is.null)]
return(properties)
},
list_parameters = function(){
parameters <- private$..parameters
parameters <- parameters[!sapply(parameters, is.null)]
return(parameters)
}
),
public = list(
initialize = function(...) {
self$set_parameters(...)
},
base_properties = function() {
## Adding Properties to Simrel Object
self$set_properties("relpred", expression({
p <- self$get_parameters("p")
q <- self$get_parameters("q")
relpos <- self$get_parameters("relpos")
out <- private$..predpos(p, q, relpos)
if (!is.list(relpos)) out <- unlist(out)
return(out)
}))
self$set_properties("eigen_x", expression({
p <- self$get_parameters("p")
gamma <- self$get_parameters("gamma")
private$..get_eigen(gamma, p)
}))
self$set_properties("eigen_y", expression({
type <- self$get_parameters("type")
l <- switch(type,
univariate = 1,
bivariate = 2,
multivariate = length(self$get_parameters("ypos")))
if (l == 1) return(1)
eta <- self$get_parameters("eta")
private$..get_eigen(eta, l)
}))
self$set_properties("sigma_z", expression({
diag(self$get_properties("eigen_x"))
}))
self$set_properties("sigma_zinv", expression({
diag(1 / self$get_properties("eigen_x"))
}))
},
get_validated = function() {
errors <- list()
warnings <- list()
params <- self$list_parameters
list2env(params, envir = environment())
switch(
type,
univariate = {
req_params <- c("n", "p", "q", "relpos", "R2", "gamma", "type")
## --- ensure minimum required parameters
if (!all(req_params %in% names(params))) {
stop("Not Enough Parameters to continue")
}
## --- Check muX and muY
if (exists("muX")) {
if (length(muX) != p) errors[[1]] <- "Length of muX is not equals to number of predictors (p)"
}
if (exists("muY")) {
if (length(muY) != m) errors[[1]] <- "Length of muY is not equals to number of responses (m)"
}
## --- ensure length params agree
if (q <= length(relpos)) {
errors[[2]] <- "Number of position of relevant components should be less than the number of relevant predictors."
}
if (q > p) {
errors[[3]] <- "Relevant number of predictor should be less than or equals to total number of predictors."
}
if (!all(c(length(q), length(gamma), length(R2)) == 1)) {
errors[[6]] <- "Length of q, gamma and R2 should be equal to 1."
}
if (!all(relpos %in% 1:p)) {
errors[[7]] <- paste("Position of relevant component must be between 1 and ", p)
}
## --- Ensure range of parameters
if (gamma < 0) {
errors[[4]] <- "Gamma must be greater than zero."
}
if (!all(R2 > 0, R2 < 1)) {
errors[[5]] <- "Coefficient of determination (R2) should lie between 0 and 1."
}
## --- Ensure class of parameters
if (!is.atomic(relpos)) {
errors[[6]] <- "Relpos for uniresponse simulation should be a vector."
}
},
bivariate = {
n_relpos <- sapply(relpos, length)
## --- Ensure minimum required parameters
req_params <- c("n", "p", "q", "relpos", "R2",
"gamma", "type", "rho")
if (any(!req_params %in% names(params))) {
stop("Not Enough Parameters to continue")
}
## --- Check muX and muY
if (exists("muX")) {
if (length(muX) != p)
errors[[1]] <- "Length of muX is not equals to number of predictors (p)"
}
if (exists("muY")) {
if (length(muY) != m)
errors[[2]] <- "Length of muY is not equals to number of responses (m)"
}
## --- Ensure class of parameters
if (!is.list(relpos)) {
stop("Position of relevant predictors must be a list")
}
compos <- sum(relpos[[1]] %in% relpos[[2]])
if (compos > 0) {
cpos1 <- which(relpos[[1]] %in% relpos[[2]])
cpos2 <- which(relpos[[2]] %in% relpos[[1]])
}
if (all(compos == 0, q[3] > 0)) {
warning("The number of common relevant predictors is set to zero due to orthogonal relevant spaces")
private$..parameters$q[3] <- 0
}
if (all(compos == n_relpos[1], compos == n_relpos[2], q[1] != 0)) {
private$..parameters$q[3] <- min(q[1:2])
warnings(paste("The number of common relevant predictors is set to",
q[3], "since the relevant spaces are identical.\n"))
}
if (all(compos > 0, q[3] == 0)) {
private$..parameters$q[3] <- compos
warning(paste("The number of common relevant predictors is set to ",
compos, " since the relevant spaces must share common variable(s)\n"))
}
if (compos == 0) {
rho[2] <- rho[1]/sqrt((1 - R2[1]) * (1 - R2[2]))
if (rho[2] < -0.99999 | rho[2] > 0.9999) {
stop("Non-PD covariance matrix, choose smaller R2-values\n")
} else {
warning(paste("The conditional correlation between y1 and y2 is fixed equal to",
round(rho[2], 2), " due to orthogonal relevant spaces \n"))
private$..parameters$q[3] <- 0
private$..parameters$rho[2] <- rho[2]
}
}
if (!all(c(length(rho), length(relpos), length(R2)) == 2)) {
stop(paste("Length of rho, relpos and R2 should be of length 2"))
}
## Ensure Lengths of parameters
if (length(unlist(sapply(relpos, length))) == 0) {
errors[[3]] <- "You must have minimum one relevant component for each response."
}
if (sum(q) < sum(sapply(relpos, length))) {
errors[[4]] <- "Number of position of relevant components should be less than the number of relevant predictors for each response components."
}
if (sum(q) > p) {
errors[[4]] <- "Total number of relevant predictors should be less than or equals to total number of predictors."
}
if (!all(unlist(relpos) %in% 1:p)) {
errors[[5]] <- paste("Position of relevant predictor component of response must be between 1 and ", p)
}
## --- Ensure range of parameters
if (gamma < 0) {
errors[[7]] <- "gamma must be greater than zero."
}
if (any(sapply(R2, function(x) !all(x > 0, x < 1)))) {
errors[[8]] <- "Coefficients of determination (R2) should lie between 0 and 1."
}
if (any(sapply(rho, function(x) !all(x > -1, x < 1)))) {
errors[[9]] <- "The correlation between responses should lie between -1 and 1."
}
},
multivariate = {
## --- Ensure minimum required parameters
req_params <- c("n", "p", "q", "relpos", "R2",
"gamma", "type", "ypos", "m")
if (any(!req_params %in% names(params))) {
stop("Not Enough Parameters to continue")
}
## --- Check muX and muY
if (exists("muX")) {
if (length(muX) != p) errors[[1]] <- "Length of muX is not equals to number of predictors (p)"
}
if (exists("muY")) {
if (length(muY) != m) errors[[1]] <- "Length of muY is not equals to number of responses (m)"
}
## --- Ensure class of parameters
if (!is.list(relpos) | !is.list(ypos)) {
stop("relpos and ypos must be a list")
}
if (!all(c(length(q), length(relpos), length(R2)) == length(q))) {
stop(paste("Length of q, relpos and R2 should be of same length"))
}
## Ensure Lengths of parameters
if (any(q < sapply(relpos, length))) {
errors[[2]] <- "Number of position of relevant components should be less than the number of relevant predictors for each response components."
}
if (sum(q) > p) {
errors[[3]] <- "Total number of relevant predictors should be less than or equals to total number of predictors."
}
if (!all(unlist(relpos) %in% 1:p)) {
errors[[4]] <- paste("Position of relevant predictor component of response must be between 1 and ", p)
}
if (any(duplicated(unlist(relpos)))) {
errors[[5]] <- "Relevant position of predictor components must be unique"
}
if (!all(unlist(ypos) %in% 1:m)) {
errors[[6]] <- paste("Position of response component must be between 1 and ", m)
}
if (any(duplicated(unlist(ypos)))) {
errors[[7]] <- "Position of response components must be unique"
}
if (length(unlist(ypos)) != m) {
errors[[8]] <- paste("Position of response components (ypos) must contain all response variabels from 1 to ", m)
}
## --- Ensure range of parameters
if (gamma < 0) {
errors[[9]] <- "gamma must be greater than zero."
}
if (eta < 0) {
errors[[9]] <- "eta must be greater than zero."
}
if (any(sapply(R2, function(x) !all(x > 0, x < 1)))) {
errors[[10]] <- "Coefficients of determination (R2) should lie between 0 and 1."
}
}
)
out <- list(errors = errors, warnings = warnings)
return(out)
},
get_parameters = function(key) {
if (length(key) == 1) return(private$..parameters[[key]])
else return(private$..parameters[key])
},
set_parameters = function(...) {
params <- list(...)
for (key in names(params)) {
private$..parameters[[key]] <- params[[key]]
}
},
get_properties = function(key) {
if (length(key) == 1) return(private$..properties[[key]])
else return(private$..properties[key])
},
set_properties = function(key, expression) {
private$..properties[[key]] <- eval(expression)
},
get_data = function(type = c("train", "test", "both")){
sigma <- self$get_properties("sigma")
if (!private$..is_pd(sigma)) stop("\nCorrelation matrix is not positive definite.\n")
type <- match.arg(type)
n <- self$get_parameters("n")
p <- self$get_parameters("p")
m <- self$get_parameters("m")
rotation_x <- self$get_properties("rotation_x")
rotation_y <- self$get_properties("rotation_y")
muX <- self$get_parameters("muX")
muY <- self$get_parameters("muY")
train <- private$..get_data(n, p, sigma, rotation_x, m, rotation_y, muX, muY)
out <- data.frame(y = I(train$y), x = I(train$x))
if (type %in% c("test", "both")) {
ntest <- self$get_parameters("ntest")
if(is.null(ntest)){
warning("ntest is not supplied, so n is used for test data")
ntest <- n
}
test <- private$..get_data(ntest, p, sigma, rotation_x, m, rotation_y, muX, muY)
out <- list(
train = data.frame(y = I(train$y), x = I(train$x)),
test = data.frame(y = I(test$y), x = I(test$x))
)
return(out)
}
return(out)
}
)
)
## ---- Class: UniSimrel ----
#' @title Uni-Response Simulation of Linear Model Data
#' @description Simulates univariate linear model data where users can specify few parameters for simulating data with wide range of properties.
#' @import R6
#' @param n Number of training samples
#' @param p Number of predictor variables
#' @param q Number of relevant predictor variables
#' @param relpos Position of relevant predictor components
#' @param ypos Position of response components while rotation (see details)
#' @param gamma Decay factor of eigenvalues of predictor variables
#' @param R2 Coefficient of determination
#' @param ntest (Optional) Number of test samples
#' @param muX (Optional) Mean vector of predictor variables
#' @param muY (Optional) Mean vector of response variables
#' @param sim.obj (Optional) Previously fitted simulation object, the parameters will be taken from the object
#' @param lambda.min (Optional) Minimum value that eigenvalues can be
#' @return simrel object (A list)
#' @rdname simrel
#' @export
UniSimrel <- R6::R6Class(
"UniSimrel",
inherit = Simrel,
public = list(
initialize = function(...){
super$initialize(...)
super$set_parameters(type = "univariate")
## Validate the parameters
is_valid <- self$get_validated()
is_valid <- lapply(is_valid, function(x) x[!sapply(x, is.null)])
if (length(is_valid$errors)) stop(unlist(is_valid$errors))
if (length(is_valid$warnings)) warning(unlist(is_valid$warnings))
## Adding Properties to Simrel Object
self$compute_properties()
},
add_properties = function(){
self$set_properties("sigma_y", expression({1}))
self$set_properties("sigma_zy", expression({
relpos <- self$get_parameters("relpos")
R2 <- self$get_parameters("R2")
p <- self$get_parameters("p")
lambda <- self$get_properties("eigen_x")
eta <- self$get_properties("eigen_y")
private$..get_cov(pos = relpos, Rsq = R2, p = p, lambda = lambda, eta = eta)
}))
self$set_properties("sigma", expression({
sigma_zy <- self$get_properties("sigma_zy")
sigma_y <- self$get_properties("sigma_y")
sigma_z <- self$get_properties("sigma_z")
out <- rbind(c(sigma_y, t(sigma_zy)), cbind(sigma_zy, sigma_z))
unname(out)
}))
self$set_properties("rotation_x", expression({
relpred <- self$get_properties("relpred")
p <- self$get_parameters("p")
irrelpred <- setdiff(1:p, unlist(relpred))
out <- diag(p)
out[irrelpred, irrelpred] <- private$..get_rotate(irrelpred)
out[relpred, relpred] <- private$..get_rotate(relpred)
return(out)
}))
self$set_properties("beta_z", expression({
sigma_zinv <- self$get_properties("sigma_zinv")
sigma_zy <- self$get_properties("sigma_zy")
return(sigma_zinv %*% sigma_zy)
}))
self$set_properties("beta", expression({
rotation_x <- self$get_properties("rotation_x")
beta_z <- self$get_properties("beta_z")
return(rotation_x %*% beta_z)
}))
self$set_properties("beta0", expression({
beta0 <- rep(0, self$get_parameters("m"))
muX <- self$get_parameters("muX")
muY <- self$get_parameters("muY")
if (!is.null(muX)) beta <- self$get_properties("beta")
if (!is.null(muY)) beta0 <- beta0 + muY
if (!is.null(muX)) beta0 <- beta0 - t(beta) %*% muX
return(c(beta0))
}))
self$set_properties("Rsq_y", expression({
beta_z <- self$get_properties("beta_z")
sigma_zy <- self$get_properties("sigma_zy")
c(t(beta_z) %*% sigma_zy)
}))
self$set_properties("minerror", expression({
sigma_y <- self$get_properties("sigma_y")
Rsq <- self$get_properties("Rsq_y")
c(sigma_y - Rsq)
}))
},
compute_properties = function() {
super$base_properties()
self$add_properties()
}
)
)
## ---- Class: BiSimrel ----
#' @title Bi-Response Simulation of Linear Model Data
#' @description Simulates multivariate linear model data where users can specify few parameters for simulating data with wide range of properties.
#' @import R6
#' @param n Number of training samples
#' @param p Number of predictor variables
#' @param q Number of relevant predictor variables
#' @param relpos Position of relevant predictor components
#' @param ypos Position of response components while rotation (see details)
#' @param gamma Decay factor of eigenvalues of predictor variables
#' @param R2 Coefficient of determination
#' @param ntest (Optional) Number of test samples
#' @param muX (Optional) Mean vector of predictor variables
#' @param muY (Optional) Mean vector of response variables
#' @param sim.obj (Optional) Previously fitted simulation object, the parameters will be taken from the object
#' @param lambda.min (Optional) Minimum value that eigenvalues can be
#' @return simrel object (A list)
#' @rdname simrel
#' @export
BiSimrel <- R6::R6Class(
"BiSimrel",
inherit = Simrel,
private = list(
..aux_var = list(
comn_relpos = NULL,
n_compos = NULL,
ex_relpos = NULL,
n_relpos = NULL,
comn_relpos_idx = NULL
),
..Rfunc = function(alpha1, alpha2, L) {
t(alpha1) %*% solve(diag(L)) %*% alpha2
},
..a21func = function(a11, a12, a22, R12, l1, l2) {
l1 / a11 * (R12 - a22 * a12 / l2)
},
..a22func = function(a11, a12, R2, R12, l1, l2) {
bb <- R12 * a12 / a11^2 * l1 / l2
root <- sqrt(R12^2 * a12^2 / a11^4 * l1^2 / l2^2 -
(1 / l2 + l1 / l2 * a12^2 / a11^2) *
(l1 / a11^2 * R12^2 - R2))
denom <- (1 / l2 + l1 / l2 * a12^2 / a11^2)
w1 <- (bb - root) / denom
w2 <- (bb + root) / denom
return(c(w1, w2))
}
),
public = list(
initialize = function(...){
params <- list(...)
default_params <- list(
q = c(6, 7, 3),
m = 2,
relpos = list(c(1, 2), c(2, 3, 5)),
R2 = c(0.7, 0.9),
rho = c(0.7, 0.8),
eta = 0,
type = "bivariate"
)
if (length(params) != 0) default_params[names(params)] <- params
do.call(super$initialize, default_params)
## Validate the parameters
is_valid <- self$get_validated()
is_valid <- lapply(is_valid, function(x) x[!sapply(x, is.null)])
if (length(is_valid$errors)) stop(unlist(is_valid$errors))
if (length(is_valid$warnings)) warning(unlist(is_valid$warnings))
## Compute auxiliary variables
self$compute_aux_vars()
## Adding Properties to Simrel Object
self$compute_properties()
},
get_aux_var = function(key) private$..aux_var[[key]],
compute_aux_vars = function() {
relpos <- self$get_parameters("relpos")
private$..aux_var <- within(private$..aux_var, {
comn_relpos <- do.call(intersect, relpos)
n_compos <- length(comn_relpos)
ex_relpos <- append(lapply(relpos, setdiff, comn_relpos),
list(comn_relpos))
n_relpos <- sapply(relpos, length)
comn_relpos_idx <- lapply(relpos, function(x) which(x %in% comn_relpos))
})
},
add_properties = function() {
## Adding Properties to Simrel Object
self$set_properties("relpred", expression({
p <- self$get_parameters("p")
q <- self$get_parameters("q")
relpos <- self$get_parameters("relpos")
comn_relpos <- self$get_aux_var("comn_relpos")
ex_relpos <- self$get_aux_var("ex_relpos")
out <- private$..predpos(p, q, ex_relpos)
if (!is.list(relpos)) out <- unlist(out)
return(out)
}))
self$set_properties("sigma_zy", expression({
p <- self$get_parameters("p")
m <- self$get_parameters("m")
relpos <- self$get_parameters("relpos")
R2 <- self$get_parameters("R2")
rho <- self$get_parameters("rho")
p <- self$get_parameters("p")
lambda <- self$get_properties("eigen_x")
eta <- self$get_properties("eigen_y")
sigma_zy <- matrix(0, p, 2)
n_compos <- self$get_aux_var("n_compos")
n_relpos <- self$get_aux_var("n_relpos")
cpos <- self$get_aux_var("comn_relpos_idx")
if (n_compos == 0) {
out <- mapply(private$..get_cov, pos = relpos, Rsq = R2, eta = eta,
MoreArgs = list(p = p, lambda = lambda))
R12 <- sum(out[out[, 1] != 0, 1] * out[out[, 2] != 0, 2]/ lambda[relpos[[1]]])
rho_out <- rho[1] / sqrt(prod(1 - R2 ^ 2))
if (any(rho_out < -1, rho_out > 1)) {
stop(paste("Two Response in orthogonal, but highly relevant spaces must be less correlated.",
"Choose it less than", rho[1], "or reduce the R2 value."))
} else {
rho_out <- c(rho[1], rho_out)
self$set_parameters(rho = rho_out)
}
sigma_zy <- out
}
if (any(n_compos == n_relpos)) {
if (any(n_relpos > n_compos)) {
id <- which(n_relpos == n_compos)
id2 <- (1:2)[-id]
private$..properties$relpred[id] <- vector("list", 1)
warning(paste("\nThe number of relevant predictors for response", id, "is set to",
n_compos, "since its relevant space is a subspace of the other\n"))
cpos.1 <- cpos[[id]]
cpos.2 <- cpos[[id2]]
rho <- self$get_parameters("rho")
R2 <- self$get_parameters("R2")
R12 <- rho[1] - rho[2] * sqrt((1 - R2[1]) * (1 - R2[2]))
alpha <- runif(n_compos, -1, 1)
alpha <- sign(alpha) * sqrt(R2[id] * abs(alpha)/sum(abs(alpha)) * lambda[relpos[[id]]])
alpha2sum <- 1
j <- 1
while (alpha2sum > R2[id2] & j < 1000) {
alpha1 <- alpha
alpha2star <- runif(n_compos - 1, -1, 1)
alphasum <- sum(alpha1[-1] * alpha2star/lambda[relpos[[id]][cpos.1[-1]]])
a21 <- (R12 - alphasum) * lambda[relpos[[id]][cpos.1[1]]]/alpha1[1]
alpha2star <- c(a21, alpha2star)
alpha2sum <- private$..Rfunc(alpha2star, alpha2star, lambda[relpos[[id]]])
j <- j + 1
}
if (j == 1000) {
stop("No PD matrix found in 1000 simulations with current parameter setting")
}
if ((n_relpos[id2] - n_compos) > 1) {
alpha2rest <- runif(n_relpos[id2] - n_compos, 0, 1) *
sample(c(-1, 1), n_relpos[id2] - n_compos, replace = TRUE)
alpha2rest <- sign(alpha2rest) * sqrt((R2[id2] - c(alpha2sum)) * abs(alpha2rest)/sum(abs(alpha2rest)) * lambda[relpos[[id2]][-cpos.2]])
} else {
alpha2rest <- sample(c(-1, 1), 1, replace = FALSE) *
sqrt((R2[id2] - c(alpha2sum)) * lambda[relpos[[id2]][-cpos.2]])
}
alpha_out <- rep(0, p)
alpha_out[relpos[[id2]][cpos.2]] <- alpha2star
alpha_out[relpos[[id2]][-cpos.2]] <- alpha2rest
sigma_zy[relpos[[id]], id] <- alpha1
sigma_zy[, id2] <- alpha_out
} else {
if (n_compos == 1) {
alpha1 <- sqrt(R2[1] * lambda[relpos[[1]]])
alpha2 <- sign(rho[1]) * sqrt(R2[2] * lambda[relpos[[2]]])
rho[2] <- (rho[1] - alpha1 * alpha2/lambda[relpos[[1]]])/
sqrt((1 - R2[1]) * (1 - R2[2]))
private$..properties$rho[2] <- rho[2]
warning(paste(
"\nThe conditional correlation between y1 and y2 is fixed equal to",
round(rho[2], 2),
"due to overlapping and one-dimensional relevant spaces for y1 and y2\n"
))
sigma_zy[relpos[[1]], 1] <- alpha1
sigma_zy[relpos[[2]], 2] <- alpha2
}
if (n_compos == 2) {
R12 <- rho[1] - rho[2] * sqrt((1 - R2[1]) * (1 - R2[2]))
R2try <- 2
tol <- 1
j <- 1
while (tol > 0.001 & (j < 5000)) {
alpha1 <- runif(2, -1, 1)
alpha1 <- sign(alpha1) *
sqrt(R2[1] * abs(alpha1)/
sum(abs(alpha1)) * lambda[relpos[[1]]])
options(warn = -1)
a22 <- private$..a22func(alpha1[1], alpha1[2], R2[2],
R12, lambda[relpos[[2]]][1],
lambda[relpos[[2]]][2])
a22 <- a22[which(abs(a22) == min(abs(a22)))][1]
if (is.na(a22)) next
options(warn = 0)
a21 <- private$..a21func(alpha1[1], alpha1[2], a22, R12,
lambda[relpos[[2]]][1],
lambda[relpos[[2]]][2])
alpha2 <- c(a21, a22)
R2try <- private$..Rfunc(alpha2, alpha2, lambda[relpos[[2]]])
tol <- abs(R2try - R2[2])
j <- j + 1
}
if (j == 5000) {
stop("No PD covariance matrix found with current prameter setting\n ")
}
sigma_zy[relpos[[1]], 1] <- alpha1
sigma_zy[relpos[[2]], 2] <- alpha2
}
if (n_compos > 2) {
R12 <- rho[1] - rho[2] * sqrt((1 - R2[1]) * (1 - R2[2]))
R2try <- 2
tol <- 1
j <- 1
while (tol > 0.001 & (j < 5000)) {
## Sampling all but two positions
R2rest.1 <- 1
R2rest.2 <- 1
R12rest <- 1
k <- 1
R1prop <- runif(1, 0.3, 0.8)
R2prop <- runif(1, 0.3, 0.8)
loop_condition <- any(
R2rest.1 > (n_relpos[1] - 2)/n_relpos[1] * R2[1],
R2rest.2 > (n_relpos[2] - 2)/n_relpos[2] * R2[2],
rho[1] - R12rest < -1,
rho[1] - R12rest > 1
)
while (loop_condition & (k < 1000)) {
alpha1cp <- runif(n_relpos[1] - 2, -1, 1)
alpha1cp <- sign(alpha1cp) *
sqrt(R1prop * R2[1] *
abs(alpha1cp) / sum(abs(alpha1cp)) *
lambda[relpos[[1]][-c(1:2)]])
R2rest.1 <- sum(alpha1cp * alpha1cp / lambda[relpos[[1]][-c(1:2)]])
alpha2cp <- runif(n_relpos[2] - 2, -1, 1)
alpha2cp <- sign(alpha2cp) *
sqrt(R2prop * R2[2] * abs(alpha2cp) / sum(abs(alpha2cp)) *
lambda[relpos[[2]][-c(1:2)]])
R2rest.2 <- sum(alpha2cp * alpha2cp / lambda[relpos[[2]][-c(1:2)]])
R12rest <- sum(alpha1cp * alpha2cp / lambda[relpos[[2]][-c(1:2)]])
k <- k + 1
}
alpha1 <- runif(2, -1, 1)
alpha1 <- sign(alpha1) *
sqrt((R2[1] - R2rest.1) *
abs(alpha1)/sum(abs(alpha1)) *
lambda[relpos[[1]]][1:2])
options(warn = -1)
a22 <- private$..a22func(
alpha1[1], alpha1[2],
R2[2] - R2rest.2,
R12 - R12rest,
lambda[relpos[[2]]][1],
lambda[relpos[[2]]][2]
)
a22 <- a22[which(abs(a22) == min(abs(a22)))][1]
if (is.na(a22)) next
options(warn = 0)
a21 <- private$..a21func(
alpha1[1], alpha1[2], a22,
R12 - R12rest, lambda[relpos[[2]]][1],
lambda[relpos[[2]]][2])
alpha2 <- c(a21, a22, alpha2cp)
alpha1 <- c(alpha1, alpha1cp)
R2try <- private$..Rfunc(alpha2, alpha2, lambda[relpos[[2]]])
tol <- abs(R2try - R2[2])
j <- j + 1
}
if (j == 5000) {
stop("No PD covariance matrix found with current prameter setting\n ")
}
sigma_zy[relpos[[1]], 1] <- alpha1
sigma_zy[relpos[[2]], 2] <- alpha2
}
}
}
if (all(n_compos != 0, all(n_compos < n_relpos))) {
j <- 1
cp1sum <- cp2sum <- 1
while (any(cp1sum >= R2[1], cp2sum >= R2[2]) & (j < 1000)) {
## cp <- which(relpos[[1]] %in% relpos[[2]])
cp <- relpos[[1]][cpos[[1]]]
R12 <- rho[1] - rho[2] * sqrt((1 - R2[1]) * (1 - R2[2]))
alpha1cp <- sign(R12) * runif(n_compos, 0, 1)
alpha2cp <- runif(n_compos, 0, 1)
cp.sum <- sum(alpha1cp * alpha2cp/lambda[cp])
k <- R12/cp.sum
sigma_zy[cp, 1] <- alpha1cp <- sqrt(k) * alpha1cp
sigma_zy[cp, 2] <- alpha2cp <- sqrt(k) * alpha2cp
cp1sum <- sum(alpha1cp * alpha1cp/lambda[cp])
cp2sum <- sum(alpha2cp * alpha2cp/lambda[cp])
j <- j + 1
}
if (j == 1000) {
stop("No PD covariance matrix found with current parameter setting\n")
}
id <- which(relpos[[1]] %in% cp)
ncp <- relpos[[1]][-id]
alpha1 <- runif(length(ncp), -1, 1)
sigma_zy[ncp, 1] <- sign(alpha1) *
sqrt((R2[1] - cp1sum) * abs(alpha1)/
sum(abs(alpha1)) * lambda[ncp])
id <- which(relpos[[2]] %in% cp)
ncp <- relpos[[2]][-id]
alpha2 <- runif(length(ncp), -1, 1)
sigma_zy[ncp, 2] <- alpha2 <- sign(alpha2) *
sqrt((R2[2] - cp2sum) * abs(alpha2)/
sum(abs(alpha2)) * lambda[ncp])
}
return(sigma_zy)
}))
self$set_properties("sigma_y", expression({
eigen_y <- self$get_properties("eigen_y")
rho <- self$get_parameters("rho")
out <- diag(c(eigen_y, rep(1, 2 - length(eigen_y))))
out[out != diag(out)] <- rho[1]
return(out)
}))
self$set_properties("sigma_yinv", expression({
sigma_y <- self$get_properties("sigma_y")
solve(sigma_y)
}))
self$set_properties("sigma", expression({
sigma_zy <- self$get_properties("sigma_zy")
sigma_y <- self$get_properties("sigma_y")
sigma_z <- self$get_properties("sigma_z")
out <- cbind(rbind(sigma_y, sigma_zy), rbind(t(sigma_zy), sigma_z))
unname(out)
}))
self$set_properties("rotation_x", expression({
relpred <- self$get_properties("relpred")
p <- self$get_parameters("p")
irrelpred <- setdiff(1:p, unlist(relpred))
out <- diag(p)
out[irrelpred, irrelpred] <- private$..get_rotate(irrelpred)
for (pos in relpred) {
rotMat <- private$..get_rotate(pos)
out[pos, pos] <- rotMat
}
return(out)
}))
self$set_properties("sigma_xy", expression({
rotation_x <- self$get_properties("rotation_x")
sigma_zy <- self$get_properties("sigma_zy")
t(sigma_zy) %*% t(rotation_x)
}))
self$set_properties("beta_z", expression({
sigma_zinv <- self$get_properties("sigma_zinv")
sigma_zy <- self$get_properties("sigma_zy")
return(sigma_zinv %*% sigma_zy)
}))
self$set_properties("beta", expression({
rotation_x <- self$get_properties("rotation_x")
beta_z <- self$get_properties("beta_z")
return(rotation_x %*% beta_z)
}))
self$set_properties("beta0", expression({
beta0 <- rep(0, self$get_parameters("m"))
muX <- self$get_parameters("muX")
muY <- self$get_parameters("muY")
if (!is.null(muX)) beta <- self$get_properties("beta")
if (!is.null(muY)) beta0 <- beta0 + muY
if (!is.null(muX)) beta0 <- beta0 - t(beta) %*% muX
return(c(beta0))
}))
self$set_properties("Rsq_y", expression({
beta_z <- self$get_properties("beta_z")
sigma_zy <- self$get_properties("sigma_zy")
t(sigma_zy) %*% beta_z
}))
self$set_properties("minerror", expression({
sigma_y <- self$get_properties("sigma_y")
Rsq <- self$get_properties("Rsq_y")
unname(sigma_y - Rsq)
}))
},
compute_properties = function() {
super$base_properties()
self$add_properties()
}
)
)
## ---- Class: MultiSimrel ----
#' @title Multi-Response Simulation of Linear Model Data
#' @description Simulates multivariate linear model data where users can specify few parameters for simulating data with wide range of properties.
#' @import R6
#' @param n Number of training samples
#' @param p Number of predictor variables
#' @param q Number of relevant predictor variables
#' @param relpos Position of relevant predictor components
#' @param ypos Position of response components while rotation (see details)
#' @param gamma Decay factor of eigenvalues of predictor variables
#' @param R2 Coefficient of determination
#' @param ntest (Optional) Number of test samples
#' @param muX (Optional) Mean vector of predictor variables
#' @param muY (Optional) Mean vector of response variables
#' @param sim.obj (Optional) Previously fitted simulation object, the parameters will be taken from the object
#' @param lambda.min (Optional) Minimum value that eigenvalues can be
#' @return simrel object (A list)
#' @rdname simrel
#' @export
MultiSimrel <- R6::R6Class(
"MultiSimrel",
inherit = Simrel,
public = list(
initialize = function(...){
params <- list(...)
default_params <- list(
q = c(6, 7),
m = 3,
relpos = list(c(1, 2), c(3, 4, 5)),
ypos = list(1, c(2, 3)),
R2 = c(0.7, 0.9),
eta = 0,
type = "multivariate"
)
if (length(params) != 0) default_params[names(params)] <- params
do.call(super$initialize, default_params)
## Validate the parameters
is_valid <- self$get_validated()
is_valid <- lapply(is_valid, function(x) x[!sapply(x, is.null)])
if (length(is_valid$errors)) stop(unlist(is_valid$errors))
if (length(is_valid$warnings)) warning(unlist(is_valid$warnings))
## Adding Properties to Simrel Object
self$compute_properties()
},
add_properties = function() {
## Adding Properties to Simrel Object
self$set_properties("sigma_w", expression({
eigen_y <- self$get_properties("eigen_y")
m <- self$get_parameters("m")
diag(c(eigen_y, rep(1, m - length(eigen_y))))
}))
self$set_properties("sigma_zw", expression({
relpos <- self$get_parameters("relpos")
m <- self$get_parameters("m")
R2 <- self$get_parameters("R2")
p <- self$get_parameters("p")
lambda <- self$get_properties("eigen_x")
eta <- self$get_properties("eigen_y")
out <- mapply(private$..get_cov, pos = relpos, Rsq = R2, eta = eta,
MoreArgs = list(p = p, lambda = lambda))
cbind(out, rep(0, m - length(eta)))
}))
self$set_properties("sigma", expression({
sigma_zw <- self$get_properties("sigma_zw")
sigma_w <- self$get_properties("sigma_w")
sigma_z <- self$get_properties("sigma_z")
out <- cbind(rbind(sigma_w, sigma_zw), rbind(t(sigma_zw), sigma_z))
unname(out)
}))
self$set_properties("rotation_x", expression({
relpred <- self$get_properties("relpred")
p <- self$get_parameters("p")
irrelpred <- setdiff(1:p, unlist(relpred))
out <- diag(p)
out[irrelpred, irrelpred] <- private$..get_rotate(irrelpred)
for (pos in relpred) {
rotMat <- private$..get_rotate(pos)
out[pos, pos] <- rotMat
}
return(out)
}))
self$set_properties("rotation_y", expression({
ypos <- self$get_parameters("ypos")
m <- self$get_parameters("m")
out <- diag(m)
for (pos in ypos) {
rotMat <- private$..get_rotate(pos)
out[pos, pos] <- rotMat
}
return(out)
}))
self$set_properties("sigma_y", expression({
rotation_y <- self$get_properties("rotation_y")
sigma_w <- self$get_properties("sigma_w")
t(rotation_y) %*% sigma_w %*% rotation_y
}))
self$set_properties("sigma_zy", expression({
rotation_y <- self$get_properties("rotation_y")
sigma_zw <- self$get_properties("sigma_zw")
rotation_y %*% t(sigma_zw)
}))
self$set_properties("sigma_xy", expression({
rotation_x <- self$get_properties("rotation_x")
rotation_y <- self$get_properties("rotation_y")
sigma_zw <- self$get_properties("sigma_zw")
rotation_y %*% t(sigma_zw) %*% t(rotation_x)
}))
self$set_properties("beta_z", expression({
sigma_zinv <- self$get_properties("sigma_zinv")
sigma_zw <- self$get_properties("sigma_zw")
return(sigma_zinv %*% sigma_zw)
}))
self$set_properties("beta", expression({
rotation_x <- self$get_properties("rotation_x")
rotation_y <- self$get_properties("rotation_y")
beta_z <- self$get_properties("beta_z")
return(rotation_x %*% beta_z %*% t(rotation_y))
}))
self$set_properties("beta0", expression({
beta0 <- rep(0, self$get_parameters("m"))
muX <- self$get_parameters("muX")
muY <- self$get_parameters("muY")
if (!is.null(muX)) beta <- self$get_properties("beta")
if (!is.null(muY)) beta0 <- beta0 + muY
if (!is.null(muX)) beta0 <- beta0 - t(beta) %*% muX
return(c(beta0))
}))
self$set_properties("Rsq_w", expression({
beta_z <- self$get_properties("beta_z")
sigma_zw <- self$get_properties("sigma_zw")
sigma_w <- self$get_properties("sigma_w")
unname(t(beta_z) %*% sigma_zw %*% solve(sigma_w))
}))
self$set_properties("Rsq_y", expression({
rotation_y <- self$get_properties("rotation_y")
Rsq_w <- self$get_properties("Rsq_w")
t(rotation_y) %*% Rsq_w %*% rotation_y
}))
self$set_properties("minerror", expression({
sigma_y <- self$get_properties("sigma_y")
Rsq <- self$get_properties("Rsq_y")
unname(sigma_y - Rsq)
}))
},
compute_properties = function() {
super$base_properties()
self$add_properties()
}
)
)
## ---- Wrapper Function ----
#' @title Multi-Response Simulation of Linear Model Data
#' @description Simulates multivariate linear model data where users can specify few parameters for simulating data with wide range of properties.
#' @import R6
#' @param type Number of training samples
#' @param ... All required arguments for different types of simulation
#' @export
simulater <- function(type, ...){
if (type == "univariate") {
sobj <- UniSimrel$new(...)
class(sobj) <- "simrel"
return(sobj)
}
if (type == "multivariate") {
sobj <- MultiSimrel$new(...)
class(sobj) <- "simrel"
return(sobj)
}
return(paste(type, "is unknown"))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.