Nothing
#' Simulation of Multivariate Linear Model Data
#' @param p Number of variables
#' @param q Vector containing the number of relevant predictor variables for each relevant response components
#' @param m Number of response variables
#' @param relpos A list of position of relevant component for predictor variables. The list contains vectors of position index, one vector or each relevant response components
#' @param gamma A declining (decaying) factor of eigen value of predictors (X). Higher the value of \code{gamma}, the decrease of eigenvalues will be steeper
#' @param R2 Vector of coefficient of determination (proportion of variation explained by predictor variable) for each relevant response components
#' @param eta A declining (decaying) factor of eigenvalues of response (Y). Higher the value of \code{eta}, more will be the declining of eigenvalues of Y. \code{eta = 0} refers that all eigenvalues of responses (Y) are 1.
#' @param muX Vector of average (mean) for each predictor variable
#' @param muY Vector of average (mean) for each response variable
#' @param ypos List of position of relevant response components that are combined to generate response variable during orthogonal rotation
#' @return A simrel object with all the input arguments along with following additional items
#' \item{X}{Simulated predictors}
#' \item{Y}{Simulated responses}
#' \item{W}{Simulated predictor components}
#' \item{Z}{Simulated response components}
#' \item{beta}{True regression coefficients}
#' \item{beta0}{True regression intercept}
#' \item{relpred}{Position of relevant predictors}
#' \item{testX}{Test Predictors}
#' \item{testY}{Test Response}
#' \item{testW}{Test predictor components}
#' \item{testZ}{Test response components}
#' \item{minerror}{Minimum model error}
#' \item{Xrotation}{Rotation matrix of predictor (R)}
#' \item{Yrotation}{Rotation matrix of response (Q)}
#' \item{type}{Type of simrel object \emph{univariate} or \emph{multivariate}}
#' \item{lambda}{Eigenvalues of predictors}
#' \item{SigmaWZ}{Variance-Covariance matrix of components of response and predictors}
#' \item{SigmaWX}{Covariance matrix of response components and predictors}
#' \item{SigmaYZ}{Covariance matrix of response and predictor components}
#' \item{Sigma}{Variance-Covariance matrix of response and predictors}
#' \item{RsqW}{Coefficient of determination corresponding to response components}
#' \item{RsqY}{Coefficient of determination corresponding to response variables}
#' @concept simulation
#' @concept linear model data
#' @keywords datagen
#' @references Sæbø, S., Almøy, T., & Helland, I. S. (2015). simrel—A versatile tool for linear model data simulation based on the concept of a relevant subspace and relevant predictors. Chemometrics and Intelligent Laboratory Systems, 146, 128-135.
#' @references Almøy, T. (1996). A simulation study on comparison of prediction methods when only a few components are relevant. Computational statistics & data analysis, 21(1), 87-107.
#' @export
msim <- function(p = 15, q = c(5, 4, 3), m = 5,
relpos = list(c(1, 2), c(3, 4, 6), c(5, 7)),
gamma = 0.6, R2 = c(0.8, 0.7, 0.8),
eta = 0, muX = NULL, muY = NULL,
ypos = list(c(1), c(3, 4), c(2, 5))) {
## Get all input parameter also for output ---
arg_list <- as.list(environment())
## Validate Inputs ----
if (!all(sapply(list(length(relpos), length(R2)), identical, length(q))))
stop("Length of relpos, R2 and q must be equal\n")
if (!all(sapply(seq_along(q), function(i) q[i] >= sapply(relpos, length)[i])))
stop("Number of relevant predictor is smaller than the number of relevant components\n")
if (!sum(q) <= p)
stop("Number of variables can not be smaller than the number of relevant variables\n")
if (!max(unlist(relpos)) <= p)
stop("Relevant Position can not exceed the number of variables\n")
if (!all(R2 < 1 & R2 > 0))
stop("R2 must be between 0 and 1\n")
if (!is.null(muX) & length(muX) != p) {
stop("Mean of X must have same length as the number of X variables\n")
}
if (!is.null(muY) & length(muY) != m) {
stop("Mean of Y must have same length as the number of Y variables\n")
}
if (any(duplicated(unlist(ypos)))) {
stop("Response Space must have unique combination of response variable.")
}
## Warning Conditions
if (any(unlist(lapply(ypos, identical, 1:2)))) {
warning("Current setting of ypos will produce uninformative response variable.")
}
## Completing Parameter for required response
nW0 <- length(relpos)
nW.extra <- m - length(relpos)
nW.extra.idx <- seq.int(nW0 + 1, length.out = nW.extra)
relpos <- append(relpos, lapply(nW.extra.idx, function(x) integer()))
R2 <- c(R2, rep(0, nW.extra))
q <- c(q, rep(0, nW.extra))
## How many components are relevant for each W_i
n.relpos <- vapply(relpos, length, 0L)
## Irrelevant position of predictors
resample <- function(x, n, ...){
if (n == 0) return(integer(0))
if (length(x) == 1) return(x)
sample(x, n, ...)
}
irrelpos <- setdiff(seq_len(p), Reduce(union, relpos))
n_epos <- q - sapply(relpos, length)
predPos <- lapply(seq_along(relpos), function(i){
pos <- relpos[[i]]
ret <- c(pos, resample(irrelpos, n_epos[i]))
irrelpos <<- setdiff(irrelpos, ret)
return(ret)
})
predPos[[length(relpos) + 1]] <- irrelpos
## Constructing Sigma
lambda <- exp(-gamma * (1:p))/exp(-gamma)
kappa <- exp(-eta * (1:m))/exp(-eta)
SigmaZ <- diag(lambda);
SigmaZinv <- diag(1 / lambda)
SigmaW <- diag(kappa) ## diag(m)
rhoMat <- SigmaW
### Covariance Construction
get_cov <- function(pos, Rsq, kappa = 1, p = p, lambda = lambda){
out <- vector("numeric", p)
alph <- runif(length(pos), -1, 1)
out[pos] <- sign(alph) * sqrt(Rsq * abs(alph) / sum(abs(alph)) * lambda[pos] * kappa)
return(out)
}
get_rho <- function(rhoMat, RsqVec) {
sapply(1:nrow(rhoMat), function(row){
sapply(1:ncol(rhoMat), function(col){
if (row == col) return(1)
rhoMat[row, col] / sqrt((RsqVec[row]) * (RsqVec[col]))
})
})
}
SigmaZW <- mapply(get_cov, pos = relpos, Rsq = R2, kappa = kappa,
MoreArgs = list(p = p, lambda = lambda))
Sigma <- cbind(rbind(SigmaW, SigmaZW), rbind(t(SigmaZW), SigmaZ))
rho.out <- get_rho(rhoMat, R2)
rho.out[is.nan(rho.out)] <- 0
if (any(rho.out < -1 | rho.out > 1))
stop("Two Responses in orthogonal, but highly relevant spaces must be less correlated. Choose rho closer to zero.")
## Rotation Matrix
RotX <- diag(p)
RotY <- diag(m)
getRotate <- function(predPos) {
n <- length(predPos)
Qmat <- matrix(rnorm(n ^ 2), n)
Qmat <- scale(Qmat, scale = FALSE)
qr.Q(qr(Qmat))
}
for (pos in predPos) {
rotMat <- getRotate(pos)
RotX[pos, pos] <- rotMat
}
for (pos in ypos) {
rotMat <- getRotate(pos)
RotY[pos, pos] <- rotMat
}
## Fill remaining irrelevant space with random normal variates
RotX[irrelpos, irrelpos] <- getRotate(irrelpos)
## True Regression Coefficient
betaZ <- SigmaZinv %*% SigmaZW
betaX <- Reduce(`%*%`, list(RotX, betaZ, t(RotY)))
## Geting Coef for Intercept
beta0 <- rep(0, m)
if (!(is.null(muY))) {
beta0 <- beta0 + muY
}
if (!(is.null(muX))) {
beta0 <- beta0 - t(betaX) %*% muX
}
## Rotation was not correct, now it is good, i suppose
SigmaY <- Reduce(`%*%`, list(RotY, SigmaW, t(RotY)))
SigmaX <- Reduce(`%*%`, list(RotX, SigmaZ, t(RotX)))
SigmaYX <- Reduce(`%*%`, list(RotY, t(SigmaZW), t(RotX)))
SigmaYZ <- RotY %*% t(SigmaZW)
SigmaWX <- t(SigmaZW) %*% RotX
SigmaOut <- rbind(
cbind(SigmaY, SigmaYX),
cbind(t(SigmaYX), SigmaX)
)
var_w <- diag(1/sqrt(diag(SigmaW)))
RsqW <- Reduce(`%*%`, list(var_w, t(SigmaZW), SigmaZinv, SigmaZW, var_w))
var_y <- diag(-1/sqrt(diag(SigmaY)))
RsqY <- Reduce(`%*%`, list(var_y, RotY, t(SigmaZW), SigmaZinv, SigmaZW, t(RotY), var_y))
## Minimum Error
## minerror <- SigmaY - SigmaYX, (RotX, SigmaZinv, t(RotX)), t(SigmaYX)
var_expl <- Reduce(`%*%`, list(RotY, t(SigmaZW), SigmaZinv, SigmaZW, t(RotY)))
minerror <- SigmaY - var_expl
## Check for Positive Definite
pd <- all(eigen(Sigma)$values > 0)
if (!pd) stop("No positive definite coveriance matrix found with current parameter settings")
seed <- .Random.seed
get_data <- function(n = 100) {
.Random.seed <- seed
## Simulation of Test and Training Data
SigmaRot <- chol(Sigma)
train_cal <- matrix(rnorm(n * (p + m), 0, 1), nrow = n)
train_cal <- train_cal %*% SigmaRot
W <- train_cal[, 1:m, drop = F]
Z <- train_cal[, (m + 1):(m + p), drop = F]
X <- Z %*% t(RotX)
Y <- W %*% t(RotY)
if (!(is.null(muX))) X <- sweep(X, 2, muX, '+')
if (!(is.null(muY))) Y <- sweep(Y, 2, muY, '+')
colnames(X) <- paste0('X', 1:p)
colnames(Y) <- paste0('Y', 1:m)
return(list(W = W, Z = Z, X = X, Y = Y))
}
## Return List
ret <- list(
call = match.call(),
data = get_data,
beta = betaX,
beta0 = beta0,
relpred = predPos,
minerror = minerror,
Xrotation = RotX,
Yrotation = RotY,
lambda = lambda,
SigmaWZ = Sigma,
SigmaWX = SigmaWX,
SigmaYZ = SigmaYZ,
SigmaYX = SigmaYX,
Sigma = SigmaOut,
rho.out = rho.out,
RsqW = RsqW,
RsqY = RsqY,
type = "multivariate"
)
ret <- `class<-`(append(arg_list, ret), 'simrel')
return(ret)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.