#' Generate data from SLIDE model for two views.
#'
#' Generates two matched datasets from SLIDE model with rank 2 for each type of structure (shared/individual).
#'
#'
#' Generates X_1 (n x p_1) and X_2 (n x p_2) according to the additive noise model \deqn{X_1 = Z_1 + E_1, X_2 = Z_2 + E_2,} where noise matrices \eqn{E_1, E_2} have independent entries from normal distribution with mean zero, and standard deviations sigma1, sigma2 correspondingly, and
#' \deqn{Z_1 = c_1 (U_1 D_0 W_{1,1}' + U_2 D_1 W_{1,2}')}
#' \deqn{Z_2 = c_2 (U_1 D_0 W_{2,1}' + U_3 D_2 W_{2,3}').}
#' Here \eqn{U=[U_1 U_2 U_3]} (n x 6) is generated using uniform distribution on [0,1] with subsequent centering and orthonormalization, \eqn{D_0 = diag(1.5, 1.3)}, \eqn{D_1 = diag(1, 0.8)}, \eqn{D_2 = diag(1, 0.7)}, and the loadings \eqn{W = [W_1 W_2]} (p_1 + p_2 X 6) with \eqn{W_1 = [W_{1,1} W_{1,2} 0]}, \eqn{W_2 = [W_{2,1} 0 W_{2,3}]} are generated using uniform distribution on [0,1] with subsequent orthonormalization of \eqn{W}. The loadings matrix \eqn{V} in SLIDE model is generated by condensing c_1, c_2, D_0, D_1, D_2 and W.
#'
#' @param n An integer specifying the number of matched samples, the default value is 100.
#' @param pvec A vector of integer values p_1, p_2 corresponding to the number of measurements within each data view. The default value is \code{pvec = c(25,25)}.
#' @param cvec A vector of positive values c_1, c_2 corresponding to the scale of each view. The default value is \code{cvec = c(1,1)}, that is both views have the same scale.
#' @param snr A non-negative value specifying the ratio between the Frobenius norm of the signal matrix and the Frobenius norm of the noise matrix. This value determines the variance of the elements in the noise matrix. The default value is 1.
#' @param orthogonalV A logical indicating whether the loading vectors should be generated orthogonal to each other, the default value is TRUE.
#'
#' @return A list with the elements
#' \item{X}{A n x (p_1 + p_2) concatenated data matrix of views X_1, X_2.}
#' \item{pvec}{The supplied values of \code{pvec}.}
#' \item{sigma1}{The standard deviation of the elements of the noise matrix for the 1st dataset.}
#' \item{sigma2}{The standard deviation of the elements of the noise matrix for the 2nd dataset.}
#' \item{U}{The score matrix from the SLIDE model.}
#' \item{V}{The loadings matrix from the SLIDE model.}
#' \item{cvec}{The supplied values of \code{cvec}.}
#' \item{snr}{The supplied value of \code{snr}.}
#' @export
#' @examples
#' data <- generateModel1(n = 50, pvec = c(50,75), snr = 2)
#' data <- generateModel1(n = 100, pvec = c(25,150), snr = 1)
#' data <- generateModel1(n = 100, pvec = c(25,25),cvec = c(0.5,1.5), snr = 1)
generateModel1 <- function(n = 100, pvec = c(25, 25), cvec = c(1, 1), snr = 1, orthogonalV = T) {
# Fix number of datasets and all the ranks
D <- 2
# joint rank
r0 <- 2
# individual ranks
rvec <- c(2, 2)
# Total rank
r_total <- sum(rvec) + r0
# Total number of variables
p_total <- sum(pvec)
# Space for the whole concatenated X
X <- matrix(0, n, p_total)
# Generate all the scores
U = matrix(runif(n * r_total), n, r_total)
# Column-center U so that the overall mean is zero
U = scale(U, scale = F)
# Orthogonalize the scores
U <- qr.Q(qr(U))
# Generate all the loadings
Vtmp <- list()
# Generate dataset-specific loadings
for (d in 1:D) {
Vtmp[[d]] <- matrix(runif(pvec[d] * (r0 + rvec[d])), pvec[d], (r0 + rvec[d]))
# orthogonalize the loadings
Vtmp[[d]] <- qr.Q(qr(Vtmp[[d]]))
}
# Put everything together into one V
V <- matrix(0, nrow = p_total, ncol = r_total)
for (d in 1:D) {
# Row index for dth dataset
if (d == 1) {
index <- c(1:pvec[1])
} else {
index <- c((pvec[1] + 1):sum(pvec))
}
# Column index for dth dataset
if (d == 1) {
col_indexd = c(1:(r0 + rvec[1]))
} else {
col_indexd = c(1:r0, (r0 + rvec[1] + 1):(r0 + sum(rvec)))
}
V[index, col_indexd] = Vtmp[[d]]
}
# Orthogonalize V
if (orthogonalV == T) {
V <- qr.Q(qr(V))
}
# Add the scale between joint and individual by fixing singular values
sjoint <- c(1.5, 1.3)
s1ind <- c(1, 0.8)
s2ind <- c(1, 0.7)
V <- V %*% diag(c(sjoint, s1ind, s2ind))
# Add the dataset-specific scale
for (d in 1:D) {
# Row index
if (d == 1) {
index <- c(1:pvec[1])
} else {
index <- c((pvec[1] + 1):sum(pvec))
}
V[index, ] <- cvec[d] * V[index, ]
}
# Form X
X <- tcrossprod(U, V)
# Add noise
if (snr != 0) {
if ((cvec[1] == cvec[2]) & (pvec[1] == pvec[2])) {
# Datasets on the same scale
sigma <- sqrt(sum(X^2)/(n * sum(pvec) * snr))
# Add the noise part0.3*
X <- X + matrix(rnorm(n * p_total, sd = sigma), n, p_total)
# Combine joint loadings and individual loadings into one matrix V
return(list(X = X, pvec = pvec, sigma1 = sigma,sigma2=sigma, U = U,
V = V, cvec = cvec, snr = snr))
} else {
# Datasets have different scales or different dimensions
index1 <- 1:pvec[1]
sigma1 <- sqrt(sum(X[, index1]^2)/(n * pvec[1] * snr))
X[, index1] <- X[, index1] + matrix(rnorm(n * pvec[1], sd = sigma1), n,
pvec[1])
index2 <- (pvec[1] + 1):sum(pvec)
sigma2 <- sqrt(sum(X[, index2]^2)/(n * pvec[2] * snr))
X[, index2] <- X[, index2] + matrix(rnorm(n * pvec[2], sd = sigma2), n,
pvec[2])
# Combine joint loadings and individual loadings into one matrix V
return(list(X = X, pvec = pvec, sigma1 = sigma1,
sigma2 = sigma2, U = U, V = V, cvec = cvec, snr = snr))
}
} else {
# No noise Combine joint loadings and individual loadings into one matrix V
return(list(X = X, pvec = pvec, sigma1 = 0, sigma2=0, U = U, V = V,
cvec = cvec, snr = snr))
}
}
#' Generate data from SLIDE model for three views.
#'
#' Generates three matched datasets from SLIDE model with rank 2 for each type of structure (shared, partially-shared and individual).
#'
#'Generates X_1 (n x p_1), X_2 (n x p_2) and X_3 (n x p_3) according to the additive noise model \deqn{X_1 = Z_1 + E_1, X_2 = Z_2 + E_2, X_3 = Z_3 + E_3,} where noise matrices \eqn{E_1, E_2, E_3} have independent entries from normal distribution with mean zero, and standard deviations according to \code{sigmavec}, and signal matrix \eqn{Z=[Z_1, Z_2, Z_3]} is generated using SLIDE decomposition
#'\deqn{Z = UV',}
#'where U (n x 14) is generated using uniform distribution on [0,1] with subsequent centering and standardization, and V (p x 14) is generated as a block-sparse matrix with orthogonal columns so that the rank of each structure (shared, partially-shared, individual) is two.
#' @param n An integer specifying the number of matched samples, the default value is 100.
#' @param pvec A vector of integer values p_1, p_2, p_3 corresponding to the number of measurements within each data view. The default value is \code{pvec = c(100,100,100)}.
#' @param snr A non-negative value specifying the ratio between the Frobenius norm of the signal matrix and the Frobenius norm of the noise matrix. This value determines the variance of the elements in the noise matrix. The default value is 1.
#' @param orthogonalV A logical indicating whether the loading vectors should be generated orthogonal to each other, the default value is TRUE.
#'
#' @return A list with the elements
#' \item{X}{A n x (p_1 + p_2 + p_3) concatenated data matrix of views X_1, X_2, X_3.}
#' \item{pvec}{The supplied values of \code{pvec}.}
#' \item{sigmavec}{A vector of values s_1, s_2, s_3 corresponding to standard deviations of the elements of noise matrix for each data view.}
#' \item{U}{The score matrix from the SLIDE model.}
#' \item{V}{The loadings matrix from the SLIDE model.}
#' \item{snr}{The supplied value of \code{snr}.}
#' @export
#' @examples
#' data <- generateModel2(n = 100, pvec = c(100,100,100), snr = 1)
#' data <- generateModel2(n = 50, pvec = c(25,100,40), snr = 2)
generateModel2 <- function(n = 100, pvec = c(100, 100, 100), snr = 1, orthogonalV = T) {
p_total <- sum(pvec)
pcum <- cumsum(pvec)
D <- 3
r0 <- 2
rmat <- matrix(2, 3, 3)
cvec <- c(1, 1, 1)
r_total <- r0 + sum(rmat[upper.tri(rmat, diag = T)])
# generate noise part of X
X <- matrix(0, n, p_total)
# generate all the scores
U <- matrix(runif(n * r_total), n, r_total)
# Column-center U so that the overall mean is zero
U <- scale(U, scale = F)
# orthogonalize the scores
U <- qr.Q(qr(U))
# Generate all the loadings
Vtmp <- list()
# generate dataset-specific Vs
for (d in 1:D) {
Vtmp[[d]] <- matrix(runif(pvec[d] * (r0 + sum(rmat[d, ]))), pvec[d], (r0 +
sum(rmat[d, ])))
# orthogonalize the loadings
Vtmp[[d]] <- qr.Q(qr(Vtmp[[d]]))
}
# Put everything together into one V
V <- matrix(0, nrow = p_total, ncol = r_total)
for (d in 1:D) {
# Row index
if (d == 1) {
index <- c(1:pvec[1])
} else {
index <- c((pcum[d - 1] + 1):pcum[d])
}
# Column index, order is r0, r12, r13, r23, r1, r2, r3
if (d == 1) {
col_indexd = c(1:(r0 + rmat[1, 2] + rmat[1, 3]), (r0 + rmat[1, 2] + rmat[1,
3] + rmat[2, 3] + 1):(r0 + rmat[1, 2] + rmat[1, 3] + rmat[2, 3] +
rmat[1, 1]))
} else if (d == 2) {
col_indexd = c(1:(r0 + rmat[1, 2]), (r0 + rmat[1, 2] + rmat[1, 3] + 1):(r0 +
rmat[1, 2] + rmat[1, 3] + rmat[2, 3]), (r0 + rmat[1, 2] + rmat[1,
3] + rmat[2, 3] + rmat[1, 1] + 1):(r0 + rmat[1, 2] + rmat[1, 3] +
rmat[2, 3] + rmat[1, 1] + rmat[2, 2]))
} else {
col_indexd = c(1:r0, (r0 + rmat[1, 2] + 1):(r0 + rmat[1, 2] + rmat[1,
3] + rmat[2, 3]), (r0 + rmat[1, 2] + rmat[1, 3] + rmat[2, 3] + rmat[1,
1] + rmat[2, 2] + 1):(r0 + rmat[1, 2] + rmat[1, 3] + rmat[2, 3] +
rmat[1, 1] + rmat[2, 2] + rmat[3, 3]))
}
V[index, col_indexd] = Vtmp[[d]]
}
# Orthogonallize V
if (orthogonalV == T) {
V <- qr.Q(qr(V))
}
# Add the scale between joint, semi-joint and individual by fixing singular values
# Column index, order is r0, r12, r13, r23, r1, r2, r3
sjoint <- c(1.5, 1.3)
s12 <- c(1, 0.8)
s13 <- c(1, 0.7)
s23 <- c(1, 0.5)
s1ind <- c(1.2, 0.5)
s2ind <- c(0.9, 0.8)
s3ind <- c(0.5, 0.4)
V <- V %*% diag(c(sjoint, s12, s13, s23, s1ind, s2ind, s3ind))
# Form X
X <- tcrossprod(U, V)
if (snr != 0) {
# Datasets have different scales or different dimensions
sigmavec = rep(0, D)
for (d in 1:D) {
# Row index for dth dataset
if (d == 1) {
index <- c(1:pvec[1])
} else {
index <- c((pcum[d - 1] + 1):pcum[d])
}
sigmavec[d] <- sqrt(sum(X[, index]^2)/(n * pvec[d] * snr))
X[, index] <- X[, index] + matrix(rnorm(n * pvec[d], sd = sigmavec[d]),
n, pvec[d])
}
# Combine joint loadings and individual loadings into one matrix V
return(list(X = X, pvec = pvec, sigmavec = sigmavec,
U = U, V = V, snr = snr))
} else {
# Combine joint loadings and individual loadings into one matrix V
return(list(X = X, pvec = pvec, sigmavec = rep(0,D), U = U,
V = V, snr = snr))
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.