R/Models.R

Defines functions generateModel2 generateModel1

Documented in generateModel1 generateModel2

#' Generate data from SLIDE model for two views.
#'
#' Generates two matched datasets from SLIDE model with rank 2 for each type of structure (shared/individual).
#'
#'
#' Generates X_1 (n x p_1) and X_2 (n x p_2) according to the additive noise model \deqn{X_1 = Z_1 + E_1, X_2 = Z_2 + E_2,} where noise matrices \eqn{E_1, E_2} have independent entries from normal distribution with mean zero, and standard deviations sigma1, sigma2 correspondingly, and
#' \deqn{Z_1 = c_1 (U_1 D_0 W_{1,1}' + U_2 D_1 W_{1,2}')}
#' \deqn{Z_2 = c_2 (U_1 D_0 W_{2,1}' + U_3 D_2 W_{2,3}').}
#' Here \eqn{U=[U_1 U_2 U_3]} (n x 6) is generated using uniform distribution on [0,1] with subsequent centering and orthonormalization, \eqn{D_0 = diag(1.5, 1.3)}, \eqn{D_1 = diag(1, 0.8)}, \eqn{D_2 = diag(1, 0.7)}, and the loadings \eqn{W = [W_1 W_2]} (p_1 + p_2 X 6) with \eqn{W_1 = [W_{1,1} W_{1,2} 0]}, \eqn{W_2 = [W_{2,1} 0 W_{2,3}]} are generated using uniform distribution on [0,1] with subsequent orthonormalization of \eqn{W}. The loadings matrix \eqn{V} in SLIDE model is generated by condensing c_1, c_2, D_0, D_1, D_2 and W.
#'
#' @param n An integer specifying the number of matched samples, the default value is 100.
#' @param pvec A vector of integer values p_1, p_2 corresponding to the number of measurements within each data view. The default value is \code{pvec = c(25,25)}.
#' @param cvec A vector of positive values c_1, c_2 corresponding to the scale of each view. The default value is \code{cvec = c(1,1)}, that is both views have the same scale.
#' @param snr A non-negative value specifying the ratio between the Frobenius norm of the signal matrix and the Frobenius norm of the noise matrix. This value determines the variance of the elements in the noise matrix. The default value is 1.
#' @param orthogonalV A logical indicating whether the loading vectors should be generated orthogonal to each other, the default value is TRUE.
#'
#' @return A list with the elements
#'   \item{X}{A n x (p_1 + p_2) concatenated data matrix of views X_1, X_2.}
#'   \item{pvec}{The supplied values of \code{pvec}.}
#'   \item{sigma1}{The standard deviation of the elements of the noise matrix for the 1st dataset.}
#'   \item{sigma2}{The standard deviation of the elements of the noise matrix for the 2nd dataset.}
#'   \item{U}{The score matrix from the SLIDE model.}
#'   \item{V}{The loadings matrix from the SLIDE model.}
#'   \item{cvec}{The supplied values of \code{cvec}.}
#'   \item{snr}{The supplied value of \code{snr}.}
#' @export
#' @examples
#' data <- generateModel1(n = 50, pvec = c(50,75), snr = 2)
#' data <- generateModel1(n = 100, pvec = c(25,150), snr = 1)
#' data <- generateModel1(n = 100, pvec = c(25,25),cvec = c(0.5,1.5), snr = 1)
generateModel1 <- function(n = 100, pvec = c(25, 25), cvec = c(1, 1), snr = 1, orthogonalV = T) {
    # Fix number of datasets and all the ranks
    D <- 2
    # joint rank
    r0 <- 2
    # individual ranks
    rvec <- c(2, 2)
    # Total rank
    r_total <- sum(rvec) + r0
    # Total number of variables
    p_total <- sum(pvec)

    # Space for the whole concatenated X
    X <- matrix(0, n, p_total)

    # Generate all the scores
    U = matrix(runif(n * r_total), n, r_total)
    # Column-center U so that the overall mean is zero
    U = scale(U, scale = F)

    # Orthogonalize the scores
    U <- qr.Q(qr(U))

    # Generate all the loadings
    Vtmp <- list()
    # Generate dataset-specific loadings
    for (d in 1:D) {
        Vtmp[[d]] <- matrix(runif(pvec[d] * (r0 + rvec[d])), pvec[d], (r0 + rvec[d]))
        # orthogonalize the loadings
        Vtmp[[d]] <- qr.Q(qr(Vtmp[[d]]))
    }
    # Put everything together into one V
    V <- matrix(0, nrow = p_total, ncol = r_total)
    for (d in 1:D) {
        # Row index for dth dataset
        if (d == 1) {
            index <- c(1:pvec[1])
        } else {
            index <- c((pvec[1] + 1):sum(pvec))
        }
        # Column index for dth dataset
        if (d == 1) {
            col_indexd = c(1:(r0 + rvec[1]))
        } else {
            col_indexd = c(1:r0, (r0 + rvec[1] + 1):(r0 + sum(rvec)))
        }
        V[index, col_indexd] = Vtmp[[d]]
    }
    # Orthogonalize V
    if (orthogonalV == T) {
        V <- qr.Q(qr(V))
    }
    # Add the scale between joint and individual by fixing singular values
    sjoint <- c(1.5, 1.3)
    s1ind <- c(1, 0.8)
    s2ind <- c(1, 0.7)
    V <- V %*% diag(c(sjoint, s1ind, s2ind))

    # Add the dataset-specific scale
    for (d in 1:D) {
        # Row index
        if (d == 1) {
            index <- c(1:pvec[1])
        } else {
            index <- c((pvec[1] + 1):sum(pvec))
        }
        V[index, ] <- cvec[d] * V[index, ]
    }

    # Form X
    X <- tcrossprod(U, V)

    # Add noise
    if (snr != 0) {
        if ((cvec[1] == cvec[2]) & (pvec[1] == pvec[2])) {
            # Datasets on the same scale
            sigma <- sqrt(sum(X^2)/(n * sum(pvec) * snr))
            # Add the noise part0.3*
            X <- X + matrix(rnorm(n * p_total, sd = sigma), n, p_total)
            # Combine joint loadings and individual loadings into one matrix V
            return(list(X = X, pvec = pvec, sigma1 = sigma,sigma2=sigma, U = U,
                V = V, cvec = cvec, snr = snr))
        } else {
            # Datasets have different scales or different dimensions
            index1 <- 1:pvec[1]
            sigma1 <- sqrt(sum(X[, index1]^2)/(n * pvec[1] * snr))
            X[, index1] <- X[, index1] + matrix(rnorm(n * pvec[1], sd = sigma1), n,
                pvec[1])
            index2 <- (pvec[1] + 1):sum(pvec)
            sigma2 <- sqrt(sum(X[, index2]^2)/(n * pvec[2] * snr))
            X[, index2] <- X[, index2] + matrix(rnorm(n * pvec[2], sd = sigma2), n,
                pvec[2])
            # Combine joint loadings and individual loadings into one matrix V
            return(list(X = X, pvec = pvec, sigma1 = sigma1,
                sigma2 = sigma2, U = U, V = V, cvec = cvec, snr = snr))
        }
    } else {
        # No noise Combine joint loadings and individual loadings into one matrix V
        return(list(X = X, pvec = pvec, sigma1 = 0, sigma2=0, U = U, V = V,
            cvec = cvec, snr = snr))
    }
}



#' Generate data from SLIDE model for three views.
#'
#'   Generates three matched datasets from SLIDE model with rank 2 for each type of structure (shared, partially-shared and individual).
#'
#'Generates X_1 (n x p_1), X_2 (n x p_2) and X_3 (n x p_3) according to the additive noise model \deqn{X_1 = Z_1 + E_1, X_2 = Z_2 + E_2, X_3 = Z_3 + E_3,} where noise matrices \eqn{E_1, E_2, E_3} have independent entries from normal distribution with mean zero, and standard deviations according to \code{sigmavec}, and signal matrix \eqn{Z=[Z_1, Z_2, Z_3]} is generated using SLIDE decomposition
#'\deqn{Z = UV',}
#'where U (n x 14) is generated using uniform distribution on [0,1] with subsequent centering and standardization, and V (p x 14) is generated as a block-sparse matrix with orthogonal columns so that the rank of each structure (shared, partially-shared, individual) is two.
#' @param n An integer specifying the number of matched samples, the default value is 100.
#' @param pvec A vector of integer values p_1, p_2, p_3 corresponding to the number of measurements within each data view. The default value is \code{pvec = c(100,100,100)}.
#' @param snr A non-negative value specifying the ratio between the Frobenius norm of the signal matrix and the Frobenius norm of the noise matrix. This value determines the variance of the elements in the noise matrix. The default value is 1.
#' @param orthogonalV A logical indicating whether the loading vectors should be generated orthogonal to each other, the default value is TRUE.
#'
#' @return A list with the elements
#'   \item{X}{A n x (p_1 + p_2 + p_3) concatenated data matrix of views X_1, X_2, X_3.}
#'   \item{pvec}{The supplied values of \code{pvec}.}
#'   \item{sigmavec}{A vector of values s_1, s_2, s_3 corresponding to standard deviations of the elements of noise matrix for each data view.}
#'   \item{U}{The score matrix from the SLIDE model.}
#'   \item{V}{The loadings matrix from the SLIDE model.}
#'   \item{snr}{The supplied value of \code{snr}.}
#' @export
#' @examples
#' data <- generateModel2(n = 100, pvec = c(100,100,100), snr = 1)
#' data <- generateModel2(n = 50, pvec = c(25,100,40), snr = 2)
generateModel2 <- function(n = 100, pvec = c(100, 100, 100), snr = 1, orthogonalV = T) {
    p_total <- sum(pvec)
    pcum <- cumsum(pvec)
    D <- 3
    r0 <- 2
    rmat <- matrix(2, 3, 3)
    cvec <- c(1, 1, 1)
    r_total <- r0 + sum(rmat[upper.tri(rmat, diag = T)])

    # generate noise part of X
    X <- matrix(0, n, p_total)

    # generate all the scores
    U <- matrix(runif(n * r_total), n, r_total)
    # Column-center U so that the overall mean is zero
    U <- scale(U, scale = F)

    # orthogonalize the scores
    U <- qr.Q(qr(U))

    # Generate all the loadings
    Vtmp <- list()
    # generate dataset-specific Vs
    for (d in 1:D) {
        Vtmp[[d]] <- matrix(runif(pvec[d] * (r0 + sum(rmat[d, ]))), pvec[d], (r0 +
            sum(rmat[d, ])))
        # orthogonalize the loadings
        Vtmp[[d]] <- qr.Q(qr(Vtmp[[d]]))
    }
    # Put everything together into one V
    V <- matrix(0, nrow = p_total, ncol = r_total)
    for (d in 1:D) {
        # Row index
        if (d == 1) {
            index <- c(1:pvec[1])
        } else {
            index <- c((pcum[d - 1] + 1):pcum[d])
        }

        # Column index, order is r0, r12, r13, r23, r1, r2, r3
        if (d == 1) {
            col_indexd = c(1:(r0 + rmat[1, 2] + rmat[1, 3]), (r0 + rmat[1, 2] + rmat[1,
                3] + rmat[2, 3] + 1):(r0 + rmat[1, 2] + rmat[1, 3] + rmat[2, 3] +
                rmat[1, 1]))
        } else if (d == 2) {
            col_indexd = c(1:(r0 + rmat[1, 2]), (r0 + rmat[1, 2] + rmat[1, 3] + 1):(r0 +
                rmat[1, 2] + rmat[1, 3] + rmat[2, 3]), (r0 + rmat[1, 2] + rmat[1,
                3] + rmat[2, 3] + rmat[1, 1] + 1):(r0 + rmat[1, 2] + rmat[1, 3] +
                rmat[2, 3] + rmat[1, 1] + rmat[2, 2]))
        } else {
            col_indexd = c(1:r0, (r0 + rmat[1, 2] + 1):(r0 + rmat[1, 2] + rmat[1,
                3] + rmat[2, 3]), (r0 + rmat[1, 2] + rmat[1, 3] + rmat[2, 3] + rmat[1,
                1] + rmat[2, 2] + 1):(r0 + rmat[1, 2] + rmat[1, 3] + rmat[2, 3] +
                rmat[1, 1] + rmat[2, 2] + rmat[3, 3]))
        }
        V[index, col_indexd] = Vtmp[[d]]
    }
    # Orthogonallize V
    if (orthogonalV == T) {
        V <- qr.Q(qr(V))
    }
    # Add the scale between joint, semi-joint and individual by fixing singular values
    # Column index, order is r0, r12, r13, r23, r1, r2, r3
    sjoint <- c(1.5, 1.3)
    s12 <- c(1, 0.8)
    s13 <- c(1, 0.7)
    s23 <- c(1, 0.5)
    s1ind <- c(1.2, 0.5)
    s2ind <- c(0.9, 0.8)
    s3ind <- c(0.5, 0.4)
    V <- V %*% diag(c(sjoint, s12, s13, s23, s1ind, s2ind, s3ind))

    # Form X
    X <- tcrossprod(U, V)
    if (snr != 0) {
        # Datasets have different scales or different dimensions
        sigmavec = rep(0, D)
        for (d in 1:D) {
            # Row index for dth dataset
            if (d == 1) {
                index <- c(1:pvec[1])
            } else {
                index <- c((pcum[d - 1] + 1):pcum[d])
            }
            sigmavec[d] <- sqrt(sum(X[, index]^2)/(n * pvec[d] * snr))
            X[, index] <- X[, index] + matrix(rnorm(n * pvec[d], sd = sigmavec[d]),
                n, pvec[d])
        }
        # Combine joint loadings and individual loadings into one matrix V
        return(list(X = X, pvec = pvec, sigmavec = sigmavec,
            U = U, V = V, snr = snr))
    } else {
        # Combine joint loadings and individual loadings into one matrix V
        return(list(X = X, pvec = pvec, sigmavec = rep(0,D), U = U,
            V = V, snr = snr))
    }
}
irinagain/SLIDE documentation built on Aug. 14, 2021, 2:56 p.m.