R/MVOPR.R

Defines functions MVOPR3 MVOPR2

Documented in MVOPR2 MVOPR3

#' @title Multi-View Orthogonal Projection Regression for two modalities
#' @description Fit Multi-View Orthogonal Projection Regression for two modalities with Lasso, MCP, SCAD. The function is capable for linear, logistic, and poisson regression.
#'
#' @param M1 A numeric matrix (n x p) for the first modality.
#' @param M2 A numeric matrix (n x q) for the second modality. Assumes `M2` is correlated to `M1` via a low-rank matrix.
#' @param Y A numeric response vector of length `n`, connected to `M1` and `M2`.
#' @param RRR_Control A list to control the fitting for reduced rank regression.
#' \describe{
#'     \item{\code{Sparsity}}{Logical. If `TRUE`, performs Sparse Orthogonal Factor Regression (SOFAR); otherwise, a reduced-rank regression model is fitted.}
#'     \item{\code{nrank}}{Integer. Maximum rank to be searched for the reduced-rank model.}
#'     \item{\code{ic.type}}{Character. Model selection criterion: `"AIC"`, `"BIC"`, or `"GIC"`.}
#'   }
#' @param family Either "gaussian", "binomial", or "poisson", depending on the response.
#' @param penalty The penalty to be applied in the outcome model Y to M1 and M2. Either "MCP" (the default), "SCAD", or "lasso".
#'
#' @return A list containing:
#' \describe{
#'   \item{\code{fitY}}{Results for Outcome regression (Y~M1+M2). A fitted object from `cv.ncvreg`, which contains the penalized regression results for `Y`.}
#'   \item{\code{fitM2}}{Results for reduced-rank regression (M2~M1).The fitted reduced-rank regression model from `rrpack`.}
#'   \item{\code{CoefY}}{A vector of estimated regression coefficients for `M1` and `M2` on `Y`.}
#'   \item{\code{coefM2}}{A matrix of estimated regression coefficients for `M1` on `M2`.}
#'   \item{\code{rank}}{An integer indicates the estimated rank of the reduced-rank regression.}
#'   \item{\code{P}}{A projection matrix used to extract the orthogonal components of `M1`.}
#'   \item{\code{M1s}}{Transformed version of `M1` after projection.}
#'   \item{\code{M2s}}{Transformed version of `M2` after removing the effect of `M1`.}
#' }
#'
#' @references
#' Dai, Z., Huang, Y. J., & Li, G. (2025). Multi-View Orthogonal Projection Regression with Application in Multi-omics Integration. arXiv preprint arXiv:2503.16807. Available at <https://arxiv.org/abs/2503.16807>
#'
#' @export
#'
#' @examples
#'
#' ## Simulation.1
#' p = 100; q = 100; n = 200
#' rank = 3
#'
#' beta = c(rep(c(rep(1,5),rep(0,95)),2))
#' M1 = matrix(rnorm(p*n),n,p)
#'
#' U = matrix(rnorm(rank*p),p,rank)
#' V = matrix(rnorm(rank*q),rank,q)
#' B = U %*% V
#' E = matrix(rnorm(q*n),n,q)
#' M2 = M1 %*% B + E
#' Y = cbind(M1,M2) %*% matrix(beta,p+q,1)
#'
#' Fit = MVOPR2(M1,M2,Y,RRR_Control = list(Sparsity = FALSE))
#'
#' ## Result for variable selection
#' print(data.frame(Truecoef = beta,estimate = Fit$CoefY[2:(p+q+1)]))
#'
#' ## Plot the pathway and cv error in outcome model
#' oldpar <- par(mfrow = c(1, 2))
#' on.exit(par(oldpar))
#' plot(Fit$fitY$fit)
#' plot(Fit$fitY)
#'
#'
MVOPR2 <- function(M1,M2,Y,RRR_Control = list(Sparsity = TRUE, nrank=10,ic.type='GIC'),
                   family = 'gaussian',penalty = 'lasso'){
  ## Dimension of M1,M2
  p = dim(M1)[2]
  q = dim(M2)[2]
  n = length(Y)

  ## Control for reduced rank regression
  r1 = RRR_Control
  Sparsity = r1$Sparsity; nrank = r1$nrank; ic.type = r1$ic.type

  ## Reduced rank regression
  if(Sparsity){
    fit1 = rrpack::sofar(M2, M1, ic.type = ic.type, nrank = nrank)
    D = matrix(0,length(fit1$D),length(fit1$D))
    diag(D) = fit1$D

    coef = fit1$U %*% D %*% t(fit1$V)
  }else{
    fit1 = with(list(M1=M1,M2=M2), rrpack::rrr(M2, M1, maxrank = nrank,ic.type = ic.type,))
    coef = fit1$coef
  }
  rank = fit1$rank

  ## Transform the data
  ### Residualize M2
  M2s = M2 - M1%*%coef

  ### Project M1
  SVD1 = svd(M1%*%coef)
  P = SVD1$u[,1:rank] %*% t(SVD1$u[,1:rank])
  M1s = (diag(1,dim(P)[1]) - P)%*%M1

  ### Nuisance variable
  nus = as.matrix(SVD1$u[,1:rank])

  ## Fit the outcome model
  Xdata = as.matrix(cbind(M1s,M2s,nus))
  penalty.factor = c(rep(1,p+q),rep(0,rank))

  fitY = ncvreg::cv.ncvreg(X = Xdata,y = Y,family = family,
                    penalty = penalty,penalty.factor = penalty.factor)
  CoefY = fitY$fit$beta[,fitY$min]

  ## Output
  result = list(fitY = fitY, fitM2 = fit1,
                CoefY = CoefY, coefM2 = coef,
                rank = rank, P = P,
                M1s = M1s, M2s = M2s)
  return(result)
}


#' @title Multi-View Orthogonal Projection Regression for three modalities
#' @description Fit Multi-View Orthogonal Projection Regression for three modalities with Lasso, MCP, SCAD. The function is capable for linear, logistic, and poisson regression.
#'
#'
#' @param M1 A numeric matrix (n x p1) for the first modality.
#' @param M2 A numeric matrix (n x p2) for the second modality. Assumes `M2` is correlated to `M1` via a low-rank matrix.
#' @param M3 A numeric matrix (n x p3) for the third modality. Assumes `M3` is correlated to `M1` and `M2` via a low-rank matrix.
#' @param Y A numeric response vector of length `n`, connected to `M1`, `M2`, and `M3`.
#' @param RRR_Control A list to control the fitting for reduced rank regression.
#' \describe{
#'     \item{\code{Sparsity}}{Logical. If `TRUE`, performs Sparse Orthogonal Factor Regression (SOFAR); otherwise, a reduced-rank regression model is fitted.}
#'     \item{\code{nrank}}{Integer. Maximum rank to be searched for the reduced-rank model.}
#'     \item{\code{ic.type}}{Character. Model selection criterion: `"AIC"`, `"BIC"`, or `"GIC"`.}
#'   }
#' @param family Either "gaussian", "binomial", or "poisson", depending on the response.
#' @param penalty The penalty to be applied in the outcome model Y to M1 and M2. Either "MCP" (the default), "SCAD", or "lasso".
#'
#' @return A list containing:
#' \describe{
#'   \item{\code{fitY}}{A fitted object from `cv.ncvreg`, containing the penalized regression results for `Y`.}
#'   \item{\code{fitM2}}{The fitted reduced-rank regression (`sofar` or `rrr` object) for `M2` given `M1`.}
#'   \item{\code{fitM3}}{The fitted reduced-rank regression (`sofar` or `rrr` object) for `M3` given `M1` and `M2`.}
#'   \item{\code{CoefY}}{A vector of estimated regression coefficients for `Y`.}
#'   \item{\code{coefM2}}{A matrix of estimated regression coefficients for `M2` given `M1`.}
#'   \item{\code{coefM3}}{A matrix of estimated regression coefficients for `M3` given `M1` and `M2`.}
#'   \item{\code{rank1}}{An integer indicating the estimated rank of the reduced-rank regression for `M2`.}
#'   \item{\code{rank2}}{An integer indicating the estimated rank of the reduced-rank regression for `M3`.}
#'   \item{\code{P1}}{A projection matrix used to extract the orthogonal components of `M1`.}
#'   \item{\code{P2}}{A projection matrix used to extract the orthogonal components of `E2`, which is the error term in the regression for `M2` given `M1`.}
#'   \item{\code{M1s}}{A transformed version of `M1` after projection.}
#'   \item{\code{M2s}}{A transformed version of `M2` after removing the effect of `M1` and projecting to the orthogonal space.}
#'   \item{\code{M3s}}{A transformed version of `M3` after removing the effects of `M1` and `M2`.}
#' }
#'
#'#' @references
#' Dai, Z., Huang, Y. J., & Li, G. (2025). Multi-View Orthogonal Projection Regression with Application in Multi-omics Integration. arXiv preprint arXiv:2503.16807. Available at <https://arxiv.org/abs/2503.16807>
#'
#' @export
#'
#' @examples
#'
#' ## Simulation: three modalities
#' p1 = 50; p2 = 50; p3 = 50; n = 200
#' rank = 2
#'
#' beta = c(rep(c(rep(1,5),rep(0,45)),3))
#' M1 = matrix(rnorm(p1*n),n,p1)
#'
#' U1 = matrix(rnorm(rank*p1),p1,rank)
#' V1 = matrix(runif(rank*p2,-0.1,0.1),rank,p2)
#' B1 = U1 %*% V1
#'
#' U2 = matrix(rnorm(rank*p1),p1,rank)
#' V2 = matrix(runif(rank*p2,-0.1,0.1),rank,p3)
#' B2 = U2 %*% V2
#'
#' U3 = matrix(rnorm(rank*p2),p2,rank)
#' V3 = matrix(runif(rank*p2,-0.1,0.1),rank,p3)
#' B3 = U3 %*% V3
#'
#' E1 = matrix(rnorm(p2*n),n,p2)
#' E2 = matrix(rnorm(p3*n),n,p3)
#'
#' M2 = M1 %*% B1 + E1
#' M3 = M1 %*% B2 + M2 %*% B3 + E2
#' Y = cbind(M1,M2,M3) %*% matrix(beta,p1+p2+p3,1)
#'
#' ## Fit MVOPR with Lasso
#' Fit1 = MVOPR3(M1,M2,M3,Y,RRR_Control = list(Sparsity = FALSE),penalty = 'lasso')
#'
#' ## Fit MVOPR with MCP
#' Fit2 = MVOPR3(M1,M2,M3,Y,RRR_Control = list(Sparsity = FALSE),penalty = 'MCP')
#'
#' ## Fit MVOPR with SCAD
#' Fit3 = MVOPR3(M1,M2,M3,Y,RRR_Control = list(Sparsity = FALSE),penalty = 'SCAD')
#'
#' ## Compare the variable selection between Lasso, MCP, SCAD
#' print(data.frame(Lasso = Fit1$CoefY[2:151],MCP = Fit2$CoefY[2:151],SCAD = Fit3$CoefY[2:151],beta))
#'
MVOPR3 <- function(M1,M2,M3,Y,RRR_Control = list(Sparsity = TRUE, nrank=10,ic.type='GIC'),
                   family = 'gaussian',penalty = 'lasso'){
  ## Dimension of M1,M2
  p1 = dim(M1)[2]
  p2 = dim(M2)[2]
  p3 = dim(M3)[2]
  ps = p1+p2+p3
  n = length(Y)

  ## Control for reduced rank regression
  r1 = RRR_Control
  Sparsity = r1$Sparsity; nrank = r1$nrank; ic.type = r1$ic.type

  ## Reduced rank regression
  if(Sparsity){
    # M2 ~ M1
    fit1 = rrpack::sofar(M2, M1, ic.type = ic.type, nrank = nrank)
    D = matrix(0,length(fit1$D),length(fit1$D))
    diag(D) = fit1$D
    coef1 = fit1$U %*% D %*% t(fit1$V)

    # M3 ~ M2 + M1
    fit2 = rrpack::sofar(M3, cbind(M2,M1), ic.type = ic.type, nrank = nrank)
    D = matrix(0,length(fit2$D),length(fit2$D))
    diag(D) = fit2$D
    coef2 = fit2$U %*% D %*% t(fit2$V)

  }else{
    # M2 ~ M1
    fit1 = with(list(M1=M1,M2=M2), rrpack::rrr(M2, M1, maxrank = nrank,ic.type = ic.type))
    coef1 = fit1$coef

    # M2 ~ M1
    fit2 = with(list(M=cbind(M2,M1),M3=M3), rrpack::rrr(M3, M, maxrank = nrank,ic.type = ic.type))
    coef2 = fit2$coef
  }
  rank1 = fit1$rank
  rank2 = fit2$rank

  ## Transform the data
  ### Residualize M2 and M3
  M = cbind(M2,M1)
  M2s = M2 - M1%*%coef1
  M3s = M3 - M%*%coef2

  ### Project M1
  coefM1 = cbind(coef1,coef2[(p2+1):(p1+p2),])
  SVD1 = svd(M1%*%coefM1)
  P1 = SVD1$u[,1:rank1] %*% t(SVD1$u[,1:rank1])
  M1s = (diag(1,dim(P1)[1]) - P1)%*%M1

  ### Project M2
  coefM2 = coef2[(1):(p2),]
  SVD2 = svd(M2s%*%coefM2)
  P2 = SVD2$u[,1:rank2] %*% t(SVD2$u[,1:rank2])
  M2s = (diag(1,dim(P2)[1]) - P2)%*%M2s

  ### Nuisance variable
  nus1 = as.matrix(SVD1$u[,1:rank1])
  nus2 = as.matrix(SVD2$u[,1:rank2])
  nus = cbind(nus1,nus2)

  ## Fit the outcome model
  Xdata = as.matrix(cbind(M1s,M2s,M3s,nus))
  penalty.factor = c(rep(1,ps),rep(0,rank1+rank2))

  fitY = ncvreg::cv.ncvreg(X = Xdata,y = Y,family = family,
                           penalty = penalty,penalty.factor = penalty.factor)
  CoefY = fitY$fit$beta[,fitY$min]

  ## Output
  result = list(fitY = fitY, fitM2 = fit1, fitM3 = fit2,
                CoefY = CoefY, coefM2 = coef1, coefM3 = coef2,
                rank1 = rank1,rank2 = rank2, P1 = P1,P2 = P2,
                M1s = M1s, M2s = M2s, M3s = M3s)
  return(result)
}

Try the MVOPR package in your browser

Any scripts or data that you put into this service are public.

MVOPR documentation built on April 12, 2025, 1:44 a.m.