#' Targeted Principal Components Regression
#'
#' \code{tpcr} fits a principal components regression by maximizing a joint
#' multivariate normal pseudo-likelihood.
#'
#' This is the only function exported from the package with the same name.
#' The likelihood maximized is that for n independent observations
#' from a normal multivariate response linear regression model where the column space
#' of the p-by-r coefficient matrix is spanned by the k leading eigenvectors (corresponding to
#' the largest eigenvalues) of the predictors' covariance matrix.
#' This covariance matrix is assumed to be spiked, meaning its
#' smallest eigenvalue has multiplicity p - k.
#'
#' @param Y Matrix (n x r) of responses; rows correspond to observations
#' @param X Matrix (n x p) of predictors; rows correspond to observations
#' @param k Integer number of principal components to use; can be a vector
#' @param rho Numeric ridge penalty on alpha in representation beta = L alpha
#' @param tol Numeric tolerance for L-BFGS-B on profile log-likelihood
#' @param maxit Integer maximum number of iterations of L-BFGS-B algorithm
#' @param center_Y If TRUE, responses are centered by their sample average.
#' @param center_X If TRUE, predictors are centered by their sample average.
#' @param scale_Y If TRUE, responses are scaled to have unit sample standard deviation.
#' @param scale_X If TRUE, predictors are scaled to have unit sample standard deviation.
#' @param quiet If TRUE, suppresses information from optim (L-BFGS-B)
#' @param L Matrix (p x k) starting value in L-BFGS-B for the Cholesky root
#' in the decomposition Sigma_X = tau (I + LL')
#' @param m Numeric penalty in information criterion
#' - 2 * log-likelihood + m * n_params; can be a vector
#' @param covmat If TRUE, calculates asymptotic covariance matrix of vec(beta)
#' @param Xnew Matrix of new observations to predict the response for
#' @return If length(k) = 1, returns a list with estimates and information criterion.
#' If scale_X or scale_Y are TRUE, estimates are rescaled to original scale.
#'
#' If length(k) > 1, returns a list of lists of length k + 1 where for
#' j in 1:k the jth element is the list returned by tpcr with k = k[j]
#' and the (k + 1)th element is a vector of the same length as m where
#' the jth element is the k selected by the IC corresponding to m[j].
#' @useDynLib tpcr, .registration = TRUE
#' @importFrom Rcpp sourceCpp
#' @importFrom Rcpp evalCpp
#' @export
tpcr <- function(Y, X, k, rho = 0, tol = 1e-10, maxit = 1e3, center_Y = TRUE,
center_X = TRUE, scale_Y = FALSE, scale_X = FALSE, quiet = TRUE, L, m = 2,
covmat = FALSE, Xnew = X)
{
#############################################################################
# Argument checking and starting values
#############################################################################
stopifnot(is.matrix(X))
k <- sort(k)
num_k <- length(k)
stopifnot(is.numeric(k), is.null(dim(k)), all(k == floor(k)))
p <- ncol(X)
n <- nrow(X)
if(k[num_k] >= min(n, p)) stop("tpcr requires k < min(n, p)")
stopifnot(is.numeric(Y))
if(!is.matrix(Y)){
Y <- matrix(Y, nrow = n)
}
r <- ncol(Y)
stopifnot(is.numeric(rho), length(rho) == 1, rho >= 0)
if(n < p & rho == 0){
warning("The objective function is unbounded when n < p and rho = 0;
do not expect reliable estimates!")
}
stopifnot(is.numeric(tol), length(tol) == 1, tol > 0)
stopifnot(is.numeric(maxit), length(maxit) == 1, maxit > 0, maxit == floor(maxit))
stopifnot(is.logical(center_Y), length(center_Y) == 1)
stopifnot(is.logical(center_X), length(center_X) == 1)
stopifnot(is.logical(scale_Y), length(scale_Y) == 1)
stopifnot(is.logical(scale_X), length(scale_X) == 1)
stopifnot(is.logical(quiet), length(quiet) == 1)
# Get starting value
if(missing(L) | num_k > 1){
e_S <- eigen(crossprod(X) / n, symmetric = TRUE)
}
if(missing(L)){
# Use probabilistic principal components estimates
U <- e_S$vectors[, 1:k[1], drop = F]
tau <- mean(e_S$values[(k[1] + 1):p])
D <- e_S$values[1:k[1]] / tau - 1
L <- chol_spsd(U %*% diag(D, k[1]) %*% t(U) , tol = 1e-8)$L
L <- L[, 1:k[1], drop = F]
rm(D, tau, U)
} else{
stopifnot(is.matrix(L), ncol(L) == k, nrow(L) == p)
}
stopifnot(is.numeric(m), is.null(dim(m)))
stopifnot(is.logical(covmat), length(covmat) == 1)
stopifnot(is.matrix(Xnew), ncol(Xnew) == p)
#############################################################################
# Scale data if required
#############################################################################
ybar <- rep(0, r)
xbar <- rep(0, p)
if(center_Y){
ybar <- colMeans(Y)
Y <- sweep(Y, 2, ybar)
}
if(center_X){
xbar <- colMeans(X)
X <- sweep(X, 2, xbar)
}
sy <- rep(1, r)
if(scale_Y){
sy <- apply(Y, 2, stats::sd)
Y <- sweep(Y, 2, 1 / sy, FUN = "*")
}
sx <- rep(1, p)
if(scale_X){
sx <- apply(X, 2, stats::sd)
X <- sweep(X, 2, 1 / sx, FUN = "*")
}
#############################################################################
# Fit model for each k_i in k
#############################################################################
out_list <- list()
for(ii in 1:num_k){
# Get estimates for possibly scaled data
L <- update_L(L = L, Y = Y, X = X, rho = rho, tol = tol, maxit = maxit,
quiet = quiet)
sL <- svd(L)
U <- sL$u
d <- sL$d^2
Z <- X %*% U
gam <- solve(crossprod(Z) + diag(rho, k[ii]), crossprod(Z, Y))
Sigma <- crossprod(Y - Z %*% gam) / n
tau <- sum(X^2) - sum(t(X) * (L %*% qr.solve(diag(1, k[ii]) +
crossprod(L), t(L)) %*% t(X)))
tau <- tau / (n * p)
d <- d * tau
Psi <- U %*% diag(d, k[ii]) %*% t(U)
beta <- U %*% gam
Sigma_X <- diag(tau, p) + Psi
# Get information criteria for possibly scaled data
n_param <- r * (r + 1) / 2 # Sigma
n_param <- n_param + k[ii] * r # gamma
n_param <- n_param + 1 # tau
n_param <- n_param + k[ii] + k[ii] * p - k[ii] * (k[ii] + 1) / 2 #Psi
if(center_Y) n_param <- n_param + r # Intercept
if(center_X) n_param <- n_param + p # Predictor mean
# k eigenvalues, k orthonormal vectors requires (p - 1) + (p - 2) ... (p - k)
# = k + kp - sum_i^k i = k + kp - k(k + 1) / 2
IC <- sum(mvtnorm::dmvnorm(Y - X %*% beta, sigma = Sigma, log = T))
IC <- IC + sum(mvtnorm::dmvnorm(X, sigma = Sigma_X, log = T))
IC <- -2 * IC + m * n_param
# Get get estimates on original scale for returning
if(scale_Y){
Sigma <- sweep(Sigma, 2, sy, FUN = "*")
Sigma <- sweep(Sigma, 1, sy, FUN = "*")
beta <- sweep(beta, 2, sy, FUN = "*")
}
if(scale_X){
Sigma_X <- sweep(Sigma_X, 2, sx, FUN = "*")
Sigma_X <- sweep(Sigma_X, 1, sx, FUN = "*")
beta <- sweep(beta, 1, 1 / sx, FUN = "*")
}
# Covariance of estimates on original scale
C <- NULL
if(covmat){
if(scale_X){
U <- sweep(U, 1, sx, FUN = "*")
}
C <- kronecker(Sigma, U %*% ((1 / (d + tau)) * t(U)))
}
# Prediction or fitted values on original scale
Yhat <- sweep(sweep(Xnew, 2, xbar) %*% beta, 2, ybar, FUN = "+")
# Return list for k_i
out_list[[ii]] <- list(ybar = ybar, xbar = xbar, b = beta,
Sigma = Sigma, Sigma_X = Sigma_X,
IC = IC, cov_b = C, Yhat = Yhat, L = L)
# Starting value for next iter; use directions of first sample evecs not in span
if(ii < num_k){
u_new <- e_S$vectors[, 1:(k[ii + 1] - k[ii]), drop = F] -
sL$u %*% crossprod(sL$u, e_S$vectors[, 1:(k[ii + 1] - k[ii]), drop = F])
u_new <- scale(u_new, center = F, scale = apply(u_new, 2,
function(x)sqrt(sum(x^2))))
U_hat <- cbind(sL$u, u_new)
L <- chol_spsd(U_hat %*% diag(c(sL$d^2, rep(sL$d[k[ii]]^2,
k[ii + 1] - k[ii])), k[ii + 1]) %*%
t(U_hat), tol = 1e-8)$L[, 1:k[ii + 1]]
}
}
#############################################################################
# Prepare return list
#############################################################################
if(num_k == 1){
out_list <- out_list[[1]]
} else{
all_IC <- do.call(rbind, lapply(out_list, function(x)x$IC))
out_list[[num_k + 1]] <- k[as.vector(apply(all_IC, 2, which.min))]
names(out_list) <- c(paste0("fit_k_", k), "k_star")
}
return(out_list)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.