# R/pfpca.R In javzapata/fgm: Partial Separability and Functional Graphical Models for Multivariate Gaussian Processes

#### Documented in pfpca

# pfpca(data, t=seq(0, 1, m))
#
# Arguments
#
# y: list of length p containing multivariate (p-dimensional) functional data.
# 	y[[j]] is an nxm matrix of functional data for n subjects observed on a
# 	grid of length m
#
# t - grid on which functional data is observed, defaults to seq(0, 1, m)
#        where m = dim(data[[1]])[2]
#
# Value
# A list with 3 components:
#
# phi - Lxm matrix where each row denotes the value of a basis function evaluated
# 	at a grid of length m
# theta - list of length L of functional principal component scores.  theta[[l]]
# 		is an nxp matrix of vector scores corresponding to the basis function
# 		phi[l,]
# FVE - fraction of variability explained by the first L components

# @example
# \describe{
# \item pfpca.fit = pfpca(y)
# \item pfpca.fit = pfpca(y, t=seq(0, 1, m))
# }

#' Partially Separable Karhunen-Loeve Expansion
#'
#' Estimates the Karhunen-Loeve expansion for a partially separable multivariate Gaussian process.
#' @param y  list of length p containing densely observed multivariate (p-dimensional) functional data . \code{y[[j]]} is an nxm matrix of functional data for n subjects observed on a grid of length m
#' @param t  (optional) grid on which functional data is observed, defaults to seq(0, 1, m) where \code{m = dim(data[[1]])[2]}
#' @return A list with three variables:
#' \describe{
#'   \item{\code{phi}}{Lxm matrix where each row denotes the value of a basis function evaluated at a grid of length m}
#'   \item{\code{theta}}{list of length L of functional principal component scores. \code{theta[[l]]} is an nxp matrix of vector scores corresponding to the basis function \code{phi[l,]}}
#'   \item{\code{FVE}}{fraction of variability explained by the first L components}
#' }
#'@examples
#' ## Variables
#' # Omega - list of precision matrices, one per eigenfunction
#' # Sigma - list of covariance matrices, one per eigenfunction
#' # theta - list of functional  principal component scores
#' # phi - list of eigenfunctions densely observed on a time grid
#' # y - list containing densely observed multivariate (p-dimensional) functional data
#'
#' library(mvtnorm)
#' library(fda)
#'
#' ## Generate data y
#'  source(system.file("exec", "getOmegaSigma.R", package = "fgm"))
#'  theta = lapply(1:nbasis, function(b) t(rmvnorm(n = 100, sigma = Sigma[[b]])))
#'  theta.reshaped = lapply( 1:p, function(j){
#'      t(sapply(1:nbasis, function(i) theta[[i]][j,]))
#'  })
#'  phi.basis=create.fourier.basis(rangeval=c(0,1), nbasis=21, period=1)
#'  t = seq(0, 1, length.out = time.grid.length)
#'  chosen.basis = c(2, 3, 6, 7, 10, 11, 16, 17, 20, 21)
#'  phi = t(predict(phi.basis, t))[chosen.basis,]
#'  y = lapply(theta.reshaped, function(th) t(th)%*%phi)
#'
#' ## Solve
#'  pfpca(y)
#'
#' @keywords pfpca fpca pca fda partial separability principal components
#' @export
#' @author Javier Zapata, Sang-Yun Oh and Alexander Petersen
#' @references Zapata, J., Oh, S., and Petersen, A. (2019) Functional Graphical Models for Partially Separable Gaussian Processes. Available at arXiv.org
#' @details
#' This function implements the functional graphical model in Zapata, Oh, and Petersen (2019).
#' This code uses functions from the testing version of fdapace available at: \url{https://github.com/functionaldata/tPACE}.

pfpca = function(y, t=seq(0, 1, length.out=dim(y[[1]])[2])){

# checking inputs
if (missing(y)) stop('y is missing')
if (!is.list(y)) stop('y must be a list of matrices')
if (length(t) != dim(y[[1]])[2]) stop('length of time grid does not match dim(y[[1]])[2])')

# Compute autocovariance surface of each component separately (smoothed first)
p = length(y)
optns = list(dataType = 'Dense', error = FALSE)
mu = t(sapply(y, function(yy) .GetMeanDense(yy, t, optns)$mu)) m = length(t); n = dim(y[[1]])[1] S = array(0, dim = c(m, m, p)) for(j in 1:p){ S[,,j] = .GetCovDense(y[[j]], mu[j,], optns)$smoothCov
}
# Add together and compute eigendecomposition

optns$FVEthreshold = 0.99 optns$maxK = n*p
C = rowSums(S, dims = 2)
E = .GetEigenAnalysisResults(C, t, optns)

# Compute vector scores

y2 = lapply(1:n, function(i){ # Rearrange data by subject
t(sapply(1:p, function(j) y[[j]][i,]))
})

L = dim(E$phi)[2] #if(L > dim(E$phi)[2]){
#  #warning('L is too large, changing to maximum value')
#  L = dim(E$phi)[2] #} theta = lapply(1:L, function(l){ sapply(1:n, function(i){ sapply(1:p, function(j) fdapace::trapzRcpp(t, (y2[[i]][j,] - mu[j,])*(E$phi[,l])))
})
})

# Compute FVE
FVE = E$cumFVE[L] return(list(phi = t(E$phi[,1:L]), theta = theta, FVE= E$cumFVE[1:L], L=L)) #FVE = FVE, #cumFVE=E$cumFVE, L=L))

}

.GetEigenAnalysisResults <- function(smoothCov, regGrid, optns, muWork = NULL) {
# this function is based on <https://github.com/functionaldata/tPACE/blob/master/R/GetEigenAnalysisResults.R>
maxK <- optns$maxK FVEthreshold <- optns$FVEthreshold
verbose <- optns$verbose gridSize <- regGrid[2] - regGrid[1] numGrids <- nrow(smoothCov) eig <- eigen(smoothCov) positiveInd <- eig[['values']] >= 0 if (sum(positiveInd) == 0) { stop('All eigenvalues are negative. The covariance estimate is incorrect.') } d <- eig[['values']][positiveInd] eigenV <- eig[['vectors']][, positiveInd, drop=FALSE] if (maxK < length(d)) { if (optns[['verbose']]) { message(sprintf("At most %d number of PC can be selected, thresholded by maxK = %d. \n", length(d), maxK)) } d <- d[1:maxK] eigenV <- eigenV[, 1:maxK, drop=FALSE] } # thresholding for corresponding FVE option #(not before to avoid not being able to reach the FVEthreshold when pos eigenvalues > maxk) # i.e. default FVE 0.9999 outputs all components remained here. FVE <- cumsum(d) / sum(d) * 100 # cumulative FVE for all available eigenvalues from fitted cov no_opt <- min(which(FVE >= FVEthreshold * 100)) # final number of component chosen based on FVE # normalization if (is.null(muWork)) { muWork = 1:dim(eigenV)[1] } phi <- apply(eigenV, 2, function(x) { x <- x / sqrt(fdapace::trapzRcpp(regGrid, x^2)) #x <- x / sqrt(pracma:::trapz(regGrid, x^2)) if ( 0 <= sum(x*muWork) ) return(x) else return(-x) }) lambda <- gridSize * d; fittedCov <- phi %*% diag(x=lambda, nrow = length(lambda)) %*% t(phi) return(list(lambda = lambda[1:no_opt], phi = phi[,1:no_opt, drop=FALSE], cumFVE = FVE, kChoosen=no_opt, fittedCov=fittedCov)) } .GetMeanDense <- function(ymat, obsGrid, optns){ # This function is based on <https://github.com/functionaldata/tPACE/blob/master/R/GetMeanDense.R> # Check optns if(!(optns$dataType %in% c('Dense', 'DenseWithMV'))){
stop('Cross sectional mean is only applicable for option: dataType = "Dense" or "DenseWithMV"!')
}

if ( is.null(optns$userMu) ){ mu = colMeans(ymat, na.rm = TRUE) # use non-missing data only } else { mu = spline(optns$userMu$t, optns$userMu$mu, xout= obsGrid)$y;
}

if(any(is.na(mu))){
stop('The cross sectional mean is appears to have NaN! Consider setting your dataType to \'Sparse\' manually')
}

ret = list('mu' = mu, 'muDense' = NULL, 'mu_bw' = NULL)
class(ret) = "SMC"
return(ret)
}

.GetCovDense <- function(ymat, mu, optns) {
# This function is based on <https://github.com/functionaldata/tPACE/blob/master/R/GetCovDense.R>
if(!(optns$dataType %in% c('Dense', 'DenseWithMV'))){ stop('Sample Covariance is only applicable for option: dataType = "Dense" or "DenseWithMV"!') } # if( optns$muCovEstMethod == 'cross-sectional' ){
n = nrow(ymat)
m = ncol(ymat)
if( !is.null(optns$userMu) ){ ymat = ymat - matrix(rep(times= nrow(ymat), mu), ncol= ncol(ymat), byrow=TRUE) K = matrix( rep(0,m^2), m) for( i in (1:m)){ for( j in (1:m)){ XcNaNindx = which(is.na(ymat[,i])); YcNaNindx = which(is.na(ymat[,j])); NaNrows = union(XcNaNindx, YcNaNindx); # Find inner product of the columns with no NaN values indx = setdiff( 1:n, NaNrows) K[i,j] = sum(ymat[indx,i] * ymat[indx,j]) * (1/(n-1-length(NaNrows))); } } } else { K = cov(ymat, use = 'pairwise.complete.obs') # sample variance using non-missing data } K = 0.5 * (K + t(K)) # ensure that K is symmetric if (any(is.na(K))) { stop("Data is too sparse to be considered DenseWithMV. Remove sparse observations or specify dataType='Sparse' for FPCA") } if (optns[['error']] == TRUE) { # 2nd order difference method for finding sigma2 if (!is.null(optns[['userSigma2']])) { sigma2 <- optns[['userSigma2']] } else { #browser() ord <- 2 sigma2 <- mean(diff(t(ymat), differences=ord)^2, na.rm=TRUE) / choose(2 * ord, ord) diag(K) <- diag(K) - sigma2 } } else { sigma2 <- NULL } ret = list('rawCov' = NULL, 'smoothCov' = K, 'bwCov' = NULL, 'sigma2' = sigma2, outGrid = NULL) class(ret) = "SmoothCov" return(ret) # } else { # stop('optns$muCovEstMethod is unknown!\n')
# }
}
javzapata/fgm documentation built on Dec. 20, 2021, 10 p.m.