R/PACE.R

Defines functions PACE .PACE

Documented in PACE

.PACE <- function(X, Y, Y.pred = NULL, nbasis = 10, pve = 0.99, npc = NULL, makePD = FALSE, cov.weight.type = "none")
{

  if (is.null(Y.pred))
    Y.pred = Y
  D = NCOL(Y)
  if(D != length(X)) # check if number of observation points in X & Y are identical
    stop("different number of (potential) observation points differs in X and Y!")
  I = NROW(Y)
  I.pred = NROW(Y.pred)
  d.vec = rep(X, each = I) # use given X-values for estimation of mu
  gam0 = mgcv::gam(as.vector(Y) ~ s(d.vec, k = nbasis))
  mu = mgcv::predict.gam(gam0, newdata = data.frame(d.vec = X))
  Y.tilde = Y - matrix(mu, I, D, byrow = TRUE)
  cov.sum = cov.count = cov.mean = matrix(0, D, D)
  for (i in seq_len(I)) {
    obs.points = which(!is.na(Y[i, ]))
    cov.count[obs.points, obs.points] = cov.count[obs.points, obs.points] + 1
    cov.sum[obs.points, obs.points] = cov.sum[obs.points, obs.points] + tcrossprod(Y.tilde[i, obs.points])
  }
  G.0 = ifelse(cov.count == 0, NA, cov.sum/cov.count)
  diag.G0 = diag(G.0)
  diag(G.0) = NA
  row.vec = rep(X, each = D) # use given X-values
  col.vec = rep(X, D) # use given X-values
  cov.weights <- switch(cov.weight.type,
                        none = rep(1, D^2),
                        counts = as.vector(cov.count),
                        stop("cov.weight.type ", cov.weight.type, " unknown in smooth covariance estimation"))

  npc.0 = matrix(mgcv::predict.gam(mgcv::gam(as.vector(G.0)~te(row.vec, col.vec, k = nbasis), weights = cov.weights),
                                   newdata = data.frame(row.vec = row.vec, col.vec = col.vec)), D, D)
  npc.0 = (npc.0 + t(npc.0))/2
  # no extra-option (useSymm) as in fpca.sc-method
  if (makePD) { # see fpca.sc
    npc.0 <- {
      tmp <- Matrix::nearPD(npc.0, corr = FALSE, keepDiag = FALSE,
                            do2eigen = TRUE, trace = options()$verbose)
      as.matrix(tmp$mat)
    }
  }

  ### numerical integration for calculation of eigenvalues (see Ramsay & Silverman, Chapter 8)
  w <- funData::.intWeights(X, method = "trapezoidal")
  Wsqrt <- diag(sqrt(w))
  Winvsqrt <- diag(1/(sqrt(w)))
  V <- Wsqrt %*% npc.0 %*% Wsqrt
  evalues = eigen(V, symmetric = TRUE, only.values = TRUE)$values
  ###
  evalues = replace(evalues, which(evalues <= 0), 0)
  npc = ifelse(is.null(npc), min(which(cumsum(evalues)/sum(evalues) > pve)), npc)
  efunctions = matrix(Winvsqrt%*%eigen(V, symmetric = TRUE)$vectors[, seq(len = npc)], nrow = D, ncol = npc)
  evalues = eigen(V, symmetric = TRUE, only.values = TRUE)$values[seq_len(npc)]  # use correct matrix for eigenvalue problem
  cov.hat = efunctions %*% tcrossprod(diag(evalues, nrow = npc, ncol = npc), efunctions)
  ### numerical integration for estimation of sigma2
  T.len <- X[D] - X[1] # total interval length
  T1.min <- min(which(X >= X[1] + 0.25*T.len)) # left bound of narrower interval T1
  T1.max <- max(which(X <= X[D] - 0.25*T.len)) # right bound of narrower interval T1
  DIAG = (diag.G0 - diag(cov.hat))[T1.min :T1.max] # function values
  # weights
  w <- funData::.intWeights(X[T1.min:T1.max], method = "trapezoidal")
  sigma2 <- max(1/(X[T1.max]-X[T1.min]) * sum(DIAG*w, na.rm = TRUE), 0) #max(1/T.len * sum(DIAG*w), 0)
  ####
  D.inv = diag(1/evalues, nrow = npc, ncol = npc)
  Z = efunctions
  Y.tilde = Y.pred - matrix(mu, I.pred, D, byrow = TRUE)
  fit = matrix(0, nrow = I.pred, ncol = D)
  scores = matrix(NA, nrow = I.pred, ncol = npc)

  # no calculation of confidence bands, no variance matrix
  for (i.subj in seq_len(I.pred)) {
    obs.points = which(!is.na(Y.pred[i.subj, ]))
    if (sigma2 == 0 & length(obs.points) < npc) {
      #stop("Measurement error estimated to be zero and there are fewer observed points than PCs; scores cannot be estimated.")
      scores[i.subj, ] <- NA
      fit[i.subj, ] <- NA
      next()
    }
    Zcur = matrix(Z[obs.points, ], nrow = length(obs.points),
                  ncol = dim(Z)[2])
    ZtZ_sD.inv = solve(crossprod(Zcur) + sigma2 * D.inv)
    scores[i.subj, ] = ZtZ_sD.inv %*% crossprod(Zcur, Y.tilde[i.subj, obs.points])
    fit[i.subj, ] = t(as.matrix(mu)) + tcrossprod(scores[i.subj, ], efunctions)
  }
  ret.objects = c("fit", "scores", "mu", "efunctions", "evalues",
                  "npc", "sigma2") # add sigma2 to output
  ret = lapply(seq_len(length(ret.objects)), function(u) get(ret.objects[u]))
  names(ret) = ret.objects
  ret$estVar <- diag(cov.hat)
  return(ret)
}

#' Univariate functional principal component analysis by smoothed covariance
#'
#' This function calculates a univariate functional principal components
#' analysis by smoothed covariance based on code from
#' \code{\link[refund]{fpca.sc}} (package \pkg{refund}).
#'
#' @section Warning: This function works only for univariate functional data
#'   observed on one-dimensional domains.
#'
#' @param funDataObject An object of class \code{\link[funData]{funData}} or
#'   \code{\link[funData]{irregFunData}} containing the functional data
#'   observed, for which the functional principal component analysis is
#'   calculated. If the data is sampled irregularly (i.e. of class
#'   \code{\link[funData]{irregFunData}}), \code{funDataObject} is transformed
#'   to a \code{\link[funData]{funData}} object first.
#' @param predData  An object of class \code{\link[funData]{funData}}, for which
#'   estimated trajectories based on a truncated Karhunen-Loeve representation
#'   should be estimated. Defaults to \code{NULL}, which implies prediction for
#'   the given data.
#' @param nbasis An integer, representing the number of  B-spline basis
#'   functions used for estimation of the mean function and bivariate smoothing
#'   of the covariance surface. Defaults to \code{10} (cf.
#'   \code{\link[refund]{fpca.sc}}).
#' @param pve A numeric value between 0 and 1, the proportion of variance
#'   explained: used to choose the number of principal components. Defaults to
#'   \code{0.99} (cf. \code{\link[refund]{fpca.sc}}).
#' @param npc An integer, giving a prespecified value for the number of
#'   principal components. Defaults to \code{NULL}. If given, this overrides
#'   \code{pve} (cf. \code{\link[refund]{fpca.sc}}).
#' @param makePD Logical: should positive definiteness be enforced for the
#'   covariance surface estimate? Defaults to \code{FALSE} (cf.
#'   \code{\link[refund]{fpca.sc}}).
#' @param cov.weight.type The type of weighting used for the smooth covariance
#'   estimate. Defaults to \code{"none"}, i.e. no weighting. Alternatively,
#'   \code{"counts"} (corresponds to \code{\link[refund]{fpca.sc}} ) weights the
#'   pointwise estimates of the covariance function by the number of observation
#'   points.
#'
#' @return \item{mu}{A \code{\link[funData]{funData}} object with one
#'   observation, corresponding to the mean function.} \item{values}{A vector
#'   containing the estimated eigenvalues.} \item{functions}{A
#'   \code{\link[funData]{funData}} object containing the estimated functional
#'   principal components.} \item{scores}{An matrix of estimated scores for the
#'   observations in \code{funDataObject}. Each row corresponds to the scores of
#'   one observation.} \item{fit}{A \code{\link[funData]{funData}} object
#'   containing the estimated trajectories based on the truncated Karhunen-Loeve
#'   representation and the estimated scores and functional principal components
#'   for \code{predData} (if this is not \code{NULL}) or \code{funDataObject}
#'   (if \code{predData} is \code{NULL}).} \item{npc}{The number of functional
#'   principal components: either the supplied \code{npc}, or the minimum number
#'   of basis functions needed to explain proportion \code{pve} of the variance
#'   in the observed curves (cf. \code{\link[refund]{fpca.sc}}).}
#'   \item{sigma2}{The estimated measurement error variance (cf.
#'   \code{\link[refund]{fpca.sc}}).} \item{estVar}{The estimated smooth
#'   variance function of the data.}
#'
#' @seealso \code{\link[funData]{funData}}, \code{\link[refund]{fpca.sc}},
#'   \code{\link{fpcaBasis}}, \code{\link{univDecomp}}
#'
#' @export PACE
#'
#' @examples
#' \donttest{
#'   oldPar <- par(no.readonly = TRUE)
#'
#'   # simulate data
#'   sim <- simFunData(argvals = seq(-1,1,0.01), M = 5, eFunType = "Poly",
#'                     eValType = "exponential", N = 100)
#'
#'   # calculate univariate FPCA
#'   pca <- PACE(sim$simData, npc = 5)
#'
#'   # Plot the results
#'   par(mfrow = c(1,2))
#'   plot(sim$trueFuns, lwd = 2, main = "Eigenfunctions")
#'   # flip estimated functions for correct signs
#'   plot(flipFuns(sim$trueFuns,pca$functions), lty = 2, add = TRUE)
#'   legend("bottomright", c("True", "Estimate"), lwd = c(2,1), lty = c(1,2))
#'
#'   plot(sim$simData, lwd = 2, main = "Some Observations", obs = 1:7)
#'   plot(pca$fit, lty = 2, obs = 1:7, add = TRUE) # estimates are almost equal to true values
#'   legend("bottomright", c("True", "Estimate"), lwd = c(2,1), lty = c(1,2))
#'
#'   par(oldPar)
#' }
PACE <- function(funDataObject, predData = NULL, nbasis = 10, pve = 0.99, npc = NULL, makePD = FALSE, cov.weight.type = "none")
{
  # check inputs
  if(! class(funDataObject) %in% c("funData", "irregFunData"))
    stop("Parameter 'funDataObject' must be a funData or irregFunData object.")
  if(dimSupp(funDataObject) != 1)
    stop("PACE: Implemented only for funData objects with one-dimensional support.")
  if(methods::is(funDataObject, "irregFunData")) # for irregular functional data, use funData representation
    funDataObject <- as.funData(funDataObject)

  if(is.null(predData))
    Y.pred = NULL # use only funDataObject
  else
  {
    if(!isTRUE(all.equal(funDataObject@argvals, predData@argvals)))
      stop("PACE: funDataObject and predData must be defined on the same domains!")

    Y.pred = predData@X
  }


  if(!all(is.numeric(nbasis), length(nbasis) == 1, nbasis > 0))
    stop("Parameter 'nbasis' must be passed as a number > 0.")

  if(!all(is.numeric(pve), length(pve) == 1, 0 <= pve, pve <= 1))
    stop("Parameter 'pve' must be passed as a number between 0 and 1.")

  if(!is.null(npc) & !all(is.numeric(npc), length(npc) == 1, npc > 0))
    stop("Parameter 'npc' must be either NULL or passed as a number > 0.")

  if(!is.logical(makePD))
    stop("Parameter 'makePD' must be passed as a logical.")

  if(!is.character(cov.weight.type))
    stop("Parameter 'cov.weight.type' must be passed as a character.")


  res <- .PACE(X = funDataObject@argvals[[1]], funDataObject@X, Y.pred = Y.pred,
               nbasis = nbasis, pve = pve, npc = npc, makePD = makePD,
               cov.weight.type = cov.weight.type)

  return(list(mu = funData(funDataObject@argvals, matrix(res$mu, nrow = 1)),
              values = res$evalues,
              functions = funData(funDataObject@argvals, t(res$efunctions)),
              scores = res$scores,
              fit = funData(funDataObject@argvals, res$fit),
              npc = res$npc,
              sigma2 = res$sigma2,
              estVar = funData(funDataObject@argvals, matrix(res$estVar, nrow = 1))
  ))
}
anthonydevaux/hdlandmark documentation built on Jan. 11, 2023, 8:01 a.m.