R/zeitzeiger_fit.R

Defines functions getCircDiff zeitzeigerLikelihood fx zeitzeigerSpc predictIntensity getSplineBasis getPeriodBsplineKnots zeitzeigerFit

Documented in getCircDiff predictIntensity zeitzeigerFit zeitzeigerSpc

#' @importFrom data.table data.table :=
#' @importFrom foreach foreach %dopar% %do%
NULL


#' Fit a periodic spline for each feature
#'
#' Fit a periodic smoothing spline to the measurements for each feature as a
#' function of the periodic variable.
#'
#' @param x Matrix of measurements, with observations in rows and features in
#'   columns. Missing values are allowed.
#' @param time Vector of values of the periodic variable for the observations,
#'   where 0 corresponds to the lowest possible value and 1 corresponds to the
#'   highest possible value.
#' @param nKnots Number of internal knots to use for the periodic smoothing
#'   spline.
#'
#' @return
#' \item{xFitMean}{Matrix of coefficients, where rows correspond to features and
#'   columns correspond to variables in the fit.}
#' \item{xFitResid}{Vector of root mean square of residuals, same length as
#'   `x`.}
#'
#' @seealso [zeitzeigerSpc()], [zeitzeigerPredict()]
#'
#' @export
zeitzeigerFit = function(x, time, nKnots = 3) {
  knots = seq(0, 1 - 1 / nKnots, length = nKnots)
  basis = getSplineBasis(time, knots)
  fit = limma::lmFit(t(x), basis)
  xFitMean = stats::coef(fit)
  xFitResid = fit$sigma
  return(list(xFitMean = xFitMean, xFitResid = xFitResid))}


# modified from the pbs package
# Jake Hughey, Nov 2018
getPeriodBsplineKnots = function(knots, degree = 3) {
  nKnots = length(knots)

  closeKnots1 = rep(0, degree)
  for (i in 1:degree) {
    closeKnots1[i] = knots[nKnots] + knots[i + 1] - knots[1]}

  closeKnots2 = rep(0, degree)
  for (i in 1:degree) {
    closeKnots2[i] = knots[1] - knots[nKnots] + knots[nKnots - i]}

  closeKnots = c(closeKnots2, knots, closeKnots1)
  return(closeKnots)}


# modified from the pbs package
# Jake Hughey, Nov 2018
getSplineBasis = function(x, knots, period = 1, degree = 3) {
  # basis = cbind(1, cos(x / period * 2 * pi), sin(x / period * 2 * pi))
  x = x %% period
  degree = as.integer(degree)
  knotsFull = getPeriodBsplineKnots(c(0, knots, period), degree)

  basisTmp = splines::spline.des(knotsFull, x, 1 + degree)$design
  basisLeft = basisTmp[, 1:degree, drop = FALSE]
  basisRight = basisTmp[, (ncol(basisTmp) - degree + 1):ncol(basisTmp),
                        drop = FALSE]

  idx = -c(1:degree, (ncol(basisTmp) - degree + 1):ncol(basisTmp))
  basis = cbind(basisTmp[, idx, drop = FALSE], basisLeft + basisRight)
  return(basis)}


#' Calculate time-dependent mean
#'
#' Calculate the expected value of each feature.
#'
#' @param fitCoef Matrix of coefficients from the spline fits, where rows
#'   correspond to features and columns correspond to variables in the model.
#' @param time Vector of values of the periodic variable for the observations,
#'   where 0 corresponds to the lowest possible value and 1 corresponds to the
#'   highest possible value.
#' @param period Period for the periodic variable.
#' @param knots Optional vector of knots. This argument is designed for internal
#'   use.
#'
#' @return Matrix of predicted measurements, where rows correspond to
#'   time-points and columns correspond to features.
#'
#' @seealso [zeitzeigerFit()]
#'
#' @export
predictIntensity = function(fitCoef, time, period = 1, knots = NULL) {
  if (is.null(knots)) {
    nKnots = ncol(fitCoef) - 1
    knots = seq(0, period - period / nKnots, length = nKnots)}
  basis = getSplineBasis(time, knots, period)
  y = basis %*% t(fitCoef)
  return(y)}


#' Calculate sparse principal components of time-dependent variation
#'
#' Calculate the SPCs given the time-dependent means and the residuals from
#' [zeitzeigerFit()].
#'
#' @param xFitMean List of bigsplines, length is number of features.
#' @param xFitResid Matrix of residuals, dimensions are observations by
#'   features.
#' @param nTime Number of time-points by which to discretize the time-dependent
#'   behavior of each feature. Corresponds to the number of rows in the matrix
#'   for which the SPCs will be calculated.
#' @param useSpc Logical indicating whether to use [PMA::SPC()] (default) or
#'   [base::svd()].
#' @param sumabsv L1-constraint on the SPCs, passed to [PMA::SPC()].
#' @param orth Logical indicating whether to require left singular vectors
#'   be orthogonal to each other, passed to [PMA::SPC()].
#' @param ... Other arguments passed to [PMA::SPC()].
#'
#' @return Output of [PMA::SPC()], unless `useSpc` is `FALSE`, then output of
#'   [base::svd()].
#'
#' @seealso [zeitzeigerFit()], [zeitzeigerPredict()]
#'
#' @export
zeitzeigerSpc = function(xFitMean, xFitResid, nTime = 10, useSpc = TRUE,
                         sumabsv = 1, orth = TRUE, ...) {
  timeRange = seq(0, 1 - 1 / nTime, 1 / nTime)
  xMean = predictIntensity(xFitMean, timeRange)
  xMeanScaled = scale(xMean, center = TRUE, scale = FALSE)
  z = xMeanScaled / matrix(rep(xFitResid, nTime), nrow = nTime, byrow = TRUE)
  if (useSpc) {
    spcResult = PMA::SPC(z, sumabsv = sumabsv, K = min(dim(z)), orth = orth,
                         trace = FALSE, compute.pve = FALSE, ...)
  } else {
    spcResult = svd(z)}
  return(spcResult)}


fx = function(x, time, xFitMean, xFitResid, knots, logArg = FALSE) {
  # dim(x): c(n, p) or c(1, p)
  # length(time): n
  if (is.vector(x)) {
    x = matrix(x, nrow = 1)}

  like = matrix(data = 0, nrow = length(time), ncol = ncol(x))
  for (jj in seq_len(ncol(x))) {
    xPredMean = predictIntensity(xFitMean[jj, , drop = FALSE], time, knots = knots)[, 1]
    like[, jj] = stats::dnorm(x[, jj], mean = xPredMean, sd = xFitResid[jj], log = logArg)}
  return(like)}


# #' Calculate time-dependent likelihood
# #'
# #' Given a matrix of test observations, the estimated time-dependent means
# #' and variances of the features, and a vector of times, `zeitzeigerLikelihood`
# #' calculates the likelihood of each time for each test observation. The calculation
# #' assumes that conditioned on the periodic variable, the densities of the features
# #' are normally distributed. The calculation also assumes that the features are
# #' independent.
# #'
# #' @param xTest Matrix of measurements, with observations in rows and features in columns.
# #' @param xFitMean Matrix of coefficients from the spline fits.
# #' @param xFitResid Vector of root mean square residuals.
# #' @param timeRange Vector of values of the periodic variable at which to calculate likelihood.
# #' @param logArg Logical indicating whether to return likelihood (default) or log-likelihood.
# #'
# #' @return Matrix with observations in rows and times in columns.
# #'
# #' @export
zeitzeigerLikelihood = function(
  xTest, xFitMean, xFitResid, knots, timeRange = seq(0, 1 - 0.01, 0.01),
  logArg = FALSE) {
  loglike = matrix(NA, nrow = nrow(xTest), ncol = length(timeRange))
  for (ii in seq_len(nrow(xTest))) {
    xTestNow = xTest[ii, , drop = FALSE]
    loglikeTmp = fx(xTestNow, timeRange, xFitMean, xFitResid, knots, logArg = TRUE)
    loglike[ii, ] = rowSums(loglikeTmp)}
  if (logArg) {
    return(loglike)}
  return(exp(loglike))}


#' Calculate circular difference
#'
#' Calculate circular difference.
#'
#' @param x Numeric vector or matrix.
#' @param y Numeric vector or matrix.
#' @param period Period of the periodic variable.
#' @param towardZero If `TRUE`, returned values will be between `-period / 2`
#' and `period / 2`. If `FALSE`, returned values will be between 0 and `period`.
#'
#' @return Vector or matrix corresponding to `x - y`.
#'
#' @export
getCircDiff = function(x, y, period = 1, towardZero = TRUE) {
  d = (x - y) %% period
  if (towardZero) {
    d = ifelse(d <= (period / 2), d, d - period)}
  return(d)}
hugheylab/zeitzeiger documentation built on Dec. 7, 2022, 2:29 a.m.