R/SpatPCA.R

Defines functions plot.spatpca predict predictEigenfunction spatpca spatpcaCVWithSelectedK

Documented in plot.spatpca predict predictEigenfunction spatpca spatpcaCVWithSelectedK

# This file was generated by Rcpp::compileAttributes
# Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393
#'
#' Internal function: M-fold CV of SpatPCA with selected K
#'
#' @keywords internal
#'
#' @param x Location matrix
#' @param Y Data matrix
#' @param M The number of folds for cross validation; default is 5.
#' @param tau1 Vector of a non-negative smoothness parameter sequence. If NULL, 10 tau1 values in a range are used.
#' @param tau2 Vector of a non-negative sparseness parameter sequence. If NULL, none of tau2 is used.
#' @param gamma Vector of a non-negative hyper parameter sequence for tuning eigenvalues. If NULL, 10 values in a range are used.
#' @param shuffle_split Vector of indices for random splitting Y into training and test sets
#' @param maxit Maximum number of iterations. Default value is 100.
#' @param thr Threshold for convergence. Default value is \eqn{10^{-4}}.
#' @param l2 Vector of a non-negative tuning parameter sequence for ADMM use
#' @return A list of objects including
#' \item{cv_result}{A list of resultant objects produced by `spatpcaCV`}
#' \item{selected_K}{Selected K based on CV.}
#'

spatpcaCVWithSelectedK <-
  function(x,
           Y,
           M,
           tau1,
           tau2,
           gamma,
           shuffle_split,
           maxit,
           thr,
           l2) {
    upper_bound <- fetchUpperBoundNumberEigenfunctions(Y, M)
    prev_cv_selection <- spatpcaCV(x, Y, M, 1, tau1, tau2, gamma, shuffle_split, maxit, thr, l2)

    for (k in 2:upper_bound) {
      cv_selection <-
        spatpcaCV(x, Y, M, k, tau1, tau2, gamma, shuffle_split, maxit, thr, l2)
      difference <-
        prev_cv_selection$selected_gamma - cv_selection$selected_gamma
      prev_cv_selection <- cv_selection
      if (difference <= 0 || abs(difference) <= 1e-8) {
        break
      }
    }
    return(list(cv_result = cv_selection, selected_K = k - 1))
  }

#'
#' @title  Regularized PCA for spatial data
#'
#' @description Produce spatial dominant patterns and spatial predictions at the designated locations according to the specified tuning parameters or the selected tuning parameters by the M-fold cross-validation.
#'
#' @param x Location matrix (\eqn{p \times d}). Each row is a location. \eqn{d} is the dimension of locations
#' @param Y Data matrix (\eqn{n \times p}) stores the values at \eqn{p} locations with sample size \eqn{n}.
#' @param K Optional user-supplied number of eigenfunctions; default is NULL. If K is NULL or is_K_selected is TRUE, K is selected automatically.
#' @param is_K_selected If TRUE, K is selected automatically; otherwise, is_K_selected is set to be user-supplied K. Default depends on user-supplied K.
#' @param tau1 Optional user-supplied numeric vector of a non-negative smoothness parameter sequence. If NULL, 10 tau1 values in a range are used.
#' @param tau2 Optional user-supplied numeric vector of a non-negative sparseness parameter sequence. If NULL, none of tau2 is used.
#' @param gamma Optional user-supplied numeric vector of a non-negative tuning parameter sequence. If NULL, 10 values in a range are used.
#' @param M Optional number of folds for cross validation; default is 5.
#' @param is_Y_detrended If TRUE, center the columns of Y. Default is FALSE.
#' @param maxit Maximum number of iterations. Default value is 100.
#' @param thr Threshold for convergence. Default value is \eqn{10^{-4}}.
#' @param num_cores Number of cores used to parallel computing. Default value is NULL (See `RcppParallel::defaultNumThreads()`)
#'
#' @seealso \link{predict}
#'
#' @return A list of objects including
#' \item{eigenfn}{Estimated eigenfunctions at the new locations, x_new.}
#' \item{selected_K}{Selected K based on CV. Execute the algorithm when `is_K_selected` is `TRUE`.}
#' \item{selected_tau1}{Selected tau1.}
#' \item{selected_tau2}{Selected tau2.}
#' \item{selected_gamma}{Selected gamma.}
#' \item{cv_score_tau1}{cv scores for tau1.}
#' \item{cv_score_tau2}{cv scores for tau2.}
#' \item{cv_score_gamma}{cv scores for gamma.}
#' \item{tau1}{Sequence of tau1-values used in the process.}
#' \item{tau2}{Sequence of tau2-values used in the process.}
#' \item{gamma}{Sequence of gamma-values used in the process.}
#' \item{detrended_Y}{If is_Y_detrended is TRUE, detrended_Y means Y is detrended; else, detrended_Y is equal to Y.}
#' \item{scaled_x}{Input location matrix. Only scale when it is one-dimensional}
#'
#' @details An ADMM form of the proposed objective function is written as
#' \deqn{\min_{\mathbf{\Phi}} \|\mathbf{Y}-\mathbf{Y}\mathbf{\Phi}\mathbf{\Phi}'\|^2_F +\tau_1\mbox{tr}(\mathbf{\Phi}^T\mathbf{\Omega}\mathbf{\Phi})+\tau_2\sum_{k=1}^K\sum_{j=1}^p |\phi_{jk}|,}
#' \eqn{\mbox{subject to $ \mathbf{\Phi}^T\mathbf{\Phi}=\mathbf{I}_K$,}} where \eqn{\mathbf{Y}} is a data matrix, \eqn{{\mathbf{\Omega}}} is a smoothness matrix, and \eqn{\mathbf{\Phi}=\{\phi_{jk}\}}.
#' @export
#' @author Wen-Ting Wang and Hsin-Cheng Huang
#' @references Wang, W.-T. and Huang, H.-C. (2017). Regularized principal component analysis for spatial data. \emph{Journal of Computational and Graphical Statistics} \bold{26} 14-25.
#' @examples
#' # The following examples only use two threads for parallel computing.
#' ## 1D: regular locations
#' x_1D <- as.matrix(seq(-5, 5, length = 50))
#' Phi_1D <- exp(-x_1D^2) / norm(exp(-x_1D^2), "F")
#' set.seed(1234)
#' Y_1D <- rnorm(n = 100, sd = 3) %*% t(Phi_1D) + matrix(rnorm(n = 100 * 50), 100, 50)
#' cv_1D <- spatpca(x = x_1D, Y = Y_1D, num_cores = 2)
#' plot(x_1D, cv_1D$eigenfn[, 1], type = "l", main = "1st eigenfunction")
#' lines(x_1D, svd(Y_1D)$v[, 1], col = "red")
#' legend("topleft", c("SpatPCA", "PCA"), lty = 1:1, col = 1:2)
#'
#' \donttest{
#' ## 2D: Daily 8-hour ozone averages for sites in the Midwest (USA)
#' library(fields)
#' library(pracma)
#' library(maps)
#' data(ozone2)
#' x <- ozone2$lon.lat
#' Y <- ozone2$y
#' date <- as.Date(ozone2$date, format = "%y%m%d")
#' rmna <- !colSums(is.na(Y))
#' YY <- matrix(Y[, rmna], nrow = nrow(Y))
#' YY <- detrend(YY, "linear")
#' xx <- x[rmna, ]
#' cv <- spatpca(x = xx, Y = YY)
#' quilt.plot(xx, cv$eigenfn[, 1])
#' map("state", xlim = range(xx[, 1]), ylim = range(xx[, 2]), add = TRUE)
#' map.text("state", xlim = range(xx[, 1]), ylim = range(xx[, 2]), cex = 2, add = TRUE)
#' plot(date, YY %*% cv$eigenfn[, 1], type = "l", ylab = "1st Principal Component")
#' ### new loactions
#' new_p <- 200
#' x_lon <- seq(min(xx[, 1]), max(xx[, 1]), length = new_p)
#' x_lat <- seq(min(xx[, 2]), max(xx[, 2]), length = new_p)
#' xx_new <- as.matrix(expand.grid(x = x_lon, y = x_lat))
#' eof <- spatpca(x = xx,
#'                Y = YY,
#'                K = cv$selected_K,
#'                tau1 = cv$selected_tau1,
#'                tau2 = cv$selected_tau2)
#' predicted_eof <- predictEigenfunction(eof, xx_new)
#' quilt.plot(xx_new,
#'            predicted_eof[,1],
#'            nx = new_p,
#'            ny = new_p,
#'            xlab = "lon.",
#'            ylab = "lat.")
#' map("state", xlim = range(x_lon), ylim = range(x_lat), add = TRUE)
#' map.text("state", xlim = range(x_lon), ylim = range(x_lat), cex = 2, add = TRUE)
#' ## 3D: regular locations
#' p <- 10
#' x <- y <- z <- as.matrix(seq(-5, 5, length = p))
#' d <- expand.grid(x, y, z)
#' Phi_3D <- rowSums(exp(-d^2)) / norm(as.matrix(rowSums(exp(-d^2))), "F")
#' Y_3D <- rnorm(n = 100, sd = 3) %*% t(Phi_3D) + matrix(rnorm(n = 100 * p^3), 100, p^3)
#' cv_3D <- spatpca(x = d, Y = Y_3D, tau2 = seq(0, 1000, length = 10))
#' library(plot3D)
#' library(RColorBrewer)
#' cols <- colorRampPalette(brewer.pal(9, "Blues"))(p)
#' isosurf3D(x, y, z,
#'          colvar = array(cv_3D$eigenfn[, 1], c(p, p, p)),
#'          level= seq(min(cv_3D$eigenfn[, 1]), max(cv_3D$eigenfn[, 1]), length = p),
#'          ticktype = "detailed",
#'          colkey = list(side = 1),
#'          col = cols)
#' }
spatpca <- function(x,
                    Y,
                    M = 5,
                    K = NULL,
                    is_K_selected = ifelse(is.null(K), TRUE, FALSE),
                    tau1 = NULL,
                    tau2 = NULL,
                    gamma = NULL,
                    is_Y_detrended = FALSE,
                    maxit = 100,
                    thr = 1e-04,
                    num_cores = NULL) {
  call2 <- match.call()
  checkInputData(Y, x, M)
  setCores(num_cores)

  # Transform main objects
  x <- as.matrix(x)
  Y <- detrend(Y, is_Y_detrended)
  K <- setNumberEigenfunctions(K, Y, M)
  p <- ncol(Y)
  n <- nrow(Y)
  scaled_x <- scaleLocation(x)
  shuffle_split <- sample(rep(1:M, length.out = nrow(Y)))

  # Initialize candidates of tuning parameters
  tau1 <- setTau1(tau1, M)
  tau2 <- setTau2(tau2, M)
  l2 <- setL2(tau2)
  gamma <- setGamma(gamma, Y[which(shuffle_split != 1), ])

  if (is_K_selected) {
    cv_with_selected_k <-
      spatpcaCVWithSelectedK(scaled_x,
                             Y,
                             M,
                             tau1,
                             tau2,
                             gamma,
                             shuffle_split,
                             maxit,
                             thr,
                             l2)
    cv_result <- cv_with_selected_k$cv_result
    selected_K <- cv_with_selected_k$selected_K
  }
  else {
    cv_result <-
      spatpcaCV(scaled_x,
                Y,
                M,
                K,
                tau1,
                tau2,
                gamma,
                shuffle_split,
                maxit,
                thr,
                l2)
    selected_K <- K
  }

  selected_tau1 <- cv_result$selected_tau1
  selected_tau2 <- cv_result$selected_tau2
  selected_gamma <- cv_result$selected_gamma
  cv_score_tau1 <- cv_result$cv_score_tau1
  cv_score_tau2 <- cv_result$cv_score_tau2
  cv_score_gamma <- cv_result$cv_score_gamma
  estimated_eigenfn <- cv_result$estimated_eigenfn

  obj.cv <- list(
    call = call2,
    eigenfn = estimated_eigenfn,
    selected_K = K,
    selected_tau1 = selected_tau1,
    selected_tau2 = selected_tau2,
    selected_gamma = selected_gamma,
    cv_score_tau1 = cv_score_tau1,
    cv_score_tau2 = cv_score_tau2,
    cv_score_gamma = cv_score_gamma,
    tau1 = tau1,
    tau2 = tau2,
    gamma = gamma,
    detrended_Y = Y,
    scaled_x = scaled_x
  )
  class(obj.cv) <- "spatpca"
  return(obj.cv)
}

#' @title  Spatial dominant patterns on new locations
#'
#' @description Estimate K eigenfunctions on new locations
#'
#' @param spatpca_object An `spatpca` class object
#' @param x_new New location matrix.
#' @seealso \link{spatpca}
#' @return {A matrix with K Eigenfunction values on new locations.}
#' @examples
#' # 1D: artificial irregular locations
#' x_1D <- as.matrix(seq(-5, 5, length = 10))
#' Phi_1D <- exp(-x_1D^2) / norm(exp(-x_1D^2), "F")
#' set.seed(1234)
#' Y_1D <- rnorm(n = 100, sd = 3) %*% t(Phi_1D) + matrix(rnorm(n = 100 * 10), 100, 10)
#' rm_loc <- sample(1:10, 2)
#' x_1Drm <- x_1D[-rm_loc]
#' Y_1Drm <- Y_1D[, -rm_loc]
#' x_1Dnew <- as.matrix(seq(-5, 5, length = 20))
#' cv_1D <- spatpca(x = x_1Drm, Y = Y_1Drm, tau2 = 1:100, num_cores = 2)
#' dominant_patterns <- predictEigenfunction(cv_1D, x_new = x_1Dnew)
#'
predictEigenfunction <- function(spatpca_object, x_new) {
  checkNewLocationsForSpatpcaObject(spatpca_object, x_new)
  scaled_x_new <- scaleLocation(x_new)
  
  predicted_eigenfn <- eigenFunction(scaled_x_new,
                                     spatpca_object$scaled_x,
                                     spatpca_object$eigenfn)
  return(predicted_eigenfn)
}

#' @title  Spatial predictions on new locations
#'
#' @description Predict the response on new locations with the estimated spatial structures.
#'
#' @param spatpca_object An `spatpca` class object
#' @param x_new New location matrix.
#' @param eigen_patterns_on_new_site Eigen-patterns on x_new
#' @seealso \link{spatpca}
#' @return A prediction matrix of Y at the new locations, x_new.
#' @examples
#' # 1D: artificial irregular locations
#' x_1D <- as.matrix(seq(-5, 5, length = 10))
#' Phi_1D <- exp(-x_1D^2) / norm(exp(-x_1D^2), "F")
#' set.seed(1234)
#' Y_1D <- rnorm(n = 100, sd = 3) %*% t(Phi_1D) + matrix(rnorm(n = 100 * 10), 100, 10)
#' removed_location <- sample(1:10, 3)
#' removed_x_1D <- x_1D[-removed_location]
#' removed_Y_1D <- Y_1D[, -removed_location]
#' new_x_1D <- as.matrix(seq(-5, 5, length = 20))
#' cv_1D <- spatpca(x = removed_x_1D, Y = removed_Y_1D, tau2 = 1:100, num_cores = 2)
#' predictions <- predict(cv_1D, x_new = new_x_1D)
#'
predict <-
  function(spatpca_object,
           x_new,
           eigen_patterns_on_new_site = NULL) {
    checkNewLocationsForSpatpcaObject(spatpca_object, x_new)

    if (is.null(eigen_patterns_on_new_site)) {
      eigen_patterns_on_new_site <-
        predictEigenfunction(spatpca_object, x_new)
    }

    spatial_prediction <- spatialPrediction(
      spatpca_object$eigenfn,
      spatpca_object$detrended_Y,
      spatpca_object$selected_gamma,
      eigen_patterns_on_new_site
    )
    return(spatial_prediction$prediction)
  }

#'
#' @title  Display the cross-validation results
#'
#' @description Display the M-fold cross-validation results
#'
#' @param x An spatpca class object for `plot` method
#' @param ... Not used directly
#' @return `NULL`.
#' @seealso \link{spatpca}
#'
#' @export
#' @method plot spatpca
#' @examples
#' x_1D <- as.matrix(seq(-5, 5, length = 10))
#' Phi_1D <- exp(-x_1D^2) / norm(exp(-x_1D^2), "F")
#' set.seed(1234)
#' Y_1D <- rnorm(n = 100, sd = 3) %*% t(Phi_1D) + matrix(rnorm(n = 100 * 10), 100, 10)
#' cv_1D <- spatpca(x = x_1D, Y = Y_1D, num_cores = 2)
#' plot(cv_1D)
#
plot.spatpca <- function(x, ...) {
  if (!inherits(x, "spatpca")) {
    stop("Invalid object! Please enter a `spatpca` object")
  }

  # Set the default theme
  theme_set(
    theme_bw() +
      theme(
        text = element_text(size = 16),
        legend.position = "none",
        legend.title = element_blank(),
        plot.title = element_text(hjust = 0.5)
      )
  )
  tau1_hat_string = paste(
    c(
      "hat(tau)[1]==",
      formatC(x$selected_tau1, format = "f", digits = 3)
    ),
    collapse = ""
  )
  tau2_hat_string = paste(
    c(
      "hat(tau)[2]==",
     formatC(x$selected_tau2, format = "f", digits = 3)
    ),
    collapse = ""
  )
  parameter_types = c(
    "tau[1]~'|'~tau[2]==0",
    paste(c("tau[2]~'|'~", tau1_hat_string), collapse = ""),
    paste(c("gamma~'|'~list(", tau1_hat_string, ",", tau2_hat_string, ")"), collapse = "")
  )
  cv_dataframe <- rbind(
    data.frame(
      type = parameter_types[1],
      parameter = array(x$tau1),
      cv = array(x$cv_score_tau1)
    ),
    data.frame(
      type = parameter_types[2],
      parameter = array(x$tau2),
      cv = array(x$cv_score_tau2)
    ),
    data.frame(
      type = parameter_types[3],
      parameter = array(x$gamma),
      cv = array(x$cv_score_gamma)
    )
  )
  cv_dataframe$type = factor(cv_dataframe$type, levels = parameter_types)

  result <-
    ggplot(cv_dataframe,
           aes(x = parameter, y = cv, color = type)) +
    geom_line(linewidth = 1.5) +
    facet_grid(scales = "free",
               . ~ type,
               labeller = labeller(type = label_parsed)) +
    ggtitle("Result of K-fold CV")
  return(suppressMessages(print(result)))
}

Try the SpatPCA package in your browser

Any scripts or data that you put into this service are public.

SpatPCA documentation built on Nov. 13, 2023, 5:06 p.m.