R/SpatPCA.R

Defines functions plot.spatpca predict predictEigenfunction spatpca spatpcaCVWithSelectingK

Documented in plot.spatpca predict predictEigenfunction spatpca spatpcaCVWithSelectingK

# This file was generated by Rcpp::compileAttributes
# Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393
#'
#' Internal function: M-fold CV of SpatPCA with selecting K
#'
#' @keywords internal
#' 
#' @param x Location matrix
#' @param Y Data matrix
#' @param M The number of folds for cross validation; default is 5.
#' @param tau1 Vector of a nonnegative smoothness parameter sequence. If NULL, 10 tau1 values in a range are used.
#' @param tau2 Vector of a nonnegative sparseness parameter sequence. If NULL, none of tau2 is used.
#' @param gamma Vector of a nonnegative hyper parameter sequence for tuning eigenvalues. If NULL, 10 values in a range are used.
#' @param shuffle_split Vector of indeces for random splitting Y into training and test sets 
#' @param maxit Maximum number of iterations. Default value is 100.
#' @param thr Threshold for convergence. Default value is \eqn{10^{-4}}.
#' @param l2 Vector of a nonnegative tuning parameter sequence for ADMM use
#' @return A list of objects including
#' \item{cv_result}{A list of resultant objects produced by `spatpcaCV`}
#' \item{selected_K}{Selected K based on CV.}
#'
spatpcaCVWithSelectingK <- function(x, Y, M, tau1, tau2, gamma, shuffle_split, maxit, thr, l2) {
  upper_bound <- fetchUpperBoundNumberEigenfunctions(Y, M)
  prev_cv_selection <- spatpcaCV(x, Y, M, 1, tau1, tau2, gamma, shuffle_split, maxit, thr, l2)
  
  for (k in 2:upper_bound) {
    cv_selection <- spatpcaCV(x, Y, M, k, tau1, tau2, gamma, shuffle_split, maxit, thr, l2)
    difference <- prev_cv_selection$selected_gamma - cv_selection$selected_gamma
    prev_cv_selection <- cv_selection
    if (difference <= 0 || abs(difference) <= 1e-8) {
      break
    }
  }
  return(list(cv_result = cv_selection, selected_K = k - 1))
}

#'
#' @title  Regularized PCA for spatial data
#'
#' @description Produce spatial dominant patterns and spatial predictions at the designated locations according to the specified tuning parameters or the selected tuning parameters by the M-fold cross-validation.
#'
#' @param x Location matrix (\eqn{p \times d}). Each row is a location. \eqn{d} is the dimension of locations
#' @param Y Data matrix (\eqn{n \times p}) stores the values at \eqn{p} locations with sample size \eqn{n}.
#' @param K Optional user-supplied number of eigenfunctions; default is NULL. If K is NULL or is_K_selected is TRUE, K is selected automatically.
#' @param is_K_selected If TRUE, K is selected automatically; otherwise, is_K_selected is set to be user-supplied K. Default depends on user-supplied K.
#' @param tau1 Optional user-supplied numeric vector of a nonnegative smoothness parameter sequence. If NULL, 10 tau1 values in a range are used.
#' @param tau2 Optional user-supplied numeric vector of a nonnegative sparseness parameter sequence. If NULL, none of tau2 is used.
#' @param gamma Optional user-supplied numeric vector of a nonnegative tuning parameter sequence. If NULL, 10 values in a range are used.
#' @param M Optional number of folds for cross validation; default is 5.
#' @param is_Y_detrended If TRUE, center the columns of Y. Default is FALSE.
#' @param maxit Maximum number of iterations. Default value is 100.
#' @param thr Threshold for convergence. Default value is \eqn{10^{-4}}.
#' @param num_cores Number of cores used to parallel computing. Default value is NULL (See `RcppParallel::defaultNumThreads()`)
#' 
#' @seealso \link{predict}
#' 
#' @return A list of objects including
#' \item{eigenfn}{Estimated eigenfunctions at the new locations, x_new.}
#' \item{selected_K}{Selected K based on CV. Execute the algorithm when `is_K_selected` is `TRUE`.}
#' \item{selected_tau1}{Selected tau1.}
#' \item{selected_tau2}{Selected tau2.}
#' \item{selected_gamma}{Selected gamma.}
#' \item{cv_score_tau1}{cv scores for tau1.}
#' \item{cv_score_tau2}{cv scores for tau2.}
#' \item{cv_score_gamma}{cv scores for gamma.}
#' \item{tau1}{Sequence of tau1-values used in the process.}
#' \item{tau2}{Sequence of tau2-values used in the process.}
#' \item{gamma}{Sequence of gamma-values used in the process.}
#' \item{detrended_Y}{If is_Y_detrended is TRUE, detrended_Y means Y is detrended; else, detrended_Y is equal to Y.}
#' \item{scaled_x}{Input location matrix. Only scale when it is one-dimensional}
#'
#' @details An ADMM form of the proposed objective function is written as
#' \deqn{\min_{\mathbf{\Phi}} \|\mathbf{Y}-\mathbf{Y}\mathbf{\Phi}\mathbf{\Phi}'\|^2_F +\tau_1\mbox{tr}(\mathbf{\Phi}^T\mathbf{\Omega}\mathbf{\Phi})+\tau_2\sum_{k=1}^K\sum_{j=1}^p |\phi_{jk}|,}
#' \eqn{\mbox{subject to $ \mathbf{\Phi}^T\mathbf{\Phi}=\mathbf{I}_K$,}} where \eqn{\mathbf{Y}} is a data matrix, \eqn{{\mathbf{\Omega}}} is a smoothness matrix, and \eqn{\mathbf{\Phi}=\{\phi_{jk}\}}.
#' @export
#' @author Wen-Ting Wang and Hsin-Cheng Huang
#' @references Wang, W.-T. and Huang, H.-C. (2017). Regularized principal component analysis for spatial data. \emph{Journal of Computational and Graphical Statistics} \bold{26} 14-25.
#' @examples
#' # The following examples only use two threads for parallel computing.
#' ## 1D: regular locations
#' x_1D <- as.matrix(seq(-5, 5, length = 50))
#' Phi_1D <- exp(-x_1D^2) / norm(exp(-x_1D^2), "F")
#' set.seed(1234)
#' Y_1D <- rnorm(n = 100, sd = 3) %*% t(Phi_1D) + matrix(rnorm(n = 100 * 50), 100, 50)
#' cv_1D <- spatpca(x = x_1D, Y = Y_1D, num_cores = 2)
#' plot(x_1D, cv_1D$eigenfn[, 1], type = "l", main = "1st eigenfunction")
#' lines(x_1D, svd(Y_1D)$v[, 1], col = "red")
#' legend("topleft", c("SpatPCA", "PCA"), lty = 1:1, col = 1:2)
#'
#' \donttest{
#' ## 2D: Daily 8-hour ozone averages for sites in the Midwest (USA)
#' library(fields)
#' library(pracma)
#' library(maps)
#' data(ozone2)
#' x <- ozone2$lon.lat
#' Y <- ozone2$y
#' date <- as.Date(ozone2$date, format = "%y%m%d")
#' rmna <- !colSums(is.na(Y))
#' YY <- matrix(Y[, rmna], nrow = nrow(Y))
#' YY <- detrend(YY, "linear")
#' xx <- x[rmna, ]
#' cv <- spatpca(x = xx, Y = YY)
#' quilt.plot(xx, cv$eigenfn[, 1])
#' map("state", xlim = range(xx[, 1]), ylim = range(xx[, 2]), add = TRUE)
#' map.text("state", xlim = range(xx[, 1]), ylim = range(xx[, 2]), cex = 2, add = TRUE)
#' plot(date, YY %*% cv$eigenfn[, 1], type = "l", ylab = "1st Principal Component")
#' ### new loactions
#' new_p <- 200
#' x_lon <- seq(min(xx[, 1]), max(xx[, 1]), length = new_p)
#' x_lat <- seq(min(xx[, 2]), max(xx[, 2]), length = new_p)
#' xx_new <- as.matrix(expand.grid(x = x_lon, y = x_lat))
#' eof <- spatpca(x = xx,
#'                Y = YY,
#'                K = cv$selected_K, 
#'                tau1 = cv$selected_tau1, 
#'                tau2 = cv$selected_tau2)
#' predicted_eof <- predictEigenfunction(eof, xx_new)              
#' quilt.plot(xx_new,
#'            predicted_eof[,1],
#'            nx = new_p, 
#'            ny = new_p, 
#'            xlab = "lon.", 
#'            ylab = "lat.")
#' map("state", xlim = range(x_lon), ylim = range(x_lat), add = TRUE)
#' map.text("state", xlim = range(x_lon), ylim = range(x_lat), cex = 2, add = TRUE)
#' ## 3D: regular locations
#' p <- 10
#' x <- y <- z <- as.matrix(seq(-5, 5, length = p))
#' d <- expand.grid(x, y, z)
#' Phi_3D <- rowSums(exp(-d^2)) / norm(as.matrix(rowSums(exp(-d^2))), "F")
#' Y_3D <- rnorm(n = 100, sd = 3) %*% t(Phi_3D) + matrix(rnorm(n = 100 * p^3), 100, p^3)
#' cv_3D <- spatpca(x = d, Y = Y_3D, tau2 = seq(0, 1000, length = 10))
#' library(plot3D)
#' library(RColorBrewer)
#' cols <- colorRampPalette(brewer.pal(9, "Blues"))(p)
#' isosurf3D(x, y, z,
#'          colvar = array(cv_3D$eigenfn[, 1], c(p, p, p)),
#'          level= seq(min(cv_3D$eigenfn[, 1]), max(cv_3D$eigenfn[, 1]), length = p),
#'          ticktype = "detailed",
#'          colkey = list(side = 1),
#'          col = cols)
#' }
spatpca <- function(x,
                    Y,
                    M = 5,
                    K = NULL,
                    is_K_selected = ifelse(is.null(K), TRUE, FALSE),
                    tau1 = NULL,
                    tau2 = NULL,
                    gamma = NULL,
                    is_Y_detrended = FALSE,
                    maxit = 100,
                    thr = 1e-04,
                    num_cores = NULL) {
  call2 <- match.call()
  checkInputData(Y, x, M)
  setCores(num_cores)

  # Transform main objects
  x <- as.matrix(x)
  Y <- detrend(Y, is_Y_detrended)
  K <- setNumberEigenfunctions(K, Y, M)
  p <- ncol(Y)
  n <- nrow(Y)
  scaled_x <- scaleLocation(x)
  shuffle_split <- sample(rep(1:M, length.out = nrow(Y)))

  # Initialize candidates of tuning parameters
  tau1 <- setTau1(tau1, M)
  tau2 <- setTau2(tau2, M)
  l2 <- setL2(tau2)
  gamma <- setGamma(gamma, Y[which(shuffle_split != 1), ])


  if (is_K_selected) {
    cv_with_selected_k <- spatpcaCVWithSelectingK(scaled_x, Y, M, tau1, tau2, gamma, shuffle_split, maxit, thr, l2)
    cv_result <- cv_with_selected_k$cv_result
    selected_K <- cv_with_selected_k$selected_K
  }
  else {
    cv_result <- spatpcaCV(scaled_x, Y, M, K, tau1, tau2, gamma, shuffle_split, maxit, thr, l2)
    selected_K <- K
  }

  selected_tau1 <- cv_result$selected_tau1
  selected_tau2 <- cv_result$selected_tau2
  selected_gamma <- cv_result$selected_gamma
  cv_score_tau1 <- cv_result$cv_score_tau1
  cv_score_tau2 <- cv_result$cv_score_tau2
  cv_score_gamma <- cv_result$cv_score_gamma
  estimated_eigenfn <- cv_result$estimated_eigenfn

  obj.cv <- list(
    call = call2,
    eigenfn = estimated_eigenfn,
    selected_K = K,
    selected_tau1 = selected_tau1,
    selected_tau2 = selected_tau2,
    selected_gamma = selected_gamma,
    cv_score_tau1 = cv_score_tau1,
    cv_score_tau2 = cv_score_tau2,
    cv_score_gamma = cv_score_gamma,
    tau1 = tau1,
    tau2 = tau2,
    gamma = gamma,
    detrended_Y = Y,
    scaled_x = scaled_x
  )
  class(obj.cv) <- "spatpca"
  return(obj.cv)
}

#' @title  Spatial dominant patterns on new locations
#'
#' @description Estimate K eigenfunctions on new locations
#' 
#' @param spatpca_object An `spatpca` class object 
#' @param x_new New location matrix.
#' @seealso \link{spatpca}
#' @return {A matrix with K Eigenfunction values on new locations.}
#' @examples
#' # 1D: artificial irregular locations
#' x_1D <- as.matrix(seq(-5, 5, length = 10))
#' Phi_1D <- exp(-x_1D^2) / norm(exp(-x_1D^2), "F")
#' set.seed(1234)
#' Y_1D <- rnorm(n = 100, sd = 3) %*% t(Phi_1D) + matrix(rnorm(n = 100 * 10), 100, 10)
#' rm_loc <- sample(1:50, 20)
#' x_1Drm <- x_1D[-rm_loc]
#' Y_1Drm <- Y_1D[, -rm_loc]
#' x_1Dnew <- as.matrix(seq(-5, 5, length = 20))
#' cv_1D <- spatpca(x = x_1Drm, Y = Y_1Drm, tau2 = 1:100, num_cores = 2)
#' dominant_patterns <- predictEigenfunction(cv_1D, x_new = x_1Dnew)
#' 
predictEigenfunction <- function(spatpca_object, x_new) {
  checkNewLocationsForSpatpcaObject(spatpca_object, x_new)
  scaled_x_new <- scaleLocation(x_new)

  predicted_eigenfn <- eigenFunction(
    scaled_x_new,
    spatpca_object$scaled_x,
    spatpca_object$eigenfn
  )
  return(predicted_eigenfn)
}

#' @title  Spatial predictions on new locations
#'
#' @description Predict on new locations with the estimated spatial structures.
#' 
#' @param spatpca_object An `spatpca` class object 
#' @param x_new New location matrix.
#' @param eigen_patterns_on_new_site Eigen-patterns on x_new
#' @seealso \link{spatpca}
#' @return A prediction matrix of Y at the new locations, x_new.
#' @examples
#' # 1D: artificial irregular locations
#' x_1D <- as.matrix(seq(-5, 5, length = 10))
#' Phi_1D <- exp(-x_1D^2) / norm(exp(-x_1D^2), "F")
#' set.seed(1234)
#' Y_1D <- rnorm(n = 100, sd = 3) %*% t(Phi_1D) + matrix(rnorm(n = 100 * 10), 100, 10)
#' rm_loc <- sample(1:50, 20)
#' x_1Drm <- x_1D[-rm_loc]
#' Y_1Drm <- Y_1D[, -rm_loc]
#' x_1Dnew <- as.matrix(seq(-5, 5, length = 20))
#' cv_1D <- spatpca(x = x_1Drm, Y = Y_1Drm, tau2 = 1:100, num_cores = 2)
#' predictions <- predict(cv_1D, x_new = x_1Dnew)
#' 
predict <- function(spatpca_object, x_new, eigen_patterns_on_new_site = NULL) {
  checkNewLocationsForSpatpcaObject(spatpca_object, x_new)

  if (is.null(eigen_patterns_on_new_site)) {
    eigen_patterns_on_new_site <- predictEigenfunction(spatpca_object, x_new)
  }

  spatial_prediction <- spatialPrediction(
    spatpca_object$eigenfn,
    spatpca_object$detrended_Y,
    spatpca_object$selected_gamma,
    eigen_patterns_on_new_site
  )
  return(spatial_prediction$predict)
}

#'
#' @title  Display the cross-validation results
#'
#' @description Display the M-fold cross-validation results
#'
#' @param x An spatpca class object for `plot` method
#' @param ... Not used directly
#' @seealso \link{spatpca}
#'
#' @export
#' @method plot spatpca
#' @examples
#' x_1D <- as.matrix(seq(-5, 5, length = 10))
#' Phi_1D <- exp(-x_1D^2) / norm(exp(-x_1D^2), "F")
#' set.seed(1234)
#' Y_1D <- rnorm(n = 100, sd = 3) %*% t(Phi_1D) + matrix(rnorm(n = 100 * 10), 100, 10)
#' cv_1D <- spatpca(x = x_1D, Y = Y_1D, num_cores = 2)
#' plot(cv_1D)
#
plot.spatpca <- function(x, ...) {
  if (class(x) != "spatpca") {
    stop("Invalid object! Please enter a `spatpca` object")
  }

  # Set the default theme
  theme_set(
    theme_bw() +
      theme(
        text = element_text(size = 16),
        legend.position = "none",
        legend.title = element_blank(),
        plot.title = element_text(hjust = 0.5)
      )
  )
  cv_dataframe <- rbind(
    data.frame(
      type = "tau1 given tau2 = 0",
      parameter = array(x$tau1),
      cv = array(x$cv_score_tau1)
    ),
    data.frame(
      type = "tau2 given selected tau1",
      parameter = array(x$tau2),
      cv = array(x$cv_score_tau2)
    ),
    data.frame(
      type = "gamma given selected tau1 and tau2",
      parameter = array(x$gamma),
      cv = array(x$cv_score_gamma)
    )
  )
  result <-
    ggplot(
      cv_dataframe,
      aes(x = parameter, y = cv, color = type)
    ) +
    geom_line(size = 1.5) +
    facet_grid(scales = "free", . ~ type)

  return(suppressMessages(print(result)))
}

Try the SpatPCA package in your browser

Any scripts or data that you put into this service are public.

SpatPCA documentation built on Jan. 31, 2021, 5:05 p.m.