GauPro: Gaussian Process Fitting

Documented in k_Gaussian

#' Gaussian Kernel R6 class
#'
#' @docType class
#' @importFrom R6 R6Class
#' @export
#' @useDynLib GauPro, .registration = TRUE
#' @importFrom Rcpp evalCpp
#' @importFrom stats optim
# @keywords data, kriging, Gaussian process, regression
#' @return Object of \code{\link[R6]{R6Class}} with methods for fitting GP model.
#' @format \code{\link[R6]{R6Class}} object.
#' @examples
#' k1 <- Gaussian$new(beta=0)
#' plot(k1)
#' k1 <- Gaussian$new(beta=c(0,-1, 1))
#' plot(k1)
#'
#'
#' n <- 12
#' x <- matrix(seq(0,1,length.out = n), ncol=1)
#' y <- sin(2*pi*x) + rnorm(n,0,1e-1)
#' gp <- GauPro_kernel_model$new(X=x, Z=y, kernel=Gaussian$new(1),
#'                               parallel=FALSE)
#' gp$predict(.454)
#' gp$plot1D()
#' gp$cool1Dplot()
Gaussian <- R6::R6Class(
  classname = "GauPro_kernel_Gaussian",
  inherit = GauPro_kernel_beta,
  public = list(
    # initialize = function(beta, s2=1, beta_lower=-8, beta_upper=6,
    #                       s2_lower=1e-8, s2_upper=1e8) {
    #   self$beta <- beta
    #   self$beta_length <- length(beta)
    #   # if (length(theta) == 1) {
    #   #   self$theta <- rep(theta, self$d)
    #   # }
    #   self$beta_lower <- beta_lower
    #   self$beta_upper <- beta_upper
    #
    #   self$s2 <- s2
    #   self$logs2 <- log(s2, 10)
    #   self$logs2_lower <- log(s2_lower, 10)
    #   self$logs2_upper <- log(s2_upper, 10)
    # },
    #' @description Calculate covariance between two points
    #' @param x vector.
    #' @param y vector, optional. If excluded, find correlation
    #' of x with itself.
    #' @param beta Correlation parameters.
    #' @param s2 Variance parameter.
    #' @param params parameters to use instead of beta and s2.
    k = function(x, y=NULL, beta=self$beta, s2=self$s2, params=NULL) {
      if (!is.null(params)) {
        lenparams <- length(params)
        if (self$beta_est) {
          beta <- params[1:self$beta_length]
        } else {
          beta <- self$beta
        }
        if (self$s2_est) {
          logs2 <- params[lenparams]
        } else {
          logs2 <- self$logs2
        }
        s2 <- 10 ^ logs2
      } else {
        if (is.null(beta)) {beta <- self$beta}
        if (is.null(s2)) {s2 <- self$s2}
      }
      theta <- 10^beta
      if (self$isotropic && length(theta) == self$beta_length) {
        theta <- rep(theta, self$D)
      }
      if (is.null(y)) {
        if (is.matrix(x)) {
          # cgmtry <- try(val <- s2 * corr_gauss_matrix_symC(x, theta))
          # if (inherits(cgmtry,"try-error")) {browser()}
          # return(val) # arma version isn't actually faster?
          return(s2 * corr_gauss_matrix_symC(x, theta))
          # return(s2 * corr_gauss_matrix_sym_armaC(x, theta))
        } else {
          return(s2 * 1)
        }
      }
      if (is.matrix(x) & is.matrix(y)) {
        s2 * corr_gauss_matrixC(x, y, theta)
        # if (self$D >= 12 || nrow(x) < 30) {
        #   s2 * corr_gauss_matrixC(x, y, theta)
        # } else { # parallel only faster for small D and many rows
        #   s2 * corr_gauss_matrixCpar(x, y, theta)
        # }
        # s2 * corr_gauss_matrix_armaC(x, y, theta) # arma not actually faster?
        # corr_gauss_matrix_armaC(x, y, theta, s2)
      } else if (is.matrix(x) & !is.matrix(y)) {
        s2 * corr_gauss_matrixvecC(x, y, theta)
      } else if (is.matrix(y)) {
        s2 * corr_gauss_matrixvecC(y, x, theta)
      } else {
        s2 * exp(-sum(theta * (x-y)^2))
      }
    },
    #' @description Find covariance of two points
    #' @param x vector
    #' @param y vector
    #' @param beta correlation parameters on log scale
    #' @param theta correlation parameters on regular scale
    #' @param s2 Variance parameter
    kone = function(x, y, beta, theta, s2) {
      if (missing(theta)) {theta <- 10^beta}
      s2 * exp(-sum(theta * (x-y)^2))
    },
    #' @description Derivative of covariance with respect to parameters
    #' @param params Kernel parameters
    #' @param X matrix of points in rows
    #' @param C_nonug Covariance without nugget added to diagonal
    #' @param C Covariance with nugget
    #' @param nug Value of nugget
    dC_dparams = function(params=NULL, X, C_nonug, C, nug) {
      n <- nrow(X)
      lenparams <- length(params)
      if (lenparams > 0) {
        if (self$beta_est) {
          beta <- params[1:self$beta_length]
        } else {
          beta <- self$beta
        }
        if (self$s2_est) {
          logs2 <- params[lenparams]
        } else {
          logs2 <- self$logs2
        }
      } else {
        beta <- self$beta
        logs2 <- self$logs2
      }

      # lenparams <- length(params)
      # beta <- params[1:(lenparams - 1)]
      theta <- 10^beta
      if (self$isotropic && length(theta) == self$beta_length) {
        theta <- rep(theta, self$D)
      }

      log10 <- log(10)
      # logs2 <- params[lenparams]
      s2 <- 10 ^ logs2

      # if (inherits(try(diag(nug*s2, nrow(C_nonug))), "try-error")){browser()}
      # if (is.null(params)) {params <- c(self$beta, self$logs2)}
      if (missing(C_nonug)) { # Assume C missing too, must have nug
        C_nonug <- self$k(x=X, params=params)
        C <- C_nonug + diag(nug*s2, nrow(C_nonug))
      }

      lenparams_D <- self$beta_length*self$beta_est + self$s2_est

      # I wrote Rcpparmadillo function to speed this up a lot hopefully
      # useR <- FALSE
      if (!self$useC || self$isotropic) { # useR
        dC_dparams <- array(dim=c(lenparams_D, n, n), data=0)
        if (self$s2_est) {
          dC_dparams[lenparams_D,,] <- C * log10 #/ s2 * s2 *
        }
        # dC_dparams <- rep(list(C_nonug), length(beta))
        if (self$beta_est) {
          for (k in 1:self$beta_length) {
            for (i in seq(1, n-1, 1)) {
              for (j in seq(i+1, n, 1)) {
                # if (inherits(try(C_nonug[i,j] * (X[i,k] - X[j,k])^2 *
                #           theta[k] * log10), "try-error")) {browser()}
                if (!self$isotropic) {
                  dC_dparams[k,i,j] <- - C_nonug[i,j] * (X[i,k] - X[j,k])^2 *
                    theta[k] * log10
                } else {
                  dC_dparams[k,i,j] <- - C_nonug[i,j] * sum((X[i,] - X[j,])^2) *
                    theta[k] * log10
                }
                dC_dparams[k,j,i] <- dC_dparams[k,i,j]
              }
            }
            for (i in seq(1, n, 1)) { # Get diagonal set to zero
              dC_dparams[k,i,i] <- 0
            }
          }
        }

      } else {
        dC_dparams <- kernel_gauss_dC(X, theta, C_nonug, self$s2_est,
                                      self$beta_est, lenparams_D, s2*nug)
      }
      # mats <- c(dC_dbetas, list(dC_dlogs2))
      return(dC_dparams)
    },
    #' @description Calculate covariance matrix and its derivative
    #'  with respect to parameters
    #' @param params Kernel parameters
    #' @param X matrix of points in rows
    #' @param nug Value of nugget
    C_dC_dparams = function(params=NULL, X, nug) {
      n <- nrow(X)
      lenparams <- length(params)
      if (lenparams > 0) {
        if (self$beta_est) {
          beta <- params[1:self$beta_length]
        } else {
          beta <- self$beta
        }
        if (self$s2_est) {
          logs2 <- params[lenparams]
        } else {
          logs2 <- self$logs2
        }
      } else {
        beta <- self$beta
        logs2 <- self$logs2
      }

      # if (is.null(params)) {params <- c(self$beta, self$logs2)}
      # beta <- params[1:(lenparams - 1)]
      theta <- 10^beta
      # if (self$isotropic && length(theta) == self$beta_length) {
      #   theta <- rep(theta, self$D)
      # }

      log10 <- log(10)
      # logs2 <- params[lenparams]
      s2 <- 10 ^ logs2
      # Calculate C
      C_nonug <- self$k(x=X, beta=beta, s2=s2)
      C <- C_nonug + diag(nug*s2, nrow(C_nonug))

      lenparams_D <- self$beta_length*self$beta_est + self$s2_est

      # I wrote Rcpparmadillo function to speed this up a lot hopefully
      # useR <- FALSE
      if (!self$useC || self$isotropic) { # useR
        dC_dparams <- array(dim=c(lenparams_D, n, n), data=0)
        if (self$s2_est) {
          dC_dparams[lenparams_D,,] <- C * log10 #/ s2 * s2 *
        }
        # dC_dbetas <- rep(list(C_nonug), length(beta))
        # n <- nrow(X)
        if (self$beta_est) {
          for (k in 1:self$beta_length) {
            for (i in seq(1, n-1, 1)) {
              for (j in seq(i+1, n, 1)) {
                if (!self$isotropic) {
                  dC_dparams[k,i,j] <- - C[i,j] * (X[i,k] - X[j,k])^2 *
                    theta[k] * log10
                } else {
                  dC_dparams[k,i,j] <- - C[i,j] * sum((X[i,] - X[j,])^2) *
                    theta[k] * log10
                }
                dC_dparams[k,j,i] <- dC_dparams[k,i,j]
              }
            }
            for (i in seq(1, n, 1)) { # Get diagonal set to zero
              dC_dparams[k,i,i] <- 0
            }
          }
        }
      } else {
        dC_dparams <- kernel_gauss_dC(X, theta, C_nonug, self$s2_est,
                                      self$beta_est, lenparams_D, s2*nug)
      }
      # kernel_gauss_dC(X, theta, C_nonug, self$s2_est,
      #                  self$beta_est, lenparams_D, s2*nug)
      # mats <- c(dC_dbetas, list(dC_dlogs2))
      return(list(C = C, dC_dparams = dC_dparams))
    },
    # dC_dx = function(XX, X, theta, beta=self$beta, s2=self$s2) {
    #   if (missing(theta)) {theta <- 10^beta}
    #   if (!is.matrix(XX)) {stop("XX must be matrix")}
    #   d <- ncol(XX)
    #   if (ncol(X) != d) {stop("XX and X must have same number")}
    #   n <- nrow(X)
    #   nn <- nrow(XX)
    #   dC_dx <- array(NA, dim=c(nn, d, n))
    #   for (i in 1:nn) {
    #     for (j in 1:d) {
    #       for (k in 1:n) {
    #         dC_dx[i, j, k] <- -2 * theta[j] * (XX[i, j] - X[k, j]) *
    #                             s2 * exp(-sum(theta * (XX[i,] - X[k,]) ^ 2))
    #       }
    #     }
    #   }
    #   dC_dx
    # },
    # Below is updated version using arma, was called dC_dx_arma before
    #' @description Derivative of covariance with respect to X
    #' @param XX matrix of points
    #' @param X matrix of points to take derivative with respect to
    #' @param theta Correlation parameters
    #' @param beta log of theta
    #' @param s2 Variance parameter
    dC_dx = function(XX, X, theta, beta=self$beta, s2=self$s2) {
      if (missing(theta)) {theta <- 10^beta}
      if (self$isotropic && length(theta) == self$beta_length) {
        theta <- rep(theta, self$D)
      }
      if (!is.matrix(XX)) {stop("XX must be matrix")}
      if (ncol(X) != ncol(XX)) {stop("XX and X must have same number of cols")}
      corr_gauss_dCdX(XX, X, theta, s2)
    },
    #' @description Second derivative of covariance with respect to X
    #' @param XX matrix of points
    #' @param X matrix of points to take derivative with respect to
    #' @param theta Correlation parameters
    #' @param beta log of theta
    #' @param s2 Variance parameter
    d2C_dx2 = function(XX, X, theta, beta=self$beta, s2=self$s2) {
      if (missing(theta)) {theta <- 10^beta}
      if (self$isotropic && length(theta) == self$beta_length) {
        theta <- rep(theta, self$D)
      }
      if (!is.matrix(XX)) {stop("XX must be matrix")}
      d <- ncol(XX)
      if (ncol(X) != d) {stop("X and XX must have same # of columns")}
      n <- nrow(X)
      nn <- nrow(XX)
      d2C_dx2 <- array(NA, dim=c(nn, d, d, n))
      for (i in 1:nn) {
        for (k in 1:n) {
          Cik <- s2 * exp(-sum(theta * (XX[i,] - X[k,]) ^ 2))
          if (d > 1) {
            for (j1 in 1:(d-1)) {
              for (j2 in (j1+1):d) {
                d2C_dx2[i, j1, j2, k] <- 4 * theta[j1] *
                  (XX[i, j1] - X[k, j1]) * theta[j2] *
                  (XX[i, j2] - X[k, j2]) * Cik
                d2C_dx2[i, j2, j1, k] <- d2C_dx2[i, j1, j2, k]
              }
            }
          }
          for (j in 1:d) {
            d2C_dx2[i, j, j, k] <- -2 * theta[j] * Cik +
              4 * theta[j]^2 * (XX[i, j] - X[k, j])^2 * Cik
          }
        }
      }
      d2C_dx2
    },
    #' @description Second derivative of covariance with respect to
    #' X and XX each once.
    #' @param XX matrix of points
    #' @param X matrix of points to take derivative with respect to
    #' @param theta Correlation parameters
    #' @param beta log of theta
    #' @param s2 Variance parameter
    d2C_dudv = function(XX, X, theta, beta=self$beta, s2=self$s2) {
      if (missing(theta)) {theta <- 10^beta}
      if (self$isotropic && length(theta) == self$beta_length) {
        theta <- rep(theta, self$D)
      }
      if (!is.matrix(XX)) {stop("XX must be matrix")}
      d <- ncol(XX)
      if (ncol(X) != d) {stop("X and XX must have same # of columns")}
      n <- nrow(X)
      nn <- nrow(XX)
      d2C_dx2 <- array(NA, dim=c(nn, d, d, n))
      for (i in 1:nn) {
        for (k in 1:n) {
          Cik <- s2 * exp(-sum(theta * (XX[i,] - X[k,]) ^ 2))
          if (d > 1) {
            for (j1 in 1:(d-1)) {
              for (j2 in (j1+1):d) {
                d2C_dx2[i, j1, j2, k] <- - 4 * theta[j1] *
                  (XX[i, j1] - X[k, j1]) * theta[j2] *
                  (XX[i, j2] - X[k, j2]) * Cik
                d2C_dx2[i, j2, j1, k] <- d2C_dx2[i, j1, j2, k]
              }
            }
          }
          for (j in 1:d) {
            d2C_dx2[i, j, j, k] <- 2 * theta[j] * Cik -
              4 * theta[j]^2 * (XX[i, j] - X[k, j])^2 * Cik
          }
        }
      }
      d2C_dx2
    },
    #' @description Second derivative of covariance with respect to X and XX
    #' when they equal the same value
    #' @param XX matrix of points
    #' @param theta Correlation parameters
    #' @param beta log of theta
    #' @param s2 Variance parameter
    d2C_dudv_ueqvrows = function(XX, theta, beta=self$beta, s2=self$s2) {
      # Calculates derivative of C w.r.t. each component evaluated for
      #  both components equal to rows of XX
      # Vectorized version of d2C_dudv for u=v for rows of XX
      # Name is for "u equal v for rows of XX"
      # Much simpler since XX-X terms go to zero when XX=X
      # For m1 matrix, following two are equal, this version 2.5x faster
      # lapply(1:nrow(m1), function(i) {gp$kernel$d2C_dudv(XX = m1[i,,drop=F],
      #                                           X = m1[i,,drop=F])[1,,,1]})
      # gp$kernel$d2C_dudv_ueqvrows(XX = m1)
      if (missing(theta)) {theta <- 10^beta}
      if (self$isotropic && length(theta) == self$beta_length) {
        theta <- rep(theta, self$D)
      }
      if (!is.matrix(XX)) {stop("XX must be matrix")}
      d <- ncol(XX)
      nn <- nrow(XX)
      d2C_dx2 <- array(0, dim=c(nn, d, d))
      for (j in 1:d) {
        # Not multiplied by C since C=1 when u=v
        d2C_dx2[, j, j] <- 2 * theta[j] * s2
      }
      d2C_dx2
    },
    #' @description Print this object
    print = function() {
      cat('GauPro kernel: Gaussian\n')
      cat('\tD    =', self$D, '\n')
      cat('\tbeta =', signif(self$beta, 3), '\n')
      cat('\ts2   =', self$s2, '\n')
    }
  )
)

#' @rdname Gaussian
#' @export
#' @param beta Initial beta value
#' @param s2 Initial variance
#' @param D Number of input dimensions of data
#' @param beta_lower Lower bound for beta
#' @param beta_upper Upper bound for beta
#' @param beta_est Should beta be estimated?
#' @param s2_lower Lower bound for s2
#' @param s2_upper Upper bound for s2
#' @param s2_est Should s2 be estimated?
#' @param useC Should C code used? Much faster.
#' @param isotropic If isotropic then a single beta/theta is used for all
#' dimensions. If not (anisotropic) then a separate beta/beta is used for
#' each dimension.
k_Gaussian <- function(beta, s2=1, D,
                       beta_lower=-8, beta_upper=6, beta_est=TRUE,
                       s2_lower=1e-8, s2_upper=1e8, s2_est=TRUE,
                       useC=TRUE, isotropic=FALSE) {
  Gaussian$new(
    beta=beta,
    s2=s2,
    D=D,
    beta_lower=beta_lower,
    beta_upper=beta_upper,
    beta_est=beta_est,
    s2_lower=s2_lower,
    s2_upper=s2_upper,
    s2_est=s2_est,
    useC=useC,
    isotropic=isotropic
  )
}