CCKriging: Kriging for Mixed Continuous and Categorical Input Variables

#' @title Get Matrix of Correlations between the Design Points
#'
#' @description
#' Compute correlations between the points of a mixed continuous and categorical design matrix
#' according to a specified correlation function for the continuous variables and a method for
#' constructing cross-correlations between the categorical level-combinations.
#'
#' @param model [\code{\link[=makeCCKriging]{CCKriging}}]\cr
#'   The \code{\link[=makeCCKriging]{CCKriging}} model the correlation matrix
#'   should be computed for.
#' @param par [\code{numeric}]\cr
#'   Optional vector of parameters for the Kriging model. The first values belong to the
#'   continuous variables, the rest to the categorical variables. Defaults to \code{NULL}, which
#'   means the parameters of the \code{model} object are used.
#' @param info [\code{logical(1)}]\cr
#'   Should extra information be printed to the console? Default is \code{TRUE}.
#' @export
getCorrMatrix = function(model, par = NULL, config, info = TRUE) {
  x = model$x
  if (is.null(par))
    par = model$par
  cat.type = model$config$cat.type
  cont.type = model$config$cont.type
  ## FIXME: generalize for other correlation functions
  nu = model$cont.par$nu

  d = ncol(x)
  n = nrow(x)

  cat.inds = which(sapply(x, class) == "factor")
  cont.inds = setdiff(1:d, cat.inds)

  q = length(cont.inds)
  m = length(cat.inds)

  ## compute distances of continuous variables
  points = expand.grid((1:n), (1:n))
  points = cbind(points[, 2], points[, 1])
  points = matrix(points[points[, 2] > points[, 1], ], ncol = 2)

  ## FIXME: par[cont.inds]
  # cont.corr.mat = GPfit::corr_matrix(getContInputs(x),
  #  beta = par[cont.inds], corr = list(type = cont.type, nu = nu))

  X = as.matrix(getContInputs(x))

  out = .C("C_covMatrix", as.double(X), as.integer(n), as.integer(q), 
    as.double(par[cont.inds]), as.double(1), as.character("matern5_2"), 
    ans = double(n * n), PACKAGE = "DiceKriging")
  cont.corr.mat = matrix(out$ans, n, n)

  if (cat.type == "CD") {   # categorical distances
    stop("Method 'CD' is not yet implemented.")
    # dists.quan = abs(x[points[, 2], quan.inds] - x[points[, 1], quan.inds])
    # ## generate distance matrix for categorical variables
    # qual.dists = parToQualDistMat(par = par, d = d)
    # dists.qual = getQualDists(x[points[, 2], qual.inds], x[points[, 1], qual.inds], qual.dists)
    # dists = as.matrix(cbind(dists.quan, dists.qual))
    #
    # if (cont.type == "matern") {
    #   ## compute correlation matrix, Matérn
    #   R = matrix(0, n, n)
    #   beta = par[1:d]
    #   temp = 10^beta
    #   temp = matrix(temp, ncol = d, nrow = (length(dists)/d), byrow = TRUE)
    #   dists = 2 * sqrt(nu) * dists * (temp)
    #   ID = which(dists == 0)
    #   Rtemp = 1/(gamma(nu) * 2^(nu - 1)) * (dists)^nu * besselK(dists, nu)
    #   Rtemp[ID] = 1
    #   Rtemp = apply(Rtemp, 1, prod)
    #   R[points] = Rtemp
    #   R = R + t(R)
    #   diag(R) = 1
    # }
  } else if (cat.type == "GK") {  # Gower Kriging
    stop("Gower Kriging is not yet implemented.")
  } else {  # EC, MC, UC, TMC, GMC
    ## get categorical correlation matrix of design points
    ## FIXME: make this work for more than one categorical inputs
    ## option 1: loop over this with one cat input at a time and change parameter vector beforehand
    ## option 2: do something inside the function
    cat.corr.mat = getCatCorrMatrix(model, config = config, par = par, design.corr = TRUE)

    R = cont.corr.mat * cat.corr.mat
  }
  return(R)
}

#' @title Get Categorical Correlation Matrix
#'
#' @description This function returns a matrix of correlations between the categorical
#'   level-combinations, which is needed for methods that split the correlation function in one
#'   product for the continuous and one for the categorical inputs (i.e., EC, MC, or UC).
#'
#' @param x [\code{data.frame}]\cr
#'   Categorical inputs of the design matrix.
#' @param cat.type [\code{character(1)}]\cr
#'   Which method should be used?
#'   Possible choices:\cr
#'   \tabular{ll}{
#'     \dQuote{EC}: \tab Exchangeable Correlation\cr
#'     \dQuote{MC}: \tab Multiplicative Correlation\cr
#'     \dQuote{UC}: \tab Hypersphere Decomposition-Based Unrestrictive Correlation\cr
#'     \dQuote{TMC}: \tab Toeplitz Matrix Multiplication-Based Correlation
#'   }
#' @param par [\code{numeric}]\cr
#'   Vector of the Kriging model's parameters. The first values belong to the
#'   continuous variables, the rest to the categorical variables.
#' @param q [\code{numeric}]\cr
#'   Number of continuous variables in the original design. This is needed in order to remove
#'   the unnecessary elements of the parameter vector \code{par}.
#' @param design.corr [\code{logical(1)}]\cr
#'   Should the n-by-n categorical correlation matrix of the design points be returned?
#'   If \code{FALSE}, the s-by-s matrix with correlations between the level combinations is
#'   returned, where s denotes the number of level combinations of the categorical variables.
#'   Default is \code{TRUE}.
#' @param info [\code{logical(1)}]\cr
#'   Should extra information be printed to the console? Default is \code{TRUE}.
#' @return [\code{matrix}] The correlation matrix.
getCatCorrMatrix = function(model, config, par = NULL, design.corr = TRUE) {
  getSingleInputCatCorrMatrix = function(x, cat.type, par, design.corr = TRUE, info = TRUE) {
    # if (!is.null(q))
    #   par = par[-(1:q)]
    n = nrow(x)
    m = ncol(x)
    n.levels = sapply(x, nlevels)
    s = prod(n.levels)
    ## FIXME: Add product form
    cat.mat = matrix(1, nrow = s, ncol = s)
    # if (cat.type == "TC") {  # Toeplitz Correlation
    #   if (!testNumeric(par, all.missing = FALSE, len = s - 1)) {
    #     stop(sprintf(
    #       "For method 'TC', par must be of length '# of categorical level-combinations' = %i.",
    #       s))
    #   }
    #   par = c(1, par)
    #   par.order = order(par, decreasing = TRUE)
    #
    #   dec.seq = sort(par, decreasing = TRUE)
    #   R = toeplitz(dec.seq)
    #   order.mat = t(sapply(par.order, unit, n = s))  # generate permutation matrix
    #   cat.mat = order.mat %*% R %*% order.mat  # swap rows and columns, still positive definite
    # }
    if (cat.type == "EC") {  # Exchangeable Correlation
      if (!checkmate::testNumeric(par, any.missing = FALSE, len = 1L)) {
        stop(sprintf("For method 'EC', par must be of length '# of continuous variables' + 1 = %i.",
          q + 1))
      }
      cat.mat[lower.tri(cat.mat) | upper.tri(cat.mat)] = par
    }
    if (cat.type == "MC") {  # Multiplicative Correlation
      if (!checkmate::testNumeric(par, any.missing = FALSE, len = s)) {
        stop(sprintf("For method 'MC', par must be of length '# of continuous variables' + '# of level-combinations' = %i.",
          q + s))
      }
      for (i in 1:(s-1)) {
        cat.mat[(i+1):s, i] = cat.mat[i, (i+1):s] = exp(-par[i]) * exp(-par[(i+1):s])
      }
    }
    if (cat.type == "UC") {  # Hypersphere Decomposition-Based Unrestrictive Correlation
      ## FIXME: To be checked.
      if (!checkmate::testNumeric(par, any.missing = FALSE, len = (s^2 - s)/2)) {
        stop(sprintf("For method 'UC', par must be of length '# of continuous variables' + ('# of lvl combinations'^2 - '# of lvl combinations')/2 = %i.",
          q + (s^2-s)/2))
      }
      ## build 'real' parameters from the parameter vector
      theta = matrix(nrow = s, ncol = s)
      par.rem = par
      for (j in 1:(s-1)) {
        theta[(j+1):s, j] = par.rem[1:(s-j)]
        par.rem = par.rem[(s-j+1):length(par.rem)]
      }

      ## compute the lower triangular matrix of the hypersphere decomposition, L
      L = matrix(0, nrow = s, ncol = s)
      L[1, 1] = 1
      for (r in 2:s) {
        L[r, 1] = cos(theta[r, 1])
        if (r > 2) {
          for (m in 2:(r - 1)) {
            L[r, m] = prod(c(sapply(1:(m - 1), function(j) sin(theta[r, j])), cos(theta[r, m])))
          }
        }
        L[r, r] = prod(sapply(1:(r-1), function(j) sin(theta[r, j])))
      }
      cat.mat = L %*% t(L)
    }
    if (cat.type == "TMC") {  # Toeplitz Matrix Multiplication-based Correlation
      if (!checkmate::testNumeric(par, all.missing = FALSE, len = s)) {
        stop(sprintf("For method 'TMC', par must be of length '# of continuous variables' + '# of level-combinations' = %i.",
                     q + s))
      }
      ## generate Toeplitz matrix from the parameter vector
      A = toeplitz(par)
      ## multiply it with its transpose to obtain a symmetric and positive definite matrix
      A = A %*% t(A)
      ## get the maximum value of all but the diagonal elements of A
      max.A = max(abs(A[lower.tri(A)]))
      ## use this maximum to scale all but the diagonal elements to [-1, 1]
      A = A/max.A

      ## generate transformation matrix B in order to get unit diagonal entries
      B = diag(1/sqrt(diag(A)), nrow = nrow(A))
      cat.mat = B %*% A %*% B
    }
    if (cat.type == "GMC") {  # General Matrix Multiplication-based Correlation
      perm = config$cat.par$perm
      k = nlevels(as.factor(perm))
      if (!checkmate::testCharacter(perm, any.missing = FALSE, len = s^2)) {
        stop(sprintf("Length of permutation vector must be '# of level-combinations'^2 = %i.",
                     s^2))
      }
      if (!checkmate::testNumeric(par, all.missing = FALSE, len = k)) {
        stop(sprintf("For the given permutation vector, the number of parameters must be %i.",
                     q + k))
      }
      for (i in 1:k)
        perm[perm == as.character(i)] = par[i]
      perm = as.numeric(perm)
      A = matrix(perm, byrow = TRUE, nrow = s)
      A = A %*% t(A)
      ## get the maximum value of all but the diagonal elements of A
      max.A = max(abs(A[lower.tri(A)]))
      ## use this maximum to scale all but the diagonal elements to [-1, 1]
      A = A/max.A

      ## generate transformation matrix B in order to get unit diagonal entries
      B = diag(1/sqrt(diag(A)), nrow = nrow(A))
      cat.mat = B %*% A %*% B
    }
    ## check if cat.mat is positive definite (needed for Cholesky decomposition)
    if (any(eigen(cat.mat)$values <= 0)) {
      ## if it’s not pd, try to repair it
      cat.mat = repairCorrMat(cat.mat, info = info)
    }
    ## FIXME: add sensible output for (!design.corr && cat.interaction)
    if (!design.corr)
      return(cat.mat)
    # qual.trafo = expand.grid(as.list(as.data.frame(
    #   sapply(1:ncol(x), function(j) levels(x[, j])))))
    ## FIXME: check this
    #cat.trafo = expand.grid(lapply(as.list(as.data.frame(                    # ?
    #  sapply(1:ncol(x), function(j) levels(x[, j])))), levels))              # ?
    cat.trafo = expand.grid(sapply(1:ncol(x), function(j) levels(x[, j])))    # ? just take cat.lut?
    which.lvl.combi = apply(sapply(1:nrow(x), function(j) sapply(1:nrow(cat.trafo),
      function(i) check.equal(cat.trafo[i, ], x[j, ]))), 2, which)
    cat.corr.matrix = matrix(1, nrow = n, ncol = n)
    for (i in 1:n) {
      for (j in 1:i) {
        cat.corr.matrix[i, j] = cat.mat[which.lvl.combi[i], which.lvl.combi[j]]
        cat.corr.matrix[j, i] = cat.corr.matrix[i, j]
      }
    }
    return(cat.corr.matrix)
  }

  x = getCatInputs(model)
  m = ncol(x)
  if (is.null(par))
    par = model$par
  q = ncol(getContInputs(model))
  n = nrow(x)
  info = model$config$info
  cat.corr.mat = matrix(1, nrow = n, ncol = n)
  par.rem = par[-(1:q)]

  cat.corr.mat = vector("list", length = ncol(x))
  for (i in seq_along(x[1, ])) {
    k = getNumberOfPars(x[, i, drop = FALSE], config)
    par.here = par.rem[1:k]
    par.rem = par.rem[-(1:k)]
    cat.corr.mat[[i]] = getSingleInputCatCorrMatrix(x[, i, drop = FALSE],
      cat.type = config$cat.type, par = par.here, design.corr = design.corr, info = info)
  }
  if (design.corr) {
    cat.corr.matrix = matrix(1, nrow = n, ncol = n)
    for (i in length(cat.corr.mat))
      cat.corr.matrix = cat.corr.matrix * cat.corr.mat[[i]]
  } else {
    if (model$config$cat.interaction || m == 1) {
      return(cat.corr.mat[[1]])
    }
    cat.lut = model$cat.lut
    s = nrow(cat.lut)
    cat.corr.matrix = matrix(1, nrow = s, ncol = s)
    for (i in 1:s) {
      for (j in 1:i) {
        ## multiply the correct values of the l correlation matrices with each other
        cat.corr.matrix[i, j] = do.call(prod, as.list(sapply(1:length(cat.lut), function(l) {
          cat.corr.mat[[l]][which(cat.lut[i, l] == levels(cat.lut[, l])),
                            which(cat.lut[j, l] == levels(cat.lut[, l]))]
        })))
        # cat.corr.matrix[i, j] = cat.mat[which.lvl.combi[i], which.lvl.combi[j]]
        cat.corr.matrix[j, i] = cat.corr.matrix[i, j]
      }
    }
  }
  return(cat.corr.matrix)
}
dominikkirchhoff/CCKriging documentation built on May 19, 2019, 4:05 p.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
dominikkirchhoff/CCKriging
Kriging for Mixed Continuous and Categorical Input Variables

R/getCorrMatrix.R
In dominikkirchhoff/CCKriging: Kriging for Mixed Continuous and Categorical Input Variables

R Package Documentation

Browse R Packages

We want your feedback!

dominikkirchhoff/CCKriging Kriging for Mixed Continuous and Categorical Input Variables

R/getCorrMatrix.R In dominikkirchhoff/CCKriging: Kriging for Mixed Continuous and Categorical Input Variables

R Package Documentation

Browse R Packages

We want your feedback!

dominikkirchhoff/CCKriging
Kriging for Mixed Continuous and Categorical Input Variables

R/getCorrMatrix.R
In dominikkirchhoff/CCKriging: Kriging for Mixed Continuous and Categorical Input Variables