R/CRcompare.R
In OptimaRegion: Confidence Regions for Optima of Response Surfaces

Documented in CRcompare

#' Confidence interval for the distance between two response surface optima (2 regressors)
#'
#' Computes bootstrapped confidence intervals for the mean and median
#' distance between the optima of two response surface models. Models can be thin
#' plate splines or quadratic polynomials
#' \insertCite{DelCastilloCR}{OptimaRegion}.
#'
#' Computes distribution-free bootstrapped confidence intervals on the mean and
#' median distance between the optima of two different responses. The responses can
#' be Thin Plate Spline models or Quadratic polynomial models. Program calls
#' each response, next computes all pairwise distances between points in each CR,
#' and finally bootstraps the distances to compute bca bootstrapped confidence
#' intervals for the mean and median distance.
#'
#' @param X1 nx2 matrix with the values of the 2 regressors (experimental factors)
#'           corresponding to the first response. Note: can have replicates. They
#'           will be eliminated by the program and the corresponding y-values averaged
#' @param y1 nx1 vector of values for the first response corresponding to X1
#' @param X2 nx2 matrix with the values of the 2 regressors (experimental factors)
#'           corresponding to the second response. Note: can have replicates. They
#'           will be eliminated by the program and the corresponding y-values averaged
#' @param y2 nx1 vector of values for the second response corresponding to X2
#' @param lambda psmoothing penalty if a TPS model is selected (default=0.04)
#' @param responseType use 'TPS' if fitting thin plate spline responses, 'Quad'
#'                     if fitting quadratic polynomials
#' @param nosim1and2 number of simulations(default = 200) used to find each of the two
#'                   confidence regions of optima
#' @param alpha confidence level (0 < alpha < 1; default = 0.05)
#' @param LB1 vector of lower bounds for x (2x1 vector) above which the optimum
#'            is sought for the first response
#' @param LB2 vector of lower bounds for x (2x1 vector) above which the optimum
#'            is sought for the second response
#' @param UB1 vector of upper bounds for x (2x1 vector) below which the optimum
#'            is sought for the first response
#' @param UB2 vector of upper bounds for x (2x1 vector) below which the optimum
#'            is sought for the second response
#' @param triangularRegion1 logical: if TRUE it will constrain the maximum points of
#'  response 1 to lie inside a triangle defined by the coordinates (0,0), and those in
#'  "vertex11", and "vertex21", see below (in addition to being constrained to
#'  lie inside the region defined by LB1 and UB1). NOTE: use TRUE when the treatments
#'  form a triangular experimental region in shape. If FALSE, optima will only be
#'  constrained to lie inside the rectangular region defined by LB1 and UB1.
#'  Default is FALSE.
#' @param vertex11 2 times 1 vector with coordinates defining one of the
#'   3 vertices of the triangular region where the first response is being
#'   optimized. Must be provided if triangularRegion1 is TRUE
#'   (NOTE: vertices numbered clockwise, with vertex0 fixed to (0,0))
#' @param vertex21 2 times 1 vector with coordinates defining a second
#'   vertex of a triangular region where the first response is being
#'   optimized. Must be provided if triangularRegion1 is TRUE
#' @param triangularRegion2 logical: if TRUE it will constrain the maximum points of
#'  response 2 to lie inside a triangle defined by the coordinates (0,0), and those in
#'  "vertex12", and "vertex22", see below (in addition to being constrained to
#'  lie inside the region defined by LB2 and UB2).NOTE: use TRUE when the treatments
#'  form a triangular experimental region in shape. If FALSE, optima will only be
#'  constrained to lie inside the rectangular region defined by LB2 and UB2.
#'  Default is FALSE.
#' @param vertex12 2 times 1 vector with coordinates defining one of the
#'   3 vertices of the triangular region where the second response is being
#'   optimized. Must be provided if triangularRegion2 is TRUE
#'   (NOTE: vertices numbered clockwise, with vertex0 fixed to (0,0))
#' @param vertex22 2 times 1 vector with coordinates defining a second
#'   vertex of a triangular region where the second response is being
#'   optimized. Must be provided if triangularRegion2 is TRUE
#' @param maximization1 logical: if TRUE (default) it maximizes response 1,
#'                      if FALSE it minimizes it
#' @param maximization2 logical: if TRUE (default) it maximizes response 2,
#'                      if FALSE it minimizes it
#' @param xlab1and2 text label for x axis in both confidence region plots
#'                  (default: "Protein eaten (mg)")
#' @param ylab1and2 text label for y axis in both confidence region plots
#'                  (default: "Carbohydrates eaten (mg)")
#' @param outputPDFFile1 name of the PDF file where the CR plot of the
#'                       first response is saved (default: "CR_plot.pdf")
#' @param outputPDFFile2 name of the PDF file where the CR plot of the
#'                       second response is saved (default: "CR_plot.pdf")
#' @param outputOptimaFile1 name of the text file containing the coordinates of
#'                          all the simulated optima of the first response
#' @param outputOptimaFile2 name of the text file containing the coordinates of
#'                          all the simulated optima of the second response
#' @return Upon completion, two PDF files with the CR plots and two text files
#'         with the coordinates of each set of optima are created, and the
#'         function also returns a list consisting of the following 5 components:
#'         \describe{
#'           \item{dist}{vector of distances between pairs of points taken
#'                         from each set of optima}
#'           \item{mean}{mean of dist}
#'           \item{median}{median of dist}
#'           \item{ciMean}{95% confidence interval for the mean of dist using
#'                           bca bootstrapping; it is a vector with 5 columns,
#'                           containing the signicance level, the next two
#'                           containing the indices of the order statistics used
#'                           in the calculations and the final two the calculated
#'                           endpoints of the CI's.}
#'           \item{ciMEdian}{95% confidence interval for the median of dist
#'                             using bca bootstrapping; it is a vector with 5 columns,
#'                             containing the signicance level, the next two
#'                             containing the indices of the order statistics used
#'                             in the calculations and the final two the calculated
#'                             endpoints of the CI's.}
#'         }
#' @inheritSection OptRegionQuad Author(s)
#' @references{
#'  \insertAllCited{}
#' }
#' @examples
#' \dontrun{
#' # Example: two randomly generated data sets, quadratic polynomial responses.
#' X1 <- cbind(runif(100, -2, 2), runif(100, -2, 2))
#' y1 <- as.matrix(72 - 11.78 * X1[, 1] + 0.74 * X1[, 2] - 7.25 * X1[, 1]^2 -
#'   7.55 * X1[, 2]^2 - 4.85 * X1[, 1] * X1[, 2] + rnorm(100, 0, 8))
#' X2 <- cbind(runif(100, -2, 2), runif(100, -2, 2))
#' y2 <- as.matrix(72 - 11.78 * X2[, 1] + 0.74 * X2[, 2] - 7.25 * X2[, 1]^2 -
#'   7.55 * X2[, 2]^2 - 4.85 * X2[, 1] * X2[, 2] + rnorm(100, 0, 8))
#' out <- CRcompare(
#'   X1 = X1, y1 = y1, X2 = X2, y2 = y2, responseType = "Quad", nosim1and2 = 200,
#'   alpha = 0.05, LB1 = c(-2, -2), UB1 = c(2, 2), LB2 = c(-2, -2), UB2 = c(2, 2)
#' )
#' }
#' @importFrom stats median
#' @export
CRcompare <- function(X1, y1, X2, y2, responseType = "TPS",
                      lambda = 0.04, nosim1and2 = 200, alpha = 0.05,
                      LB1, LB2, UB1, UB2,
                      triangularRegion1 = FALSE, vertex11 = NULL, vertex21 = NULL,
                      triangularRegion2 = FALSE, vertex12 = NULL, vertex22 = NULL,
                      maximization1 = TRUE, maximization2 = TRUE,
                      xlab1and2 = "Protein eaten (mg)",
                      ylab1and2 = "Carbohydrates eaten (mg)",
                      outputPDFFile1 = "CR_plot1.pdf", outputOptimaFile1 = "Optima1.txt",
                      outputPDFFile2 = "CR_plot2.pdf", outputOptimaFile2 = "Optima2.txt") {

  # Load required libraries
  # t<-.libPaths()
  # library("spam", lib.loc=t)
  # library("boot", lib.loc=t)
  # library("OptimaRegion", lib.loc=t)

  if (responseType == "TPS") { # fit TPS models
    # Run OptRegionTps (from package OptimaRegion) twice
    out1 <- OptRegionTps(
      X = X1, y = y1, nosim = nosim1and2, lambda = lambda, alpha = alpha, LB = LB1, UB = UB1, triangularRegion = triangularRegion1, vertex1 = vertex11, vertex2 = vertex21, maximization = maximization1, xlab = xlab1and2, ylab = ylab1and2,
      outputPDFFile = outputPDFFile1, outputOptimaFile = outputOptimaFile1
    )
    out2 <- OptRegionTps(
      X = X2, y = y2, nosim = nosim1and2, lambda = lambda, alpha = alpha, LB = LB2, UB = UB2, triangularRegion = triangularRegion2, vertex1 = vertex12, vertex2 = vertex22, maximization = maximization2, xlab = xlab1and2, ylab = ylab1and2,
      outputPDFFile = outputPDFFile2, outputOptimaFile = outputOptimaFile2
    )
  } else if (responseType == "Quad") # fit quadratic polynomails instead
  {
    # Run OptRegionQuad (from package OptimaRegion) twice
    out1 <- OptRegionQuad(
      X = X1, y = y1, nosim = nosim1and2, alpha = alpha, LB = LB1, UB = UB1, triangularRegion = triangularRegion1, vertex1 = vertex11, vertex2 = vertex21, maximization = maximization1, xlab = xlab1and2, ylab = ylab1and2,
      outputPDFFile = outputPDFFile1
    )
    out2 <- OptRegionQuad(
      X = X2, y = y2, nosim = nosim1and2, alpha = alpha, LB = LB2, UB = UB2, triangularRegion = triangularRegion2, vertex1 = vertex12, vertex2 = vertex22, maximization = maximization2, xlab = xlab1and2, ylab = ylab1and2,
      outputPDFFile = outputPDFFile2
    )
  }


  # Form as many pairs of points as in the smallest set and compute their euclidean distances
  Dmatrix <- spam::nearest.dist(x = out1$xin, y = out2$xin, upper = TRUE, delta = 1e20)
  dist <- spam::diag(Dmatrix)
  mean <- mean(dist)
  median <- median(dist)
  # Bootstrap the distances between points in each CR
  bmean <- boot::boot(dist, sampleMeans, R = 1000)
  bmedian <- boot::boot(dist, sampleMedians, R = 1000)
  ciMean <- boot::boot.ci(bmean, type = c("bca"))
  ciMedian <- boot::boot.ci(bmedian, type = c("bca"))
  return(list(dist = dist, mean = mean, median = median, ciMean = ciMean$bca, ciMedian = ciMedian$bca))
}
sampleMeans <- function(W, i) {
  mean(W[i])
}
sampleMedians <- function(W, i) {
  median(W[i])
}