R/intcalibrate.R
In inca: Integer Calibration

#' @title
#' Integer Calibration Function
#'
#' @description
#' This function performs an integer programming algorithm developed for calibrating integer weights, 
#' in order to reduce a specific objective function 
#' 
#' @param weights A numerical vector of real or integer weights to be calibrated. If real values are provided, they will be rounded before applying the calibration algorithm
#' @param formula A formula to express a linear system for hitting the \code{targets}
#' @param targets A numerical vector of point-targets to hit
#' @param objective A character specifying the objective function used for calibration. By default \code{"L1"}. See details for more information
#' @param tgtBnds A two-column matrix containing the bounds for the point-targets
#' @param lower A numerical vector or value defining the lower bounds of the weights
#' @param upper A numerical vector or value defining the upper bounds of the weights
#' @param sparse A logical value denoting if the linear system is sparse or not. By default it is \code{FALSE}
#' @param scale A numerical vector of positive values 
#' @param penalty A character specifying the penalty function. By default \code{"null"} (for backward compatibility). See details for more information
#' @param tuning A positive value denoting the tuning parameter to control the intensity of the penalty function
#' @param data A \code{data.frame} or \code{matrix} object containing the data to be used for calibration
#' 
#' @details 
#' The integer programming algorithm for calibration can be performed by considering one of the following objective functions:
#' \describe{
#'   \item{\code{"L1"}}{for the summation of absolute errors}
#'   \item{\code{"aL1"}}{for the asymmetric summation of absolute errors}
#'   \item{\code{"rL1"}}{for the summation of absolute relative errors}
#'   \item{\code{"LB1"}}{for the summation of absolute errors if outside the boundaries}
#'   \item{\code{"rB1"}}{for the summation of absolute relative errors if outside the boundaries}
#'   \item{\code{"L2"}}{for the summation of square errors}
#'   \item{\code{"aL2"}}{for the asymmetric summation of square errors}
#'   \item{\code{"rL2"}}{for the summation of square relative errors}
#'   \item{\code{"LB2"}}{for the summation of square errors if outside the boundaries}
#'   \item{\code{"rB2"}}{for the summation of square relative errors if outside the boundaries}
#'   \item{\code{"LC"}}{for the summation of the logcosh errors}
#'   \item{\code{"aLC"}}{for the asymmetric summation of the logcosh errors}
#'   \item{\code{"rLC"}}{for the summation of the logcosh relative errors}
#'   \item{\code{"SE"}}{for the summation of the exponential absolute errors}
#'   \item{\code{"aSE"}}{for the asymmetric summation of the exponential absolute errors}
#'   \item{\code{"rSE"}}{for the summation of the exponential absolute relative errors}
#' }
#'
#' The calibrated weights can also be restricted further using one of the following penalty functions:
#' \describe{
#'   \item{\code{"null"}}{does not penalize, and it is used for backward compatibility}
#'   \item{\code{"l0norm"}}{counts the number of non-zero adjustments}
#'   \item{\code{"lasso"}}{sums the absolute values of the adjustments}
#'   \item{\code{"ridge"}}{sums the adjustments squared}
#'   \item{\code{"raking"}}{uses raking ratios}
#'   \item{\code{"minentropy"}}{uses the minimum entropy}
#'   \item{\code{"quadrat"}}{uses a nomalized euclidean distance}
#'   \item{\code{"quadmod"}}{uses a modified normalization in the euclidean distance}
#'   \item{\code{"hellinger"}}{uses the Hellinger's distance}
#'   \item{\code{"mcp"}}{uses a variation of the minimax concave penalty}
#'   \item{\code{"scad"}}{uses a variation of the smoothly clipped absolute deviations}
#'   \item{\code{"relasso"}}{sums the absolute value of the relative adjustments}
#'   \item{\code{"modrelasso"}}{sums the absolute value of the modified relative adjustments}
#'   \item{\code{"rehuber"}}{uses the Huber loss on the relative adjustments}
#'   \item{\code{"modrehuber"}}{uses the Huber loss on the modified relative adjustmnets}
#' }
#' In particular, the adjustments are considered from the initial rounded weights rather than the input vector of weights with real numbers.
#'
#' A two-column matrix must be provided to \code{tgtBnds} when \code{objective = "aL1"}, \code{objective = "LB1"}, \code{objective = "rB1"},
#' \code{objective = "aL2"}, \code{objective = "LB2"}, \code{objective = "rB2"}, \code{objective = "aLC"}, and \code{objective = "aSE"}..
#' 
#' The argument \code{scale} must be specified with a vector of positive real numbers when \code{objective = "rL1"},
#' \code{objective = "rL2"}, \code{objective = "rLC"}, or \code{objective = "rSE"}.
#'
#' @return 
#' A numerical vector of calibrated integer weights. 
#' 
#' @examples
#' library(inca)
#' set.seed(0)
#' w <- rpois(10, 4)
#' data <- matrix(rbinom(1000, 1, .3) * rpois(1000, 4), 100, 10)
#' y <- data %*% w
#' w <- runif(10, 0, 7.5)
#' print(sum(abs(y - data %*% w)))
#' cw <- intcalibrate(w, ~. + 0, y, lower = 1, upper = 7, sparse = TRUE, data = data)
#' print(sum(abs(y - data %*% cw)))
#' qw <- intcalibrate(w, ~. + 0, y, lower = 1, upper = 7, sparse = TRUE, data = data,
#'                    penalty = "quadrat", tuning = 0.7)
#' print(sum(abs(y - data %*% qw)))
#' barplot(table(cw), main = "Calibrated integer weights")
#' barplot(table(qw), main = "Calibrated integer weights")
#'
#' @export

"intcalibrate" <- function(weights, formula, targets, objective = c("L1", "aL1", "rL1", "LB1", "rB1",
                                                "L2", "aL2", "rL2", "LB2", "rB2", "LC", "aLC", "rLC",
                                                "SE", "aSE", "rSE"),
                           tgtBnds = NULL, lower = -Inf, upper = Inf, scale = NULL, sparse = FALSE,
                           penalty = c("null", "l0norm", "lasso", "ridge", "raking", "minentropy",
                                       "quadrat", "quadmod", "hellinger", "mcp", "scad", "relasso", 
                                       "modrelasso", "rehuber", "modrehuber"), tuning = 0.0,
                           data) {
  
  if (is.null(tgtBnds)) {
    if (objective[1] %in% c("LB1", "aL1", "rB1", "LB2", "aL2", "rB2", "aLC", "aSE"))
      stop("\"tgtBnds\" must be specified when \"objective\" is either ", 
           paste("\"LB1\",", "\"aL1\",", "\"rB1\",", "\"LB2\",", "\"aL2\",", "\"rB2\",", "\"aLC\",", "or \"aSE\""))
    tgtBnds <- cbind(rep_len(-Inf, length(targets)),
                     rep_len( Inf, length(targets)))
  }
  else { 
    tgtBnds <- as.matrix(tgtBnds)
    if (ncol(tgtBnds) != 2) 
      stop("\"tgtBnds\" must be a data.frame or a matrix object with two columns")
  }
  if(is.null(scale)) {
    if (objective[1] %in% c("rL1", "rL2", "rLC", "rSE"))
      warning("The argument \"scale\" should be specified when \"objective\" is either ",
              "\"rL1\", \"rL2\", \"rLC\", or \"rSE\"\n",
              "In this case, \"scale\" will be set as a vector of values equal to one by default")
    scale <- rep_len(1, length(targets))
  }
  mylower <- pmax(ceiling(lower), rep_len(-Inf, length(weights)))
  myupper <- pmin(floor(upper), rep_len(Inf, length(weights)))
  wts <- adjWeights(weights, lower = mylower, upper = myupper)
  if(sparse) {
    A <- Matrix::sparse.model.matrix(formula, data = as.data.frame(data), row.names = FALSE)
    wts <- .Call('sparse_ipc', A, targets, wts, mylower, myupper, tgtBnds, scale, objective[1L], penalty[1L], tuning, PACKAGE = 'inca')
  }
  else {
    A <- model.matrix(formula, data = as.data.frame(data))
    wts <- .Call('dense_ipc', A, targets, wts, mylower, myupper, tgtBnds, scale, objective[1L], penalty[1L], tuning, PACKAGE = 'inca')
  }
  return(as.vector(wts))
}