R/WIPF3.R
In WIPF: Weighted Iterative Proportional Fitting

Documented in WIPF3

#' Weighted Iterative Proportional Fitting (WIPF) in three dimensions
#'
#' @description  Implements WIPF in three dimensions. This function updates an
#'               initial 3D-array (referred to as the seed)
#'               using a 3D-array of weights to align with a set of three vectors
#'               (referred to as 1D-margins) and three matrices (referred to as 2D-margins),
#'               where some of them can be missing.
#'               When all provided margins are compatible given the weights, the updated
#'               values ensure that the weighted sums across rows, columns, layers, and
#'               combinations of (row, column), (row, layer), and (column, layer) coincide
#'               with the provided margins.
#'               If the provided margins are incompatible given the weights, the functions `WIPF1`
#'               and `WIPF2` are applied to the initial margins to make the margins compatible with the weights.
#'               In those cases, the margins are updated (are made compatible) in increasing order
#'               of sub-indices and with the second sub-indices running faster.
#'
#' @author Jose M. Pavia, \email{pavia@@uv.es}
#'
#' @param seed A (RxCxL) array of non-negative values with the initial values.
#'
#' @param weights A (RxCxL) array of non-negative values with the weights associated to each entry of the `seed` array.
#'
#' @param margin1 A R-length vector of positive values with the target (weighted) marginal sum across both layers and columns to be fitted.
#'
#' @param margin2 A C-length vector of positive values with the target (weighted) marginal sum across both rows and layers to be fitted.
#'
#' @param margin3 A L-length vector of positive values with the target (weighted) marginal sum across both rows and columns to be fitted.
#'
#' @param margin12 A RxC matrix of positive values with the target (weighted) marginal sum across layers to be fitted.
#'
#' @param margin13 A RxL matrix of positive values with the target (weighted) marginal sum across columns to be fitted.
#'
#' @param margin23 A CxL matrix of positive values with the target (weighted) marginal sum across both rows to be fitted.
#'
#' @param normalize `TRUE`/`FALSE` argument indicating whether the weights should be normalized across all dimensions
#'                  (for either row, column, layer, row-column, row-layer or column-layer weights to sum 1)
#'                  before constructing the weighted sums for comparison with the margin values.
#'                  Default, `TRUE`. Normalization is essential when adjusting a set of indexes where the margins represent
#'                  theoretical convex combinations of the inner indexes.
#'                  This characterizes a typical context where WIPF could be of value.
#'
#' @param tol Stopping criterion. The algorithm stops when the maximum difference between the weighted sum(s) of
#'            the values to be fitted and the margin(s) is lower than the value specified by `tol`.
#'            Default, `0.000001`.
#'
#' @param maxit Stopping criterion. A positive integer number indicating the maximum number of iterations
#'              allowed. Default, `1000`. The algorithm will stop if the values to be fitted still has not
#'              converged after this many iterations.
#'
#' @param full `TRUE`/`FALSE` argument indicating if either only the solution should be shown or
#'                            a more complete output.
#'
#' @param ... Other arguments to be passed to the function. Not currently used.
#'
#' @return
#' When `full = FALSE` an object similar to `seed` with the solution reached when the algorithm stops.
#' When `full = TRUE` a list with the following components:
#'  \item{sol}{ An object similar to `seed` with the solution reached at convergence (or when the maximum number of iterations is reached).}
#'  \item{iter}{ Number of iterations when the algorithm stops.}
#'  \item{margin1}{ A R-length vector of positive values with the actual margin1 object used to reach the solution.
#'                    This coincides with `margin1` even when all the margins are not compatible given the weights.}
#'  \item{margin2}{ A C-length vector of positive values with the actual margin2 object used to reach the solution.
#'                    This coincides with `margin2` when all the margins are compatible given the weights.}
#'  \item{margin3}{ A L-length vector of positive values with the actual margin3 object used to reach the solution.
#'                    This coincides with `margin3` when all the margins are compatible given the weights.}
#'  \item{margin12}{ A RxC matrix of positive values with the actual margin12 object used to reach the solution.
#'                    This coincides with `margin12` when all the margins are compatible given the weights.}
#'  \item{margin13}{ A RxL matrix of positive values with the actual margin13 object used to reach the solution.
#'                    This coincides with `margin13` when all the margins are compatible given the weights.}
#'  \item{margin23}{ A CxL matrix of positive values with the actual margin23 object used to reach the solution.
#'                    This coincides with `margin23` when all the margins are compatible given the weights.}
#'  \item{dev.margins}{ A list with a set of objects similar to the margins with absolute maximum deviations
#'                        between the values in margins and the corresponding weighted sums of the values in `sol`.}
#'  \item{dev.congruence}{A list with a set of objects similar to the margins containing the differences
#'                        between the final margins actually used (after adjusting them,
#'                        if necessary, to be compatible with the weights) and the original margins provided by the user.}
#'  \item{inputs}{ A list containing all the objects with the values used as arguments by the function.}
#'
#' @note Weighted Iterative proportional fitting is an extension of IPF.
#'       WIPF produces the same solutions than IPF with all weights being ones
#'       and when they are not normalized. IPF is also known as RAS in economics,
#'       raking in survey research or matrix scaling in computer science.
#'
#' @export
#'
#' @examples
#' s <- structure(c(0.9297, 0.9446, 0.8763, 0.92, 0.8655, 0.8583, 0.8132,
#'                  0.8679, 0.7968, 0.7834, 0.721, 0.7859, 0.7747, 0.7851, 0.8632,
#'                  1.041, 1.5617, 1.5642, 1.4847, 1.5176, 1.4157, 1.3851, 1.3456,
#'                  1.4012, 1.3017, 1.2626, 1.1904, 1.2668, 1.3203, 1.3181, 1.1965,
#'                  1.1654, 1.2219, 1.3863, 1.306, 1.1963, 1.1376, 1.35, 1.2595,
#'                  1.1289, 1.0456, 1.2863, 1.1274, 1.0208, 1.0542, 1.1272, 1.1594,
#'                  1.1668, 1.1931, 1.1328, 1.1221, 1.1011, 1.1298, 1.0454, 1.0573,
#'                  1.0557, 1.0599, 0.973, 0.9545, 0.9721, 1.0489, 0.9934, 0.9382,
#'                  0.876, 1.339, 1.1939, 1.0229, 1.0378, 1.0402, 0.9554, 0.9794,
#'                  1.0089, 0.9422, 0.8584, 0.8563, 0.9013, 0.9252, 0.8706, 0.8354,
#'                  0.8071, 0.9737, 1.0008, 0.9593, 0.9257, 0.9556, 0.9534, 0.9313,
#'                  0.9151, 0.883, 0.8731, 0.8285, 0.8309, 0.9131, 0.9258, 0.8467,
#'                  0.7785), .Dim = c(4L, 4L, 6L))
#' w <- structure(c(18520.3, 11776.3, 19479.5, 22497.6, 18968.7, 17263.7,
#'                  36494.7, 21707, 13406.3, 13570.4, 37746.1, 20593.2, 6595.6, 25444.6,
#'                  59868.2, 81777.2, 3380.4, 20610.7, 22247.3, 6800.9, 5236.3, 14877.8,
#'                  7205, 5028.4, 1130.7, 6603.2, 4007.4, 2620.5, 374.8, 1624.3,
#'                  4963.7, 9551.3, 31806, 93615.9, 121986.6, 44640.3, 32110.6, 95814.4,
#'                  72827.9, 30922.5, 43197.3, 72050.8, 66673.4, 40370.1, 31488.2,
#'                  55014.9, 69457.2, 80021.2, 17701.7, 8765.2, 11790.9, 3872.8,
#'                  30544.5, 12141.2, 12415.2, 9471.9, 36138.6, 19198.1, 23120.1,
#'                  15597.9, 12140.2, 8058.3, 20948.3, 19380.2, 78543.9, 86503.6,
#'                  28727.8, 29208.7, 26300.6, 42363, 20786.6, 14380.3, 9493.5, 17816.2,
#'                  19844.1, 10898.2, 1419, 4211.5, 20615, 22748.2, 3365.8, 2639.8,
#'                  2433.3, 930.5, 22119.6, 31022.7, 12748.5, 10161.4, 15450.2, 32747.1,
#'                  22596.4, 13228.1, 17289.2, 30189.2, 31476.6, 15338.7),
#'                 .Dim = c(4L, 4L, 6L))
#' m1 <- c(1.025527, 1.018229, 0.969744, 0.994998)
#' m2 <- c(1.111023, 1.030213, 0.935041, 0.906709)
#' m3 <- c(0.810568, 1.375203, 1.07096, 1.044461, 0.949441, 0.915284)
#' m12 <- structure(c(1.061059, 1.120345, 1.097519, 1.188501, 1.017091,
#'                    0.967245, 1.03447, 1.18867, 0.9797, 0.900885, 0.85575, 1.070772,
#'                    1.041953, 1.074653, 0.887316, 0.791906), .Dim = c(4L, 4L))
#' m13 <- structure(c(0.779029, 0.865343, 0.757887, 0.852708, 1.351367,
#'                    1.409585, 1.350907, 1.361528, 1.091867, 1.107661, 0.99364, 1.127478,
#'                    1.13439, 0.948428, 1.075919, 0.916096, 1.031958, 0.835103, 1.006321,
#'                    0.982888, 0.86109, 0.976673, 0.961731, 0.764211), .Dim = c(4L, 6L))
#' m23 <- structure(c(0.962955, 0.880973, 0.798545, 0.714783, 1.547556,
#'                    1.277098, 1.149491, 1.210108, 1.186342, 1.084436, 0.976822, 1.003611,
#'                    1.092564, 1.066306, 1.038601, 0.996779, 0.971751, 1.016173, 0.867197,
#'                    0.803929, 0.831913, 0.933863, 0.857392, 0.960169), .Dim = c(4L, 6L))
#'
#' example <- WIPF3(seed = s, weights = w, margin3 = m3, margin12 = m12, margin13 = m13)
#'

WIPF3 <- function(seed,
                  weights,
                  margin1,
                  margin2,
                  margin3,
                  margin12,
                  margin13,
                  margin23,
                  normalize = TRUE,
                  tol = 10^-6,
                  maxit = 1000,
                  full = FALSE,
                  ...)
{

  # checking margins provided
  if (missing(margin1)) {
    margin1 <- NULL
  }
  if (missing(margin2)) {
    margin2 <- NULL
  }
  if (missing(margin3)) {
    margin3 <- NULL
  }
  if (missing(margin12)) {
    margin12 <- NULL
  }
  if (missing(margin13)) {
    margin13 <- NULL
  }
  if (missing(margin23)) {
    margin23 <- NULL
  }

  # Inputs
  inputs <- c(as.list(environment()), list(...))

  # Compatible margins
  margins <- test_WIPF3(seed = seed,
                       weights = weights,
                       margin1 = margin1,
                       margin2 = margin2,
                       margin3 = margin3,
                       margin12 = margin12,
                       margin13 = margin13,
                       margin23 = margin23,
                       normalize = normalize,
                       tol = tol,
                       maxit = maxit,
                       full = full)

  errors <- updating_errors(seed = seed,
                            weights = weights,
                            margins = margins,
                            normalize = normalize)

  dif <- max(errors$errors)
  iter <- 0L
  # tamanyo <- dim(weights)
  delta <- seed

  while(dif > tol & iter < maxit){
    delta <- updating_delta(delta = delta,
                            weights = weights,
                            margins = margins,
                            normalize = normalize)
    errors <- updating_errors(seed = delta,
                              weights = weights,
                              margins = margins,
                              normalize = normalize)
    dif <- max(errors$errors)
    iter <- iter + 1L
  }

  if(full){
    dif2 <- list("dev1" = margins$margin1 - inputs$margin1,
                 "dev2" = margins$margin2 - inputs$margin2,
                 "dev3" = margins$margin3 - inputs$margin3,
                 "dev12" = margins$margin12 - inputs$margin12,
                 "dev13" = margins$margin13 - inputs$margin13,
                 "dev23" = margins$margin23 - inputs$margin23
                 )
    dif2 <- dif2[sapply(dif2, length) > 0]

    return(list("sol" = delta, "iter" = iter,
                "margin1" = margins$margin1, "margin2" = margins$margin2,
                "margin3" = margins$margin3, "margin12" = margins$margin12,
                "margin13" = margins$margin13, "margin23" = margins$margin23,
                "dev.margins" = errors$dif, "dev.congruence" = dif2,
                "inputs" = inputs))
  } else {
    return(delta)
  }

}