R/predict-methods.R
In maSAE: Mandallaz' Model-Assisted Small Area Estimators

#' Methods for Function \code{predict}
#'
#' Calculate small area predictions and their variances.
#'
#' Based on the structure of the \code{saeObj} given, \code{predict} decides,
#' which
#' predictor to use:\cr
#' If a smallAreaMeans-data.frame covering all fixed effects is given, the
#' exhaustive
#' estimator \eqn{\hat{\tilde{y}}_{g, synth}} is calculated.  \cr
#' If a smallAreaMeans-data.frame not covering all fixed effects is given, the
#' partially
#' exhaustive
#' estimator \eqn{\hat{\tilde{y}}_{g, greg}} is calculated.  \cr
#' If no smallAreaMeans-data.frame but s1 is given, the three-phase
#' estimator \eqn{\hat{\tilde{y}}_{g, g3reg}} is calculated.  \cr
#' If neither smallAreaMeans nor s1 are given, the non-exhaustive
#' estimator \eqn{\hat{\tilde{y}}_{g, psynth}} is calculated.  \cr
#' If a clustering variable is given, the cluster sampling design equivalents
#' of the
#' above estimators are used.\cr
#' If \code{version} is not set to "1.0.0", the (pseudo) small and synthetic
#' estimations and their variances are also calculated (see
#' \code{vignette("A_Taxonomy_of_Estimators", package = "maSAE")})
#'
#' @name predict-methods
#' @aliases predict-methods predict,saeObj-method predict,sadObj-method
#' @docType methods
#' @section Methods: \describe{
#'
#' \item{\code{signature(object = saeObj)}}{Calculate predictions and variances
#' according to the auxiliary information given, see \bold{Details} above.}
#'
#' \item{\code{signature(object = sadObj)}}{Calculate design-based predictions
#' and
#' variances.}
#' }
#' @return A data frame containing predictions and variances for each small
#' area, see \bold{Details} above.
#' @keywords methods
#' @examples
#' ## ## design-based estimation
#' ## load data
#' data("s2", package = "maSAE")
#' ## create object
#' saeO <- maSAE::saObj(data = s2, f = y ~ NULL | g)
#' ## design-based estimation for all small areas given by g
#' maSAE::predict(saeO)
setMethod(
  f = "predict",
  signature(object = "sadObj"),
  function(object) {
    varnames <- all.vars(methods::slot(object, "f"))
    small_area <- varnames[length(varnames)]
    predictand <- varnames[1]
    # recycle comments generated by the constructor function
    com <- comment(object)
    include <- methods::slot(object, "data")[, methods::slot(object, "include")]
    if (!is.null(methods::slot(object, "cluster"))) { # cluster sampling
      m <- tapply(as.numeric(include),
        methods::slot(object, "data")[, methods::slot(object, "cluster")],
        sum,
        na.rm = TRUE
      )
      y_c <- tapply(
        methods::slot(object, "data")[, predictand] * as.numeric(include),
        methods::slot(object, "data")[, methods::slot(object, "cluster")], sum
      ) / m
    } else { # non-cluster sampling
      y <- methods::slot(object, "data")[, predictand]
    }
    out <- data.frame()
    groups <- unique(methods::slot(object, "data")[, small_area])
    for (group in sort(groups[!is.na(groups)])) {
        cluster <- methods::slot(object, "cluster")
      if (!is.null(cluster)) { # cluster sampling
          cluster <- methods::slot(object, "data")[cluster]
          small_area <- methods::slot(object, "data")[, small_area]
        if (is.factor(small_area)) {
          smallarea <- tapply(as.character(small_area), cluster, unique)
        } else {
          smallarea <- tapply(small_area, cluster, unique)
        }
        gc <- smallarea == group
        gc[is.na(gc)] <- FALSE
        n_gc <- sum(as.numeric(gc))
        p_cs <- Reduce("+", mapply("*", m[gc],
          y_c[gc],
          SIMPLIFY = FALSE
        )) / sum(m[gc])
        var_cs <- 1 / sum(m[gc])^2 * n_gc / (n_gc - 1) * sum((y_c[gc] * m[gc]
          - p_cs * m[gc])^2)
        result <- data.frame(
          small_area = group,
          prediction = p_cs,
          variance = var_cs
        )
      } else { # non-cluster sampling
        g <- methods::slot(object, "data")[, small_area] == group
        g[is.na(g)] <- FALSE
        result <- data.frame(
          small_area = group,
          prediction = mean(y[g]),
          variance = stats::var(y[g]) / length(y[g])
        )
      }
      out <- rbind(out, result)
    }
    comment(out) <- com
    attr(out, "type") <- "design-based"
    return(out)
  }
)

#' @name predict-methods
#' @param version set to "1.0.0" or set options(maSAE_version = "1.0.0")
#' to use the functions from maSAE 1.0.0. See NEWS.md for 2.0.0.
#' @param use_lm Rather for internal use, stick with the default.
#' @seealso \code{vignette(package = "maSAE")}
#' @examples
#' ## ## model-assisted estimation
#' ## load  data
#' data("s1", "s2", package = "maSAE")
#' str(s1)
#' s12 <- maSAE::bind_data(s1, s2)
#' ## create object
#' saeO <- maSAE::saObj(data = s12, f = y ~ x1 + x2 + x3 | g, s2 = "phase2")
#' ## small area estimation
#' maSAE::predict(saeO)
setMethod(
  f = "predict",
  signature(object = "saeObj"),
  predict_version
)