R/predict-functions_old.R

Defines functions predict_v1

#' references
#'
#' internal character object to store information about the literature used
#' in the reference-attribute of the objects returned by
#' \code{\link[=predict]{?maSAE::predict}}.
#'
#' @format chr
#'
#' @usage NULL
#' @keywords internal
#' @rdname maSAE-internal
# called only once,
# but created here to enhance comprehensibility of the code.
references <- " I cite 6 manuscripts:

a2 Daniel Mandallaz. Design-based properties of some small-area estimators in
forest inventory with two-phase sampling.
In: Canadian Journal of Forest Research 43.5 (2013), pp. 441-449.
doi: 10.1139/cjfr-2012-0381.

a1 Daniel Mandallaz. Design-based properties of some small-area estimators in
forest inventory with two-phase sampling.
Tech. rep. Eidgenoessische Technische Hochschule Zuerich,
Departement Umweltsystemwissenschaften, 2012.
doi: 10.3929/ethz-a-007318974.

b2 Daniel Mandallaz, Jochen Breschan, and Andreas Hill. New regression
estimators in forest inventories with two-phase sampling and partially
exhaustive information: a design-based Monte Carlo approach with applications
to small-area estimation.
In: Canadian Journal of Forest Research 43.11 (2013), pp. 1023-1031.
doi: 10.1139/cjfr-2013-0181.

b1 Daniel Mandallaz. Regression estimators in forest inventories with twophase
sampling and partially exhaustive information with applications to small-area
estimation.
Tech. rep. Eidgenoessische Technische Hochschule Zuerich,
Departement Umweltsystemwissenschaften, 2013.
doi: 10.3929/ethz-a-007623322.

c2 Daniel Mandallaz. A three-phase sampling extension of the generalized
regression estimator with partially exhaustive information.
In: Canadian Journal of Forest Research 44.4 (2014), pp. 383-388.
doi: 10.1139/cjfr-2013-0449.

c1 Daniel Mandallaz. Regression estimators in forest inventories with threephase
sampling and two multivariate components of auxiliary information.
Tech. rep. Eidgenoessische Technische Hochschule Zuerich,
Departement Umweltsystemwissenschaften, 2013.
doi: 10.3929/ethz-a-009990020.

There's three topics ('a', 'b' and 'c') with two versions each,
(1) a detailed report published via http://e-collection.library.ethz.ch/ and
(2) a paper in CFJR.
I use topicversion to cite them. i.e. a1 is the report on topic a.
I cite equations from the manuscripts using
topicversion.equationnumber[.equationpart][.equationversion]
where 'equationnumber. is the equation number from the manuscript,
for multiline equations 'equationpart' is the line of the equation indexed
by letters
and for equations differing by an index, say k, equationversion gives the
value of the index.
So a2_13_b refers to the second line of equation (13) in
the CJFR-paper 'Design-based properties of some small-area estimators ...'
and b2_5_2 refers to the version for k=2 of equation (5) in the CJFR-paper
'New regression estimators ...'
"

predict_v1 <- function(object) {
    vimfold <- TRUE # I use if (vimfold) {...}-blocks for folding in vim.
                    # They're just TRUE.
    varnames <- all.vars(methods::slot(object, "f"))
    small_area <- varnames[length(varnames)]
    predictors <- varnames[-c(1, which(varnames == small_area))]
    predictand <- varnames[1]

    com <- comment(object) # recycle comments generated by the constructor
    if (vimfold) { # which type of auxiliary data do we have?
      if (is.null(methods::slot(object, "smallAreaMeans"))) {
        if (is.null(methods::slot(object, "s1"))) {
          pred_type <- "non-exhaustive"
        } else {
          pred_type <- "partially"
          threephase <- TRUE
        }
      } else {
        if (
          length(colnames(methods::slot(object, "smallAreaMeans"))) == length(c(predictors, small_area)) &&
            all(sort(colnames(methods::slot(object, "smallAreaMeans"))) == sort(c(predictors, small_area)))
        ) {
          pred_type <- "exhaustive"
        } else {
          pred_type <- "partially"
          threephase <- FALSE
        }
      }
    }

    if (vimfold) { ## cluster and non-cluster commons
      if (pred_type == "partially") {
        if (threephase) {
          ## find those predictors which are not all NA where s1 is FALSE
          predictors1 <- predictors[
            apply(
              !is.na(
                methods::slot(object, "data")[!methods::slot(object, "data")[, methods::slot(object, "s1")], predictors]
              ),
              2,
              all
            )
          ]
        } else {
          predictors1 <- colnames(methods::slot(object, "smallAreaMeans"))[-which(colnames(methods::slot(object, "smallAreaMeans")) == small_area)]
        }
        z1 <- as.list(as.data.frame(t(cbind(1, methods::slot(object, "data")[, predictors1]))))
      }
      if (is.null(methods::slot(object, "cluster")) ||
        pred_type == "partially") {
        if (is.null(methods::slot(object, "s2"))) {
          s2 <- rep(TRUE, nrow(methods::slot(object, "data")))
        } else {
          s2 <- methods::slot(object, "data")[, methods::slot(object, "s2")]
        }
        n_s2 <- sum(as.numeric(s2))
        y <- methods::slot(object, "data")[, predictand]
        z <- as.list(as.data.frame(t(cbind(1, methods::slot(object, "data")[, c(predictors)]))))
      }
    }
    if (pred_type %in% c("exhaustive", "partially", "non-exhaustive")) { ## testing for missing values in samples s2/s1/s0
      ## missing predictand in s2
      if (is.null(methods::slot(object, "s2"))) {
        pred_vals <- methods::slot(object, "data")[, predictand]
      } else {
        pred_vals <- methods::slot(object, "data")[methods::slot(object, "data")[, methods::slot(object, "s2")], predictand]
      }
      !any(is.na(pred_vals)) || stop("can't handle missing values for predictand in s2")
      ## missing predictor in s2
      if (is.null(methods::slot(object, "s2"))) {
        pred_vals <- methods::slot(object, "data")[, predictors]
      } else {
        pred_vals <- methods::slot(object, "data")[methods::slot(object, "data")[, methods::slot(object, "s2")], predictors]
      }
      !any(is.na(pred_vals)) || stop("can't handle missing values for predictors in s2")
      ## missing predictor in s1
      if (pred_type %in% c("non-exhaustive", "partially")) {
        if (is.null(methods::slot(object, "s1"))) {
          pred_vals <- methods::slot(object, "data")[, predictors]
        } else {
          pred_vals <- methods::slot(object, "data")[methods::slot(object, "data")[, methods::slot(object, "s1")], predictors]
        }
        !any(is.na(pred_vals)) || stop("can't handle missing values for predictors in s1")
      }
      ## missing predictor in s0
      ## works with three-phase implementation
    }
    if (!is.null(methods::slot(object, "cluster"))) { # cluster sampling
      if (vimfold) { ## commons
        if (is.null(methods::slot(object, "s1"))) {
          ## s1 is given by s2 %in% c(TRUE,FALSE)
          ## we need to remove 'na'-Values for 'cluster'
          s1c <- rep(
            TRUE,
            length(unique(
              methods::slot(object, "data")[
                !is.na(methods::slot(object, "data")[, methods::slot(object, "cluster")]),
                methods::slot(object, "cluster")
              ]
            ))
          )
          names(s1c) <- unique(
            methods::slot(object, "data")[
              !is.na(methods::slot(object, "data")[, methods::slot(object, "cluster")]),
              methods::slot(object, "cluster")
            ]
          )
        } else {
          s1c <- tapply(methods::slot(object, "data")[, methods::slot(object, "s1")], methods::slot(object, "data")[, methods::slot(object, "cluster")], unique)
        }
        s2c <- tapply(methods::slot(object, "data")[, methods::slot(object, "s2")], methods::slot(object, "data")[, methods::slot(object, "cluster")], unique)
        n_s2c <- sum(as.numeric(s2c))
        include <- methods::slot(object, "data")[, methods::slot(object, "include")]
        m <- tapply(as.numeric(include), methods::slot(object, "data")[, methods::slot(object, "cluster")], sum, na.rm = TRUE)
        y_c <- tapply(
          methods::slot(object, "data")[, predictand] * as.numeric(include),
          methods::slot(object, "data")[, methods::slot(object, "cluster")], sum
        ) / m

        if (length(predictors) == 1) {
          z_c <- mapply(append,
            mapply("/",
              by(methods::slot(object, "data")[, c(predictors)] * as.numeric(include),
                methods::slot(object, "data")[, methods::slot(object, "cluster")], sum,
                simplify = TRUE
              ),
              m,
              SIMPLIFY = FALSE
            ),
            1,
            SIMPLIFY = FALSE,
            after = 0
          )
        } else {
          z_c <- mapply(append,
            mapply("/",
              by(methods::slot(object, "data")[, c(predictors)] * as.numeric(include),
                methods::slot(object, "data")[, methods::slot(object, "cluster")], colSums,
                simplify = TRUE
              ),
              m,
              SIMPLIFY = FALSE
            ),
            1,
            SIMPLIFY = FALSE,
            after = 0
          )
        } ## a2 p.445
        ## set up a_cs2c
        a_cs2c <- (Reduce(
          "+",
          mapply("*", m[s2c],
            lapply(
              z_c[s2c],
              function(x) as.numeric(x) %o% as.numeric(x)
            ),
            SIMPLIFY = FALSE
          )
        )) / n_s2c ## from a2_38
        ia_cs2c <- solve(a_cs2c)
        ## estimate regression coefficients
        a2_38 <- as.vector(ia_cs2c %*% Reduce("+", mapply("*", m[s2c],
          mapply("*", y_c[s2c],
            z_c[s2c],
            SIMPLIFY = FALSE
          ),
          SIMPLIFY = FALSE
        )) / n_s2c)
        ## calculate residuals
        r_c <- y_c - sapply(z_c, function(x) x %*% a2_38) ## a2 p.445
        ## design-based variance-covariance matrix
        a2_39 <- ia_cs2c %*%
          (Reduce(
            "+",
            mapply("*", m[s2c]^2,
              mapply("*",
                r_c[s2c]^2,
                lapply(z_c[s2c], function(x) as.numeric(x) %o% as.numeric(x)),
                SIMPLIFY = FALSE
              ),
              SIMPLIFY = FALSE
            )
          ) / n_s2c^2) %*% ia_cs2c
      }
      switch(pred_type # uncommons
        , "partially" = {
          n_s1c <- sum(as.numeric(s1c))
        },
        "exhaustive" = # same as below
        , "non-exhaustive" = {
        },
        stop(paste("unkown pred_type", pred_type))
      )
    }
    else { # non-cluster sampling
      if (vimfold) { ## commons
        if (is.null(methods::slot(object, "s1"))) {
          s1 <- rep(TRUE, nrow(methods::slot(object, "data")))
        } else {
          s1 <- methods::slot(object, "data")[, methods::slot(object, "s1")]
        }
        ## set up a_s2
        a_s2 <- Reduce("+", lapply(z[s2], function(x) as.numeric(x) %o% as.numeric(x))) / n_s2
        ia_s2 <- solve(a_s2)
        ## estimate regression coefficients
        u_s2 <- Reduce("+", mapply("*", z[s2], y[s2], SIMPLIFY = FALSE)) / n_s2
        a2_13_b <- as.numeric(ia_s2 %*% u_s2)
        ## calculate residuals
        r <- y - sapply(z, function(x) x %*% a2_13_b) # a2 p.443 & b2 p.1025
        ## design-based variance-covariance matrix
        a2_15 <- ia_s2 %*% (Reduce("+", mapply("*", r[s2]^2,
          lapply(z[s2], function(x) as.numeric(x) %o% as.numeric(x)),
          SIMPLIFY = FALSE
        )) / n_s2^2) %*% ia_s2
      }
      switch(pred_type # uncommons
        , "exhaustive" = # same as below
        , "non-exhaustive" = {
        },
        "partially" = {
          ## classical model
          n_s1 <- sum(as.numeric(s1)) ## needed in b2_35
          predictors2 <- predictors[-which(predictors %in% predictors1)]
          a1_s2 <- Reduce("+", lapply(z1[s2], function(x) as.numeric(x) %o% as.numeric(x))) / n_s2 ## from b2_27
          ia1_s2 <- solve(a1_s2)
          u1_s2 <- Reduce("+", mapply("*", z1[s2], y[s2], SIMPLIFY = FALSE)) / n_s2
          b2_5_2 <- as.numeric(ia1_s2 %*% u1_s2)
          r1 <- y - sapply(z1, function(x) x %*% b2_5_2) # b2 p.1025
          b1_29 <- ia1_s2 %*% (Reduce("+", mapply("*", r1[s2]^2,
            lapply(z1[s2], function(x) as.numeric(x) %o% as.numeric(x)),
            SIMPLIFY = FALSE
          )) / n_s2^2) %*% ia1_s2
        },
        stop(paste("unkown pred_type", pred_type))
      )
    }
    out <- data.frame()
    for (group in sort(unique(methods::slot(object, "data")[, small_area])[!is.na(unique(methods::slot(object, "data")[, small_area]))])) {
      if (vimfold) { # cluster and non-cluster commons, temporary objects will be removed for cluster sampling
        if (is.null(methods::slot(object, "cluster")) ||
          pred_type == "partially") {
          g <- methods::slot(object, "data")[, small_area] == group
          g[is.na(g)] <- FALSE
          ez <- mapply(append, z, as.numeric(g), SIMPLIFY = FALSE) ## a2 p.444
          iea_s2 <- solve(1 / n_s2 * Reduce("+", lapply(
            ez[s2],
            function(x) as.numeric(x) %o% as.numeric(x)
          ))) ## a2 p.444
          eu_s2 <- Reduce("+", mapply("*", ez[s2], y[s2], SIMPLIFY = FALSE)) / n_s2 ## a2 p.444
          b2_28 <- iea_s2 %*% eu_s2 ## the only one needed in cluster sampling, see b1 p.22
        }

        switch(pred_type,
          "partially" = {
            ez1 <- mapply(append, z1, as.numeric(g), SIMPLIFY = FALSE) # b2 p.1026
            ea1_s2 <- Reduce("+", lapply(ez1[s2], function(x) as.numeric(x) %o% as.numeric(x))) / n_s2
            iea1_s2 <- solve(ea1_s2)
            eu1_s2 <- Reduce("+", mapply("*", ez1[s2], y[s2], SIMPLIFY = FALSE)) / n_s2
            b2_27 <- iea1_s2 %*% eu1_s2 ## needed in cluster sampling, see b1 p.22
            tmz1_g <- as.numeric(
              cbind(
                1,
                subset(
                  methods::slot(object, "smallAreaMeans")[, predictors1],
                  methods::slot(object, "smallAreaMeans")[, small_area] == group
                )
              )
            ) ## b2 p.1026
            tmez1_g <- c(tmz1_g, 1) ## b2 p.1027. b1 p.18 and p.22 are both in error. needed in cluster sampling b1_50 KLDUGE this may be in error!
          },
          "exhaustive" = ## as below
          , "non-exhaustive" = {
            ## do nothing
          },
          stop(paste("unkown pred_type", pred_type))
        )
      }
      if (!is.null(methods::slot(object, "cluster"))) { # cluster sampling
        if (is.factor(methods::slot(object, "data")[, small_area])) {
          smallarea <- tapply(as.character(methods::slot(object, "data")[, small_area]), methods::slot(object, "data")[, methods::slot(object, "cluster")], unique)
        } else {
          smallarea <- tapply(methods::slot(object, "data")[, small_area], methods::slot(object, "data")[, methods::slot(object, "cluster")], unique)
        }
        gc <- smallarea == group
        gc[is.na(gc)] <- FALSE
        n_s1cgc <- sum(as.numeric(s1c & gc))
        result <- data.frame(small_area = group)
        if (vimfold) { # commons
          ## extended model
          tmp <- methods::slot(object, "data")[, small_area] == group
          tmp[is.na(tmp)] <- FALSE
          i_cgc <- tapply(as.numeric(tmp), methods::slot(object, "data")[, methods::slot(object, "cluster")], sum) / m ## a2 p.445: I_{c, gc}(x) !:= gc
          ez_c <- mapply(append, z_c, i_cgc, SIMPLIFY = FALSE)
          iea_cs2c <- solve(
            Reduce(
              "+",
              mapply("*",
                lapply(ez_c[s2c], function(x) as.numeric(x) %o% as.numeric(x)),
                m[s2c],
                SIMPLIFY = FALSE
              )
            ) / n_s2c # a1 p.24
          )
          a1_63 <- iea_cs2c %*% Reduce("+", mapply("*", ez_c[s2c],
            y_c[s2c] * m[s2c],
            SIMPLIFY = FALSE
          )) / n_s2c
          er_c <- y_c - sapply(ez_c, function(x) x %*% a1_63) # a1 p.25
          a1_64 <- iea_cs2c %*% (Reduce(
            "+",
            mapply("*", m[s2c]^2,
              mapply("*",
                er_c[s2c]^2,
                lapply(ez_c[s2c], function(x) as.numeric(x) %o% as.numeric(x)),
                SIMPLIFY = FALSE
              ),
              SIMPLIFY = FALSE
            )
          ) / n_s2c^2) %*% iea_cs2c
          mez_cs1cgc <- Reduce("+", mapply("*", m[s1c & gc],
            ez_c[s1c & gc],
            SIMPLIFY = FALSE
          )) / sum(m[s1c & gc]) # a1. p.25, b1 p.22 and c1 p.21
        }
        switch(pred_type # uncommons
          , "partially" = {
            rm(g, ez, iea_s2, eu_s2) ## remove temporary objects
            rm(ez1, ea1_s2, iea1_s2, eu1_s2, tmz1_g) ## remove temporary objects
            if (length(predictors1) == 1) {
              ## length 1, we need to tapply sum
              z1_c <- mapply(append,
                mapply("/",
                  tapply(
                    methods::slot(object, "data")[, predictors1] * as.numeric(include),
                    methods::slot(object, "data")[, methods::slot(object, "cluster")], sum
                  ),
                  m,
                  SIMPLIFY = FALSE
                ),
                1,
                SIMPLIFY = FALSE,
                after = 0
              )
            } else {
              ## length > 1, we need to by colSums
              z1_c <- mapply(append,
                mapply("/",
                  by(methods::slot(object, "data")[, predictors1] * as.numeric(include),
                    methods::slot(object, "data")[, methods::slot(object, "cluster")], colSums,
                    simplify = TRUE
                  ),
                  m,
                  SIMPLIFY = FALSE
                ),
                1,
                SIMPLIFY = FALSE,
                after = 0
              )
            }
            ez1_c <- mapply(append, z1_c, i_cgc, SIMPLIFY = FALSE)
            mez1_cs1cgc <- Reduce("+", mapply("*", m[s1c & gc],
              ez1_c[s1c & gc],
              SIMPLIFY = FALSE
            )) / sum(m[s1c & gc]) # b1 p.22
            iea1_cs1c <- solve(
              Reduce(
                "+",
                mapply("*",
                  lapply(ez1_c[s1c], function(x) as.numeric(x) %o% as.numeric(x)),
                  m[s1c],
                  SIMPLIFY = FALSE
                )
              ) / n_s1c # b1 p.22
            )
            iea1_cs2c <- solve(
              Reduce(
                "+",
                mapply("*",
                  lapply(ez1_c[s2c], function(x) as.numeric(x) %o% as.numeric(x)),
                  m[s2c],
                  SIMPLIFY = FALSE
                )
              ) / n_s2c # b1 p.22
            )
            b1_48 <- iea1_cs2c %*% Reduce("+", mapply("*", ez1_c[s2c],
              y_c[s2c] * m[s2c],
              SIMPLIFY = FALSE
            )) / n_s2c
            b1_49 <- a1_63
            if (threephase) { # we now estimate the smallAreaMeans from s0: c1 p.16. This is the essence of c1.
              tmez1_g <- Reduce("+", mapply("*", m[gc],
                ez1_c[gc],
                SIMPLIFY = FALSE
              )) / sum(m[gc]) # a1. p.25, b1 p.22 and c1 p.21
            }
            b1_50 <- (tmez1_g - mez1_cs1cgc) %*% b1_48 + t(b1_49) %*% mez_cs1cgc
            w <- "b1 p.22 gives strange formulae for r_c and R1_c, I
                  replace hat{y} with y, resulting in formulae analogous to b2
                  p.18 with Zeta replaced by Zeta1. Confirmed by D.m., personal
                  communication, 2014-01-08"
            com <- c(com, w)
            er1_c <- y_c - sapply(ez1_c, function(x) x %*% b1_48) # b1 p.22
            w <- "c1 p.21/b1 p.22: In analogy to a1_25 I change
                  hat{er}_{1,c} to use the clustered version of the parameter
                  estimate. Confirmed by D.m., personal communication, 2014-01-08"
            com <- c(com, w)
            b1_51_a <- iea1_cs1c %*% (Reduce(
              "+",
              mapply("*", m[s2c]^2,
                mapply("*",
                  er1_c[s2c]^2,
                  lapply(ez1_c[s2c], function(x) as.numeric(x) %o% as.numeric(x)),
                  SIMPLIFY = FALSE
                ),
                SIMPLIFY = FALSE
              )
            ) / n_s2c^2) %*% iea1_cs1c
            er_c <- y_c - sapply(ez_c, function(x) x %*% b1_49) # b1 p.22
            w <- "c1 p.21/b1 p.22: In analogy to a1_25 I change hat{er}_{c} to use the clustered version of the parameter estimate. Confirmed by D.m., personal communication, 2014-01-08"
            com <- c(com, w)
            b1_51_b <- iea_cs2c %*% (Reduce(
              "+",
              mapply("*", m[s2c]^2,
                mapply("*",
                  er_c[s2c]^2,
                  lapply(ez_c[s2c], function(x) as.numeric(x) %o% as.numeric(x)),
                  SIMPLIFY = FALSE
                ),
                SIMPLIFY = FALSE
              )
            ) / n_s2c^2) %*% iea_cs2c
            w <- "b1_52 is skrewed, I replace hat{bar{z}}_{g,1} with  hat{bar{z}}_{c,g} and bar{z}^(1) with bar{Zeta}^(1)_{g} in analogy with c2_24. Confirmed by D.m."
            com <- c(com, w)
            b1_52 <- n_s2c / n_s1c * tmez1_g %*% b1_51_a %*% tmez1_g + (1 - n_s2c / n_s1c) * mez_cs1cgc %*% b1_51_b %*% mez_cs1cgc
            if (threephase) {
              c1_53 <- b1_50
              n_s0cgc <- sum(as.numeric(gc))
              cov_cs0g <- Reduce(
                "+",
                mapply("*",
                  (m[gc] / mean(m[gc]))^2,
                  lapply(
                    lapply(
                      ez1_c[gc],
                      function(x) x - tmez1_g
                    ),
                    function(x) as.numeric(x) %o% as.numeric(x)
                  ),
                  SIMPLIFY = FALSE
                )
              ) / (n_s0cgc * (n_s0cgc - 1))
              c1_55 <- b1_52 + t(b1_48) %*% cov_cs0g %*% b1_48
              result <- cbind(result,
                prediction = c1_53,
                variance = c1_55
              )
            } else {
              result <- cbind(result,
                prediction = b1_50,
                variance = b1_52
              )
            }
          },
          "exhaustive" = {
            tmz_gc <- as.numeric(
              cbind(
                1,
                subset(
                  methods::slot(object, "smallAreaMeans")[, predictors],
                  methods::slot(object, "smallAreaMeans")[, small_area] == group
                )
              )
            ) 
            ## extended model
            tmez_gc <- c(tmz_gc, 1) # a1 p.18
            a2_48 <- as.numeric(tmez_gc %*% a1_63)
            a2_49 <- as.numeric(tmez_gc %*% a1_64 %*% tmez_gc)
            result <- cbind(result,
              prediction = a2_48,
              variance = a2_49
            )
          },
          "non-exhaustive" = {
            ## ## ## psynth
            ## mean of the clustered y m over gc
            a2_40 <- Reduce("+", mapply("*", m[s1c & gc],
              z_c[s1c & gc],
              SIMPLIFY = FALSE
            )) / sum(m[s1c & gc])
            a2_41 <- Reduce(
              "+",
              mapply("*",
                (m[s1c & gc] / mean(m[s1c & gc]))^2,
                lapply(
                  lapply(
                    z_c[s1c & gc],
                    function(x) x - a2_40
                  ),
                  function(x) as.numeric(x) %o% as.numeric(x)
                ),
                SIMPLIFY = FALSE
              )
            ) / (n_s1cgc * (n_s1cgc - 1))
            a2_46 <- as.numeric(mez_cs1cgc %*% a1_63) # == a1_65
            a1_67 <- Reduce(
              "+",
              mapply("*",
                (m[s1c & gc] / mean(m[s1c & gc]))^2,
                lapply(
                  lapply(
                    ez_c[s1c & gc],
                    function(x) x - mez_cs1cgc
                  ),
                  function(x) as.numeric(x) %o% as.numeric(x)
                ),
                SIMPLIFY = FALSE
              )
            ) / (n_s1cgc * (n_s1cgc - 1))
            a2_47 <- as.numeric(mez_cs1cgc %*% a1_64 %*% mez_cs1cgc + t(a1_63) %*% a1_67 %*% a1_63) # == a1_66
            result <- cbind(result,
              prediction = a2_46,
              variance = a2_47
            )
          },
          stop(paste("unkown pred_type", pred_type))
        )
      }
      else { # non-cluster sampling
        n_s1g <- sum(as.numeric(s1 & g))
        result <- data.frame(small_area = group)
        if (vimfold) { # commons
          ## extended model
          er <- y - sapply(ez, function(x) x %*% b2_28) ## a2 p.444
          a2_30 <- iea_s2 %*% (Reduce("+", mapply("*", er[s2]^2,
            lapply(
              ez[s2],
              function(x) as.numeric(x) %o% as.numeric(x)
            ),
            SIMPLIFY = FALSE
          )) / n_s2^2
          ) %*% iea_s2 ## Sigma_theta_s2
        }
        switch(pred_type # commons to non-exhaustive & partially
          , "exhaustive" = {
            ## do nothing
          },
          "non-exhaustive" = ## same as below
          , "partially" = {
            if (length(predictors) == 1) {
              mz_s1g <- c(1, mean(methods::slot(object, "data")[, c(predictors)][s1 & g])) ## a2 p.443, b1 p.15
            } else {
              mz_s1g <- c(1, colMeans(methods::slot(object, "data")[, c(predictors)][s1 & g, ])) ## a2 p.443, b1 p.15
            }
            a2_34 <- Reduce("+", ez[s1 & g]) / n_s1g ## meZ_s1G
          },
          stop(paste("unkown pred_type", pred_type))
        )
        switch(pred_type # uncommons
          , "exhaustive" = {
            tmz_g <- as.numeric(
              cbind(
                1,
                subset(
                  methods::slot(object, "smallAreaMeans")[, predictors],
                  methods::slot(object, "smallAreaMeans")[, small_area] == group
                )
              )
            ) ## a2 p.443
            tmez_g <- c(tmz_g, 1) ## a1 p.18
            a2_31 <- tmez_g %*% b2_28 ## same as a2_32
            a2_33 <- tmez_g %*% a2_30 %*% tmez_g
            result <- cbind(result,
              prediction = a2_31,
              variance = a2_33
            )
          },
          "non-exhaustive" = {
            a2_35 <- a2_34 %*% b2_28
            a2_37 <- Reduce("+", lapply(
              lapply(ez[s1 & g], function(x) x - a2_34),
              function(x) as.numeric(x) %o% as.numeric(x)
            )) / (n_s1g * (n_s1g - 1))
            a2_36 <- as.numeric(t(a2_34) %*% a2_30 %*% a2_34 + t(b2_28) %*% a2_37 %*% b2_28)
            result <- cbind(result,
              prediction = a2_35,
              variance = a2_36
            )
          },
          "partially" = {
            if (length(predictors2) == 0) { ## possible, if s1 is given.
              mz1_s1g <- mz_s1g
            } else {
              mz1_s1g <- as.vector(mz_s1g[-which(names(mz_s1g) %in% predictors2)])
            }
            b2_29 <- a2_30
            er1 <- y - sapply(ez1, function(x) x %*% b2_27) # b1 p.18
            ## ea1_s1 is never given, but by analogy with a1_s2 (from b2_27), A1_s1c (b1 p.21), eA1_s1c (b1 p.22):
            ea1_s1 <- Reduce("+", lapply(ez1[s1], function(x) as.numeric(x) %o% as.numeric(x))) / n_s1
            iea1_s1 <- solve(ea1_s1)
            b1_40_b <- iea1_s1 %*% (Reduce("+", mapply("*", er1[s2]^2,
              lapply(
                ez1[s2],
                function(x) as.numeric(x) %o% as.numeric(x)
              ),
              SIMPLIFY = FALSE
            )) / n_s2^2
            ) %*% iea1_s1 # Sigma_Gamma_s2 b2 p.1016
            mez1_s1g <- c(mz1_s1g, 1) # b2 p.1027
            if (threephase) { # we now estimate the smallAreaMeans from s0: c1 p.16. This is the essence of c1.
              if (length(predictors1) == 1) {
                tmez1_g <- c(1, mean(methods::slot(object, "data")[g, predictors1]), 1)
              } else {
                tmez1_g <- c(1, colMeans(methods::slot(object, "data")[g, predictors1]), 1)
              }
            }
            b2_30 <- (tmez1_g - mez1_s1g) %*% b2_27 + a2_34 %*% b2_28
            b2_31 <- n_s2 / n_s1 * tmez1_g %*% b1_40_b %*% tmez1_g + (1 - n_s2 / n_s1) * a2_34 %*% b2_29 %*% a2_34
            if (threephase) {
              c2_23 <- b2_30 ## we have replaced tmez1_g  with its estimate over s0!
              n_s0g <- sum(g)
              c2_25 <- Reduce("+", lapply(
                lapply(
                  ez1[g],
                  function(x) x - tmez1_g
                ),
                function(x) as.numeric(x) %o% as.numeric(x)
              )) / (n_s0g * (n_s0g - 1))
              c2_24 <- b2_31 + t(b2_27) %*% c2_25 %*% b2_27
            }
            if (threephase) {
              result <- cbind(result,
                prediction = c2_23,
                variance = c2_24
              )
            } else {
              result <- cbind(result,
                prediction = b2_30,
                variance = b2_31
              )
            }
          },
          stop(paste("unkown pred_type", pred_type))
        )
      }
      out <- rbind(out, result)
    }
    comment(out) <- com
    attr(out, "references") <- c(
      "READ ME using  cat(sep = '\n', attr(NAME, 'references')[2])  ",
      references
    )

    attr(out, "auxilliary.data") <- ifelse(
      pred_type == "non-exhaustive", pred_type,
      ifelse(
        pred_type == "exhaustive", pred_type,
        if (pred_type == "partially" && threephase) {
          "three-phase sampling"
        } else {
          "partially exhaustive"
        }
      )
    )
    attr(out, "clustered") <- ifelse(is.null(methods::slot(object, "cluster")),
      FALSE, TRUE
    )
    attr(out$prediction, "reference") <- switch(as.character(attr(out, "clustered")),
      "TRUE" = switch(attr(out, "auxilliary.data"),
        "exhaustive" = "a2_48",
        "non-exhaustive" = "a2_46",
        "partially exhaustive" = "b1_50",
        "three-phase sampling" = "c1_53"
      ),
      "FALSE" = switch(attr(out, "auxilliary.data"),
        "exhaustive" = "a2_31",
        "non-exhaustive" = "a2_35",
        "partially exhaustive" = "b2_30",
        "three-phase sampling" = "c2_23"
      )
    )
    attr(out$variance, "reference") <- switch(as.character(attr(out, "clustered")),
      "TRUE" = switch(attr(out, "auxilliary.data"),
        "exhaustive" = "a2_49",
        "non-exhaustive" = "a2_47",
        "partially exhaustive" = "b1_52",
        "three-phase sampling" = "c1_55"
      ),
      "FALSE" = switch(attr(out, "auxilliary.data"),
        "exhaustive" = "a2_33",
        "non-exhaustive" = "a2_36",
        "partially exhaustive" = "b2_31",
        "three-phase sampling" = "c2_24"
      )
    )

    return(out)
}

Try the maSAE package in your browser

Any scripts or data that you put into this service are public.

maSAE documentation built on April 12, 2021, 5:06 p.m.