msaeOB: Optimum Benchmarking for Multivariate Small Area Estimation

#' @title Parametric Bootstrap Mean Squared Error Estimators of Optimum Benchmarking for Univariate Small Area Estimation
#'
#' @description Calculates the parametric bootstrap mean squared error estimates of optimum benchmarking for univariate small area estimation
#'
#' @param formula an object of class list of formula describe the fitted model
#' @param vardir vector containing sampling variances of direct estimators
#' @param weight vector containing proportion of units in small areas
#' @param samevar logical. If \code{TRUE}, the varians is same. Default is \code{FALSE}
#' @param B number of bootstrap. Default is 1000
#' @param MAXITER maximum number of iterations for Fisher-scoring. Default is 100
#' @param PRECISION coverage tolerance limit for the Fisher Scoring algorithm. Default value is \code{1e-4}
#' @param data dataframe containing the variables named in formula, vardir, and weight
#'
#' @return
#' \item{mse.eblup}{estimated mean squared errors of the EBLUPs for the small domains based on Prasad Rao}
#' \item{pbmse.eblupOB}{parametric bootstrap mean squared error estimates of the optimum benchmark}
#' \item{running.time}{time for running function}
#'
#' @export mse_saeOB
#'
#' @import abind
#' @importFrom magic adiag
#' @importFrom Matrix forceSymmetric
#' @importFrom stats model.frame na.omit model.matrix median pnorm rnorm
#' @importFrom MASS mvrnorm
#'
#' @examples
#' \donttest{
#' ## load dataset
#' data(datamsaeOB)
#'
#' # Compute MSE EBLUP and Optimum Benchmark
#'
#' ## Using parameter 'data'
#' mse_sae = mse_saeOB(Y1 ~ X1 + X2, v1, w1, data = datamsaeOB)
#'
#' ## Without parameter 'data'
#' mse_sae = mse_saeOB(datamsaeOB$Y1 ~ datamsaeOB$X1 + datamsaeOB$X2, datamsaeOB$v1, datamsaeOB$w1)
#'
#' ## Return
#' mse_sae$pbmse.eblupOB # to see the MSE Optimum Benchmark estimators
#' }
mse_saeOB<-function (formula, vardir, weight, samevar = FALSE, B = 100,
                     MAXITER = 100, PRECISION = 1e-04, data)
{
  start_time <- Sys.time()
  if (!is.list(formula))
    formula = list(formula)
  r = length(formula)
  if (r > 1)
    stop("You should use mse_msaeOB() for multivariate")
  R_function = function(vardir, n, r) {
    if (r == 1) {
      R = diag(vardir)
    }
    else {
      R = matrix(rep(0, times = n * r * n * r), nrow = n *
                   r, ncol = n * r)
      k = 1
      for (i in 1:r) {
        for (j in 1:r) {
          if (i <= j) {
            mat0 = matrix(rep(0, times = r * r), nrow = r,
                          ncol = r)
            mat0[i, j] = 1
            matVr = diag(vardir[, k], length(vardir[,
                                                    k]))
            R_hasil = kronecker(mat0, matVr)
            R = R + R_hasil
            k = k + 1
          }
        }
      }
      R = forceSymmetric(R)
    }
    return(as.matrix(R))
  }
  eblup_inside = function(r, n, samevar, y, X, R, MAXITER = 100,
                          PRECISION = 1e-04) {
    y_names = sapply(formula, "[[", 2)
    Ir = diag(r)
    In = diag(n)
    dV = list()
    dV1 = list()
    for (i in 1:r) {
      dV[[i]] = matrix(0, nrow = r, ncol = r)
      dV[[i]][i, i] = 1
      dV1[[i]] = kronecker(dV[[i]], In)
    }
    convergence = TRUE
    if (samevar) {
      Vu = median(diag(R))
      k = 0
      diff = rep(PRECISION + 1, r)
      while (any(diff > PRECISION) & (k < MAXITER)) {
        k = k + 1
        Vu1 = Vu
        Gr = kronecker(Vu1, Ir)
        Gn = kronecker(Gr, In)
        V = as.matrix(Gn + R)
        Vinv = solve(V)
        XtVinv = t(Vinv %*% X)
        Q = solve(XtVinv %*% X)
        P = Vinv - t(XtVinv) %*% Q %*% XtVinv
        Py = P %*% y
        s = (-0.5) %*% sum(diag(P)) + 0.5 %*% (t(Py) %*%
                                                 Py)
        iF = 0.5 %*% sum(diag(P %*% P))
        Vu = Vu1 + solve(iF) %*% s
        diff = abs((Vu - Vu1)/Vu1)
      }
      Vu = as.vector((rep(max(Vu, 0), r)))
      names(Vu) = y_names
      if (k >= MAXITER && diff >= PRECISION) {
        convergence = FALSE
      }
      Gn = kronecker(diag(Vu), In)
      V = as.matrix(Gn + R)
      Vinv = solve(V)
      XtVinv = t(Vinv %*% X)
      Q = solve(XtVinv %*% X)
      P = Vinv - t(XtVinv) %*% Q %*% XtVinv
      Py = P %*% y
      beta = Q %*% XtVinv %*% y
      res = y - X %*% beta
      eblup = data.frame(matrix(X %*% beta + Gn %*% Vinv %*%
                                  res, n, r))
      names(eblup) = y_names
      se.b = sqrt(diag(Q))
      t.value = beta/se.b
      p.value = 2 * pnorm(abs(as.numeric(t.value)), lower.tail = FALSE)
      coef = as.matrix(cbind(beta, se.b, t.value, p.value))
      colnames(coef) = c("beta", "std. error",
                         "t value", "p-value")
      rownames(coef) = colnames(X)
      g1 = diag(Gn %*% Vinv %*% R)
      g2 = diag(R %*% Vinv %*% X %*% Q %*% t(X) %*% t(R %*%
                                                        Vinv))
      dg = Vinv - Gn %*% Vinv %*% Vinv
      gg3 = (dg %*% V %*% t(dg))/iF
      g3 = diag(gg3)
      mse = g1 + g2 + 2 * g3
      mse.df = data.frame(matrix(data = mse, nrow = n,
                                 ncol = r))
      names(mse.df) = y_names
    }
    else {
      Vu = apply(matrix(diag(R), nrow = n, ncol = r), 2,
                 median)
      k = 0
      diff = rep(PRECISION + 1, r)
      while (any(diff > rep(PRECISION, r)) & (k < MAXITER)) {
        k = k + 1
        Vu1 = Vu
        if (r == 1) {
          Gr = Vu1
        }
        else {
          Gr = diag(as.vector(Vu1))
        }
        Gn = kronecker(Gr, In)
        V = as.matrix(Gn + R)
        Vinv = solve(V)
        XtVinv = t(Vinv %*% X)
        Q = solve(XtVinv %*% X)
        P = Vinv - t(XtVinv) %*% Q %*% XtVinv
        Py = P %*% y
        s = sapply(dV1, function(x) (-0.5) * sum(diag(P %*%
                                                        x)) + 0.5 * (t(Py) %*% x %*% Py))
        iF = matrix(unlist(lapply(dV1, function(x) lapply(dV1,
                                                          function(y) 0.5 * sum(diag(P %*% x %*% P %*%
                                                                                       y))))), r)
        Vu = Vu1 + solve(iF) %*% s
        diff = abs((Vu - Vu1)/Vu1)
      }
      Vu = as.vector(sapply(Vu, max, 0))
      if (k >= MAXITER && diff >= PRECISION) {
        convergence = FALSE
      }
      if (r == 1) {
        Gr = Vu1
      }
      else {
        Gr = diag(as.vector(Vu1))
      }
      Gn = kronecker(Gr, In)
      V = as.matrix(Gn + R)
      Vinv = solve(V)
      XtVinv = t(Vinv %*% X)
      Q = solve(XtVinv %*% X)
      P = Vinv - t(XtVinv) %*% Q %*% XtVinv
      Py = P %*% y
      beta = Q %*% XtVinv %*% y
      res = y - X %*% beta
      eblup = data.frame(matrix(X %*% beta + Gn %*% Vinv %*%
                                  res, n, r))
      names(eblup) = y_names
      se.b = sqrt(diag(Q))
      t.value = beta/se.b
      p.value = 2 * pnorm(abs(as.numeric(t.value)), lower.tail = FALSE)
      coef = as.matrix(cbind(beta, se.b, t.value, p.value))
      colnames(coef) = c("beta", "std. error",
                         "t value", "p-value")
      rownames(coef) = colnames(X)
      FI = solve(iF)
      g1 = diag(Gn %*% Vinv %*% R)
      g2 = diag(R %*% Vinv %*% X %*% Q %*% t(X) %*% t(R %*%
                                                        Vinv))
      dg = lapply(dV1, function(x) x %*% Vinv - Gn %*%
                    Vinv %*% x %*% Vinv)
      gg3 = list()
      for (i in 1:r) {
        for (j in 1:r) {
          gg3[[(i - 1) * r + j]] = FI[i, j] * (dg[[i]] %*%
                                                 V %*% t(dg[[j]]))
        }
      }
      g3 = diag(Reduce("+", gg3))
      mse = g1 + g2 + 2 * g3
      mse.df = data.frame(matrix(data = mse, nrow = n,
                                 ncol = r))
      names(mse.df) = y_names
    }
    result = list(eblup = NA, fit = list(estcoef = NA, refvar = NA),
                  mse = NA, mse_component = list(g1 = NA, g2 = NA,
                                                 g3 = NA))
    result$eblup = eblup
    result$fit$estcoef = coef
    result$fit$refvar = t(Vu)
    result$mse = mse.df
    result$mse_component$g1 = g1
    result$mse_component$g2 = g2
    result$mse_component$g3 = g3
    return(result)
  }
  namevar = deparse(substitute(vardir))
  nameweight = deparse(substitute(weight))
  if (!missing(data)) {
    formuladata = lapply(formula, function(x) model.frame(x,
                                                          na.action = na.omit, data))
    y = unlist(lapply(formula, function(x) model.frame(x,
                                                       na.action = na.omit, data)[[1]]))
    X = Reduce(adiag, lapply(formula, function(x) model.matrix(x,
                                                               data)))
    W = as.matrix(data[, nameweight])
    n = length(y)/r
    if (any(is.na(data[, namevar])))
      stop("Object vardir contains NA values.")
    if (any(is.na(data[, nameweight])))
      stop("Object weight contains NA values.")
    R = R_function(data[, namevar], n, r)
    vardir = data[, namevar]
    samevar = samevar
  }
  else {
    formuladata = lapply(formula, function(x) model.frame(x,
                                                          na.action = na.omit))
    y = unlist(lapply(formula, function(x) model.frame(x,
                                                       na.action = na.omit)[[1]]))
    X = Reduce(adiag, lapply(formula, function(x) model.matrix(x)))
    W = as.matrix(weight)
    n = length(y)/r
    if (any(is.na(vardir)))
      stop("Object vardir contains NA values")
    if (any(is.na(weight)))
      stop("Object weight contains NA values.")
    R = R_function(vardir, n, r)
    samevar = samevar
  }
  y_names = sapply(formula, "[[", 2)
  eblup_first = eblup_inside(r = r, n = n, samevar = samevar,
                             y = y, X = X, R = R)
  beta = eblup_first$fit$estcoef[, 1]
  A = eblup_first$fit$refvar
  A_mat = kronecker(A, diag(n))
  Vinv = solve(kronecker(A, diag(n)) + R)
  XtVinv = t(Vinv %*% X)
  Q = solve(XtVinv %*% X)
  mse_prasad = eblup_first$mse
  g1d = eblup_first$mse_component$g1
  g2d = eblup_first$mse_component$g2
  sumg1.pb = rep(0, n * r)
  sumg2.pb = rep(0, n * r)
  sumg3.pb = rep(0, n * r)
  boot <- 1
  while (boot <= B) {
    u.boot = rnorm(n, 0, sqrt(A))
    theta.boot = X %*% beta + u.boot
    e.boot = rnorm(n, 0, sqrt(as.vector(vardir)))
    direct.boot = theta.boot + e.boot
    direct.boot.mat = matrix(direct.boot, nrow = n, ncol = r)
    resultEBLUP = eblup_inside(r = r, n = n, samevar = samevar,
                               y = direct.boot, X = X, R = R)
    sigma2.simula = resultEBLUP$fit$refvar
    beta.simula = resultEBLUP$fit$estcoef[, 1]
    mse.simula = resultEBLUP$mse
    Gn.simula = kronecker(as.vector(sigma2.simula), diag(n))
    Vinv.simula = as.matrix(solve(Gn.simula + R))
    Xbeta.simula = X %*% beta.simula
    XtVi.simula = t(Vinv.simula %*% X)
    Q.simula = solve(XtVi.simula %*% X)
    thetaEBLUP.boot1 = Xbeta.simula + Gn.simula %*% Vinv.simula %*%
      (direct.boot - Xbeta.simula)
    thetaEBLUP.boot1.mat = as.matrix(thetaEBLUP.boot1)
    thetaALFA.boot1 = colSums(W * direct.boot.mat) - colSums(W * thetaEBLUP.boot1.mat)
    thetaLAMBDA.boot1 = (mse.simula * W) / colSums(mse.simula * W^2)
    thetaOPTIMUM.boot1 = thetaEBLUP.boot1.mat + (thetaLAMBDA.boot1 * thetaALFA.boot1)
    Bstim.eblup = solve(XtVinv %*% X) %*% XtVinv %*% direct.boot
    Xbeta.eblup = X %*% Bstim.eblup
    thetaEBLUP.boot2 = Xbeta.eblup + A_mat %*% Vinv %*% (direct.boot -
                                                           Xbeta.eblup)
    thetaEBLUP.boot2.mat = as.matrix(thetaEBLUP.boot2)
    thetaALFA.boot2 = colSums(W * direct.boot.mat) - colSums(W * thetaEBLUP.boot2.mat)
    thetaLAMBDA.boot2 = (mse.simula * W) / colSums(mse.simula * W^2)
    thetaOPTIMUM.boot2 = thetaEBLUP.boot2.mat + (thetaLAMBDA.boot2 * thetaALFA.boot2)
    g1boot = diag(Gn.simula %*% Vinv.simula %*% R)
    g2boot = diag(R %*% Vinv.simula %*% X %*% Q.simula %*%
                    t(X) %*% t(R %*% Vinv.simula))
    g3boot = (thetaOPTIMUM.boot1 - thetaOPTIMUM.boot2)^2
    sumg1.pb = sumg1.pb + g1boot
    sumg2.pb = sumg2.pb + g2boot
    sumg3.pb = sumg3.pb + g3boot
    boot = boot + 1
  }
  g1.pb = sumg1.pb/B
  g2.pb = sumg2.pb/B
  g3.pb = sumg3.pb/B
  msebootoptimum = 2 * (g1d + g2d) - g1.pb - g2.pb + g3.pb
  end_time <- Sys.time()
  running_time = end_time - start_time
  result1 = list(mse.eblup = NA, pbmse.eblupOB = NA, running.time = NA)
  result1$mse.eblup = mse_prasad
  result1$pbmse.eblupOB = msebootoptimum
  result1$running.time = running_time
  return(result1)
}
yas-q/msaeOB documentation built on June 23, 2022, 7:10 p.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
yas-q/msaeOB
Optimum Benchmarking for Multivariate Small Area Estimation

R/mse_saeOB.R
In yas-q/msaeOB: Optimum Benchmarking for Multivariate Small Area Estimation

R Package Documentation

Browse R Packages

We want your feedback!

yas-q/msaeOB Optimum Benchmarking for Multivariate Small Area Estimation

R/mse_saeOB.R In yas-q/msaeOB: Optimum Benchmarking for Multivariate Small Area Estimation

R Package Documentation

Browse R Packages

We want your feedback!

yas-q/msaeOB
Optimum Benchmarking for Multivariate Small Area Estimation

R/mse_saeOB.R
In yas-q/msaeOB: Optimum Benchmarking for Multivariate Small Area Estimation