# Internal documentation -------------------------------------------------------
# Point estimation function
# This function implements the transformation of data, estimation of the nested
# error linear regression model and the monte-carlo approximation to predict
# the desired indicators. If the weighted version of the approach is used, then
# additional estimation steps are taken in order to calculate weighted
# regression coefficients before the monte-carlo approximation. See
# corresponding functions below.
point_estim <- function(framework,
fixed,
transformation,
interval,
L,
keep_data = FALSE) {
# Transformation of data -----------------------------------------------------
# Estimating the optimal parameter by optimization
# Optimal parameter function returns the minimum of the optimization
# functions from generic_opt; the minimum is the optimal lambda.
# The function can be found in the script optimal_parameter.R
optimal_lambda <- optimal_parameter(
generic_opt = generic_opt,
fixed = fixed,
smp_data = framework$smp_data,
smp_domains = framework$smp_domains,
transformation = transformation,
interval = interval
)
# Data_transformation function returns transformed data and shift parameter.
# The function can be found in the script transformation_functions.R
transformation_par <- data_transformation(
fixed = fixed,
smp_data = framework$smp_data,
transformation = transformation,
lambda = optimal_lambda
)
shift_par <- transformation_par$shift
# Model estimation, model parameter and parameter of generating model --------
# Estimation of the nested error linear regression model
# See Molina and Rao (2010) p. 374
# lme function is included in the nlme package which is imported.
mixed_model <- nlme::lme(
fixed = fixed,
data = transformation_par$transformed_data,
random =
as.formula(paste0(
"~ 1 | as.factor(",
framework$smp_domains, ")"
)),
method = "REML",
keep.data = keep_data
)
# Function model_par extracts the needed parameters theta from the nested
# error linear regression model. It returns the beta coefficients (betas),
# sigmae2est, sigmau2est and the random effect (rand_eff).
est_par <- model_par(
mixed_model = mixed_model,
framework = framework,
fixed = fixed,
transformation_par = transformation_par
)
# Function gen_model calculates the parameters in the generating model.
# See Molina and Rao (2010) p. 375 (20)
# The function returns sigmav2est and the constant part mu.
gen_par <- gen_model(
model_par = est_par,
fixed = fixed,
framework = framework
)
# Monte-Carlo approximation --------------------------------------------------
if (inherits(framework$threshold, "function")) {
framework$threshold <-
framework$threshold(
y =
as.numeric(framework$smp_data[[paste0(fixed[2])]])
)
}
# The monte-carlo function returns a data frame of desired indicators.
indicator_prediction <- monte_carlo(
transformation = transformation,
L = L,
framework = framework,
lambda = optimal_lambda,
shift = shift_par,
model_par = est_par,
gen_model = gen_par
)
mixed_model$coefficients_weighted <- if (!is.null(framework$weights)) {
as.numeric(est_par$betas)
} else {
NULL
}
names(mixed_model$coefficients_weighted) <- if (!is.null(framework$weights)) {
rownames(est_par$betas)
} else {
NULL
}
return(list(
ind = indicator_prediction,
optimal_lambda = optimal_lambda,
shift_par = shift_par,
model_par = est_par,
gen_model = gen_par,
model = mixed_model
))
} # End point estimation function
# All following functions are only internal ------------------------------------
# Functions to extract and calculate model parameter----------------------------
# Function model_par extracts the needed parameters theta from the nested
# error linear regression model. It returns the beta coefficients (betas),
# sigmae2est, sigmau2est and the random effect (rand_eff).
model_par <- function(framework,
mixed_model,
fixed,
transformation_par) {
if (is.null(framework$weights)) {
# fixed parametersn
betas <- nlme::fixed.effects(mixed_model)
# Estimated error variance
sigmae2est <- mixed_model$sigma^2
# VarCorr(fit2) is the estimated random error variance
sigmau2est <- as.numeric(nlme::VarCorr(mixed_model)[1, 1])
# Random effect: vector with zeros for all domains, filled with
rand_eff <- rep(0, length(unique(framework$pop_domains_vec)))
# random effect for in-sample domains (dist_obs_dom)
rand_eff[framework$dist_obs_dom] <- (random.effects(mixed_model)[[1]])
return(list(
betas = betas,
sigmae2est = sigmae2est,
sigmau2est = sigmau2est,
rand_eff = rand_eff
))
} else {
# fixed parameters
betas <- nlme::fixed.effects(mixed_model)
# Estimated error variance
sigmae2est <- mixed_model$sigma^2
# VarCorr(fit2) is the estimated random error variance
sigmau2est <- as.numeric(nlme::VarCorr(mixed_model)[1, 1])
# Calculations needed for pseudo EB
weight_sum <- rep(0, framework$N_dom_smp)
mean_dep <- rep(0, framework$N_dom_smp)
mean_indep <- matrix(0, nrow = framework$N_dom_smp, ncol = length(betas))
delta2 <- rep(0, framework$N_dom_smp)
gamma_weight <- rep(0, framework$N_dom_smp)
num <- matrix(0, nrow = length(betas), ncol = 1)
den <- matrix(0, nrow = length(betas), ncol = length(betas))
for (d in 1:framework$N_dom_smp) {
domain <- names(table(framework$smp_domains_vec)[d])
# Domain means of of the dependent variable
dep_smp <- transformation_par$transformed_data[[
as.character(mixed_model$terms[[2]])]][
framework$smp_domains_vec == domain
]
weight_smp <- transformation_par$transformed_data[[
as.character(framework$weights)]][framework$smp_domains_vec == domain]
weight_sum[d] <- sum(weight_smp)
indep_smp <- if(length(weight_smp) == 1) {
matrix(model.matrix(fixed, framework$smp_data)[framework$smp_domains_vec == domain,]
, ncol = length(betas), nrow = 1)
} else {
model.matrix(fixed, framework$smp_data)[framework$smp_domains_vec == domain,]
}
# weighted mean of the dependent variable
mean_dep[d] <- sum(weight_smp * dep_smp) / weight_sum[d]
# weighted means of the auxiliary information
for (k in 1:length(betas)) {
mean_indep[d, k] <- sum(weight_smp * indep_smp[, k]) / weight_sum[d]
}
delta2[d] <- sum(weight_smp^2) / (weight_sum[d]^2)
gamma_weight[d] <- sigmau2est / (sigmau2est + sigmae2est * delta2[d])
weight_smp_diag <- diag(weight_smp)
dep_var_ast <- dep_smp - gamma_weight[d] * mean_dep[d]
indep_weight <- t(indep_smp) %*% weight_smp_diag
indep_var_ast <- indep_smp - matrix(rep(
gamma_weight[d] *
mean_indep[d, ],
framework$n_smp[d]
),
nrow = framework$n_smp[d],
byrow = TRUE
)
num <- num + (indep_weight %*% dep_var_ast)
den <- den + (indep_weight %*% indep_var_ast)
}
betas <- solve(den) %*% num
# Random effect: vector with zeros for all domains, filled with
rand_eff <- rep(0, length(unique(framework$pop_domains_vec)))
# random effect for in-sample domains (dist_obs_dom)
rand_eff[framework$dist_obs_dom] <- gamma_weight * (mean_dep -
mean_indep %*% betas)
return(list(
betas = betas,
sigmae2est = sigmae2est,
sigmau2est = sigmau2est,
rand_eff = rand_eff,
gammaw = gamma_weight,
delta2 = delta2
))
}
} # End model_par
# Function gen_model calculates the parameters in the generating model.
# See Molina and Rao (2010) p. 375 (20)
gen_model <- function(fixed,
framework,
model_par) {
if (is.null(framework$weights)) {
# Parameter for calculating variance of new random effect
gamma <- model_par$sigmau2est / (model_par$sigmau2est +
model_par$sigmae2est / framework$n_smp)
# Variance of new random effect
sigmav2est <- model_par$sigmau2est * (1 - gamma)
# Random effect in constant part of y for in-sample households
rand_eff_pop <- rep(model_par$rand_eff, framework$n_pop)
# Model matrix for population covariate information
framework$pop_data[[paste0(fixed[2])]] <- seq_len(nrow(framework$pop_data))
X_pop <- model.matrix(fixed, framework$pop_data)
# Constant part of predicted y
mu_fixed <- X_pop %*% model_par$betas
mu <- mu_fixed + rand_eff_pop
return(list(sigmav2est = sigmav2est, mu = mu, mu_fixed = mu_fixed))
} else {
# Parameter for calculating variance of new random effect
gamma <- model_par$gammaw
# Variance of new random effect
sigmav2est <- model_par$sigmau2est * (1 - gamma)
# Random effect in constant part of y for in-sample households
rand_eff_pop <- rep(model_par$rand_eff, framework$n_pop) ####### change
# Model matrix for population covariate information
framework$pop_data[[paste0(fixed[2])]] <- seq_len(nrow(framework$pop_data))
X_pop <- model.matrix(fixed, framework$pop_data)
# Constant part of predicted y
mu_fixed <- X_pop %*% model_par$betas
mu <- mu_fixed + rand_eff_pop
return(list(sigmav2est = sigmav2est, mu = mu, mu_fixed = mu_fixed))
}
} # End gen_model
# Monte-Carlo approximation ----------------------------------------------------
# The function approximates the expected value (Molina and Rao (2010)
# p.372 (6)). For description of monte-carlo simulation see Molina and
# Rao (2010) p. 373 (13) and p. 374-375
monte_carlo <- function(transformation,
L,
framework,
lambda,
shift,
model_par,
gen_model) {
# Preparing matrices for indicators for the Monte-Carlo simulation
if(!is.null(framework$aggregate_to_vec)){
N_dom_pop_tmp <- framework$N_dom_pop_agg
pop_domains_vec_tmp <- framework$aggregate_to_vec
} else {
N_dom_pop_tmp <- framework$N_dom_pop
pop_domains_vec_tmp <- framework$pop_domains_vec
}
ests_mcmc <- array(dim = c(
N_dom_pop_tmp,
L,
length(framework$indicator_names)
))
for (l in seq_len(L)) {
# Errors in generating model: individual error term and random effect
# See below for function errors_gen.
errors <- errors_gen(
framework = framework,
model_par = model_par,
gen_model = gen_model
)
# Prediction of population vector y
# See below for function prediction_y.
population_vector <- prediction_y(
transformation = transformation,
lambda = lambda,
shift = shift,
gen_model = gen_model,
errors_gen = errors,
framework = framework
)
if(!is.null(framework$pop_weights)){
pop_weights_vec <- framework$pop_data[[framework$pop_weights]]
}else{
pop_weights_vec <- rep(1, nrow(framework$pop_data))
}
# Calculation of indicators for each Monte Carlo population
ests_mcmc[, l, ] <-
matrix(
nrow = N_dom_pop_tmp,
data = unlist(lapply(framework$indicator_list,
function(f, threshold) {
matrix(
nrow = N_dom_pop_tmp,
data = unlist(mapply(
y = split(population_vector, pop_domains_vec_tmp),
pop_weights = split(pop_weights_vec, pop_domains_vec_tmp),
f,
threshold = framework$threshold
)), byrow = TRUE
)
},
threshold = framework$threshold
))
)
} # End for loop
# Point estimations of indicators by taking the mean
point_estimates <- data.frame(
Domain = unique(pop_domains_vec_tmp),
apply(ests_mcmc, c(3), rowMeans)
)
colnames(point_estimates) <- c("Domain", framework$indicator_names)
return(point_estimates)
} # End Monte-Carlo
# The function errors_gen returns error terms of the generating model.
# See Molina and Rao (2010) p. 375 (20)
errors_gen <- function(framework, model_par, gen_model) {
# individual error term in generating model epsilon
epsilon <- rnorm(framework$N_pop, 0, sqrt(model_par$sigmae2est))
# empty vector for new random effect in generating model
vu <- vector(length = framework$N_pop)
# new random effect for out-of-sample domains
vu[!framework$obs_dom] <- rep(
rnorm(
framework$N_dom_unobs,
0,
sqrt(model_par$sigmau2est)
),
framework$n_pop[!framework$dist_obs_dom]
)
# new random effect for in-sample-domains
vu[framework$obs_dom] <- rep(
rnorm(
rep(1, framework$N_dom_smp),
0,
sqrt(gen_model$sigmav2est)
),
framework$n_pop[framework$dist_obs_dom]
)
return(list(epsilon = epsilon, vu = vu))
} # End errors_gen
# The function prediction_y returns a predicted income vector which can be used
# to calculate indicators. Note that a whole income vector is predicted without
# distinction between in- and out-of-sample domains.
prediction_y <- function(transformation,
lambda,
shift,
gen_model,
errors_gen,
framework) {
# predicted population income vector
y_pred <- gen_model$mu + errors_gen$epsilon + errors_gen$vu
# back-transformation of predicted population income vector
y_pred <- back_transformation(
y = y_pred,
transformation = transformation,
lambda = lambda,
shift = shift
)
y_pred[!is.finite(y_pred)] <- 0
return(y_pred)
} # End prediction_y
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.