#' Computes the estimator for JPS data
#'
#' @param data The data to use for estimation.
#' @param set_size Set size for each ranking group.
#' @param replace Logical (default `TRUE`). Sample with replacement?
#' @param model_based An inference mode:
#' - `FALSE`: design based inference
#' - `TRUE`: model based inference using super population model
#' @param N The population size.
#' @param alpha The significance level.
#'
#' @importFrom stats aggregate qt rnorm sd var
#'
#' @return A `data.frame` with the point estimates provided by JPS estimators along with standard error and
#' confidence intervals.
#' @export
#'
jps_estimate <- function(data, set_size, replace = TRUE, model_based, N, alpha) {
n_rankers <- ncol(data) - 1
if (!replace && is.null(N)) {
stop("Population size N must be provided for sampling without replacement")
}
n <- nrow(data)
coefn <- calculate_coefficients(set_size, n)
if (n_rankers == 1) {
coef_del1 <- NULL
} else {
coef_del1 <- calculate_coefficients(set_size, n - 1)
}
if (!replace) {
coef1d2 <- coefn[1]
coef2d2 <- (1 / (set_size * (set_size - 1)) + coefn[3] + coefn[2]
- (coefn[2] + 1 / set_size^2) * set_size / (set_size - 1))
coef3d2 <- coefn[2] - 1 / (N - 1) * (1 / set_size - (coefn[1] + 1 / set_size^2))
coefd <- c(coef1d2, coef2d2, coef3d2)
coef1d2_del <- coef_del1[1]
coef2d2_del <- (1 / (set_size * (set_size - 1)) + coef_del1[3] + coef_del1[2]
- (coef_del1[2] + 1 / set_size^2) * set_size / (set_size - 1))
coef3d2_del <- coef_del1[2] - 1 / (N - 1) * (1 / set_size - (coef_del1[1] + 1 / set_size^2))
coef_del <- c(coef1d2_del, coef2d2_del, coef3d2_del)
fc <- 1 - n / N # finite population correction factor
} else {
coefd <- coefn
coef_del <- coef_del1
fc <- 1 # finite population correction factor
}
if (model_based == 1) {
coefd <- coefn
coef_del <- coef_del1
}
ranks <- data[, -1]
y <- data[, 1]
if (n_rankers == 1) {
jps_estimates <- jps_estimate_single(
ranks, y, set_size, N, coefd, coef_del, replace, model_based, n_rankers
)
estimators <- c("JPS", "SRS")
means <- round(c(jps_estimates[1], mean(y)), digits = 3)
# TODO: check std error calculation of jps variance
std_errors <- round(c(sqrt(jps_estimates[2]), sd(y) / sqrt(n)), digits = 3)
lower_bound <- round(means - qt(1 - alpha / 2, n - 1) * std_errors, digits = 3)
upper_bound <- round(means + qt(1 - alpha / 2, n - 1) * std_errors, digits = 3)
confidence_interval <- paste(lower_bound, upper_bound, sep = ",")
jps_summary <- data.frame(estimators, means, std_errors, confidence_interval, stringsAsFactors = FALSE)
ci_col <- paste0((1 - alpha) * 100, "% Confidence intervals")
colnames(jps_summary) <- c("Estimator", "Estimate", "Standard Error", ci_col)
return(jps_summary)
}
# more than one ranker
jps_estimates <- apply(
ranks,
2,
jps_estimate_single,
y = y,
set_size = set_size,
N = N,
coef = coefd,
coef_del = coef_del,
replace = replace,
model_based = model_based,
n_rankers
)
# estimates using all data
jps_mean_n <- jps_estimates[1, ]
jps_variance_n <- jps_estimates[2, ]
# estimates with i-th observation deleted where the i-th row corresponds to the estimates when the i-th
# observation is deleted
jps_var_ith_deleted <- jps_estimates[c(3:(n + 2)), ]
jps_mean_ith_deleted <- jps_estimates[-c(1:(n + 2)), ]
# variance weighted
jps_var_weighted_mean <- sum((1 / jps_variance_n) * jps_mean_n) / sum(1 / jps_variance_n)
variance_weights <- t(apply(jps_var_ith_deleted, 1, function(u) {
(1 / u) / sum(1 / u)
}))
jackknife_var_weighted_mean <- diag(variance_weights %*% t(jps_mean_ith_deleted))
jackknife_var_weighted_var <- fc * (n - 1) * var(jackknife_var_weighted_mean) * ((n - 1) / n)^2
mean_best_ranker <- jps_mean_n[1]
variance_best_ranker <- jps_variance_n[1]
# equally weighted
jps_mean <- mean(jps_mean_n)
jackknife_mean <- apply(jps_mean_ith_deleted, 1, mean)
jackknife_variance <- fc * (n - 1) * var(jackknife_mean) * ((n - 1) / n)^2
# agreement weighted
jackknife_estimated <- jackknife_agreement_estimate(data[, -1], y, set_size, N, fc)
agreement_mean <- jackknife_estimated$agreement_mean
jackknife_agreement_var <- jackknife_estimated$jackknife_var
estimated_means <- c(jps_mean, jps_var_weighted_mean, agreement_mean, mean_best_ranker, mean(y))
estimated_variances <- c(
jackknife_variance,
jackknife_var_weighted_var,
jackknife_agreement_var,
variance_best_ranker,
var(y) / n
)
min_variance_index <- which(estimated_variances == min(estimated_variances))
estimated_variances <- c(estimated_variances, estimated_variances[min_variance_index])
std_errors <- sqrt(estimated_variances)
estimated_means <- c(estimated_means, estimated_means[min_variance_index])
lower_bounds <- round(estimated_means - qt(1 - alpha / 2, n - 1) * sqrt(estimated_variances), digits = 3)
upper_bounds <- round(estimated_means + qt(1 - alpha / 2, n - 1) * sqrt(estimated_variances), digits = 3)
confidence_intervals <- paste(lower_bounds, upper_bounds, sep = ",")
estimators <- c("UnWeighted", "Sd.Weighted", "Aggregate Weight", "JPS Estimate", "SRS estimate", "Minimum")
ci_col <- paste0((1 - alpha) * 100, "% Confidence intervals")
estimator_summary <- data.frame(estimators,
round(estimated_means, digits = 3),
round(std_errors, digits = 3),
confidence_intervals,
stringsAsFactors = FALSE
)
colnames(estimator_summary) <- c("Estimator", "Estimate", "Standard Error", ci_col)
return(estimator_summary)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.