#' Compute ratio estimator for strata and separate ratio estimator for a
#' stratified sample of polygons.
#'
#' Ratio estimator is computed for each strata and then a separate version of
#' the ratio estimator is computed where the estimates of the strata are
#' weighted according to the area of each stratum (unlike traditional estimator
#' that weights with number of polygons sampled).
#' \code{strata_ratio_est} returns the estimates for each strata in the sample
#' (assuming SRS within strata).
#' @param data \code{data.frame} each line is an observation and includes the
#' stratum and columns used in ratio estimation.
#' @param allocation \code{data.frame} with a column defining the strata and a
#' column with sample size allocations for each stratum (one line per stratum).
#' @param stratum unquoted column with strata in the data and allocation
#' \code{data.frame}'s (the columns must have the same name in the
#' two \code{data.frame}'s)
#' @param y: unquoted variable in data corresponding to numerator in ratio
#' (eg. binary vector 1 correctly classified 0 ow)
#' #' @param M: unquoted variable in data corresponding to denominator in ratio
#' (eg. polygon areas)
#' @param N: number of polygons in the strata.
#' @return A \code{list} with estimates per strata and the separate ratio
#' estimate for overall population.
#' @importFrom magrittr %>%
#' @importFrom rlang !! !!! :=
#' @export
separate_ratio_est <- function(data, estratos_area, stratum, y, M, N){
stratum_var_string <- deparse(substitute(stratum))
stratum <- dplyr::enquo(stratum)
M <- dplyr::enquo(M)
y <- dplyr::enquo(y)
# allocation <- data %>% count(!!stratum)
if (missing(N)){
p_hs <- data %>%
left_join(estratos_area, by = stratum_var_string) %>%
dplyr::mutate(M = !!M, y = !!y) %>%
group_by(!!stratum) %>%
do(ratio_est(M = .$M, y = .$y))
} else {
N <- dplyr::enquo(N)
p_hs <- data %>%
dplyr::mutate(M = !!M, y = !!y, N = !!N) %>%
group_by(!!stratum) %>%
do(ratio_est(M = .$M, y = .$y, N = .$N[1]))
}
p_h_strata <- p_hs %>%
left_join(estratos_area, by = stratum_var_string)
p_h_pop <- strat_est(p_h = p_h_strata$p_hat, n_h = p_h_strata$n,
M_h = p_h_strata$M_h, N_h = p_h_strata$N_h, s2_h = p_h_strata$s2_p_hat)
return(list(p_h_strata = p_h_strata, p_h_pop = p_h_pop))
}
ratio_est <- function(M, y, N = Inf){
# computes estimate and standard error of y/M,
# used for estimates per strata (with SRS within strata) or population
# estimates with SRS
## Arguments
# M: denominator in ratio, (eg. vector of polygon areas)
# y: numerator in ratio (eg. binary vector 1 correctly classified 0 ow)
# N: number of polygons (in the strata)
n <- length(M)
M0_hat <- sum(M)
M_bar <- mean(M)
p_hat <- sum(M * y) / M0_hat
s2_p_hat <- ifelse(n > 1,
1 / (M_bar ^ 2) * 1 / (n - 1) * sum((M * (y - p_hat)) ^ 2),
0)
var_p_hat <- ifelse(n == N, 0, s2_p_hat / n * (1 - n / N))
return(dplyr::data_frame(p_hat = p_hat, se_p_hat = sqrt(var_p_hat),
s2_p_hat = s2_p_hat, n = n))
}
strat_est <- function(p_h, n_h, N_h, M_h, s2_h){
# computes overall p_hat and se(p_hat), when p_strata is a ratio estimator
## Arguments
# p_h: vector of ratio estimators per strata (as returned by ratio_est)
# n_h: vector of number of polygons sampled per strata
# s2_h: estimate of s^2 per strata (as returned by ratio_est)
# N_h: vector of number of polygons per strata (population)
# M_h: vector of strata areas
M0 <- sum(M_h) # total area
p_hat <- sum(M_h / M0 * p_h)
se_p_hat <- sqrt(sum((1 - n_h / N_h) * (M_h / M0) ^ 2 * s2_h / n_h))
return(data.frame(p_hat = p_hat, se_p_hat = se_p_hat))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.