jk_wtd_mean_se | R Documentation |
Jacknife estimates of the weighted mean with a stratified sample. Weights are adjusted to take the change in n into account when obtaining the jacknife estimate dropping an observation within a stratum.
jk_wtd_mean_se(
x,
by = rep(1, length(x)),
w = rep(1, length(x)),
FUN = wtd_mean
)
x |
numerical vector |
by |
stratification variable or list of stratification variable. Default: single level for unstratified samples |
w |
vector of weights, e.g. Horvitz-Thomson weights or renormed version. Default: 1 |
FUN |
(default: wtd_mean) function used to obtain estimate |
wtd_mean
for weighted means, link{lin_comb}
for linear
combinations, jk_wtd_mean_se
for a jacknife estimate of the SE of a weighted mean
and jk_lin_comb_se
for a jacknife estimate of the SE of a linear combination.
Other Post-stratification survey functions:
jk_wtd_means()
,
lin_comb()
,
wtd_mean()
## Not run:
#
# This is a made-up survey in 3 states along the eastern edge of the Rockies.
#
ds <- read.table(header = TRUE, text = "
Gender State Income
Male Utah 10
Male Utah 11
Male Utah 12
Male Utah 13
Male Utah 14
Male Utah 15
Female Utah 16
Female Utah 17
Female Utah 18
Female Utah 19
Male Idaho 20
Male Idaho 21
Male Idaho 22
Female Idaho 23
Female Idaho 24
Female Idaho 25
Female Idaho 26
Female Idaho 27
Female Idaho 28
Female Idaho 29
Male Arizona 30
Male Arizona 31
Male Arizona 32
Male Arizona 33
Male Arizona 34
Male Arizona 35
Male Arizona 36
Female Arizona 37
Female Arizona 38
Female Arizona 39
")
#
# and the population within each stratum (imaginary data):
#
dpop <- read.table(header = TRUE, text = "
Gender State N
Male Utah 1500000
Female Utah 1500000
Male Idaho 700000
Female Idaho 900000
Male Arizona 4000000
Female Arizona 3000000
")
#
# First we create the 'overall' $n$ and $N$ variables.
#
ds$n <- with(ds, capply(State, list(State, Gender), length))
#
# Note that first argument of 'capply', 'State', could be any variable in the dataset
# since it is used only for the length of the chunks within each 'State' by 'Gender' stratum.
#
ds <- merge(ds, dpop)
head(ds)
#
# ### Overall Horvitz-Thompson weights
#
ds$w_o_ht <- with(ds, N/n)
head(ds)
sum(ds$w_o_ht)
with(ds, wtd_mean(Income, w_o_ht))
with(ds, jk_wtd_mean_se(Income, list(Gender,State), w_o_ht))
with(ds, jk_wtd_mean_se(Income))
sd(ds$Income)/sqrt(length(ds$Income))
#
# ### Weighted mean within Utah
#
with(subset(ds, State == "Utah"), wtd_mean(Income, w_o_ht))
with(subset(ds, State == "Utah"), jk_wtd_mean_se(Income, Gender, w_o_ht))
# alternatively:
with(ds, wtd_mean(Income, w_o_ht * (State == "Utah")))
with(ds, jk_wtd_mean_se(Income, list(Gender, State), w_o_ht * (State == "Utah")))
#
# ### Weighted mean within Utah and Idaho
#
with(subset(ds, State %in% c("Utah","Idaho")), wtd_mean(Income, w_o_ht))
with(subset(ds, State %in% c("Utah","Idaho")), jk_wtd_mean_se(Income, list(Gender, State), w_o_ht))
# alternatively:
with(ds, wtd_mean(Income, w_o_ht * (State %in% c("Utah","Idaho"))))
with(ds, jk_wtd_mean_se(Income, list(Gender, State), w_o_ht * (State %in% c("Utah","Idaho"))))
#
# ### Weighted female income
#
with(subset(ds, Gender == "Female"), wtd_mean(Income, w_o_ht))
with(subset(ds, Gender == "Female"), jk_wtd_mean_se(Income, list(Gender, State), w_o_ht))
# alternatively:
with(ds, wtd_mean(Income, w_o_ht * (Gender == "Female")))
with(ds, jk_wtd_mean_se(Income, list(Gender, State), w_o_ht * (Gender == "Female")))
#
# We would get the same results with weights normed differently.
#
# ### 'Sum to $n$' weights
#
ds$w_o_sn <- with(ds, (N/n)/mean(N/n))
sum(ds$w_o_sn)
with(ds, wtd_mean(Income, w_o_sn))
with(ds, jk_wtd_mean_se(Income, list(Gender,State), w_o_sn))
#
# ### 'Sum to 1' weights:
#
ds$w_o_s1 <- with(ds, (N/n)/sum(N/n))
sum(ds$w_o_s1)
with(ds, wtd_mean(Income, w_o_s1))
with(ds, jk_wtd_mean_se(Income, list(Gender,State), w_o_s1))
#### Gender gap in mean salary in Utah
# With linear combinations, we can estimate population parameters that can't be
# estimated as weighted means. For example, the Gender gap in Utah:
coeffs <- with(ds,
std((State == "Utah") * (Gender == "Male") * w_o_ht)
- std((State == "Utah") * (Gender == "Female") * w_o_ht) )
names(coeffs) <- with(ds, interaction(State,Gender))
cbind(coeffs)
with(ds, lin_comb(Income, coeffs))
with(ds, jk_lin_comb_se(Income, list(State, Gender), coeffs))
#'
#' #### Comparing two states
#'
#' To compare Utah with Idaho using the population proportions of Gender:
#'
coeffs <- with(ds,
std((State == "Utah") * w_o_ht)
- std((State == "Idaho") * w_o_ht) )
coeffs
names(coeffs) <- with(ds, interaction(State,Gender))
cbind(coeffs)
with(ds, lin_comb(Income, coeffs))
with(ds, jk_lin_comb_se(Income, list(State, Gender), coeffs))
#'
#' #### Unweighted average of two states
#'
#' The unweighted average of Utah and Idaho using the Gender proportion in each state:
#'
coeffs <- with(ds,
.5 * std((State == "Utah") * w_o_ht)
+ .5 * std((State == "Idaho") * w_o_ht) )
coeffs
with(ds, lin_comb(Income, coeffs))
with(ds, jk_lin_comb_se(Income, list(State, Gender), coeffs))
#'
#' #### The Gender gap within each state
#'
states <- c("Idaho", "Utah", "Arizona")
names(states) <- states
coefs_gap <- lapply( states,
function(state)
with(ds, std((State == state) * (Gender == "Male") * w_o_ht) -
std((State == state) * (Gender == "Female") * w_o_ht) ))
mat <- do.call(cbind,coefs_gap)
rownames(mat) <- with(ds, interaction(State,Gender))
MASS::fractions(mat)
with(ds, lapply(coefs_gap, function(w) lin_comb(Income,w)))
with(ds, lapply(coefs_gap, function(w) jk_lin_comb_se(Income,list(Gender, State), w)))
#'
#' ## Hierarchical weights
#'
#' The following illustrates how to create within-state weights and then how to modify them to form overall
#' weights.
#'
#' We can start with the convenient fact that Horvitz-Thompson weights serve as both within-state and
#' overall weights. To get within-state 'sum to $n$' weights, we need to norm the Horvitz-Thompson weights
#' within each state. We need to divide the Horvitz-Thompson weights by their within-state means:
#'
ds$ht_ws_mean <- with(ds, capply(w_o_ht, State, mean))
ds$w_ws_sn <- with(ds, w_o_ht / ht_ws_mean) # within-state 'sum to n' mean
with(ds, tapply(w_ws_sn, State, sum)) # check that they sum to within-state n
tab(ds, ~ State)
#'
#' As noted earlier, these weights need to be multiplied by $\frac{N_s/N}{n_s/n}$ to transform them into
#' overall 'sum to $n$' weights. The easiest way to generate $N_s$ is to use the Horvitz-Thomson weights:
#'
ds$N_s <- with(ds, capply(w_o_ht, State, sum))
ds$n_s <- with(ds, capply(w_o_ht, State, length))
N_total <- sum(ds$w_o_ht)
n_total <- nrow(ds)
ds$w_o_sn2 <- with(ds, w_ws_sn * (N_s / N_total) / (n_s/n_total))
ds
#' Comparing these weighs with previous ones:
with(ds, max(abs(w_o_sn2 - w_o_sn)))
#'
#' Estimating means per stratum
#'
ds$mean_sg <- capply(ds, ds[c('State','Gender')], with, wtd_mean(Income, w_o_ht))
ds$mean_sg_se <- capply(ds, ds[c('State','Gender')], with, jk_wtd_mean_se(Income, w = w_o_ht))
ds$mean_s <- capply(ds, ds$State, with, wtd_mean(Income, w_o_ht))
ds$mean_s_se <- capply(ds, ds$State, with, jk_wtd_mean_se(Income, Gender, w_o_ht))
ds$mean_g <- capply(ds, ds$Gender, with, wtd_mean(Income, w_o_ht))
ds$mean_g_se <- capply(ds, ds$Gender, with, jk_wtd_mean_se(Income, State, w_o_ht))
# Define a round method for data frames so round will work on a data frame with non-numeric variables
round.data.frame <- function(x, digits = 0) as.data.frame(lapply(x, function(x) if(is.numeric(x)) round(x, digits = digits) else x))
round(ds, 3)
dssum <- up(ds, ~ State/Gender) # keep all State x Gender invariant variables
round(dssum, 3)
## End(Not run)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.