jk_wtd_mean_se: Post-stratification jacknife estimate of SE of weighted mean

Description Usage Arguments See Also Examples

Description

Jacknife estimates of the weighted mean with a stratified sample. Weights are adjusted to take the change in n into account when obtaining the jacknife estimate dropping an observation within a stratum.

Usage

1
2
jk_wtd_mean_se(x, by = rep(1, length(x)), w = rep(1, length(x)),
  FUN = wtd_mean, na.rm = FALSE)

Arguments

x

numerical vector

by

stratification variable or list of stratification variable. Default: single level for unstratified samples

w

vector of weights, e.g. Horvitz-Thomson weights or renormed version. Default: 1

FUN

(default: wtd_mean) function used to obtain estimate

See Also

wtd_mean for weighted means, link{lin_comb} for linear combinations, jk_wtd_mean_se for a jacknife estimate of the SE of a weighted mean and jk_lin_comb_se for a jacknife estimate of the SE of a linear combination.

Other Post-stratification survey functions: jk_wtd_means, lin_comb, wtd_mean

Examples

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
## Not run: 
#
# This is a made-up survey in 3 states along the eastern edge of the Rockies.
#
ds <- read.table(header = TRUE, text = "
                 Gender   State    Income
                 Male     Utah        10
                 Male     Utah        11
                 Male     Utah        12
                 Male     Utah        13
                 Male     Utah        14
                 Male     Utah        15
                 Female   Utah        16
                 Female   Utah        17
                 Female   Utah        18
                 Female   Utah        19
                 Male     Idaho       20
                 Male     Idaho       21
                 Male     Idaho       22
                 Female   Idaho       23
                 Female   Idaho       24
                 Female   Idaho       25
                 Female   Idaho       26
                 Female   Idaho       27
                 Female   Idaho       28
                 Female   Idaho       29
                 Male     Arizona     30
                 Male     Arizona     31
                 Male     Arizona     32
                 Male     Arizona     33
                 Male     Arizona     34
                 Male     Arizona     35
                 Male     Arizona     36
                 Female   Arizona     37
                 Female   Arizona     38
                 Female   Arizona     39
                 ")
#
# and the population within each stratum (imaginary data):
# 
dpop <- read.table(header = TRUE, text = "
                   Gender   State          N
                   Male     Utah     1500000
                   Female   Utah     1500000
                   Male     Idaho     700000
                   Female   Idaho     900000
                   Male     Arizona  4000000
                   Female   Arizona  3000000
                   ")
# 
# First we create the 'overall' $n$ and $N$ variables.
# 
ds$n <- with(ds, capply(State, list(State, Gender), length))  
# 
# Note that first argument of 'capply', 'State', could be any variable in the dataset
# since it is used only for the length of the chunks within each 'State' by 'Gender' stratum.
# 
ds <- merge(ds, dpop)
head(ds)
#
# ### Overall Horvitz-Thompson weights
# 
ds$w_o_ht <- with(ds, N/n)
head(ds)
sum(ds$w_o_ht)

with(ds, wtd_mean(Income, w_o_ht))
with(ds, jk_wtd_mean_se(Income, list(Gender,State), w_o_ht))
with(ds, jk_wtd_mean_se(Income))
sd(ds$Income)/sqrt(length(ds$Income))
#
# ### Weighted mean within Utah
#
with(subset(ds, State == "Utah"), wtd_mean(Income, w_o_ht))
with(subset(ds, State == "Utah"), jk_wtd_mean_se(Income, Gender, w_o_ht))
# alternatively:
with(ds, wtd_mean(Income, w_o_ht * (State == "Utah")))
with(ds, jk_wtd_mean_se(Income, list(Gender, State), w_o_ht * (State == "Utah")))
#
# ### Weighted mean within Utah and Idaho
#
with(subset(ds, State %in% c("Utah","Idaho")), wtd_mean(Income, w_o_ht))
with(subset(ds, State %in% c("Utah","Idaho")), jk_wtd_mean_se(Income, list(Gender, State), w_o_ht))
# alternatively:
with(ds, wtd_mean(Income, w_o_ht * (State %in% c("Utah","Idaho"))))
with(ds, jk_wtd_mean_se(Income, list(Gender, State), w_o_ht * (State %in% c("Utah","Idaho"))))
#
# ### Weighted female income
#
with(subset(ds, Gender == "Female"), wtd_mean(Income, w_o_ht))
with(subset(ds, Gender == "Female"), jk_wtd_mean_se(Income, list(Gender, State), w_o_ht))
# alternatively:
with(ds, wtd_mean(Income, w_o_ht * (Gender == "Female")))
with(ds, jk_wtd_mean_se(Income, list(Gender, State), w_o_ht * (Gender == "Female")))
#
# We would get the same results with weights normed differently.
# 
# ### 'Sum to $n$' weights
# 
ds$w_o_sn <- with(ds, (N/n)/mean(N/n))
sum(ds$w_o_sn)

with(ds, wtd_mean(Income, w_o_sn))
with(ds, jk_wtd_mean_se(Income, list(Gender,State), w_o_sn))
# 
# ### 'Sum to 1' weights:
# 
ds$w_o_s1 <- with(ds, (N/n)/sum(N/n))
sum(ds$w_o_s1)

with(ds, wtd_mean(Income, w_o_s1))
with(ds, jk_wtd_mean_se(Income, list(Gender,State), w_o_s1))

#### Gender gap in mean salary in Utah

# With linear combinations, we can estimate population parameters that can't be
# estimated as weighted means. For example, the Gender gap in Utah:

coeffs <- with(ds, 
               std((State == "Utah") * (Gender == "Male") * w_o_ht)
               - std((State == "Utah") * (Gender == "Female") * w_o_ht) )
names(coeffs) <- with(ds, interaction(State,Gender))
cbind(coeffs)
with(ds, lin_comb(Income, coeffs))
with(ds, jk_lin_comb_se(Income, list(State, Gender), coeffs))
#'
#' #### Comparing two states
#' 
#' To compare Utah with Idaho using the population proportions of Gender:
#' 
coeffs <- with(ds, 
               std((State == "Utah") * w_o_ht)
               - std((State == "Idaho") *  w_o_ht) )
coeffs
names(coeffs) <- with(ds, interaction(State,Gender))
cbind(coeffs)
with(ds, lin_comb(Income, coeffs))
with(ds, jk_lin_comb_se(Income, list(State, Gender), coeffs))
#'
#' #### Unweighted average of two states
#' 
#' The unweighted average of Utah and Idaho using the Gender proportion in each state:
#' 
coeffs <- with(ds, 
               .5 * std((State == "Utah") * w_o_ht)
               + .5 * std((State == "Idaho") *  w_o_ht) )
coeffs
with(ds, lin_comb(Income, coeffs))
with(ds, jk_lin_comb_se(Income, list(State, Gender), coeffs))        
#'
#' #### The Gender gap within each state
#' 
states <- c("Idaho", "Utah", "Arizona")
names(states) <- states
coefs_gap <- lapply( states,
                     function(state)
                       with(ds, std((State == state) * (Gender == "Male") * w_o_ht) -
                              std((State == state) * (Gender == "Female") * w_o_ht) ))
mat <- do.call(cbind,coefs_gap)
rownames(mat) <- with(ds, interaction(State,Gender))
MASS::fractions(mat)
with(ds, lapply(coefs_gap, function(w) lin_comb(Income,w)))
with(ds, lapply(coefs_gap, function(w) jk_lin_comb_se(Income,list(Gender, State), w)))
#' 
#' ## Hierarchical weights
#' 
#' The following illustrates how to create within-state weights and then how to modify them to form overall 
#' weights.
#' 
#' We can start with the convenient fact that Horvitz-Thompson weights serve as both within-state and
#' overall weights.  To get within-state 'sum to $n$' weights, we need to norm the Horvitz-Thompson weights
#' within each state. We need to divide the Horvitz-Thompson weights by their within-state means:
#' 
ds$ht_ws_mean <- with(ds, capply(w_o_ht, State, mean))
ds$w_ws_sn <- with(ds, w_o_ht / ht_ws_mean)   # within-state 'sum to n' mean

with(ds, tapply(w_ws_sn, State, sum)) # check that they sum to within-state n
tab(ds, ~ State)
#'
#' As noted earlier, these weights need to be multiplied by $\frac{N_s/N}{n_s/n}$ to transform them into
#' overall 'sum to $n$' weights. The easiest way to generate $N_s$ is to use the Horvitz-Thomson weights:
#' 
ds$N_s <- with(ds, capply(w_o_ht, State, sum))
ds$n_s <- with(ds, capply(w_o_ht, State, length))
N_total <- sum(ds$w_o_ht)
n_total <- nrow(ds)

ds$w_o_sn2 <- with(ds, w_ws_sn * (N_s / N_total) / (n_s/n_total))
ds
#' Comparing these weighs with previous ones:
with(ds, max(abs(w_o_sn2 - w_o_sn)))
#'
#' Estimating means per stratum
#'
ds$mean_sg <- capply(ds, ds[c('State','Gender')], with, wtd_mean(Income, w_o_ht))
ds$mean_sg_se <- capply(ds, ds[c('State','Gender')], with, jk_wtd_mean_se(Income, w = w_o_ht))
ds$mean_s <- capply(ds, ds$State, with, wtd_mean(Income, w_o_ht))
ds$mean_s_se <- capply(ds, ds$State, with, jk_wtd_mean_se(Income, Gender, w_o_ht))
ds$mean_g <- capply(ds, ds$Gender, with, wtd_mean(Income, w_o_ht))
ds$mean_g_se <- capply(ds, ds$Gender, with, jk_wtd_mean_se(Income, State, w_o_ht))
# Define a round method for data frames so round will work on a data frame with non-numeric variables
round.data.frame <- function(x, digits = 0) as.data.frame(lapply(x, function(x) if(is.numeric(x)) round(x, digits = digits) else x))  
round(ds, 3)
dssum <- up(ds, ~ State/Gender) # keep all State x Gender invariant variables
round(dssum, 3)

## End(Not run)

gmonette/WWCa documentation built on May 17, 2019, 7:25 a.m.