declare_estimator: Declare estimator

Description Usage Arguments Details Value Custom Estimators Examples

Description

Declares an estimator which generates estimates and associated statistics.

Usage

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
declare_estimator(..., handler = estimator_handler,
  label = "estimator")

declare_estimators(..., handler = estimator_handler,
  label = "estimator")

tidy_estimator(estimator_function)

model_handler(data, ..., model = estimatr::difference_in_means,
  term = FALSE)

estimator_handler(data, ..., model = estimatr::difference_in_means,
  term = FALSE, estimand = NULL, label)

Arguments

...

arguments to be captured, and later passed to the handler

handler

a tidy-in, tidy-out function

label

a string describing the step

estimator_function

A function that takes a data.frame as an argument and returns a data.frame with the estimates, summary statistics (i.e., standard error, p-value, and confidence interval) and a label.

data

a data.frame

model

A model function, e.g. lm or glm. By default, the model is the difference_in_means function from the estimatr package.

term

Symbols or literal character vector of term that represent quantities of interest, i.e. Z. If FALSE, return the first non-intercept term; if TRUE return all term. To escape non-standard-evaluation use !!.

estimand

a declare_estimand step object, or a character label, or a list of either

Details

tidy_estimator takes an untidy estimation function, and returns a tidy handler which accepts standard labeling options.

The intent here is to factor out the estimator/estimand labeling so that it can be reused by other model handlers.

Value

A function that accepts a data.frame as an argument and returns a data.frame containing the value of the estimator and associated statistics.

Custom Estimators

estimator_functions implementations should be tidy (accept and return a data.frame)

model implementations should at a minimum provide S3 methods for summary and confint.

Examples

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
# Declare estimand
my_estimand <- declare_estimand(ATE = mean(Y_Z_1 - Y_Z_0))

# Declare estimator using the default handler using `difference_in_means`
# estimator from `estimatr` package. Returns the first non-intercept term
# as estimate

my_estimator_dim <- declare_estimator(Y ~ Z, estimand = "ATE", label = "DIM")

# Use lm function from base R
my_estimator_lm <- declare_estimator(Y ~ Z, estimand = "ATE",
  model = lm, label = "LM")
# Use lm_robust (linear regression with robust standard errors) from
# `estimatr` package

my_estimator_lm_rob <- declare_estimator(
  Y ~ Z,
  estimand = "ATE",
  model = lm_robust,
  label = "LM_Robust"
)

# Set `term` if estimate of interest is not the first non-intercept variable
my_estimator_lm_rob_x <- declare_estimator(
  Y ~ X + Z,
  estimand = my_estimand,
  term = "Z",
  model = lm_robust
)

# Use glm from base R
my_estimator_glm <- declare_estimator(
  Y ~ X + Z,
  family = "gaussian",
  estimand = my_estimand,
  term = "Z",
  model = glm
)

# A probit
estimator_probit <- declare_estimator(
  Y ~ Z,
  model = glm,
  family = binomial(link = "probit"),
  term = "Z"
)

# Declare estimator using a custom handler

# Define your own estimator and use the `tidy_estimator` function for labeling
# Must have `data` argument that is a data.frame
my_estimator_function <- function(data){
  data.frame(estimate = with(data, mean(Y)))
}

my_estimator_custom <- declare_estimator(
  handler = tidy_estimator(my_estimator_function),
  estimand = my_estimand
)

# Customize labeling

my_estimator_function <- function(data){
  data.frame(
    estimator_label = "foo",
    estimand_label = "bar",
    estimate = with(data, mean(Y)),
    n = nrow(data),
    stringsAsFactors = FALSE
  )
}

my_estimator_custom2 <- declare_estimator(handler = my_estimator_function)


# Examples

# First, set up the rest of a design
set.seed(42)

design_def <-
  declare_population(N = 100, X = rnorm(N), W = rexp(N, 1), noise = rnorm(N)) +
  declare_potential_outcomes(Y ~ .25 * Z + noise) +
  declare_estimand(ATE = mean(Y_Z_1 - Y_Z_0)) +
  declare_assignment(m = 50) +
  declare_reveal() +
  my_estimator_dim

draw_estimates(design_def)

# Can also use declared estimator on a data.frame
dat <- draw_data(design_def)
my_estimator_dim(dat)

# ----------
# 2. Using existing estimators
# ----------

design <- replace_step(design_def, my_estimator_dim, my_estimator_lm_rob)
draw_estimates(design)

design <- replace_step(design_def, my_estimator_dim, my_estimator_lm)
draw_estimates(design)

design <- replace_step(design_def, my_estimator_dim, my_estimator_glm)
draw_estimates(design)

# ----------
# 3. Using custom estimators
# ----------

design <- replace_step(design_def, my_estimator_dim, my_estimator_custom)

draw_estimates(design)

# The names in your custom estimator return should match with
# your diagnosands when diagnosing a design
my_median <- function(data) data.frame(med = median(data$Y))

my_estimator_median <- declare_estimator(
  handler = tidy_estimator(my_median),
  estimand = my_estimand
)

design <- replace_step(design_def, my_estimator_dim, my_estimator_median)

draw_estimates(design)

my_diagnosand <- declare_diagnosands(med_to_estimand = mean(med - estimand),
  keep_defaults = FALSE)

## Not run: 
diagnose_design(design, diagnosands = my_diagnosand, sims = 5,
  bootstrap_sims = FALSE)

## End(Not run)

# ----------
# 4. Multiple estimators per estimand
# ----------

design_two <- insert_step(design_def,  my_estimator_lm,
  after = my_estimator_dim)

draw_estimates(design_two)

## Not run: 
diagnose_design(design_two, sims = 5, bootstrap_sims = FALSE)

## End(Not run)

DeclareDesign/ddoldversion documentation built on Oct. 30, 2019, 5:17 p.m.