declare_estimator: Declare Estimator

Description Usage Arguments Value Custom Estimators Examples

Description

Declares an estimator which generates estimates and associated statistics

The intent here is to factor out the estimator/estimand labeling so that it can be reused by other model handlers.

Usage

1
2
3
4
5
6
7
8
9
declare_estimator(..., handler = estimator_handler, label = "my_estimator")

tidy_estimator(estimator_function)

model_handler(data, ..., model = estimatr::difference_in_means,
  coefficients = FALSE)

estimator_handler(data, ..., model = estimatr::difference_in_means,
  coefficients = FALSE, estimand = NULL, label)

Arguments

...

arguments to be captured, and later passed to the handler

handler

a tidy-in, tidy-out function

label

a string describing the step

estimator_function

A function that takes a data.frame as an argument and returns a data.frame with the estimates, summary statistics (i.e., standard error, p-value, and confidence interval) and a label.

data

a data.frame

model

A model function, e.g. lm or glm. By default, the model is the difference_in_means function from the estimatr package.

coefficients

Symbols or literal character vector of coefficients that represent quantities of interest, i.e. Z. If FALSE, return the first non-intercept coefficient; if TRUE return all coefficients. To escape non-standard-evaluation use !!.

estimand

a declare_estimand step object, or a character label, or a list of either

Value

a function that accepts a data.frame as an argument and returns a data.frame containing the value of the estimator and associated statistics.

Custom Estimators

estimator_functions implementations should be tidy (accept and return a data.frame)

model implementations should at the miminum provide S3 methods for summary and confint.

Examples

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
########################################################
# Default handler

my_estimand <- declare_estimand(ATE=mean(Y_Z_1-Y_Z_0))

# Automatically uses first non-intercept coefficient as estimate
# Default method is the `difference_in_means` estimator from `estimatr`

my_estimator_dim <- declare_estimator(Y ~ Z, estimand = "ATE", label = "DIM")

# lm from base R
my_estimator_lm <- declare_estimator(Y ~ Z, estimand = "ATE", model = lm, label = "LM")
# Use linear regression with robust standard errors from `estimatr` package
my_estimator_lm_rob <- declare_estimator(
  Y ~ Z,
  estimand = "ATE",
  model = estimatr::lm_robust,
  label = "LM_Robust"
)

# Set `coefficient`` if estimate of interest is not the first non-intercept variable
my_estimator_lm_rob_x <- declare_estimator(
  Y ~ X + Z,
  estimand = my_estimand,
  coefficients = "Z",
  model = estimatr::lm_robust
)

# Use glm from base R
my_estimator_glm <- declare_estimator(
  Y ~ X + Z,
  family = "gaussian",
  estimand = my_estimand,
  coefficients = "Z",
  model = glm
)

# A probit
estimator_probit <- declare_estimator(
  Y ~ Z,
  model = glm,
  family = binomial(link = "probit"),
  coefficients = "Z"
)

########################################################
# Custom handlers

# Define your own estimator and use the `tidy_estimator` function for labeling
# Must have `data` argument that is a data.frame
my_estimator_function <- function(data){
  data.frame(est = with(data, mean(Y)))
}

my_estimator_custom <- declare_estimator(
  handler = tidy_estimator(my_estimator_function),
  estimand = my_estimand
)

# Use a custom estimator function with custom labelling

my_estimator_function <- function(data){
  data.frame(
    estimator_label="foo",
    estimand_label="bar",
    est = with(data, mean(Y)),
    n = nrow(data),
    stringsAsFactors=FALSE
  )
}

my_estimator_custom2 <- declare_estimator(handler = my_estimator_function)

########################################################
# Examples

# First, set up the rest of a design
set.seed(42)

design_def <- declare_design(
  declare_population(N = 100, X = rnorm(N), W=rexp(N,1), noise=rnorm(N)),
  declare_potential_outcomes(Y ~ .25 * Z + noise),
  declare_estimand(ATE = mean(Y_Z_1 - Y_Z_0)),
  declare_assignment(m = 50),
  declare_reveal(),
  my_estimator_dim
)

run_design(design_def)

# Can also use declared estimator on a data.frame
dat <- draw_data(design_def)
my_estimator_dim(dat)

# ----------
# 2. Using existing estimators
# ----------

design <- replace_step(design_def, my_estimator_dim, my_estimator_lm_rob)

run_design(design)


design <- replace_step(design_def, my_estimator_dim, my_estimator_lm)

run_design(design)


design <- replace_step(design_def, my_estimator_dim, my_estimator_glm)

run_design(design)

# ----------
# 3. Using custom estimators
# ----------


design <- replace_step(design_def, my_estimator_dim, my_estimator_custom)

run_design(design)

# The names in your custom estimator return should match with
# your diagnosands when diagnosing a design
my_median <- function(data) data.frame(med = median(data$Y))

my_estimator_median <- declare_estimator(
  handler = tidy_estimator(my_median),
  estimand = my_estimand
)

design <- replace_step(design_def, my_estimator_dim, my_estimator_median)

run_design(design)

my_diagnosand <- declare_diagnosands(med_to_estimand = mean(med - estimand))
diagnose_design(design, diagnosands = my_diagnosand, sims = 5, bootstrap = FALSE)

# ----------
# 4. Multiple estimators per estimand
# ----------

design_two <- insert_step(design_def,  my_estimator_lm,  after=my_estimator_dim)

run_design(design_two)
diagnose_design(design_two, sims = 5, bootstrap = FALSE)

graemeblair/DeclareDesign documentation built on May 8, 2018, 1:24 p.m.