diagnose_design: Diagnose the design
In DeclareDesign: Declare and Diagnose Research Designs

diagnose_design

R Documentation

Diagnose the design

Description

Generates diagnosands from a design or simulations of a design.

Usage

diagnose_design(
  ...,
  diagnosands = NULL,
  sims = 500,
  bootstrap_sims = 100,
  make_groups = NULL,
  add_grouping_variables = NULL
)

diagnose_designs(
  ...,
  diagnosands = NULL,
  sims = 500,
  bootstrap_sims = 100,
  make_groups = NULL,
  add_grouping_variables = NULL
)

vars(...)

Arguments

`...`	A design or set of designs typically created using the + operator, or a `data.frame` of simulations, typically created by `simulate_design`.
`diagnosands`	A set of diagnosands created by `declare_diagnosands`. By default, these include bias, root mean-squared error, power, frequentist coverage, the mean and standard deviation of the estimate(s), the "type S" error rate (Gelman and Carlin 2014), and the mean of the inquiry(s).
`sims`	The number of simulations, defaulting to 500. sims may also be a vector indicating the number of simulations for each step in a design, as described for `simulate_design`
`bootstrap_sims`	Number of bootstrap replicates for the diagnosands to obtain the standard errors of the diagnosands, defaulting to `100`. Set to FALSE to turn off bootstrapping.
`make_groups`	Add group variables within which diagnosand values will be calculated. New variables can be created or variables already in the simulations data frame selected. Type name-value pairs within the function `vars`, i.e. `vars(significant = p.value <= 0.05)`.
`add_grouping_variables`	Deprecated. Please use make_groups instead. Variables used to generate groups of simulations for diagnosis. Added to default list: c("design", "estimand_label", "estimator", "outcome", "term")

Details

If the diagnosand function contains a group_by attribute, it will be used to split-apply-combine diagnosands rather than the intersecting column names.

If sims is named, or longer than one element, a fan-out strategy is created and used instead.

If the packages future and future.apply are installed, you can set plan to run multiple simulations in parallel.

Value

a list with a data frame of simulations, a data frame of diagnosands, a vector of diagnosand names, and if calculated, a data frame of bootstrap replicates.

Examples


# Two-arm randomized experiment
n <- 500

design <-
  declare_model(
    N = 1000,
    gender = rbinom(N, 1, 0.5),
    X = rep(c(0, 1), each = N / 2),
    U = rnorm(N, sd = 0.25),
    potential_outcomes(Y ~ 0.2 * Z + X + U)
  ) +
  declare_inquiry(ATE = mean(Y_Z_1 - Y_Z_0)) +
  declare_sampling(S = complete_rs(N = N, n = n)) +
  declare_assignment(Z = complete_ra(N = N, m = n/2)) +
  declare_measurement(Y = reveal_outcomes(Y ~ Z)) +
  declare_estimator(Y ~ Z, inquiry = "ATE")

## Not run: 
# Diagnose design using default diagnosands
diagnosis <- diagnose_design(design)
diagnosis

# Use tidy to produce data.frame with bootstrapped standard 
# errors and confidence intervals for each diagnosand
diagnosis_df <- tidy(diagnosis)
diagnosis_df

# Use sims argument to change the number of simulations used 
# to calculate diagnosands, and bootstrap_sims to change how
# many bootstraps are uses to calculate standard errors.
diagnosis <- diagnose_design(design,
                             sims = 500,
                             bootstrap_sims = 150)
tidy(diagnosis)

# Select specific diagnosands
reshape_diagnosis(diagnosis, select = "Power")

# Use your own diagnosands
my_diagnosands <-
  declare_diagnosands(median_bias = median(estimate - estimand),
                      absolute_error = mean(abs(estimate - estimand)))

diagnosis <- diagnose_design(design, diagnosands = my_diagnosands)
diagnosis

get_diagnosands(diagnosis)

get_simulations(diagnosis)

# Diagnose using an existing data frame of simulations
simulations <- simulate_design(design, sims = 500)
diagnosis   <- diagnose_design(simulations_df = simulations)
diagnosis


## End(Not run)

# If you do not specify diagnosands, the function default_diagnosands() is used, 
#   which is reproduced below.

alpha <- 0.05

default_diagnosands <- 
  declare_diagnosands(
    mean_estimand = mean(estimand),
    mean_estimate = mean(estimate),
    bias = mean(estimate - estimand),
    sd_estimate = sqrt(pop.var(estimate)),
    rmse = sqrt(mean((estimate - estimand) ^ 2)),
    power = mean(p.value <= alpha),
    coverage = mean(estimand <= conf.high & estimand >= conf.low)
  )
  
diagnose_design(
  design, 
  diagnosands = default_diagnosands
)

# A longer list of useful diagnosands might include:

extended_diagnosands <- 
  declare_diagnosands(
    mean_estimand = mean(estimand),
    mean_estimate = mean(estimate),
    bias = mean(estimate - estimand),
    sd_estimate = sd(estimate),
    rmse = sqrt(mean((estimate - estimand) ^ 2)),
    power = mean(p.value <= alpha),
    coverage = mean(estimand <= conf.high & estimand >= conf.low),
    mean_se = mean(std.error),
    type_s_rate = mean((sign(estimate) != sign(estimand))[p.value <= alpha]),
    exaggeration_ratio = mean((estimate/estimand)[p.value <= alpha]),
    var_estimate = pop.var(estimate),
    mean_var_hat = mean(std.error^2),
    prop_pos_sig = mean(estimate > 0 & p.value <= alpha),
    mean_ci_length = mean(conf.high - conf.low)
  )
  
## Not run: 
diagnose_design(
  design, 
  diagnosands = extended_diagnosands
)

# Adding a group for within group diagnosis:
diagnosis <- diagnose_design(design, 
                             make_groups = vars(significant = p.value <= 0.05),
)
diagnosis

n <- 500
design <-
  declare_model(
    N = 1000,
    gender = rbinom(N, 1, 0.5),
    X = rep(c(0, 1), each = N / 2),
    U = rnorm(N, sd = 0.25),
    potential_outcomes(Y ~ rnorm(1) * Z + X + U)
  ) +
  declare_inquiry(ATE = mean(Y_Z_1 - Y_Z_0)) +
  declare_sampling(S = complete_rs(N = N, n = n)) +
  declare_assignment(Z = complete_ra(N = N, m = n/2)) +
  declare_measurement(Y = reveal_outcomes(Y ~ Z)) +
  declare_estimator(Y ~ Z, inquiry = "ATE")

diagnosis <- diagnose_design(design, 
                             make_groups = 
                               vars(effect_size = 
                                      cut(estimand, quantile(estimand, (0:4)/4), 
                                          include.lowest = TRUE)),
)
diagnosis

# redesign can be used in conjunction with diagnose_designs
# to optimize the design for specific diagnosands
design_vary_N <- redesign(design, n = c(100, 500, 900))
diagnose_designs(design_vary_N)

# Calculate and plot the power of a design over a range of 
# effect sizes
design <-
  declare_model(
    N = 200,
    U = rnorm(N),
    potential_outcomes(Y ~ runif(1, 0.0, 0.5) * Z + U)
  ) +
  declare_inquiry(ATE = mean(Y_Z_1 - Y_Z_0)) +
  declare_assignment(Z = complete_ra(N)) +
  declare_measurement(Y = reveal_outcomes(Y ~ Z)) +
  declare_estimator(Y ~ Z, inquiry = "ATE") 

library(tidyverse)

simulations_df <- 
  diagnose_design(design) |> 
  get_simulations() |> 
  mutate(significant = if_else(p.value <= 0.05, 1, 0))

ggplot(simulations_df) + 
  stat_smooth(
    aes(estimand, significant), 
    method = 'loess', 
    color = "#3564ED", 
    fill = "#72B4F3", 
    formula = 'y ~ x'
  ) +
  geom_hline(
  yintercept = 0.8, color = "#C6227F", linetype = "dashed") +
  annotate("text", x = 0, y = 0.85, 
    label = "Conventional power threshold = 0.8", 
    hjust = 0, color = "#C6227F") + 
  scale_y_continuous(breaks = seq(0, 1, 0.2)) +
  coord_cartesian(ylim = c(0, 1)) +
  theme(legend.position = "none") +
  labs(x = "Model parameter: true effect size",
       y = "Diagnosand: statistical power") +
  theme_minimal()

## End(Not run)

DeclareDesign documentation built on Aug. 8, 2023, 5:13 p.m.