#' Summarizing the Specifications Setup
#'
#' \code{summary} method for class "specr.setup". Provides a short summary of the
#' created specifications (the "multiverse") that lists all analytic choices, prints
#' the function used to extract the parameters from the model. Finally, if
#' \code{print.specs = TRUE}, it also shows the head of the actual specification grid.
#'
#' @param object An object of class "specr.setup", usually, a result of a call to `setup`.
#' @param digits The number of digits to use when printing the specification table.
#' @param rows The number of rows of the specification tibble that should be printed.
#' @param print.specs Logical value; if `TRUE`, a head of the specification tibble
#' is returned and printed.
#' @param ... further arguments passed to or from other methods (currently ignored).
#'
#' @return A printed summary of an object of class \code{specr.setup}.
#'
#' @export
#'
#' @seealso The function [setup()], which creates the "specr.setup" object.
#'
#' @examples
#' # Setup specifications
#' specs <- setup(data = example_data,
#' x = c("x1", "x2"),
#' y = c("y1", "y2"),
#' model = c("lm", "glm"),
#' controls = c("c1", "c2", "c3"),
#' subsets = list(group3 = unique(example_data$group3)))
#'
#' # Summarize specifications
#' summary(specs)
summary.specr.setup <- function(object,
digits = 2,
rows = 6,
print.specs = TRUE,
...) {
cat("Setup for the Specification Curve Analysis\n")
cat("-------------------------------------------\n")
cat("Class: ", class(object), "-- version:", as.character(utils::packageVersion("specr")), "\n")
cat("Number of specifications: ", as.numeric(object$n_specs), "\n\n")
cat("Specifications:\n\n")
cat(" Independent variable: ", paste0(unique(object$x), collapse = ", "), "\n")
cat(" Dependent variable: ", paste0(unique(object$y), collapse = ", "), "\n")
cat(" Models: ", paste0(unique(object$model), collapse = ", "), "\n")
cat(" Covariates: ", sub("\\b1\\b", "1 (none)", paste0(unique(object$specs$controls), collapse = ", ")), "\n")
cat(" Subsets analyses: ", paste0(unique(object$specs$subsets), collapse = ", "), "\n\n")
cat("Function used to extract parameters:\n\n")
cat(" ")
print(object$fun1)
if(isTRUE(print.specs)) {
cat("\n\nHead of specifications table (first", rows, "rows):\n\n")
object$specs %>%
select(-.data$model_function) %>%
mutate_if(is.numeric, round, digits) %>%
utils::head(n = rows)
}
}
#' Print method for S3 class "specr.setup"
#' @keywords internal
#' @export
print.specr.setup <- function(x, ...) {
cat("Number of specifications: ", as.numeric(x$n_specs), "\n\n")
}
#' Plot visualization of the specification setup
#'
#' @description This function plots a visual summary of the specification setup.
#' It requires an object of class \code{specr.setup}, usually
#' the result of calling \code{setup()}.
#'
#' @param x A `specr.setup` object, usually resulting from calling \code{setup()}.
#' @param layout The type of layout to create for the garden of forking path. Defaults to "dendrogram". See `?ggraph` for options.
#' @param circular Should the layout be transformed into a radial representation. Only possible for some layouts. Defaults to FALSE.
#' @param ... further arguments passed to or from other methods (currently ignored).
#'
#' @return A \link[ggplot2]{ggplot} object that can be customized further.
#'
#' @export
#'
#' @examples
#' \dontrun{
#' specs <- setup(data = example_data,
#' x = c("x1", "x2", "x3"),
#' y = c("y1", "y2"),
#' model = c("lm", "glm"),
#' controls = "c1",
#' subsets = list(group2 = unique(example_data$group2)))
#'
#' plot(specs)
#' plot(specs, circular = TRUE)
#' }
plot.specr.setup <- function(x,
layout = "dendrogram",
circular = FALSE,
...) {
decision <- number <- NULL
df <- dplyr::tibble(decision = factor(c("model", "x", "y", "controls", "subsets"),
levels = c("model", "x", "y", "controls", "subsets")),
number = c(length(unique(x$specs$model)),
length(unique(x$specs$x)),
length(unique(x$specs$y)),
length(unique(x$specs$controls)),
length(unique(x$specs$subsets))))
p1 <- df %>%
ggplot(aes(x = decision, y = number)) +
geom_col(fill = "steelblue") +
scale_y_continuous(n.breaks = max(df$number)) +
theme_classic() +
labs(x = "", y = "number of options")
df <- x$specs %>%
select(.data$model, .data$x, .data$y, .data$controls, .data$subsets) %>%
arrange(.data$model, .data$x, .data$y, .data$controls, .data$subsets) %>%
mutate(start = "raw data") %>%
select(start, dplyr::everything()) %>%
mutate(x = paste0(.data$x, " & ", .data$model),
y = paste0(.data$y, " & ", .data$x),
controls = paste0(.data$controls, " & ", .data$y),
subsets = paste0(.data$subsets, " & ", .data$controls))
# Create edges
edges_level1 <- df %>%
select(.data$start, .data$model) %>%
rename(from = .data$start, to = .data$model) %>%
unique %>%
mutate(decisions = "model")
edges_level2 <- df %>%
select(.data$model, .data$x) %>%
rename(from = .data$model, to = .data$x) %>%
unique %>%
mutate(decisions = "independent variable")
edges_level3 <- df %>%
select(.data$x, .data$y) %>%
rename(from = .data$x, to = .data$y) %>%
unique %>%
mutate(decisions = "dependent variable")
edges_level4 = df %>%
select(.data$y, .data$controls) %>%
rename(from = .data$y, to = .data$controls) %>%
mutate(decisions = "control variables")
edges_level5 <- df %>%
select(.data$controls, .data$subsets) %>%
rename(from = .data$controls, to = .data$subsets) %>%
mutate(decisions = "subsets")
# Combine edges
edge_list <- rbind(edges_level1,
edges_level2,
edges_level3,
edges_level4,
edges_level5)
# Plot edges
p2 <- edge_list %>%
graph_from_data_frame %>%
ggraph::ggraph(layout = layout,
circular = circular) +
ggraph::geom_edge_diagonal() +
ggraph::geom_node_point(fill = "white", shape = 21) +
theme_void()
# Combine plots
plot_grid(p1, p2,
labels = c("A", "B"),
align = "h",
ncol = 2)
}
#' Return tibble from specr.setup object
#' @keywords internal
#' @export
as_tibble.specr.setup <- function(x, ...) {
as_tibble(x$specs, ...)
}
#' Return tibble from specr.setup object
#' @keywords internal
#' @export
as.data.frame.specr.setup <- function(x, ...) {
as.data.frame(x$specs, ...)
}
#' Summarizing the Specification Curve Analysis
#'
#' `summary` method for class "specr". It provides a printed output including
#' technical details (e.g., cores used, duration of the fitting process, number
#' of specifications), a descriptive analysis of the overall specification curve,
#' a descriptive summary of the resulting sample sizes, and a head of the results.
#'
#' @param object An object of class "specr", usually resulting of a call to `specr`.
#' @param type Different aspects can be summarized and printed. See details for alternative summaries
#' @param group In combination with `what = "curve"`, provide a vector of one or more variables (e.g., subsets, controls,...) that denote the available analytic choices to group summary of the estimate.
#' @param var In combination with `what = "curve"`, unquoted name of parameter to be summarized. Defaults to estimate.
#' @param stats Named vector or named list of summary functions (individually defined summary functions can included). If it is not named, placeholders (e.g., "fn1") will be used as column names.
#' @param digits The number of digits to use when printing the specification table.
#' @param rows The number of rows of the specification tibble that should be printed.
#' @param ... further arguments passed to or from other methods (currently ignored).
#'
#' @return A printed summary of an object of class \code{specr.object}.
#'
#' @export
#'
#' @examples
#' # Setup up specifications (returns object of class "specr.setup")
#' specs <- setup(data = example_data,
#' y = c("y1", "y2"),
#' x = c("x1", "x2"),
#' model = "lm",
#' controls = c("c1", "c2"),
#' subsets = list(group1 = unique(example_data$group1)))
#'
#' # Run analysis (returns object of class "specr.object")
#' results <- specr(specs)
#'
#' # Default summary of the "specr.object"
#' summary(results)
#'
#' # Summarize the specification curve descriptively
#' summary(results, type = "curve")
#'
#' # Grouping for certain analytical decisions
#' summary(results,
#' type = "curve",
#' group = c("x", "y"))
#'
#' # Using customized functions
#' summary(results,
#' type = "curve",
#' group = c("x", "group1"),
#' stats = list(median = median,
#' min = min,
#' max = max))
#' @seealso The function used to create the "specr.setup" object: `setup`.
summary.specr.object <- function(object,
type = "default",
group = NULL,
var = .data$estimate,
stats = list(median = median, mad = mad, min = min, max = max,
q25 = function(x) quantile(x, prob = .25),
q75 = function(x) quantile(x, prob = .75)),
digits = 2,
rows = 6,
...){
var <- enquo(var)
if(type == "default") {
# Short technical summary
cat("Results of the specification curve analysis\n")
cat("-------------------\n")
cat("Technical details:\n\n")
cat(" Class: ", class(object), "-- version:", as.character(utils::packageVersion("specr")), "\n")
cat(" Cores used: ", object$workers, "\n")
cat(" Duration of fitting process: ", object$time, "\n")
cat(" Number of specifications: ", as.numeric(object$n_specs), "\n\n")
# Short descriptive analysis across all specifications
cat("Descriptive summary of the specification curve:\n\n")
object$data %>%
summarize_at(vars(!! var), stats) %>%
as.data.frame %>%
round(digits) %>%
print(row.names = FALSE)
cat("\n")
# Head of the result table
cat("Descriptive summary of sample sizes: \n\n")
des2 <- object$data %>%
summarize(median = median(.data$fit_nobs),
min = min(.data$fit_nobs),
max = max(.data$fit_nobs)) %>%
as.data.frame %>%
round(digits)
print(des2, row.names = FALSE)
cat("\n")
# Head of the result table
cat("Head of the specification results (first", rows, "rows): \n\n")
object$data %>%
select(-.data$model_function, -.data$term) %>%
mutate_if(is.numeric, round, digits) %>%
utils::head(n = rows) %>%
print
}
if(type == "curve") {
if (length(group) == 0) {
dplyr::bind_cols(
object$data %>%
summarize_at(vars(!! var), stats),
object$data %>%
dplyr::summarize(obs = median(.data$fit_nobs))
)
} else {
dplyr::left_join(
object$data %>%
dplyr::group_by_at(group) %>%
summarize_at(vars(!! var), stats),
object$data %>%
dplyr::group_by_at(group) %>%
dplyr::summarize(obs = median(.data$fit_nobs)),
by = group
)
}
}
}
#' Print method for S3 class "specr.object"
#' @keywords internal
#' @export
print.specr.object <- function(x, ...) {
cat("Models fitted based on", nrow(x$data), "specifications\n")
cat("Number of cores used:", x$workers, "\n\n")
# Short descriptive analysis across all specifications
cat("Descriptive summary of the specification curve:\n\n")
x$data %>%
summarize_at(vars(.data$estimate),
list(median = median, mad = mad, min = min, max = max,
q25 = function(x) quantile(x, prob = .25),
q75 = function(x) quantile(x, prob = .75))) %>%
as.data.frame %>%
round(2) %>%
print(row.names = FALSE)
}
#' Plot specification curve and analytic choices
#'
#' @description This function plots visualizations of the specification curve
#' analysis. The function requires an object of class \code{specr.object}, usually
#' the results of calling \code{specr()} to create a standard visualization of the
#' specification curve analysis. Several types of visualizations are possible.
#'
#' @param x A `specr.object` object, usually resulting from calling \code{specr()}.
#' @param type What type of figure should be plotted? If \code{type = "default"},
#' the standard specification curve analysis plot (the specification curve as the
#' upper panel and an overview of the relevant choices as the lower panel) is
#' created. If \code{type = "curve"}, only the specification curve (upper panel
#' of the default plot) is plotted. If \code{type = "choices"}, only the choice
#' panel (lower part of the default plot) is plotted. If \code{type = "boxplot"},
#' an alternative visualization of differences between choices is plotted that
#' summarizes results per choice using box-and-whisker plot(s). If
#' \code{type = "samplesizes"}, a barplot of sample sizes per specification is
#' plotted. See examples for more information.
#' @param var Which parameter should be plotted in the curve? Defaults to
#' \code{estimate}, but other parameters (e.g., p.value, fit_r.squared,...)
#' can be plotted too.
#' @param group Should the arrangement of the curve be grouped by a particular choice?
#' Defaults to NULL, but can be any of the present choices (e.g., x, y, controls...)
#' @param choices A vector specifying which analytic choices should be plotted.
#' By default, all choices (x, y, model, controls, subsets) are plotted.
#' @param labels Labels for the two parts of the plot
#' @param rel_heights vector indicating the relative heights of the plot.
#' @param desc Logical value indicating whether the curve should the arranged in
#' a descending order. Defaults to FALSE.
#' @param null Indicate what value represents the 'null' hypothesis (defaults to
#' zero).
#' @param ci Logical value indicating whether confidence intervals should be
#' plotted.
#' @param ribbon Logical value indicating whether a ribbon instead should be
#' plotted
#' @param formula In combination with \code{type = "variance"}, you can provide
#' a specific formula to extract specific variance components. The syntax of the
#' formula is based on \code{lme4::lmer()} and thus looks something like, e.g.:
#' \code{"estimate ~ 1 + (1|x) + (1|y)"} (to estimate the amount of variance
#' explained by different independent `x` and dependent variables `y`). All other
#' choices are then subsumed under residual variance. By no formula is provided,
#' all choices (x, y, model, controls, and subsets) that have more than one alternative
#' are included. See examples for further details.
#' @param print In combination with \code{type = "variance"}, logical value indicating
#' whether the intra-class correlations (i.e., percentages of variance explained by
#' analstical choices) should be printed or not. Defaults to TRUE.
#' @param ... further arguments passed to or from other methods (currently ignored).
#'
#' @return A \link[ggplot2]{ggplot} object that can be customized further.
#'
#' @export
#'
#' @examples
#' \dontrun{
#' # Specification Curve analysis ----
#' # Setup specifications
#' specs <- setup(data = example_data,
#' y = c("y1", "y2"),
#' x = c("x1", "x2"),
#' model = "lm",
#' controls = c("c1", "c2"),
#' subsets = list(group1 = unique(example_data$group1),
#' group2 = unique(example_data$group2)))
#'
#' # Run analysis
#' results <- specr(specs)
#'
#' # Resulting data frame with estimates
#' as_tibble(results) # This will be used for plotting
#'
#'
#' # Visualizations ---
#' # Plot results in various ways
#' plot(results) # default
#' plot(results, choices = c("x", "y")) # specific choices
#' plot(results, ci = FALSE, ribbon = TRUE) # exclude CI and add ribbon instead
#' plot(results, type = "curve")
#' plot(results, type = "choices")
#' plot(results, type = "samplesizes")
#' plot(results, type = "boxplot")
#'
#'
#' # Grouped plot
#' plot(results, group = controls)
#'
#' # Alternative and specific visualizations ----
#' # Other variables in the resulting data set can be plotted too
#' plot(results,
#' type = "curve",
#' var = fit_r.squared, # extract "r-square" instead of "estimate"
#' ci = FALSE)
#'
#' # Such a plot can also be extended (e.g., by again adding the estimates with
#' # confidence intervals)
#' library(ggplot2)
#' plot(results, type = "curve", var = fit_r.squared) +
#' geom_point(aes(y = estimate), shape = 5) +
#' labs(x = "specifications", y = "r-squared | estimate")
#'
#' # We can also investigate how much variance is explained by each analytical choice
#' plot(results, type = "variance")
#'
#' # By providing a specific formula in `lme4::lmer()`-style, we can extract specific choices
#' # and also include interactions between chocies
#' plot(results,
#' type = "variance",
#' formula = "estimate ~ 1 + (1|x) + (1|y) + (1|group1) + (1|x:y)")
#'
#' ## Combining several plots ----
#' # `specr` also exports the function `plot_grid()` from the package `cowplot`, which
#' # can be used to combine plots meaningfully
#' a <- plot(results, "curve")
#' b <- plot(results, "choices", choices = c("x", "y", "controls"))
#' c <- plot(results, "samplesizes")
#' plot_grid(a, b, c,
#' align = "v",
#' axis = "rbl",
#' rel_heights = c(2, 3, 1),
#' ncol = 1)
#'}
plot.specr.object <- function(x,
type = "default",
var = .data$estimate,
group = NULL,
choices = c("x", "y", "model", "controls", "subsets"),
labels = c("A", "B"),
rel_heights = c(2, 3),
desc = FALSE,
null = 0,
ci = TRUE,
ribbon = FALSE,
formula = NULL,
print = TRUE,
...){
var <- enquo(var)
group <- enquo(group)
# Create specification curve plot
plot_a <- x$data %>%
format_results(var = var, group = group, null = null, desc = desc) %>%
ggplot(aes(x = .data$specifications,
y = !! var,
ymin = .data$conf.low,
ymax = .data$conf.high,
color = .data$color)) +
geom_point(aes(color = .data$color),
size = 1) +
theme_minimal() +
scale_color_identity() +
theme(strip.text = element_blank(),
axis.line = element_line("black", size = .5),
legend.position = "none",
panel.spacing = unit(.75, "lines"),
axis.text = element_text(colour = "black")) +
labs(x = "")
# add CIs if necessary
if (isTRUE(ci)) {
plot_a <- plot_a +
geom_pointrange(alpha = 0.5,
size = .6,
fatten = 1)
}
# add ribbon if necessary
if (isTRUE(ribbon)) {
plot_a <- plot_a +
geom_ribbon(aes(ymin = .data$conf.low,
ymax = .data$conf.high,
color = "lightgrey"),
alpha = 0.25)
}
# Create choice panel
value <- key <- NULL
plot_b <- x$data %>%
format_results(var = var, group = group, null = null, desc = desc) %>%
tidyr::gather(key, value, choices) %>%
dplyr::mutate(key = factor(key, levels = choices)) %>%
ggplot(aes(x = .data$specifications,
y = value,
color = .data$color)) +
geom_point(aes(x = .data$specifications,
y = value),
shape = 124,
size = 3.35) +
scale_color_identity() +
theme_minimal() +
facet_grid(key~1, scales = "free_y", space = "free_y") +
theme(
axis.line = element_line("black", size = .5),
legend.position = "none",
panel.spacing = unit(.75, "lines"),
axis.text = element_text(colour = "black"),
strip.text.x = element_blank()) +
labs(x = "", y = "")
if(type == "default"){
p <- plot_grid(plot_a,
plot_b,
labels = labels,
align = "v",
axis = "rbl",
rel_heights = rel_heights,
ncol = 1.)
return(p)
}
if(type == "curve") {
return(plot_a)
}
if(type == "choices") {
return(plot_b)
}
if(type == "boxplot") {
plot_c <- x$data %>%
tidyr::gather(key, value, all_of(choices)) %>%
ggplot(aes(x = value,
y = !! var,
fill = key)) +
geom_boxplot(outlier.color = "red") +
coord_flip() +
scale_fill_brewer(palette = "Blues") +
facet_grid(key~1, scales = "free_y", space = "free") +
theme_minimal() +
theme(legend.position = "none",
axis.line = element_line("black", size = .5),
axis.text = element_text(colour = "black"),
strip.text.x = element_blank()) +
labs(x = "")
return(plot_c)
}
if(type == "variance") {
grp <- NULL
if(is.null(formula)) {
if(length(x$x) > 1) {
var_x = "+ (1|x)"
} else {
var_x = ""
}
if(length(x$y) > 1) {
var_y = "+ (1|y)"
} else {
var_y = ""
}
if(length(x$model) > 1) {
var_model = "+ (1|model)"
} else {
var_model = ""
}
if(length(x$controls) > 0) {
var_controls = "+ (1|controls)"
} else {
var_controls = ""
}
if(x$subsets[1] == "none") {
var_subsets = ""
} else {
var_subsets = "+ (1|subsets)"
}
formula <- paste("estimate ~ 1", var_x, var_y, var_model, var_controls, var_subsets)
}
model <- lme4::lmer(formula = formula, data = x$data)
var <- lme4::VarCorr(model) %>%
as.data.frame %>%
select(grp, vcov)
# sum up all variance components
sum_var <- sum(var$vcov)
# estimate icc
var <- var %>%
mutate(icc = vcov/sum_var,
percent = .data$icc*100)
if(isTRUE(print)) {
var %>%
mutate_if(is.numeric, round, 2) %>%
print
}
plot_d <- ggplot(var, aes(x = .data$grp,
y = .data$percent)) +
geom_bar(stat = "identity", fill = "#377eb8") +
theme_minimal() +
theme(axis.text = element_text(colour = "black"),
axis.line.y = element_line(colour = "black"),
axis.line.x = element_line(colour = "black")) +
labs(x = "", y = "proportion of variance", fill = "analytical choices")
return(plot_d)
}
if(type == "samplesizes") {
plot_e <- x$data %>%
format_results(var = var, group = group, desc = desc) %>%
ggplot(aes(x = .data$specifications,
y = .data$fit_nobs)) +
geom_bar(stat = "identity",
fill = "grey",
size = .2) +
theme_minimal() +
theme(
axis.line = element_line("black", size = .5),
legend.position = "none",
panel.spacing = unit(.75, "lines"),
axis.text = element_text(colour = "black")) +
labs(x = "", y = "")
return(plot_e)
}
}
#' Return tibble from specr.object
#' @keywords internal
#' @export
as_tibble.specr.object <- function(x,...) {
as_tibble(x$data, ...)
}
#' Return data.frame from specr.object
#' @keywords internal
#' @export
as.data.frame.specr.object <- function(x, ...) {
as.data.frame(x$data, ...)
}
#' Print method for S3 class "specr.book"
#' @keywords internal
#' @export
print.specr.boot <- function(x, ...) {
# Short technical summary
cat("Results of bootstrapping 'under-the-null' procedure\n")
cat("-------------------\n")
cat("Technical details:\n\n")
cat(" Class: ", class(x), "-- version:", as.character(utils::packageVersion("specr")), "\n")
cat(" Cores used: ", x$workers, "\n")
cat(" Duration of fitting process: ", x$time, "\n")
cat(" Number of bootstrapped samples: ", x$n_samples, "\n\n")
cat("Descriptive summary of the specification curves 'under-the-null' (head):\n\n")
med1 <- x$boot_models %>%
unnest(cols = c(fit)) %>%
group_by(id) %>%
summarize(median = median(estimate),
min = min(estimate),
max = max(estimate))
med1 %>%
head(n = 6) %>%
mutate_if(is.numeric, round, 2) %>%
as.data.frame %>%
print(row.names = F)
cat("\n\nOverall median across all resamples (should be close to NULL):\n\n")
med1 %>%
ungroup %>%
summarize(median = median(median),
min = min(median),
max = max(median)) %>%
mutate_if(is.numeric, round, 2) %>%
as.data.frame %>%
print(row.names = F)
}
#' Summarizing the bootstrapped results
#'
#' Generic summary function for an object of class `specr.boot` (resulting from
#' `boot_null()`. Provides an approach to inference for specification curve analysis
#' consists of obtaining the median effect estimated across all specifications,
#' and then testing whether this median estimated effect is more extreme than
#' would be expected if all specifications had a true effect of zero.
#'
#' @param x A `specr.boot` object, resulting from calling \code{boot_null()}.
#' @param group Variables indicating which variables to summarize the results by
#' @param ... further arguments passed to or from other methods (currently ignored).
#'
#'
#' @return A tibble.
#'
#' @export
summary.specr.boot <- function(x, group = NULL, ...) {
options(dplyr.summarise.inform = FALSE)
# Summarizing observed specification curve
obs_summary <- x$observed_model %>%
mutate(pos = ifelse(estimate > 0, 1, 0),
neg = ifelse(estimate < 0, 1, 0),
sig = ifelse(p.value < .05, 1, 0),
pos_sig = ifelse(pos == 1 & sig == 1, 1, 0),
neg_sig = ifelse(neg == 1 & sig == 1, 1, 0))
if(is.null(group)) {
obs_summary <- obs_summary %>%
summarize(obs_median = median(estimate),
obs_n = n(),
obs_n_positive_sig = sum(pos_sig),
obs_n_negative_sig = sum(neg_sig)) %>%
select(obs_median, everything()) %>%
ungroup %>%
mutate(id2 = "bind")
} else {
obs_summary <- obs_summary %>%
dplyr::group_by_at(group) %>%
summarize(obs_median = median(estimate),obs_n = n(),
obs_n_positive_sig = sum(pos_sig),
obs_n_negative_sig = sum(neg_sig)) %>%
select(!!group, obs_median, everything()) %>%
ungroup %>%
mutate(id2 = "bind")
}
# Summarizing the specification curves 'under-the
boot_summary = x$boot_models %>%
unnest(cols = c(fit)) %>%
dplyr::group_by_at(vars(id)) %>%
mutate(pos = ifelse(estimate > 0, 1, 0),
neg = ifelse(estimate < 0, 1, 0),
sig = ifelse(p.value < .05, 1, 0),
pos_sig = ifelse(pos == 1 & sig == 1, 1, 0),
neg_sig = ifelse(neg == 1 & sig == 1, 1, 0)) %>%
ungroup
if(is.null(group)){
boot_summary <- boot_summary %>%
group_by_at(vars(id)) %>%
summarize(median = median(estimate),
n = n(),
n_positive_sig = sum(pos_sig),
n_negative_sig = sum(neg_sig)) %>%
ungroup() %>%
mutate(id2 = "bind")
temp = boot_summary %>%
left_join(., obs_summary, by = "id2") %>%
mutate(extreme_median = ifelse(obs_median > 0 & median >= obs_median, 1,
ifelse(obs_median < 0 & median <= obs_median, 1, 0)),
extreme_positive_sig = ifelse(n_positive_sig >= obs_n_positive_sig, 1, 0),
extreme_negative_sig = ifelse(n_negative_sig >= obs_n_negative_sig, 1, 0))
} else {
boot_summary <- boot_summary %>%
dplyr::group_by_at(vars(id, group)) %>%
summarize(median = median(estimate),
n = n(),
n_positive_sig = sum(pos_sig),
n_negative_sig = sum(neg_sig)) %>%
ungroup() %>%
mutate(id2 = "bind")
temp = suppressMessages(left_join(boot_summary, obs_summary)) %>%
mutate(extreme_median = ifelse(obs_median > 0 & median >= obs_median, 1,
ifelse(obs_median < 0 & median <= obs_median, 1, 0)),
extreme_positive_sig = ifelse(n_positive_sig >= obs_n_positive_sig, 1, 0),
extreme_negative_sig = ifelse(n_negative_sig >= obs_n_negative_sig, 1, 0))
}
if(is.null(group)) {
summary <- temp %>%
summarize(n = n(),
extreme_median = sum(extreme_median),
extreme_positive_sig = sum(extreme_positive_sig),
extreme_negative_sig = sum(extreme_negative_sig)) %>%
ungroup %>%
tidyr::gather(variable, n_extreme, contains("extreme")) %>%
mutate(p_value = n_extreme / n,
p_value = ifelse(p_value == 1.000, "1.000",
ifelse(p_value < .001, "< .001", gsub("0.(.*)", ".\\1", sprintf("%.3f", p_value))))) %>%
select(variable, p_value) %>%
tidyr::spread(variable, p_value) %>%
cbind(., obs_summary) %>%
mutate(Mdn = sprintf("%.2f", obs_median),
obs_n_positive_sig = sprintf("%s / %s", obs_n_positive_sig, obs_n),
obs_n_negative_sig = sprintf("%s / %s", obs_n_negative_sig, obs_n)) %>%
select(median = Mdn, extreme_median,
obs_n_positive_sig,
extreme_positive_sig,
obs_n_negative_sig,
extreme_negative_sig)
} else {
summary <- temp %>%
group_by_at(group) %>%
summarize(n = n(),
extreme_median = sum(extreme_median),
extreme_positive_sig = sum(extreme_positive_sig),
extreme_negative_sig = sum(extreme_negative_sig)) %>%
ungroup %>%
tidyr::gather(variable, n_extreme, contains("extreme")) %>%
mutate(p_value = n_extreme / n,
p_value = ifelse(p_value == 1.000, "1.000",
ifelse(p_value < .001, "< .001", gsub("0.(.*)", ".\\1", sprintf("%.3f", p_value))))) %>%
select(!!group, variable, p_value) %>%
tidyr::spread(variable, p_value)
summary <- suppressMessages(left_join(summary, obs_summary)) %>%
mutate(Mdn = sprintf("%.2f", obs_median),
obs_n_positive_sig = sprintf("%s / %s", obs_n_positive_sig, obs_n),
obs_n_negative_sig = sprintf("%s / %s", obs_n_negative_sig, obs_n)) %>%
select(!!group, median = Mdn,
extreme_median,
obs_n_positive_sig,
extreme_positive_sig,
obs_n_negative_sig,
extreme_negative_sig)
}
summary <- summary %>%
rename("median" = median,
"median p" = extreme_median,
"share positive" = obs_n_positive_sig,
"share positive p" = extreme_positive_sig,
"share negative" = obs_n_negative_sig,
"share negative p" = extreme_negative_sig) %>%
as.data.frame()
#cat("Inference on the specification curve:\n\n")
return(summary)
}
#' Plot specification curve and under-the-null distributions
#'
#' @description This function plots the original specification curve on top of
#' 95% quantiles of the bootstrapped 'under-the-null' distributions.
#'
#' @param x A `specr.boot` object, resulting from calling \code{boot_null()}.
#' @param ... further arguments passed to or from other methods (currently ignored).
#'
#' @return A \link[ggplot2]{ggplot} object that can be customized further.
#'
#' @export
plot.specr.boot <- function(x, ...) {
x$boot_models %>%
unnest(cols = c(fit)) %>%
group_by(id) %>%
arrange(id, estimate) %>%
select(id, x, y, estimate) %>%
dplyr::mutate(rank = 1:n()) %>%
group_by(rank) %>%
summarize(low = quantile(estimate, c(.025)),
mid = quantile(estimate, c(.5)),
high = quantile(estimate, c(.975))) %>%
left_join(x$observed_model %>%
as_tibble %>%
arrange(estimate) %>%
mutate(rank = 1:n()) %>%
select(rank, observed = estimate),
by = "rank") %>%
tidyr::gather(key, value, -rank) %>%
ggplot(aes(x = rank, y = value, color = key, linetype = key)) +
scale_color_manual(values = c("grey40", "grey40", "grey40", "steelblue")) +
scale_linetype_manual(values = c(3, 3, 4, 1)) +
geom_line(size = .75) +
theme_minimal() +
theme(
axis.line = element_line("black", size = .5),
panel.spacing = unit(.75, "lines"),
axis.text = element_text(colour = "black")
) +
labs(x = "Specifications (ranked)", y = "estimate", color = "", linetype = "")
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.