Nothing
#' @title Fit a z-curve
#'
#' @description \code{zcurve} is used to fit z-curve models. The function
#' takes input of z-statistics or two-sided p-values and returns object of
#' class \code{"zcurve"} that can be further interrogated by summary and plot
#' function. It default to EM model, but different version of z-curves can
#' be specified using the \code{method} and \code{control} arguments. See
#' 'Examples' and 'Details' for more information.
#'
#' @param z a vector of z-scores.
#' @param p a vector of two-sided p-values, internally transformed to
#' z-scores.
#' @param data an object created with [zcurve_data()] function.
#' @param z.lb a vector with start of censoring intervals of censored z-scores.
#' @param z.ub a vector with end of censoring intervals of censored z-scores.
#' @param p.lb a vector with start of censoring intervals of censored two-sided p-values.
#' @param p.ub a vector with end of censoring intervals of censored two-sided p-values.
#' @param method the method to be used for fitting. Possible options are
#' Expectation Maximization \code{"EM"} and density \code{"density"},
#' defaults to \code{"EM"}.
#' @param bootstrap the number of bootstraps for estimating CI. To skip
#' bootstrap specify \code{FALSE}.
#' @param parallel whether the bootstrap should be performed in parallel.
#' Defaults to \code{FALSE}. The implementation is not completely stable
#' and might cause a connection error.
#' @param control additional options for the fitting algorithm more details in
#' \link[=control_EM]{control EM} or \link[=control_density]{control density}.
#'
#' @details The function returns the EM method by default and changing
#' \code{method = "density"} gives the KD2 version of z-curve as outlined in
#' \insertCite{zcurve2;textual}{zcurve}. For the original z-curve
#' \insertCite{zcurve1}{zcurve}, referred to as KD1, specify
#' \code{'control = "density", control = list(model = "KD1")'}.
#'
#' @references
#' \insertAllCited{}
#'
#' @return The fitted z-curve object
#' @export zcurve
#'
#' @examples
#' # load data from OSC 2015 reproducibility project
#' OSC.z
#'
#' # fit an EM z-curve (with disabled bootstrap due to examples times limits)
#' m.EM <- zcurve(OSC.z, method = "EM", bootstrap = FALSE)
#' # a version with 1000 boostraped samples would looked like:
#' \donttest{m.EM <- zcurve(OSC.z, method = "EM", bootstrap = 1000)}
#'
#' # or KD2 z-curve (use larger bootstrap for real inference)
#' m.D <- zcurve(OSC.z, method = "density", bootstrap = FALSE)
#'
#' # inspect the results
#' summary(m.EM)
#' summary(m.D)
#' # see '?summary.zcurve' for more output options
#'
#' # plot the results
#' plot(m.EM)
#' plot(m.D)
#' # see '?plot.zcurve' for more plotting options
#'
#' # to specify more options, set the control arguments
#' # ei. increase the maximum number of iterations and change alpha level
#' ctr1 <- list(
#' "max_iter" = 9999,
#' "alpha" = .10
#' )
#' \dontrun{m1.EM <- zcurve(OSC.z, method = "EM", bootstrap = FALSE, control = ctr1)}
#' # see '?control_EM' and '?control_density' for more information about different
#' # z-curves specifications
#' @seealso [summary.zcurve()], [plot.zcurve()], [control_EM], [control_density]
zcurve <- function(z, z.lb, z.ub, p, p.lb, p.ub, data, method = "EM", bootstrap = 1000, parallel = FALSE, control = NULL){
if(!method %in% c("EM", "density"))
stop("Wrong method, select a supported option")
# set bootstrap
if(!is.numeric(bootstrap)){
bootstrap <- FALSE
}else if(bootstrap <= 0){
bootstrap <- FALSE
}
if(missing(data)){
# check input
input_type <- NULL
if((!missing(z.lb) & missing(z.ub)) | (!missing(z.ub) & missing(z.lb)))
stop("Both lower and upper bound for z-scores needs to be supplied.")
if((!missing(p.lb) & missing(p.ub)) | (!missing(p.ub) & missing(p.lb)))
stop("Both lower and upper bound for p-values needs to be supplied.")
if(missing(z) & missing(p) & missing(z.lb) & missing(p.lb))
stop("No data input")
if(!missing(z)){
if(!is.numeric(z))
stop("Wrong z-scores input: Data are not nummeric.")
if(!is.vector(z))
stop("Wrong z-scores input: Data are not a vector")
if(all(z <= 1 & z >= 0))
stop("It looks like you are entering p-values rather than z-scores. To use p-values, explicitly name your argument 'zcurve(p = [vector of p-values])'")
input_type <- c(input_type, "z")
}
if(!missing(p)){
if(!is.numeric(p))
stop("Wrong p-values input: Data are not nummeric.")
if(!is.vector(p))
stop("Wrong p-values input: Data are not a vector")
input_type <- c(input_type, "p")
}
}else if(inherits(data, "zcurve_data")){
input_type <- "zcurve-data"
}else{
stop("The 'data' input must be created by the `zcurve_data()` function. See `?zcurve_data()` for more information.")
}
# create results object
object <- NULL
object$call <- match.call()
object$method <- method
object$input_type <- input_type
# create control
if(method == "EM"){
control <- .zcurve_EM.control(control)
}else if(method == "density"){
control <- .zcurve_density.control(control)
}
### prepare data
if(missing(data)){
# get point estimates on the same scale
if(!missing(z)){
z <- abs(z)
}else{
z <- numeric()
}
if(!missing(p)){
z <- c(z, .p_to_z(p))
}
# get censoring on the same scale
if(!missing(z.lb)){
lb <- abs(z.lb)
ub <- abs(z.ub)
}else{
lb <- NULL
ub <- NULL
}
if(!missing(p.lb)){
lb <- c(lb, .p_to_z(p.ub))
ub <- c(ub, .p_to_z(p.lb))
}
# restrict censoring to the fitting range & treat extremely censored values as extremely significant values
if(!is.null(lb)){
if(any(lb < control$a))
stop("All censored observations must be higher than the fitting range.")
z <- c(z, lb[lb >= control$b])
ub <- ub[lb < control$b]
lb <- lb[lb < control$b]
}
if(length(lb) > 0){
# restrict the upper censoring to the fitting range
ub <- ifelse(ub > control$b, control$b, ub)
# update control
if(method == "EM"){
control$type <- 3
}else if(method == "density"){
stop("Censoring is not available for the density algorithm.")
}
}
object$data <- z
object$data_censoring <- data.frame(lb = lb, ub = ub)
}else{
if(nrow(data$precise) != 0){
object$data <- .p_to_z(data$precise$p)
}else{
object$data <- numeric()
}
if(nrow(data$censored) != 0){
lb <- .p_to_z(data$censored$p.ub)
ub <- .p_to_z(data$censored$p.lb)
# remove non-significant censored p-values
if(any(lb < control$a)){
warning(paste0(sum(lb < control$a), " censored p-values removed due to the upper bound being larger that the fitting range."), immediate. = TRUE, call. = FALSE)
ub <- ub[lb >= control$a]
lb <- lb[lb >= control$a]
}
# move too significant censored p-values among precise p-values
if(length(lb) > 0 && any(lb >= control$b)){
object$data <- c(object$data, lb[lb >= control$b])
ub <- ub[lb < control$b]
lb <- lb[lb < control$b]
}
if(length(lb) > 0){
# restrict the upper censoring to the fitting range
ub <- ifelse(ub > control$b, control$b, ub)
object$data_censoring <- data.frame(lb = lb, ub = ub)
# update control
if(method == "EM"){
control$type <- 3
}else if(method == "density"){
stop("Censoring is not available for the density algorithm.")
}
}else{
object$data_censoring <- data.frame(lb = NULL, ub = NULL)
}
}else{
object$data_censoring <- data.frame(lb = NULL, ub = NULL)
}
}
object$control <- control
# only run the algorithm with some significant results
if(sum(object$data > control$a & object$data < control$b) + nrow(object$data_censoring) < 10)
stop("There must be at least 10 z-scores in the fitting range but a much larger number is recommended.")
# use appropriate algorithm
if(method == "EM"){
fit <- .zcurve_EM(z = object$data, lb = object$data_censoring$lb, ub = object$data_censoring$ub, control = control)
}else if(method == "density"){
fit <- .zcurve_density(z = object$data, control = control)
}
object$fit <- fit
# check convergence
if(method == "EM"){
object$converged <- ifelse(fit$iter < control$max_iter, TRUE, FALSE)
}else if(method == "density"){
object$converged <- fit$converged
if(fit$message == "singular convergence (7)")
object$converged <- TRUE
if(fit$message == "both X-convergence and relative convergence (5)")
object$converged <- TRUE
}
if(object$converged == FALSE)
warning("Model did not converge.")
# do bootstrap
if(bootstrap != FALSE){
# use apropriate algorithm
if(method == "EM"){
if(parallel){
fit_boot <- .zcurve_EM_boot.par(z = object$data, lb = object$data_censoring$lb, ub = object$data_censoring$ub, control = control, fit = fit, bootstrap = bootstrap)
}else{
fit_boot <- .zcurve_EM_boot(z = object$data, lb = object$data_censoring$lb, ub = object$data_censoring$ub, control = control, fit = fit, bootstrap = bootstrap)
}
}else if(method == "density"){
if(parallel){
fit_boot <- .zcurve_density_boot.par(z = object$data, control = control, bootstrap = bootstrap)
}else{
fit_boot <- .zcurve_density_boot(z = object$data, control = control, bootstrap = bootstrap)
}
}
object$boot <- fit_boot
}
# estimates
object$coefficients <- .get_estimates(mu = fit$mu, weights = fit$weights, prop_high = fit$prop_high, sig_level = control$sig_level, a = control$a)
# boot estimates
if(bootstrap != FALSE){
object$coefficients_boot <- data.frame(t(sapply(1:bootstrap, function(i){
.get_estimates(mu = fit_boot$mu[i,], weights = fit_boot$weights[i,], prop_high = fit_boot$prop_high[i], sig_level = control$sig_level, a = control$a)
})))
}
class(object) <- "zcurve"
return(object)
}
### methods
#' Prints a fitted z-curve object
#' @param x Fitted z-curve object
#' @param ... Additional arguments
#' @export print.zcurve
#' @rawNamespace S3method(print, zcurve)
#' @seealso [zcurve()]
print.zcurve <- function(x, ...){
cat("Call:\n")
print(x$call)
cat("\nEstimates:\n")
print(x$coefficients[1:2])
}
#' Summarize fitted z-curve object
#'
#' @param object A fitted z-curve object.
#' @param type Whether the results \code{"results"} or the
#' mixture mode parameters \code{"parameters"} should be
#' returned. Defaults to \code{"results"}.
#' @param all Whether additional results, such as file drawer
#' ration, expected and missing number of studies, and Soric FDR
#' be returned. Defaults to \code{FALSE}
#' @param ERR.adj Confidence intervals adjustment for ERR. Defaults
#' to \code{.03} as proposed by Bartos & Schimmack (in preparation).
#' @param EDR.adj Confidence intervals adjustment for EDR. Defaults
#' to \code{.05} as proposed by Bartos & Schimmack (in preparation).
#' @param round.coef To how many decimals should the coefficient
#' be rounded. Defaults to \code{3}.
#' @param ... Additional arguments
#'
#' @return Summary of a z-curve object
#'
#' @method summary zcurve
#' @export summary.zcurve
#' @rawNamespace S3method(summary, zcurve)
#' @seealso [zcurve()]
summary.zcurve <- function(object, type = "results", all = FALSE, ERR.adj = .03, EDR.adj = .05, round.coef = 3, ...){
if(substr(object$method, 1, 2) == "EM"){
if(!is.null(object$boot)){
fit_index <- c(object$fit$Q, unname(stats::quantile(object$boot$Q, c(.025, .975))))
}else{
fit_index <- c(object$fit$Q)
}
method_text <- object$method
iter_text <- paste(c(object$fit$iter_start," + ",object$fit$iter), collapse = "")
fit_stat <- "Q"
}else if(object$method == "density"){
if(!is.null(object$boot)){
fit_index <- c(object$fit$objective, unname(stats::quantile(object$boot$objective, c(.025, .975))))
}else{
fit_index <- c(object$fit$objective)
}
if(object$control$version == 1){
method_text <- paste(c(object$method, " (version 1)"), collapse = "")
fit_stat <- "MAE (*1e3)"
fit_index <- fit_index*1e3
}else{
method_text <- object$method
fit_stat <- "RMSE"
}
iter_text <- object$fit$iter
}
temp_N_sig <- sum(object$data > stats::qnorm(object$control$sig_level/2, lower.tail = FALSE)) +
nrow(object$data_censoring[object$data_censoring$lb > object$control$a,])
temp_N_obs <- length(object$data) + nrow(object$data_censoring)
temp_N_used <- sum(object$data > object$control$a & object$data < object$control$b) +
nrow(object$data_censoring[object$data_censoring$lb > object$control$a & object$data_censoring$lb < object$control$b ,])
model <- list(
"method" = method_text,
"model" = ifelse(is.null(object$control$model), "custom", object$control$model),
"fit_index" = fit_index,
"fit_stat" = fit_stat,
"iter" = iter_text,
"input_type"= object$input_type,
"N_all" = temp_N_obs,
"N_sig" = temp_N_sig,
"N_used" = temp_N_used
)
if(type == "results" | substr(type,1,3) == "res"){
if(!is.null(object$boot)){
l.CI <- c(stats::quantile(object$coefficients_boot$ERR, .025),
stats::quantile(object$coefficients_boot$EDR, .025))
u.CI <- c(stats::quantile(object$coefficients_boot$ERR, .975),
stats::quantile(object$coefficients_boot$EDR, .975))
}else{
l.CI <- NULL
u.CI <- NULL
}
TAB <- cbind(Estimate = stats::coef(object)[1:2],
l.CI = l.CI,
u.CI = u.CI)
# adjust CIs
if(!is.null(object$boot)){
TAB["ERR", "l.CI"] <- ifelse(TAB["ERR", "l.CI"] - ERR.adj < object$control$sig_level/2, object$control$sig_level/2, TAB["ERR", "l.CI"] - ERR.adj)
TAB["ERR", "u.CI"] <- ifelse(TAB["ERR", "u.CI"] + ERR.adj > 1, 1, TAB["ERR", "u.CI"] + ERR.adj)
TAB["EDR", "l.CI"] <- ifelse(TAB["EDR", "l.CI"] - EDR.adj < object$control$sig_level, object$control$sig_level, TAB["EDR", "l.CI"] - EDR.adj)
TAB["EDR", "u.CI"] <- ifelse(TAB["EDR", "u.CI"] + EDR.adj > 1, 1, TAB["EDR", "u.CI"] + EDR.adj)
}
# additional stats
if(all){
temp_sig_level <- object$control$sig_level
if(!is.null(object$boot)){
TAB <- rbind(
TAB,
"Soric FDR" = c(
"Estimate" = .get_Soric_FDR(TAB["EDR","Estimate"], temp_sig_level),
"l.CI" = .get_Soric_FDR(TAB["EDR","u.CI"], temp_sig_level),
"u.CI" = .get_Soric_FDR(TAB["EDR","l.CI"], temp_sig_level)),
"File Drawer R" = c(
"Estimate" = .get_file_drawer_R(TAB["EDR","Estimate"]),
"l.CI" = .get_file_drawer_R(TAB["EDR","u.CI"]),
"u.CI" = .get_file_drawer_R(TAB["EDR","l.CI"])),
"Expected N" = c(
"Estimate" = .get_expected_N(TAB["EDR","Estimate"], temp_N_sig),
"l.CI" = .get_expected_N(TAB["EDR","u.CI"], temp_N_sig),
"u.CI" = .get_expected_N(TAB["EDR","l.CI"], temp_N_sig)),
"Missing N" = c(
"Estimate" = .get_missing_N(TAB["EDR","Estimate"], temp_N_sig, temp_N_obs),
"l.CI" = .get_missing_N(TAB["EDR","u.CI"], temp_N_sig, temp_N_obs),
"u.CI" = .get_missing_N(TAB["EDR","l.CI"], temp_N_sig, temp_N_obs))
)
}else{
TAB <- rbind(
TAB,
"Soric FDR" = c("Estimate" = .get_Soric_FDR(TAB["EDR","Estimate"], temp_sig_level)),
"File Drawer R" = c("Estimate" = .get_file_drawer_R(TAB["EDR","Estimate"])),
"Expected N" = c("Estimate" = .get_expected_N(TAB["EDR","Estimate"], temp_N_sig)),
"Missing N" = c("Estimate" = .get_missing_N(TAB["EDR","Estimate"], temp_N_sig, temp_N_obs)))
}
}
if(object$method == "density"){
if(object$control$version == 1){
TAB <- as.data.frame(TAB[1,,drop=FALSE])
}
}
}else if(type == "parameters" | substr(type,1,3) == "par"){
if(!is.null(object$boot)){
if(object$method == "density"){
if(object$control$version == 1){
M_l.CI <- apply(object$boot$mu,2,stats::quantile, prob = .025)
M_u.CI <- apply(object$boot$mu,2,stats::quantile, prob = .975)
}else{
M_l.CI <- NULL
M_u.CI <- NULL
}
}else{
M_l.CI <- NULL
M_u.CI <- NULL
}
W_l.CI <- apply(object$boot$weights,2,stats::quantile, prob = .025)
W_u.CI <- apply(object$boot$weights,2,stats::quantile, prob = .975)
}else{
M_l.CI <- NULL
M_u.CI <- NULL
W_l.CI <- NULL
W_u.CI <- NULL
}
TAB <- cbind('Mean ' = object$fit$mu,
'l.CI' = M_l.CI,
'u.CI ' = M_u.CI,
'Weight' = object$fit$weights,
'l.CI' = W_l.CI,
'u.CI' = W_u.CI)
rownames(TAB) <- as.character(1:length(object$fit$mu))
}
res <- list(call = object$call,
coefficients = TAB,
model = model,
converged = object$converged,
round.coef = round.coef)
class(res) <- "summary.zcurve"
return(res)
}
#' Prints summary object for z-curve method
#' @param x Summary of a z-curve object
#' @param ... Additional arguments
#' @method print.summary zcurve
#' @export print.summary.zcurve
#' @rawNamespace S3method(print, summary.zcurve)
#' @seealso [zcurve()]
print.summary.zcurve <- function(x, ...){
cat("Call:\n")
print(x$call)
cat("\n")
cat(paste(c("model: ",x$model$model, " via ", x$model$method, "\n"), collapse = ""))
cat("\n")
#stats::printCoefmat(x$coefficients, digits = 2,
# cs.ind = c(1:ncol(x$coefficients)), tst.ind = integer(), zap.ind = integer())
temp_to_int <- !rownames(x$coefficients) %in% c("Expected N", "Missing N")
temp_coef <- x$coefficients
if(length(temp_to_int) != 0){
temp_coef[temp_to_int,] <- apply(as.data.frame(x$coefficients[temp_to_int,]), 2, function(p).rXd(p, X = x$round.coef))
temp_coef[!temp_to_int,] <- round(x$coefficients[!temp_to_int,])
}
print(temp_coef,quote = FALSE, right = T)
if(length(x$model$fit_index) > 1){
fit_index_CI <- paste(c(", 95% CI[", .r2d(x$model$fit_index[2]), ", ", .r2d(x$model$fit_index[3]),"]"), collapse = "")
}else{
fit_index_CI <- NULL
}
cat("\n")
if(x$converged){
cat(paste(c("Model converged in ", x$model$iter, " iterations", "\n"), collapse = ""))
}else{
cat(paste(c("\033[0;31m", "Model did not converge in ", x$model$iter, " iterations", "\033[0m", "\n"), collapse = ""))
}
obs_proportion <- stats::prop.test(x$model$N_sig, x$model$N_all)
cat(paste0("Fitted using ", x$model$N_used, " ", paste(x$model$input_type, collapse = " and "), "-values. ", x$model$N_all, " supplied, ", x$model$N_sig, " significant (ODR = ", .r2d(obs_proportion$estimate), ", 95% CI [", .r2d(obs_proportion$conf.int[1]), ", ", .r2d(obs_proportion$conf.int[2]), "]).\n"))
cat(paste(c(x$model$fit_stat," = " , .r2d(x$model$fit_index[1]), fit_index_CI, "\n"), collapse = ""))
}
#' Plot fitted z-curve object
#'
#' @param x Fitted z-curve object
#' @param annotation Add annotation to the plot. Defaults
#' to \code{FALSE}.
#' @param CI Plot confidence intervals for the estimated z-curve. Defaults
#' to \code{FALSE}.
#' @param extrapolate Scale the chart to the extrapolated area. Defaults
#' to \code{FALSE}.
#' @param plot_type Type of plot to by produced. Defaults to \code{"base"}
#' for th base plotting function. An alternative is \code{"ggplot"} for a
#' ggplot2.
#' @param y.anno A vector of length 8 specifying the y-positions
#' of the individual annotation lines relative to the figure's height.
#' Defaults to \code{c(.95, .88, .78, .71, .61, .53, .43, .35)}
#' @param x.anno A number specifying the x-position of the block
#' of annotations relative to the figure's width.
#' @param cex.anno A number specifying the size of the annotation text.
#' @param ... Additional arguments including \code{main}, \code{xlab},
#' \code{ylab}, \code{xlim}, \code{ylim}, \code{cex.axis}, \code{cex.lab}
#'
#' @method plot zcurve
#' @export plot.zcurve
#' @rawNamespace S3method(plot, zcurve)
#'
#' @examples \dontrun{
#' # simulate some z-statistics and fit a z-curve
#' z <- abs(rnorm(300,3))
#' m.EM <- zcurve(z, method = "EM", bootstrap = 100)
#'
#' # plot the z-curve
#' plot(m.EM)
#'
#' # add annotation text and model fit CI
#' plot(m.EM, annotation = TRUE, CI = TRUE)
#'
#' # change the location of the annotation to the left
#' plot(m.EM, annotation = TRUE, CI = TRUE, x_text = 0)
#' }
#' @seealso [zcurve()]
plot.zcurve <- function(x, annotation = FALSE, CI = FALSE, extrapolate = FALSE, plot_type = "base",
y.anno = c(.95, .88, .78, .71, .61, .53, .43, .35), x.anno = .6, cex.anno = 1, ...){
if(is.null(x$boot))
CI <- FALSE
if(substr(tolower(plot_type), 1, 1) == "b"){
plot_type <- "base"
}else if(substr(tolower(plot_type), 1, 1) == "g"){
plot_type <- "ggplot"
}else{
stop("Unrecognized `plot_type` argument. The possibly options are `base` and `ggplot`.")
}
additional <- list(...)
if(is.null(additional$main)){
main <- paste(c("z-curve (", ifelse(is.null(x$control$model), "custom", x$control$model), " via ", x$method, ")"), collapse = "")
}else{
main <- additional$main
}
if(is.null(additional$xlab)){
xlab <- "z-scores"
}else{
xlab <- additional$xlab
}
if(is.null(additional$ylab)){
ylab <- "Density"
}else{
ylab <- additional$ylab
}
if(is.null(additional$xlim)){
xlim <- NULL
}else{
xlim <- additional$xlim
}
if(is.null(additional$ylim)){
ylim <- NULL
}else{
ylim <- additional$ylim
}
if(is.null(additional$cex.axis)){
cex.axis <- 1
}else{
cex.axis <- additional$cex.axis
}
if(is.null(additional$cex.lab)){
cex.lab <- 1
}else{
cex.lab <- additional$cex.lab
}
# set breaks for the histogram
br1 <- seq(x$control$a, x$control$b, .20)
br2 <- seq(0, x$control$a, .20)
# change the last break accordingly to the cut points
br1[length(br1)] <- x$control$b
br2[length(br2)] <- x$control$a
# use histograms to get counts in each bin
h1 <- graphics::hist(x$data[x$data > x$control$a & x$data < x$control$b], breaks = br1, plot = F)
# add censored observations to the histogram
if(nrow(x$data_censoring) != 0){
# spread the censored observation across the z-values
cen_counts <- do.call(rbind, lapply(1:nrow(x$data_censoring), function(i){
temp_counts <- (x$data_censoring$lb[i] < h1$breaks[-length(h1$breaks)] & x$data_censoring$ub[i] > h1$breaks[-length(h1$breaks)])
temp_counts[temp_counts] <- 1/sum(temp_counts)
return(temp_counts)
}))
cen_counts <- apply(cen_counts, 2, sum)
# add the counts and standardize the density
h1$counts <- h1$counts + cen_counts
h1$density <- (h1$counts / sum(h1$counts)) * (length(h1$counts)/(x$control$b - x$control$a))
}
# add histogram for non-sig results
if(length(x$data[x$data < x$control$a])){
h2 <- graphics::hist(x$data[x$data < x$control$a], breaks = br2, plot = F)
# scale the density of non-significant z-scores appropriately to the first one
h2$density <- h2$density * (x$control$a/(x$control$b - x$control$a))
h2$density <- h2$density/(
(length(x$data[x$data > x$control$a & x$data < x$control$b])/(x$control$b - x$control$a))
/
(length(x$data[x$data < x$control$a])/(x$control$a))
)
}else{
h2 <- NULL
}
# compute fitted z-curve density
x_seq <- seq(0, x$control$b, .01)
y_den <- sapply(1:length(x$fit$mu), function(i){
x$fit$weights[i]*exp(.zdist_lpdf(x_seq, x$fit$mu[i], 1, x$control$a, x$control$b))
})
y_den <- apply(y_den, 1, sum)
# and the piecewise confidence intervals
if(CI & !is.null(x$boot)){
y_den_boot <- sapply(1:nrow(x$boot$mu),function(b){
y_den <- sapply(1:length(x$boot$mu[b,]), function(i){
x$boot$weights[b,i]*exp(.zdist_lpdf(x_seq, x$boot$mu[b,i], 1, x$control$a, x$control$b))
})
y_den <- apply(y_den, 1, sum)
})
y_den_l.CI <- apply(y_den_boot, 1, stats::quantile, prob = .025)
y_den_u.CI <- apply(y_den_boot, 1, stats::quantile, prob = .975)
}
### setting of the axis and values for text allignment
x_max <- x$control$b
x.anno <- x_max*x.anno
if(extrapolate){
y_max <- max(c(y_den, h1$density, h2$density))
}else{
y_max <- max(c(h1$density, h2$density))
}
# adjusting the height of the chart so the text is higher than the highest ploted thing in the x-range of the text
if(annotation & CI){
y_max <- ifelse(max(c(y_den_u.CI[x.anno < x_seq], h1$density[x.anno < h1$breaks[-length(h1$density)]])) > y_max*(y.anno[length(y.anno)] - .025),
max(c(y_den_u.CI[x.anno < x_seq], h1$density[x.anno < h1$breaks[-length(h1$density)]]))/(y.anno[length(y.anno)] - .025), y_max)
}else if(annotation & !CI){
y_max <- ifelse(max(c(y_den[x.anno < x_seq], h1$density[x.anno < h1$breaks[-length(h1$density)]])) > y_max*(y.anno[length(y.anno)] - .025),
max(c(y_den[x.anno < x_seq], h1$density[x.anno < h1$breaks[-length(h1$density)]]))/(y.anno[length(y.anno)] - .025), y_max)
}
# overwrite xmin, xmax, ymin, ymax if xlim and ylim are specified
if(!is.null(ylim)){
y_min <- ylim[1]
y_max <- ylim[2]
}else{
y_min <- 0
}
if(!is.null(xlim)){
x_min <- xlim[1]
x_max <- xlim[2]
}else{
x_min <- 0
}
if(plot_type == "base"){
# plot z-scores used for fitting
graphics::plot(h1,
freq = FALSE, density = 0, angle = 0, border = "blue",
xlim = c(x_min, x_max),
ylim = c(y_min, y_max),
ylab = ylab,
xlab = xlab,
main = main,
cex.lab = cex.lab,
cex.axis = cex.axis,
lwd = 1, las = 1)
# and un-used z-scores
if(!is.null(h2)){
graphics::par(new=TRUE)
graphics::plot(h2,
freq = FALSE, density = 0, angle = 0, border ="grey30",
xlim = c(x_min, x_max),
ylim = c(y_min, y_max),
axes = FALSE, ann = FALSE, lwd = 1, las = 1)
}
# add the density estimate if the model was estimated by density
if(x$method == "density"){
graphics::lines(x$fit$density$x, x$fit$density$y, lty = 1, col = "grey60", lwd = 4)
}
# significance line
if(x.anno*x_max < x$control$a){
graphics::lines(rep(x$control$a,2), c(0, (min(y.anno) - .025)*y_max), col = "blue", lty = 2, lwd = 1)
graphics::lines(rep(stats::qnorm(x$control$sig_level/2, lower.tail = FALSE),2), c(0, (min(y.anno) - .025)*y_max), col = "red", lty = 1, lwd = 2)
}else{
graphics::abline(v = x$control$a, col = "blue", lty = 2, lwd = 1)
graphics::abline(v = stats::qnorm(x$control$sig_level/2, lower.tail = FALSE), col = "red", lty = 1, lwd = 2)
}
# predicted densities
graphics::lines(x_seq, y_den, lty = 1, col = "blue", lwd = 5)
if(CI & !is.null(x$boot)){
graphics::lines(x_seq, y_den_l.CI, lty = 3, col = "blue", lwd = 3)
graphics::lines(x_seq, y_den_u.CI, lty = 3, col = "blue", lwd = 3)
}
# add annotation
if(annotation){
x_summary <- summary(x)
graphics::text(x.anno, y_max*y.anno[1] , paste0("Range: ",.r2d(min(x$data))," to ",.r2d(max(x$data))),
adj = c(0, 0), cex = cex.anno)
graphics::text(x.anno, y_max*y.anno[2] , paste0(x_summary$model$N_all, " tests, ", x_summary$model$N_sig, " significant"),
adj = c(0, 0), cex = cex.anno)
obs_proportion <- stats::prop.test(x_summary$model$N_sig, x_summary$model$N_all)
graphics::text(x.anno, y_max*y.anno[3] , paste0("Observed discovery rate:"),
adj = c(0, 0), cex = cex.anno)
graphics::text(x.anno, y_max*y.anno[4] , paste0(.r2d(obs_proportion$estimate), " 95% CI [", .r2d(obs_proportion$conf.int[1]), " ,",
.r2d(obs_proportion$conf.int[2]), "]"),
adj = c(0, 0), cex = cex.anno)
if(!is.null(x$boot)){
graphics::text(x.anno, y_max*y.anno[5] , paste0("Expected discovery rate:"),
adj = c(0, 0), cex = cex.anno)
graphics::text(x.anno, y_max*y.anno[6] , paste0(.r2d(x_summary$coefficients["EDR","Estimate"]), " 95% CI [", .r2d(x_summary$coefficients["EDR","l.CI"]), " ,",
.r2d(x_summary$coefficients["EDR","u.CI"]), "]"),
adj = c(0, 0), cex = cex.anno)
graphics::text(x.anno, y_max*y.anno[7] , paste0("Expected replicability rate:"),
adj = c(0, 0), cex = cex.anno)
graphics::text(x.anno, y_max*y.anno[8] , paste0(.r2d(x_summary$coefficients["ERR","Estimate"]), " 95% CI [", .r2d(x_summary$coefficients["ERR","l.CI"]), " ,",
.r2d(x_summary$coefficients["ERR","u.CI"]), "]"),
adj = c(0, 0), cex = cex.anno)
}else{
graphics::text(x.anno, y_max*y.anno[5] , paste0("Expected discovery rate:"),
adj = c(0, 0), cex = cex.anno)
graphics::text(x.anno, y_max*y.anno[6] , paste0(.r2d(x_summary$coefficients["EDR","Estimate"])),
adj = c(0, 0), cex = cex.anno)
graphics::text(x.anno, y_max*y.anno[7] , paste0("Expected replicability rate:"),
adj = c(0, 0), cex = cex.anno)
graphics::text(x.anno, y_max*y.anno[8] , paste0(.r2d(x_summary$coefficients["ERR","Estimate"])),
adj = c(0, 0), cex = cex.anno)
}
}
return(invisible())
}else if(plot_type == "ggplot"){
out <- ggplot2::ggplot() +
ggplot2::scale_x_continuous(
name = xlab,
breaks = pretty(c(x_min, x_max)),
limits = c(x_min, x_max)) +
ggplot2::scale_y_continuous(
name = ylab,
breaks = pretty(c(y_min, y_max)),
limits = c(y_min, y_max)) +
ggplot2::ggtitle(main) +
ggplot2::theme_classic()
# add significant z-scores
out <- out + ggplot2::geom_rect(
data = data.frame(
xmin = h1$breaks[-length(h1$breaks)],
xmax = h1$breaks[-1],
ymin = 0,
ymax = h1$density),
mapping = ggplot2::aes(
xmin = .data[["xmin"]],
xmax = .data[["xmax"]],
ymin = .data[["ymin"]],
ymax = .data[["ymax"]]),
fill = "white", col = "blue")
# add non-significant z-scores (if any)
if(!is.null(h2)){
out <- out + ggplot2::geom_rect(
data = data.frame(
xmin = h2$breaks[-length(h2$breaks)],
xmax = h2$breaks[-1],
ymin = 0,
ymax = h2$density),
mapping = ggplot2::aes(
xmin = .data[["xmin"]],
xmax = .data[["xmax"]],
ymin = .data[["ymin"]],
ymax = .data[["ymax"]]),
fill = "white", col = "grey")
}
# add the density estimate if the model was estimated by density
if(x$method == "density"){
out <- out + ggplot2::geom_line(
data = data.frame(
x = x$fit$density$x,
y = x$fit$density$y
),
mapping = ggplot2::aes(
x = .data[["x"]],
y = .data[["y"]]
),
linewidth = 1, col = "grey60", linetype = 4)
}
# significance line
if(x.anno*x_max < x$control$a){
#do not overdraw the annotation in case it is in the way
out <- out + ggplot2::geom_line(
data = data.frame(
x = rep(x$control$a,2),
y = c(0, (min(y.anno) - .025)*y_max)
),
mapping = ggplot2::aes(
x = .data[["x"]],
y = .data[["y"]]
),
linewidth = 1, col = "blue", linetype = 1)
out <- out + ggplot2::geom_line(
data = data.frame(
x = rep(stats::qnorm(x$control$sig_level/2, lower.tail = FALSE),2),
y = c(0, (min(y.anno) - .025)*y_max)
),
mapping = ggplot2::aes(
x = .data[["x"]],
y = .data[["y"]]
),
linewidth = 1.5, col = "red", linetype = 2)
}else{
out <- out + ggplot2::geom_vline(
xintercept = x$control$a,
linewidth = 1, col = "blue", linetype = 1)
out <- out + ggplot2::geom_vline(
xintercept = stats::qnorm(x$control$sig_level/2, lower.tail = FALSE),
linewidth = 1.5, col = "red", linetype = 2)
}
# predicted densities
out <- out + ggplot2::geom_line(
data = data.frame(
x = x_seq,
y = y_den
),
mapping = ggplot2::aes(
x = .data[["x"]],
y = .data[["y"]]
),
linewidth = 2, col = "blue", linetype = 1)
if(CI & !is.null(x$boot)){
out <- out + ggplot2::geom_line(
data = data.frame(
x = x_seq,
y = y_den_l.CI
),
mapping = ggplot2::aes(
x = .data[["x"]],
y = .data[["y"]]
),
linewidth = 1.75, col = "blue", linetype = 3)
out <- out + ggplot2::geom_line(
data = data.frame(
x = x_seq,
y = y_den_u.CI
),
mapping = ggplot2::aes(
x = .data[["x"]],
y = .data[["y"]]
),
linewidth = 1.75, col = "blue", linetype = 3)
}
# add annotation
if(annotation){
x_summary <- summary(x)
obs_proportion <- stats::prop.test(x_summary$model$N_sig, x_summary$model$N_all)
ggplot2_base_size <- 5
out <- out + ggplot2::geom_text(
data = data.frame(
x = x.anno,
y = y_max*y.anno[1:4],
label = c(
paste0("Range: ",.r2d(min(x$data))," to ",.r2d(max(x$data))),
paste0(x_summary$model$N_all, " tests, ", x_summary$model$N_sig, " significant"),
paste0("Observed discovery rate:"),
paste0(.r2d(obs_proportion$estimate), " 95% CI [", .r2d(obs_proportion$conf.int[1]), " ,",
.r2d(obs_proportion$conf.int[2]), "]")
)
),
mapping = ggplot2::aes(
x = .data[["x"]],
y = .data[["y"]],
label = .data[["label"]]),
hjust = 0, vjust = 0, size = ggplot2_base_size*cex.anno)
if(!is.null(x$boot)){
out <- out + ggplot2::geom_text(
data = data.frame(
x = x.anno,
y = y_max*y.anno[5:8],
label = c(
paste0("Expected discovery rate:"),
paste0(.r2d(x_summary$coefficients["EDR","Estimate"]), " 95% CI [", .r2d(x_summary$coefficients["EDR","l.CI"]), " ,",
.r2d(x_summary$coefficients["EDR","u.CI"]), "]"),
paste0("Expected replicability rate:"),
paste0(.r2d(x_summary$coefficients["ERR","Estimate"]), " 95% CI [", .r2d(x_summary$coefficients["ERR","l.CI"]), " ,",
.r2d(x_summary$coefficients["ERR","u.CI"]), "]")
)
),
mapping = ggplot2::aes(
x = .data[["x"]],
y = .data[["y"]],
label = .data[["label"]]),
hjust = 0, vjust = 0, size = ggplot2_base_size*cex.anno)
}else{
out <- out + ggplot2::geom_text(
data = data.frame(
x = x.anno,
y = y_max*y.anno[5:8],
label = c(
paste0("Expected discovery rate:"),
paste0(.r2d(x_summary$coefficients["EDR","Estimate"])),
paste0("Expected replicability rate:"),
paste0(.r2d(x_summary$coefficients["ERR","Estimate"]))
)
),
mapping = ggplot2::aes(
x = .data[["x"]],
y = .data[["y"]],
label = .data[["label"]]),
hjust = 0, vjust = 0, size = ggplot2_base_size*cex.anno)
}
}
return(out)
}
}
#' @title Z-scores from subset of original studies featured in OSC 2015
#' reproducibility project
#'
#' @description The dataset contains z-scores from subset of original
#' studies featured in psychology reproducibility project
#' \insertCite{osc}{zcurve}. Only z-scores from studies with unambiguous
#' original outcomes are supplied (eliminating 7 studies with marginally
#' significant results). The real replication rate for those studies is
#' 35/90 (the whole project reports 36/97).
#'
#' @format A vector with 90 observations
#'
#' @references
#' \insertAllCited{}
"OSC.z"
# cleaning
.onUnload <- function (libpath) {
library.dynam.unload("zcurve", libpath)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.