Nothing
#' @title Carter et al. (2019) Data-Generating Mechanism
#'
#' @author František Bartoš \email{f.bartos96@@gmail.com} (adapted from Hong and Reed 2021)
#'
#' @description
#' This data-generating mechanism simulates primary studies estimating treatment
#' effects using Cohen's d. The observed effect size is modeled as a fixed mean
#' plus random heterogeneity across studies, with sample sizes varying to
#' generate differences in standard errors. The simulation introduces
#' publication bias via a selection algorithm where the probability of
#' publication depends nonlinearly on the sign and p-value of the effect,
#' with regimes for no, medium, and strong publication bias. It also
#' incorporates questionable research practices (QRPs) such as optional
#' outlier removal, selection between dependent variables, use of moderators,
#' and optional stopping.
#'
#' The description and code is based on
#' \insertCite{hong2021using;textual}{PublicationBiasBenchmark}.
#' The data-generating mechanism was introduced in
#' \insertCite{carter2019correcting;textual}{PublicationBiasBenchmark}.
#'
#' @param dgm_name DGM name (automatically passed)
#' @param settings List containing \describe{
#' \item{mean_effect}{Mean effect}
#' \item{effect_heterogeneity}{Mean effect heterogeneity}
#' \item{bias}{Degree of publication bias with one of following levels:
#' \code{"none"}, \code{"medium"}, \code{"high"}.}
#' \item{QRP}{Degree of questionable research practices with one of following
#' levels: \code{"none"}, \code{"medium"}, \code{"high"}.}
#' \item{n_studies}{Number of effect size estimates}
#' }
#'
#' @details
#' This simulation environment is based on the framework described by
#' Carter, Schönbrodt, Gervais, and Hilgard (2019). In this setup,
#' primary studies estimate the effect of a treatment using Cohen's d as the
#' effect size metric. The observed difference between treatment and control
#' groups is modeled as the sum of a fixed effect (alpha1) and a random component,
#' which introduces effect heterogeneity across studies. The degree of
#' heterogeneity is controlled by the parameter sigma2_h. Variability in the
#' standard errors of d is generated by simulating primary studies with
#' different sample sizes.
#'
#' The simulation incorporates two main types of distortions in the research
#' environment. First, a publication selection algorithm is used, where the
#' probability of a study being "published" depends nonlinearly on both the
#' sign of the estimated effect and its p-value. Three publication selection
#' regimes are modeled: "No Publication Bias," "Medium Publication Bias,"
#' and "Strong Publication Bias," each defined by different parameters in
#' the selection algorithm. Second, the simulation includes four types of
#' questionable research practices (QRPs): (a) optional removal of outliers,
#' (b) optional selection between two dependent variables, (c) optional use of
#' moderators, and (d) optional stopping.
#'
#' @return Data frame with \describe{
#' \item{yi}{effect size}
#' \item{sei}{standard error}
#' \item{ni}{sample size}
#' \item{es_type}{effect size type}
#' }
#'
#' @references
#' \insertAllCited{}
#'
#' @seealso [dgm()], [validate_dgm_setting()]
#' @export
dgm.Carter2019 <- function(dgm_name, settings) {
# Extract settings
mean_effect <- settings[["mean_effect"]]
effect_heterogeneity <- settings[["effect_heterogeneity"]]
bias <- as.character(settings[["bias"]])
QRP <- as.character(settings[["QRP"]])
n_studies <- settings[["n_studies"]]
# Simulate data sets
df <- .HongAndReed2021_Carter2019_simMA(n_studies, mean_effect, effect_heterogeneity, bias, QRP)
# Create result data frame
data <- data.frame(
yi = df[,"d"],
sei = df[,"se"],
ni = df[,"N"],
es_type = "SMD"
)
return(data)
}
#' @export
validate_dgm_setting.Carter2019 <- function(dgm_name, settings) {
# Check that all required settings are specified
required_params <- c("mean_effect", "effect_heterogeneity", "bias", "QRP", "n_studies")
missing_params <- setdiff(required_params, names(settings))
if (length(missing_params) > 0)
stop("Missing required settings: ", paste(missing_params, collapse = ", "))
# Extract settings
mean_effect <- settings[["mean_effect"]]
effect_heterogeneity <- settings[["effect_heterogeneity"]]
bias <- as.character(settings[["bias"]])
QRP <- as.character(settings[["QRP"]])
n_studies <- settings[["n_studies"]]
# Validate settings
if (length(mean_effect) != 1 || !is.numeric(mean_effect) || is.na(mean_effect))
stop("'mean_effect' must be numeric")
if (length(effect_heterogeneity) != 1 || !is.numeric(effect_heterogeneity) || is.na(effect_heterogeneity) || effect_heterogeneity < 0)
stop("'effect_heterogeneity' must be non-negative")
if (length(n_studies) != 1 || !is.numeric(n_studies) || is.na(n_studies) || !is.wholenumber(n_studies) || n_studies < 1)
stop("'n_studies' must be an integer larger targer than 0")
if (!length(bias) == 1 || !is.character(bias) || !bias %in% c("none", "low", "medium", "high"))
stop("'bias' must be a string with one of the following values: 'none', 'low', 'medium', 'high'")
if (!length(QRP) == 1 || !is.character(QRP) || !QRP %in% c("none", "low", "medium", "high"))
stop("'QRP' must be a string with one of the following values: 'none', 'low', 'medium', 'high'")
return(invisible(TRUE))
}
#' @export
dgm_conditions.Carter2019 <- function(dgm_name) {
# Keep the same order as in Hong and Reed 2021
k_set <- c(10, 30, 60, 100) # number of studies in each MA
delta_set <- c(0, .2, .5, .8) # true mean of effect sizes
qrpEnv_Set <- c("none", "medium", "high") # QRP environment
censor_set <- c("none", "medium", "high") # publication bias
tau_set <- c(0, .2, .4) # heterogeneity; assumed to follow a normal distribution
# params stores all possible combinations of experimental factors
paramsONE <- expand.grid(k=k_set, delta=delta_set, qrpEnv=qrpEnv_Set, censor=censor_set, tau=tau_set, stringsAsFactors = FALSE)
k_set <- c(200, 400, 800) # number of studies in each MA
delta_set <- c(0, .2, .5, .8) # true mean of effect sizes
qrpEnv_Set <- c("none", "medium", "high") # QRP environment
censor_set <- c("none", "medium", "high") # publication bias
tau_set <- c(0, .2, .4) # heterogeneity; assumed to follow a normal distribution
# params stores all possible combinations of experimental factors
paramsTWO <- expand.grid(k=k_set, delta=delta_set, qrpEnv=qrpEnv_Set, censor=censor_set, tau=tau_set, stringsAsFactors = FALSE)
# rename parameters
settings <- rbind(paramsONE, paramsTWO)
colnames(settings) <- c("n_studies", "mean_effect", "QRP", "bias", "effect_heterogeneity")
# attach setting id
settings$condition_id <- 1:nrow(settings)
return(settings)
}
### additional simulation functions ----
# Imported and slightly modified from Hong & Reed 2021
# (https://osf.io/pr4mb/)
# helper functions for .HongAndReed2021_Carter2019_censor function
# .HongAndReed2021_Carter2019_clamp x to values between 0 and 1
.HongAndReed2021_Carter2019_clamp <- function(x) {if (x > 1) x=1; if (x < 0) x = 0; return(x)}
# @param p observed p value
# @param p_range Range of observed p-values where the easing takes place
# @param from_prob Probability of publication (starting position)
# @param to_prob Probability of publication (end position)
.HongAndReed2021_Carter2019_easeOutExpo <- function (p, p_range, from_prob, to_prob) {
p_start <- p_range[1]
p_range_length <- p_range[2] - p_range[1]
(to_prob-from_prob) * (-2^(-10 * (p-p_range[1])/p_range_length) + 1) + from_prob;
}
.HongAndReed2021_Carter2019_easeInExpo <- function (p, p_range, from_prob, to_prob) {
p_start <- p_range[1]
p_range_length <- p_range[2] - p_range[1]
(to_prob-from_prob) * 2^(10 * (((p-p_range[1])/p_range_length) - 1)) + from_prob;
}
# @param pObs two-tailed p-value
# @param posSign_NS_baseRate What's the probability that a p > .10 in the right direction enters the literature?
# @param negSign_NS_baseRate What's the probability that a p > .01 in the wrong direction enters the literature? (Left anchor at p = .01)
# @param counterSig_rate What's the probability that a p < .001 in the wrong direction enters the literature?
# @param direction +1: Expected direction, -1: wrong direction
.HongAndReed2021_Carter2019_censor <- function(pObs, direction, posSign_NS_baseRate = 0.3, negSign_NS_baseRate = 0.05, counterSig_rate = 0.50){
# ---------------------------------------------------------------------
# Correct direction of effect
if (direction > 0 & pObs < .05){ #right direction, sig
pubProb = 1
} else if(direction > 0 & pObs >= .05 & pObs < .1){ #right direction, trending
pubProb = .HongAndReed2021_Carter2019_easeOutExpo(p=pObs, p_range=c(.05, .1), from_prob=1, to_prob=posSign_NS_baseRate)
} else if (direction > 0 & pObs >= .1){ # right direction; non-significant (p > .1)
pubProb =posSign_NS_baseRate
# ---------------------------------------------------------------------
# Wrong direction of effect
} else if(direction <= 0 & pObs < .001){ # wrong direction, highly sig.
pubProb= counterSig_rate
} else if(direction <= 0 & pObs >= .001 & pObs < .01){ # wrong direction, standard sig.
pubProb= .HongAndReed2021_Carter2019_easeOutExpo(p=pObs, p_range=c(.001, .01), from_prob=counterSig_rate, to_prob=negSign_NS_baseRate)
} else if(direction <= 0 & pObs >= .01){ # wrong direction, non-sig.
pubProb=negSign_NS_baseRate
}
return(pubProb)
}
# in this (equivalent) variant of the function, you can provide a one-tailed p-value
# --> then it's not necessary to provide the direction of the effect
.HongAndReed2021_Carter2019_censor_1t_0 <- function(pObs, posSign_NS_baseRate = 0.3, negSign_NS_baseRate = 0.05, counterSig_rate = 0.50){
# ---------------------------------------------------------------------
# Correct direction of effect
if (pObs < .05/2) { #right direction, sig
pubProb = 1
} else if (pObs >= .05/2 & pObs < .1/2) { #right direction, trending
pubProb = .HongAndReed2021_Carter2019_easeOutExpo(p=pObs, p_range=c(.05/2, .1/2), from_prob=1, to_prob=posSign_NS_baseRate)
} else if (pObs >= .1/2 & pObs < .5) { # right direction; non-significant (p > .1)
pubProb = posSign_NS_baseRate
# ---------------------------------------------------------------------
# Wrong direction of effect
} else if (pObs >= .5 & pObs < 1-(.01/2)){ # wrong direction, non-sig. at 1%
pubProb = negSign_NS_baseRate
} else if (pObs >= 1-(.01/2) & pObs < 1-(.001/2)){ # wrong direction, two-sided p between .01 and .001
pubProb = .HongAndReed2021_Carter2019_easeInExpo(p=pObs, p_range=c(1-(.01/2), 1-(.001/2)), from_prob=negSign_NS_baseRate, to_prob=counterSig_rate)
} else if (pObs >= 1-(.001/2)){ # wrong direction, highly sig.
pubProb = counterSig_rate
}
return(pubProb)
}
.HongAndReed2021_Carter2019_censor_1t <- Vectorize(.HongAndReed2021_Carter2019_censor_1t_0)
# helper: convert 1-tailed p-value to 2-tailed
.HongAndReed2021_Carter2019_p1_to_2 <- function(p.1tailed) {
1-abs(0.5-p.1tailed)*2
}
# helper: get direction of a 1-tailed p-value
.HongAndReed2021_Carter2019_getDir <- function(p.1tailed) {
ifelse(p.1tailed < .5, 1, -1)
}
# Sanity check: do both .HongAndReed2021_Carter2019_censor functions return the same value?
# curve(.HongAndReed2021_Carter2019_censor_1t(pObs=x, posSign_NS_baseRate = 0.20, negSign_NS_baseRate = 0.05, counterSig_rate = 0.50), from=0, to=1, n=10000)
#
# for (p.1t in seq(0, 1, length.out=1000)) {
# points(x=p.1t, y=.HongAndReed2021_Carter2019_censor(pObs=.HongAndReed2021_Carter2019_p1_to_2(p.1t), direction=.HongAndReed2021_Carter2019_getDir(p.1t)), col="red", pch=21, cex=.3)
# }
# some predefined settings: medium publication bias
.HongAndReed2021_Carter2019_censorMedium0 <- function(pObs, direction) {
.HongAndReed2021_Carter2019_censor(pObs, direction, posSign_NS_baseRate = 0.20, negSign_NS_baseRate = 0.05, counterSig_rate = 0.50)
}
.HongAndReed2021_Carter2019_censorMedium <- Vectorize(.HongAndReed2021_Carter2019_censorMedium0)
# some predefined settings: strong publication bias
.HongAndReed2021_Carter2019_censorHigh0 <- function(pObs, direction) {
.HongAndReed2021_Carter2019_censor(pObs, direction, posSign_NS_baseRate = 0.05, negSign_NS_baseRate = 0.00, counterSig_rate = 0.20)
}
.HongAndReed2021_Carter2019_censorHigh <- Vectorize(.HongAndReed2021_Carter2019_censorHigh0)
# MAIN FUNCTION to use: .HongAndReed2021_Carter2019_simMA
# (see from line 440)
# source("sim-studies.R")
# load .HongAndReed2021_Carter2019_censoring function
#source(".HongAndReed2021_Carter2019_censorFunc.R")
# get a simulated per-group sample size that follows the distribution of empirical sample sizes
# (see folder "Empirical sample size distributions")
# min = minimum sample size, max = maximum sample size
# max = 1905 corresponds to the largest observed per-group sample size in Marszalek et al.
.HongAndReed2021_Carter2019_getN <- function(k=1, min.n = 5, max.n = 1905, shape=1.15326986, scale=0.04622745) {
ns <- round(.HongAndReed2021_Carter2019_rtruncInvGamma(n=k, a=min.n, b=max.n, shape=shape, scale=scale))
}
# creating own truncgamma functions because of the importing and namespace issues with rtrunc and invgamma
.HongAndReed2021_Carter2019_qtruncInvGamma <- function (p, spec, a = -Inf, b = Inf, ...) {
# based on truncdist::qtrunc
if (a >= b)
stop("argument a is greater than or equal to b")
tt <- p
G <- function(q, shape, rate = 1, scale = 1/rate, lower.tail = TRUE, log.p = FALSE) {
if (missing(rate) && !missing(scale))
rate <- 1/scale
stats::pgamma(1/q, shape, rate, lower.tail = !lower.tail, log.p = log.p)
}
Gin <- function(p, shape, rate = 1, scale = 1/rate, lower.tail = TRUE, log.p = FALSE) {
if (missing(rate) && !missing(scale))
rate <- 1/scale
if (log.p) {
stats::qgamma(p, shape, rate, lower.tail = !lower.tail, log.p = TRUE)^(-1)
}
else {
stats::qgamma(1 - p, shape, rate, lower.tail = lower.tail, log.p = FALSE)^(-1)
}
}
G.a <- G(a, ...)
G.b <- G(b, ...)
if (G.a == G.b) {
stop("Trunction interval is not inside the domain of the quantile function")
}
result <- pmin(pmax(a, Gin(G(a, ...) + p * (G(b, ...) - G(a, ...)), ...)), b)
return(result)
}
.HongAndReed2021_Carter2019_rtruncInvGamma <- function(n, a, b, ...) {
# based on truncdist::rtrunc
if (a >= b)
stop("argument a is greater than or equal to b")
u <- stats::runif(n, min = 0, max = 1)
x <- .HongAndReed2021_Carter2019_qtruncInvGamma(u, a = a, b = b, ...)
return(x)
}
#==============
# .HongAndReed2021_Carter2019_outlier #
#==============
# evaluate whether a number, x,
# is an .HongAndReed2021_Carter2019_outlier as defined by
# being beyond 2 SDs from the mean
# of x's vector
.HongAndReed2021_Carter2019_outlier=function(x,mean,sd){
out=if(abs((x-mean)/sd)<2){0}else{1}
}
#==============
# .HongAndReed2021_Carter2019_simData_noQRP #
#==============
# generate the results from an unbiased experiment
# delta is the true effect
# tau indicated heterogeneity
# minN and meanN are fed to a negative binomial for
# sample size
# results from an unbiased experiment
.HongAndReed2021_Carter2019_simData_noQRP <- function(delta, tau, fixed.n=NULL){
# get the per-group sample size -
# either a fixed n, or sampled from the gamma distribution
if (!is.null(fixed.n)) {
n <- fixed.n
} else {
n <- .HongAndReed2021_Carter2019_getN(k=1)
}
#calculate the treatement effect as a function of the
#true effect, delta, and tau
delta_i = delta + stats::rnorm(1, 0, tau)
#generate two independent vectors of raw data
#the mean is zero and error is randomly distributed
#and equal between groups
Ye = stats::rnorm(n, delta_i, 1)
Yc = stats::rnorm(n, 0, 1)
#get the summary stats
m1 = mean(Ye)
v1 = stats::var(Ye)
m2 = mean(Yc)
v2 = stats::var(Yc)
n1 = n
n2 = n
df = 2*n-2
#get the pooled variance (formula from Hedge's g as)
S = sqrt( ((n1 - 1)*v1 + (n2 - 1)*v2) / df )
#compare the two distributions
test = stats::t.test(Ye, Yc)
#calculate d, the variance of d, the p-value, the t-stat, and n.
d = (m1 - m2)/S
d_v = (n1 + n2)/(n1 * n2) + (d^2 / (2*(n1+n2)))
d_se = sqrt(d_v)
p = test$p.value
t = as.numeric(test$statistic)
#get power
pow = pwr::pwr.t2n.test(d, n1 = n1, n2 = n2)
pwr = pow$power
#output
out = c(d, p, t, n1+n2, d_v, d_se, pwr, n1, n2, delta_i)
}
#============
# .HongAndReed2021_Carter2019_expDataB #
#============
# produce data for other functions to bias
# delta is the true effect
# tau is for heterogeneity
# cbdv is the correlation between the two outcomes
# output is 4 vectors of length maxN
# This is called within .HongAndReed2021_Carter2019_simData_QRP
.HongAndReed2021_Carter2019_expDataB <- function(delta, tau, cbdv, maxN = 3000){
# sample the treatment effect as a function of the
# true effect, delta, and heterogeneity (defined as tau)
delta_i = delta + stats::rnorm(1, 0, tau)
# store the true effect for the study
D = matrix(delta_i, 2, maxN)
#generate four matricies of maxN rows and 2 columns
#each matrix represents results from maxN subjects experiencing
#one of the 4 unique combinations of the experimental manipulation
#and the moderator (it's a 2*2 design).
#each column represents the results on a DV because each
#participant has responded on two DVs
#results on the DVs are correlated at r = cbdv
#the performance of the groups (i.e., the matricies) are
#not correlated
#responses are normally distributed with a mean of zero
#and a SD of 1
#the treatment effect is added to each observation in the
#experimental group
#there is no effect for the moderator
g1 = MASS::mvrnorm(maxN, rep(0,2), matrix(c(1,cbdv,cbdv,1),2,2)) + delta_i
g2 = MASS::mvrnorm(maxN, rep(0,2), matrix(c(1,cbdv,cbdv,1),2,2))
g3 = MASS::mvrnorm(maxN, rep(0,2), matrix(c(1,cbdv,cbdv,1),2,2)) + delta_i
g4 = MASS::mvrnorm(maxN, rep(0,2), matrix(c(1,cbdv,cbdv,1),2,2))
#build the output array
G = array(c(g1,g2,g3,g4,D),dim=c(maxN, 2, 5))
return(G)
}
#w <- .HongAndReed2021_Carter2019_expDataB(0.5, .1, .2)
#==========
# .HongAndReed2021_Carter2019_testIt #
#==========
# For use with p-hack-able data from .HongAndReed2021_Carter2019_expDataB.
# Determines d,p,t,N,n1,n2 based on a given lvl
# (i.e., main effect [0], first lvl of mod [1]
# or second [2]), a dataset for a given DV
# (i.e., 1 or 2), and whether .HongAndReed2021_Carter2019_outliers are to be
# included. This is called within .HongAndReed2021_Carter2019_analyB
.HongAndReed2021_Carter2019_testIt=function(DV, lvl, out){
# a set of conditionals that determine the data to be analyzed.
# no subsetting by the moderator, no exclusion of .HongAndReed2021_Carter2019_outliers
if(lvl==0 & out==0){
Y = DV[,1]
X = DV[,2]
}
# subsetting by lvl 1 of the moderator, no exclusion of .HongAndReed2021_Carter2019_outliers
if(lvl==1 & out==0){
Y = subset(DV[,1], DV[,3]==1)
X = subset(DV[,2], DV[,3]==1)
}
# subsetting by lvl 2 of the moderator, no exclusion of .HongAndReed2021_Carter2019_outliers
if(lvl==2 & out==0){
Y = subset(DV[,1], DV[,3]==2)
X = subset(DV[,2], DV[,3]==2)
}
# no subsetting by the moderator, exclusion of .HongAndReed2021_Carter2019_outliers
if(lvl==0 & out==1){
Y = subset(DV[,1], DV[,4] < 1)
X = subset(DV[,2], DV[,4] < 1)
}
# subsetting by lvl 1 of the moderator, exclusion of .HongAndReed2021_Carter2019_outliers
if(lvl==1 & out==1){
Y = subset(DV[,1], DV[,3]==1 & DV[,4] < 1)
X = subset(DV[,2], DV[,3]==1 & DV[,4] < 1)
}
# subsetting by lvl 2 of the moderator, exclusion of .HongAndReed2021_Carter2019_outliers
if(lvl==2 & out==1){
Y = subset(DV[,1], DV[,3]==2 & DV[,4] < 1)
X = subset(DV[,2], DV[,3]==2 & DV[,4] < 1)
}
#the output based on the above conditions
test = stats::t.test(Y~X,var.equal=T)
n1 = length(subset(Y,X==1))
n2 = length(subset(Y,X==2))
v1 = stats::var(subset(Y,X==1))
v2 = stats::var(subset(Y,X==2))
N = n1+n2
t = as.numeric(test[1])
p = test$p.value
m1 = as.numeric(test$estimate[1])
m2 = as.numeric(test$estimate[2])
df = N - 2
#get pooled variance
S = sqrt( ((n1 - 1)*v1 + (n2 - 1)*v2) / df )
#calculate d and the variance of d
d = (m1-m2)/S
#this only returns the info needed to tell
#whether the data will be hacked again
#things like power and variance will get calculated
#if the result is kept.
out <- c(d,p,t,N,n1,n2)
return(out)
}
#===============
# .HongAndReed2021_Carter2019_analyB #
#===============
# Produces a vector of results using QRPs, including
# optional moderator, .HongAndReed2021_Carter2019_outlier removal, and multiple DVs.
# Takes groups (g1:g4) from .HongAndReed2021_Carter2019_expDataB. Gives a vector of
# (d,p,t,N,v,se,power,n1,n2).
.HongAndReed2021_Carter2019_analyB <- function(g1, g2, g3, g4, D, multDV, out, mod){
#Create combo groups
G1=rbind(g1,g3); G2=rbind(g2,g4)
#create X codes
X1.1=replicate(length(G1[,1]),1); X1.2=replicate(length(G1[,1]),1)
X2.1=replicate(length(G2[,1]),2); X2.2=replicate(length(G2[,1]),2)
#Create M codes
m1.1=replicate(length(g1[,1]),1); m1.2=replicate(length(g1[,2]),1)
m2.1=replicate(length(g2[,1]),1); m2.2=replicate(length(g2[,2]),1)
m3.1=replicate(length(g3[,1]),2); m3.2=replicate(length(g3[,2]),2)
m4.1=replicate(length(g4[,1]),2); m4.2=replicate(length(g4[,2]),2)
M1.1=c(m1.1,m3.1); M1.2=c(m1.2,m3.2)
M2.1=c(m2.1,m4.1);M2.2=c(m2.2,m4.2)
#Create .HongAndReed2021_Carter2019_outlier codes
o1.1=mapply(.HongAndReed2021_Carter2019_outlier,G1[,1],mean(G1[,1]),stats::sd(G1[,1]))
o1.2=mapply(.HongAndReed2021_Carter2019_outlier,G1[,2],mean(G1[,2]),stats::sd(G1[,2]))
o2.1=mapply(.HongAndReed2021_Carter2019_outlier,G2[,1],mean(G2[,1]),stats::sd(G2[,1]))
o2.2=mapply(.HongAndReed2021_Carter2019_outlier,G2[,2],mean(G2[,2]),stats::sd(G2[,2]))
#combine codes with outcome values
c1=cbind(G1[,1],X1.1,M1.1,o1.1); c2=cbind(G1[,2],X1.2,M1.2,o1.2)
c3=cbind(G2[,1],X2.1,M2.1,o2.1); c4=cbind(G2[,2],X2.2,M2.2,o2.2)
#make "datasets"
DV1=rbind(c1,c3)
DV2=rbind(c2,c4)
#Save p values for interaction effects
A1=stats::aov(DV1[,1]~DV1[,2]*DV1[,3])
A2=stats::aov(DV2[,1]~DV2[,2]*DV2[,3])
A1.o=stats::aov(DV1[,1]~DV1[,2]*DV1[,3],subset=DV1[,4]<1)
A2.o=stats::aov(DV2[,1]~DV2[,2]*DV2[,3],subset=DV2[,4]<1)
intA1P=summary(A1)[[1]][["Pr(>F)"]][3]
intA2P=summary(A2)[[1]][["Pr(>F)"]][3]
intA1P.o=summary(A1.o)[[1]][["Pr(>F)"]][3]
intA2P.o=summary(A2.o)[[1]][["Pr(>F)"]][3]
#test in each possible way
# first DV
t100=.HongAndReed2021_Carter2019_testIt(DV1,0,0)
t101=.HongAndReed2021_Carter2019_testIt(DV1,0,1)
t110=.HongAndReed2021_Carter2019_testIt(DV1,1,0)
t111=.HongAndReed2021_Carter2019_testIt(DV1,1,1)
t120=.HongAndReed2021_Carter2019_testIt(DV1,2,0)
t121=.HongAndReed2021_Carter2019_testIt(DV1,2,1)
# second DV
t200=.HongAndReed2021_Carter2019_testIt(DV2,0,0)
t201=.HongAndReed2021_Carter2019_testIt(DV2,0,1)
t210=.HongAndReed2021_Carter2019_testIt(DV2,1,0)
t211=.HongAndReed2021_Carter2019_testIt(DV2,1,1)
t220=.HongAndReed2021_Carter2019_testIt(DV2,2,0)
t221=.HongAndReed2021_Carter2019_testIt(DV2,2,1)
#pull the best result given options
# start looking without moderator
best = if(t100[2]<.05 & t100[1]>0){t100} #DV1 and no .HongAndReed2021_Carter2019_outlier removal
else if(out==1 & t101[2]<.05 & t101[1]>0){t101} #DV1 with .HongAndReed2021_Carter2019_outlier removal
else if(multDV==1 & t200[2]<.05 & t200[1]>0){t200} #DV2 and no .HongAndReed2021_Carter2019_outlier removal
else if(multDV==1 & out==1 & t201[2]<.05 & t201[1]>0){t201} #DV2 with .HongAndReed2021_Carter2019_outlier removal
# start chopping on the moderator (lvl 1)
else if(mod == 1 & t110[2]<.05 & t110[1]>0 & intA1P < .05){t110}
else if(out==1 & mod == 1 & t111[2]<.05 & t111[1]>0 & intA1P.o< .05){t111}
else if(multDV==1 & mod == 1 & t210[2]<.05 & t210[1]>0 & intA1P < .05){t210}
else if(multDV==1 & out==1 & mod == 1 & t211[2]<.05 & t211[1]>0 & intA1P.o< .05){t211}
# lvl 2
else if(mod == 1 & t120[2]<.05 & t120[1]>0 & intA2P < .05){t120}
else if(out==1 & mod == 1 & t121[2]<.05 & t121[1]>0 & intA2P.o< .05){t121}
else if(multDV==1 & mod == 1 & t220[2]<.05 & t220[1]>0 & intA2P < .05){t220}
else if(multDV==1 & out==1 & mod == 1 & t221[2]<.05 & t221[1]>0 & intA2P.o< .05){t221}
else{t100}
#get additional info for the best results
d = best[1]
p = best[2]
t = best[3]
N = best[4]
n1 = best[5]
n2 = best[6]
df = N - 2
d_v= (n1 + n2)/(n1 * n2) + (d^2 / (2 *df)) * (n1 + n2) / df
d_se = sqrt(d_v)
pow = pwr::pwr.t2n.test(d, n1=n1, n2=n2)
pwr = pow$power
#return the best result
out=c(d, p, t, N, d_v, d_se, pwr, n1, n2, D)
return(out)
}
#==================
# .HongAndReed2021_Carter2019_simData_QRP #
#==================
# Produces results, a, from a p-hacked experiment.
.HongAndReed2021_Carter2019_simData_QRP <- function(delta, tau, QRP.strategy, maxN = 3000, fixed.n=NULL){
#correlation between multiple DVs is set to 0.20 as default
cbdv = 0.2
# if QRP strategy is NONE
if (QRP.strategy=='none'){
a = .HongAndReed2021_Carter2019_simData_noQRP(delta, tau, fixed.n=fixed.n)
}
#if QRP strategy is MODERATE
else if (QRP.strategy=='mod'){
#get data for a study using QRPs
G <- .HongAndReed2021_Carter2019_expDataB(delta, tau, cbdv)
#determine the starting per-group sample size
# get the per-group sample size -
# either a fixed n, or sampled from the gamma distribution
if (!is.null(fixed.n)) {
s <- fixed.n
} else {
s <- .HongAndReed2021_Carter2019_getN(k=1)
}
# Divide sample size by 2: the idea is that the main factor of interest defined the two group sizes. A moderator factor is then added to create a 2*2, but because the moderator is not the main focus, the empirical sample sizes should only be used for the two groups formed by the main factor--not the four groups formed by the 2*2 split.
s <- round(s/2)
#run the first analysis with some QRPs applied
a = .HongAndReed2021_Carter2019_analyB(g1 = G[,,1][1:s,], #group one, 1:the current sample size
g2 = G[,,2][1:s,],
g3 = G[,,3][1:s,],
g4 = G[,,4][1:s,],
D = G[,,5][1,1], # the study-lvl true effect
multDV=1, out=0, mod=0) # MODERATE
#define optional stopping parameters for MODERATE strategy
colLim = 3
add = 3
#see if you can benefit from optional stopping
for (i in 1:colLim){
#continue adding more data and p-hacking until either collection
#limit is reached (colLim) or the p-value and the sign of d are
#significant and positive
if(a[1] > 0 & a[2] < .05){break}
#if p-value and sign of d aren't sig/pos, define the new sample
#sizes
s=s+add
a = .HongAndReed2021_Carter2019_analyB(g1 = G[,,1][1:s,], #group one, 1:the current sample size
g2 = G[,,2][1:s,],
g3 = G[,,3][1:s,],
g4 = G[,,4][1:s,],
D = G[,,5][1,1], # the study-lvl true effect
multDV=1,out=0,mod=0) # MODERATE
}
}
#if QRP strategy is AGGRESSIVE
# ("aggressive" now is called "strong" in the paper)
else if (QRP.strategy=='agg'){
#get data for a study using QRPs
G = .HongAndReed2021_Carter2019_expDataB(delta,tau,cbdv,maxN)
#determine the starting per-group sample size
# get the per-group sample size -
# either a fixed n, or sampled from the gamma distribution
if (!is.null(fixed.n)) {
s <- fixed.n
} else {
s <- .HongAndReed2021_Carter2019_getN(k=1)
}
# Divide sample size by 2: the idea is that the main factor of interest defined the two group sizes. A moderator factor is then added to create a 2*2, but because the moderator is not the main focus, the empirical sample sizes should only be used for the two groups formed by the main factor--not the four groups formed by the 2*2 split.
s <- round(s/2)
#run the first analysis with some QRPs applied
a = .HongAndReed2021_Carter2019_analyB(g1 = G[,,1][1:s,], #group one, 1:the current sample size
g2 = G[,,2][1:s,],
g3 = G[,,3][1:s,],
g4 = G[,,4][1:s,],
D = G[,,5][1,1], # the study-lvl true effect
multDV=1,out=1,mod=1) # AGGRESIVE
#define optional stopping parameters for AGGRESSIVE strategy
colLim = 5
add = 3
#see if you can benefit from optional stopping
for (i in 1:colLim){
#continue adding more data and p-hacking until either collection
#limit is reached (colLim) or the p-value and the sign of d are
#significant and positive
if(a[1] > 0 & a[2] < .05){break}
#if p-value and sign of d aren't sig/pos, define the new sample
#sizes
s=s+add
a = .HongAndReed2021_Carter2019_analyB(g1 = G[,,1][1:s,], #group one, 1:the current sample size
g2 = G[,,2][1:s,],
g3 = G[,,3][1:s,],
g4 = G[,,4][1:s,],
D = G[,,5][1,1], # the study-lvl true effect
multDV=1,out=1,mod=1) # AGGRESSIVE
}
}else{stop("Invalid QRP strategy. Must be one of: 'none', 'mod', 'agg'")}
#return the result
return(a)
}
## ======================================================================
## New implementation with new .HongAndReed2021_Carter2019_censoring functions
## ======================================================================
# Produces a dataset for meta-analysis. Applies both QRP
# and selection at a proportion specified by propB if
# sel and QRP are 1 not 0.
# param k the number of studies in the MA
# param delta the true effect (or the average of the true effects if heterogeneity exists)
# param tau the SD around the true effect
# param censor The censoring function - either "none", "med" (medium publication bias), "high" (high publication bias), or a vector of 3 values for the censoring function (posSign_NS_baseRate, negSign_NS_baseRate, counterSig_rate)
# param qrpEnv the qrp environment that produced the literature: 'none', 'low', 'med', 'high'
# param empN.boost A constant that is added to the empirical effect sizes: WARNING: NOT CAREFULLY TESTED YET!!
# k=10;delta=.3;tau=.1;qrpEnv="med";.HongAndReed2021_Carter2019_censorFunc="A"; empN=TRUE; maxN = 1000; meanN = NA; minN = 0; empN.boost = 0
.HongAndReed2021_Carter2019_simMA <- function(k, delta, tau, qrpEnv= c("none", "low", "medium", "high"), censorFunc = c("none", "medium", "high"), verbose=FALSE, fixed.n=NULL) {
# validate parameters
if (length(censorFunc) == 1) {
censorFunc <- match.arg(censorFunc, c("none", "medium", "high"))
}
qrpEnv <- match.arg(qrpEnv, c("none", "low", "medium", "high"))
# Define the QRP environments:
# get the proportions of studies produced under each QRP strategy
if (qrpEnv == 'none'){
noneP = 1; modP = 0; aggP = 0
} else if (qrpEnv == 'low'){
noneP = 0.50; modP = 0.40; aggP = 0.10
} else if (qrpEnv == 'medium'){
noneP = 0.30; modP = 0.50; aggP = 0.20
} else if (qrpEnv == 'high'){
noneP = 0.10; modP = 0.40; aggP = 0.50
} else {
stop("qrpEnv must be none, low, medium, or high")
}
datMA <- data.frame()
# repeatedly add a new study from that environment until the desired number of k is achieved
repeat {
thisStudiesHackingStyle <- sample(x = c("none", "mod", "agg"), size=1, replace=TRUE, prob = c(noneP, modP, aggP))
if (thisStudiesHackingStyle == "none") {
res <- .HongAndReed2021_Carter2019_simData_noQRP(delta=delta, tau=tau, fixed.n=fixed.n)
res[11] = 0 #QRP style
} else if (thisStudiesHackingStyle == "mod") {
res <- .HongAndReed2021_Carter2019_simData_QRP(delta=delta, tau=tau, QRP.strategy="mod", fixed.n=fixed.n)
res[11] = 1 #QRP style
} else if (thisStudiesHackingStyle == "agg") {
res <- .HongAndReed2021_Carter2019_simData_QRP(delta=delta, tau=tau, QRP.strategy="agg", fixed.n=fixed.n)
res[11] = 2 #QRP style
}
# inflict publication bias via the .HongAndReed2021_Carter2019_censoring function
if (is.character(censorFunc) && censorFunc == "none") {
publish <- 1
} else if (is.character(censorFunc) && censorFunc == "medium") {
# predefined .HongAndReed2021_Carter2019_censor function for "medium publication bias"
publish <- stats::rbinom(n=1, size=1, prob=.HongAndReed2021_Carter2019_censorMedium(pObs = res[2], direction = sign(res[1])))
} else if (is.character(censorFunc) && censorFunc == "high") {
# predefined .HongAndReed2021_Carter2019_censor function for "strong publication bias"
publish <- stats::rbinom(n=1, size=1, prob=.HongAndReed2021_Carter2019_censorHigh(pObs = res[2], direction = sign(res[1])))
} else if (is.vector(censorFunc) && length(censorFunc)==3) {
publish <- stats::rbinom(n=1, size=1, prob=.HongAndReed2021_Carter2019_censor(res[2], direction = sign(res[1]), posSign_NS_baseRate = censorFunc[1], negSign_NS_baseRate = censorFunc[2], counterSig_rate = censorFunc[3]))
} else {
stop("Wrong specification of .HongAndReed2021_Carter2019_censor function!")
}
if (publish == 1) {
datMA <- rbind(datMA, res)
if (verbose==TRUE) message(nrow(datMA))
}
if (nrow(datMA) >= k) {break}
} # of repeat
#name columnes
colnames(datMA) = c( 'd', # effect size, d
'p', # p value for the two group comparison
't', # t value for the two group comparison
'N', # total N
'v', # variance for the effect size
'se', # standard error for the effect size
'pow', # power given the true effect for the two group comparison
'n1', # experimental group sample size
'n2', # control group sample size
'delta_i', # the study-level true effect
'qrp') # 0 = 'none', 1 = 'mod', 2 = 'agg'
# Add Hedge's correction factor
df = datMA$n1 + datMA$n2 - 2
J = 1- 3/(4*df - 1)
datMA$g = datMA$d*J
datMA$g_v = datMA$v*J^2
datMA$g_se = sqrt(datMA$g_v)
return(datMA)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.