#'Simulated Distribution-Free Bayesian Power and \emph{t} Power
#'
#'
#'The function is a design tool for comparing Bayesian distribution-free
#'power versus frequentist \emph{t} power for a range of sample sizes. Allows
#'for the stipulation of one of nine probability models for data generation.
#'
#' @param n_min Smallest desired value of sample size for power calculations (minimun 20; default is also 20)
#' @param delta Offset amount between the two variates
#' @param model Theoretical probability model for the data. One of \code{"normal"}, \code{"weibull"}, \code{"cauchy"}, \code{"lognormal"}, \code{"chisquare"}, \code{"logistic"}, \code{"exponential"}, \code{"gumbel"}, or \code{"pareto"}.
#' @param design Indicates the data structure. One of \code{"independent"} or \code{"paired"}.
#' @param effect_crit Stipulated value for a significant differences for a \emph{t}-test (1 - \emph{p}), and the critical probability for the Bayesian alternative hypothesis for a Bayesian distribution-free analysis
#' @param shape1 The shape parameter for the condition 1 variate for the distribution indicated by the \code{model} input (default is 1)
#' @param shape2 The shape parameter for the condition 2 variate for the distribution indicated by the \code{model} input (default is 1)
#' @param samples Desired number of Monte Carlo data sets drawn to estimate the power (default is 1000)
#' @param a0 The first shape parameter for the prior beta distribution (default is 1). Must be positive and finite.
#' @param b0 The second shape parameter for the prior beta distribution (default is 1). Must be positive and finite.
#' @param block_max The maximum size for a block effect (default is 0)
#' @param hide_progress (Optional) If \code{TRUE}, hide percent progress while Monte Carlo sampling is running. (default is \code{FALSE}).
#'
#' @return A list containing the following components:
#' @return \item{nsims}{The number of Monte Carlo data sets; equal to the value of the \code{samples} argument}
#' @return \item{model}{Probability model for the data}
#' @return \item{design}{The design for the data; one of \code{"independent"} or \code{"paired"}}
#' @return \item{effect_crit}{The criterion probability for considering a posterior probability for the hypothesis that \code{delta > 0} to be a detection; it is also \code{1 - p_crit} for a frequentist \emph{t}-test}
#' @return \item{deltav}{The offset between the variates; equal to the \code{delta} argument}
#' @return \item{a0}{The first shape parameter for the beta prior distribution}
#' @return \item{b0}{The second shape parameter for the beta prior distribution}
#' @return \item{block_max}{The maximum size of a block effect; equal to \code{block_max} argument}
#' @return \item{outputdf}{A dataframe of possible sample sizes and the corresponding Bayesian and frequentist power values}
#'
#' @details
#'
#' Researchers need to make experimental-design decisions such as the choice
#' about the sample size per condition and the decision of whether to use a
#' within-block design or an independent-groups design. These planning issues
#' arise regardless if one uses either a frequentist or a Bayesian approach to
#' statistical inference. In the DFBA package, there are a number of functions
#' to help users with these decisions. The \code{dfba_bayes_vs_t_power()} function
#' produces (a) the Bayesian power estimate from a distribution-free analysis
#' and (b) the corresponding frequentist power from a parametric \emph{t}-test
#' for a set of 11 sample sizes ranging from \code{n_min} to \code{n_min + 50}
#' in steps of 5. These estimates are based on a number of different Monte-
#' Carlo-sampled data sets generated by the \code{dfba_sim_data()} function.
#'
#' For each data set, statistical tests are performed. If \code{design = "paired"},
#' the frequentist \emph{t}-test is a one-tailed test on the within-block
#' difference scores to assess the null hypothesis that the population mean for
#' \code{E} is greater than the population mean for \code{C}; if
#' \code{design = "independent"}, the frequentist \emph{t}-test is the one-tailed
#' test to assess if there is a significant difference between the two
#' independent conditions (\emph{i.e.} if the mean for condition 2 is
#' significantly greater than the condition 1 mean). If \code{design = "paired"},
#' the Bayesian analysis assesses if the posterior probability for \code{phi_w > .5}
#' from the Bayesian Wilcoxon test is greater than \code{effect_crit}; if
#' \code{design = "independent"}, the Bayesian analysis assesses if the posterior
#' probability for \code{omega_E > .5} on a Bayesian Mann-Whitney test
#' is greater than \code{effect_crit}. The frequentist power is estimated by
#' the proportion of the data sets where a parametric \emph{t}-test detects a
#' significant effect because the upper-tail \emph{t} value has a \emph{p}-value
#' less than \code{1-effect_crit}. The Bayesian power is the proportion of the
#' data sets where a posterior probability for the alternative hypothesis is
#' greater than \code{effect_crit}. The default value for the
#' \code{effect_crit} argument is \code{effect_crit = .95}. The frequentist
#' \emph{p}-value and the Bayesian posterior probability for the
#' alternative hypothesis are calculated using the \code{dfba_sim_data()}
#' function.
#'
#' The arguments for the \code{dfba_sim_data()} function are passed from the
#' \code{dfba_bayes_vs_t_power()} function. Besides the sample size \code{n}, there
#' are eight other arguments that are required by the \code{dfba_sim_data()}
#' function, which are passed from the \code{dfba_bayes_vs_t_power()} function:
#'
#' \itemize{
#' \item \code{a0}
#' \item \code{b0}
#' \item \code{model}
#' \item \code{design}
#' \item \code{delta}
#' \item \code{shape1}
#' \item \code{shape2}
#' \item \code{block_max}.
#' }
#'
#' The \code{a0} and \code{b0} values are the respective first and second beta
#' shape parameters for the prior distribution needed for the Bayesian
#' distribution-free tests, which are ultimately done by calling either the
#' \code{dfba_wilcoxon()} function or by the \code{dfba_mann_whitney()} function.
#'
#' The \code{model} argument is one of the following strings:
#' \itemize{
#' \item \code{"normal"}
#' \item \code{"weibull"}
#' \item \code{"cauchy"}
#' \item \code{"lognormal"}
#' \item \code{"chisquare"}
#' \item \code{"logistic"}
#' \item \code{"exponential"}
#' \item \code{"gumbel"}
#' \item \code{"pareto"}
#' }
#' The \code{design} argument is either \code{"independent"} or \code{"paired"},
#' and stipulates whether the two sets of scores are either independent or from
#' a common block such as for the case of two scores for the same person (\emph{i.e.},
#' one in each condition).
#'
#' The \code{shape1} and \code{shape2} arguments are values for the shape parameter
#' for the respective first and second condition, and their meaning
#' depends on the probability model. For \code{model="normal"}, these
#' parameters are the standard deviations of the two distributions. For
#' \code{model = "weibull"}, the parameters are the Weibull shape parameters.
#' For \code{model = "cauchy"}, the parameters are the scale factors for the
#' Cauchy distributions. For \code{model = "lognormal"}, the shape
#' parameters are the standard deviations for log(X). For \code{model = "chisquare"},
#' the parameters are the degrees of freedom (\emph{df}) for the two
#' distributions. For \code{model = "logistic"}, the parameters are the scale
#' factors for the distributions. For \code{model = "exponential"}, the parameters
#' are the rate parameters for the distributions.
#'
#' For the Gumbel distribution, the \code{E} variate is equal to
#' \code{delta - shape2*log(log(1/U))} where \code{U} is a random value sampled
#' from the uniform distribution on the interval \code{[.00001, .99999]}, and
#' the \code{C} variate is equal to \code{-shape1*log(log(1/U))} where \code{U}
#' is another score sampled from the uniform distribution. The \code{shape1} and
#' \code{shape2} arguments for \code{model = "gumbel"} are the scale parameters
#' for the distributions. The Pareto model is a distribution designed to account
#' for income distributions as studied by economists (Pareto, 1897). For the
#' Pareto distribution, the cumulative function is equal to \code{1-(x_m/x)^alpha}
#' where \code{x} is greater than \code{x_m} (Arnold, 1983). In the \code{E}
#' condition, \code{x_m = 1 + delta} and in the \code{C} condition \code{x_m = 1}.
#' The alpha parameter is 1.16 times the shape parameters \code{shape1} and
#' \code{shape2}. Since the default value for each shape parameter is 1, the
#' resulting alpha value of 1.16 is the default value. When alpha = 1.16, the
#' Pareto distribution approximates an income distribution that represents the
#' 80-20 law where 20\% of the population receives 80\% of the income
#' (Hardy, 2010).
#'
#' The \code{block_max} argument provides for incorporating block effects in the
#' random sampling. The block effect for each score is a separate effect for the
#' block. The block effect B for a score is a random number drawn from a uniform
#' distribution on the interval \code{[0, block_max]}. When \code{design = "paired"},
#' the same random block effect is added to the score in the first condition,
#' which is the random \code{C} value, and it is also added to the corresponding
#' paired value for the \code{E} variate. Thus, the pairing research design
#' eliminates the effect of block variation for the assessment of condition
#' differences. When \code{design = "independent"}, there are different block-effect
#' contributions to the \code{E} and \code{C} variates, which reduces the
#' discrimination of condition differences because it increases the variability
#' of the difference in the two variates. The user can study the effect of the
#' relative discriminability of detecting an effect of delta by adjusting the
#' value of the \code{block_max} argument. The default for \code{block_max} is 0,
#' but it can be altered to any non-negative real number.
#'
#'
#' @seealso
#' \code{\link[stats:Distributions]{Distributions}} for details on the
#' parameters of the normal, Weibull, Cauchy, lognormal, chi-squared, logistic,
#' and exponential distributions.
#'
#' \code{\link{dfba_wilcoxon}}
#'
#' \code{\link{dfba_mann_whitney}}
#'
#' \code{\link{dfba_sim_data}} for further details about the data for two
#' conditions that differ in terms of their theoretical mean by an amount delta.
#'
#' @examples
#'
#' # Note: these examples have long runtimes due to Monte Carlo sampling;
#' # please feel free to run them in the console.
#'
#' # Examples for two data sets sampled from standard normal distributions with
#' # no blocking effect
#'
#' \dontrun{
#' dfba_bayes_vs_t_power(n_min = 40,
#' delta = .45,
#' model = "normal",
#' design = "paired",
#' hide_progress = FALSE)
#'
#' dfba_bayes_vs_t_power(n_min = 40,
#' delta = .45,
#' model = "normal",
#' design = "independent",
#' hide_progress = FALSE)
#'
#' # Examples with Weibull-distributed variates with no blocking effect
#'
#' dfba_bayes_vs_t_power(n_min = 50,
#' delta = .45,
#' model = "weibull",
#' design ="paired",
#' hide_progress = FALSE)
#'
#' dfba_bayes_vs_t_power(n_min = 50,
#' delta = .45,
#' model = "weibull",
#' design = "independent",
#' hide_progress = FALSE)
#'
#' # Examples with Weibull-distributed variates with a blocking effect
#'
#' dfba_bayes_vs_t_power(n_min = 50,
#' delta = .45,
#' model = "weibull",
#' design = "independent",
#' shape1 = .8,
#' shape2 = .8,
#' block_max = 2.3,
#' hide_progress = FALSE)
#'
#' dfba_bayes_vs_t_power(n_min = 50,
#' delta = .45,
#' model = "weibull",
#' design = "paired",
#' shape1 = .8,
#' shape2 = .8,
#' block_max = 2.3,
#' hide_progress = FALSE)
#' }
#'
#' @references
#'
#' Arnold, B. C. (1983). Pareto Distribution. Fairland, MD:
#' International Cooperative Publishing House.
#'
#' Chechile, R. A. (2017). A Bayesian analysis for the Wilcoxon signed-rank
#' statistic. Communications in Statistics - Theory and Methods,
#' https://doi.org/10.1080/03610926.2017.1388402
#'
#' Chechile, R. A. (2020). A Bayesian analysis for the Mann-Whitney statistic.
#' Communications in Statistics - Theory and Methods,
#' https://doi.org/10.1080/03610926.2018.1549247
#'
#' Fishman, G. S. (1996) Monte Carlo: Concepts, Algorithms and Applications.
#' New York: Springer.
#'
#' Hardy, M. (2010). Pareto's Law. Mathematical Intelligencer,
#' 32, 38-43.
#'
#' Johnson, N. L., Kotz S., and Balakrishnan, N. (1995). Continuous Univariate
#' Distributions, Vol. 1, New York: Wiley.
#'
#' Pareto, V. (1897). Cours d'Economie Politique. Vol. 2,
#' Lausanne: F. Rouge.
#'
#'
#' @export
#'
dfba_bayes_vs_t_power<-function(n_min=20,
delta,
model,
design,
effect_crit=.95,
shape1 = 1,
shape2 = 1,
samples=1000,
a0 = 1,
b0 = 1,
block_max = 0,
hide_progress = FALSE){
deltav <- delta
if (delta<0){
stop("The function requires a nonnegative value for delta.")
}
if (n_min < 20 | n_min%%1 != 0){
stop("n_min must be an integer that is 20 or larger")
}
if (a0 <= 0|
a0 == Inf|
b0 <= 0|
b0 == Inf|
is.na(a0)|
is.na(b0)){
stop("Both a0 and b0 must be positive and finite.")
}
mlist<-c("normal",
"weibull",
"cauchy",
"lognormal",
"chisquare",
"logistic",
"exponential",
"gumbel",
"pareto")
if (!model %in% mlist){
modelstop <- paste0("The set of distributions for model are:"," ","\n",
"\t\t", paste0(mlist, collapse = "\n\t\t"), "\n",
"The stipulated model is not on the list")
stop(modelstop)
}
designlist<-c("paired",
"independent")
if (!design %in% designlist){
designstop <- paste0("The set of distributions for design are:"," ","\n",
"\t\t", paste0(designlist, collapse = "\n\t\t"), "\n",
"The stipulated design is not on the list")
stop(designstop)
}
if ((effect_crit<0)|(effect_crit>1)){
stop("The effect_crit value must be a number nonzero number less than 1.")
}
shape_values<-c(shape1,
shape2)
nsims <- round(samples)
mup <- n_min + 50
m <- seq(n_min, mup, 5)
detect_bayes<-rep(NA, 11)
detect_t<-rep(NA, 11)
outputsim<-rep(NA, 2)
tpvalue<-rep(NA, nsims)
bayesprH1<-rep(NA, nsims)
for (i in 1:11){
n <- m[i]
for (j in 1:nsims){
outputsim<-dfba_sim_data(n = n,
model = model,
design = design,
delta = deltav,
shape1 = shape1,
shape2 = shape2,
a0 = a0,
b0 = b0,
block_max = block_max)
bayesprH1[j] <- outputsim$prH1
tpvalue[j] <- outputsim$pvalue
if (hide_progress == FALSE) {
message('\r', round((j/nsims+(i-1))/11, 2)*100, '% complete', appendLF = FALSE)
}
}
detect_bayes[i] <- (sum(bayesprH1>effect_crit))/nsims
detect_t[i] <- (sum(tpvalue<1-effect_crit))/nsims
}
if (hide_progress == FALSE) {
message("\n")
}
dfba_t_power_list<-list(nsims = nsims,
model = model,
design = design,
effect_crit = effect_crit,
deltav = deltav,
a0 = a0,
b0 = b0,
block_max = block_max,
outputdf=data.frame(sample_size = m,
Bayes_power = detect_bayes,
t_power = detect_t)
)
new("dfba_t_power_out", dfba_t_power_list)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.