Nothing
utils::globalVariables(c('densy','densx','dots'))
#' @importFrom mosaicCore named named_among unnamed
NA
#' Plots of Discrete and Continuous Distributions
#'
#' Provides a simple way to generate plots of pdfs, probability mass functions,
#' cdfs, probability histograms, and normal-quantile plots for distributions
#' known to R.
#'
#' @param dist
#' A string identifying the distribution. This should work
#' with any distribution that has associated functions beginning
#' with 'd', 'p', and 'q' (e.g,
#' [dnorm()],
#' [pnorm()], and
#' [qnorm()]). `dist` should match the name of the
#' distribution with the initial 'd', 'p', or 'q' removed.
#' @param xlim a numeric vector of length 2 or `NULL`, in which case
#' the central 99.8 of the distribution is used.
#' @param ylim a numeric vector of length 2 or `NULL`, in which case
#' a heuristic is used to avoid chasing asymptotes in distributions like
#' the F distributions with 1 numerator degree of freedom.
#' @param add a logical indicating whether the plot should be added to the previous lattice plot.
#' If missing, it will be set to match `under`.
#' @param under a logical indicating whether adding should be done in a layer under or over the existing
#' layers when `add = TRUE`.
#' @param packets,rows,columns specification of which panels will be added to when
#' `add` is `TRUE`. See [latticeExtra::layer()].
#' @param params a list containing parameters for the distribution. If `NULL` (the default),
#' this list is created from elements of `\dots` that are either unnamed or have names among
#' the formals of the appropriate distribution function. See the examples.
#' @param kind one of "density", "cdf", "qq", or "histogram" (or prefix
#' of any of these)
#' @param xlab,ylab as per other lattice functions
#' @param breaks a vector of break points for bins of histograms,
#' as in [histogram()]
#' @param type passed along to various lattice graphing functions
#' @param resolution number of points to sample when generating the plots
#' @param ... other arguments passed along to lattice graphing routines
#' @seealso [ggformula::gf_dist()]
#'
#' @details `plotDist()` determines whether the distribution
#' is continuous or discrete by seeing if all the sampled quantiles are
#' unique. A discrete random variable with many possible values could
#' fool this algorithm and be considered continuous.
#'
#' The plots are done referencing a data frame with variables
#' `x` and `y` giving points on the graph of the
#' pdf, pmf, or cdf for the distribution. This can be useful in conjunction
#' with the `groups` argument. See the examples.
#'
#' @examples
#' plotDist('norm')
#' plotDist('norm', type='h')
#' plotDist('norm', kind='cdf')
#' plotDist('exp', kind='histogram')
#' plotDist('binom', params=list( 25, .25)) # explicit params
#' plotDist('binom', 25, .25) # params inferred
#' plotDist('norm', mean=100, sd=10, kind='cdf') # params inferred
#' plotDist('binom', 25, .25, xlim=c(-1,26) ) # params inferred
#' plotDist('binom', params=list( 25, .25), kind='cdf')
#' plotDist('beta', params=list( 3, 10), kind='density')
#' plotDist('beta', params=list( 3, 10), kind='cdf')
#' plotDist( "binom", params=list(35,.25),
#' groups= y < dbinom(qbinom(0.05, 35, .25), 35,.25) )
#' plotDist( "binom", params=list(35,.25),
#' groups= y < dbinom(qbinom(0.05, 35, .25), 35,.25),
#' kind='hist')
#' plotDist("norm", mean=10, sd=2, col="blue", type="h")
#' plotDist("norm", mean=12, sd=2, col="red", type="h", under=TRUE)
#' plotDist("binom", size=100, prob=.30) +
#' plotDist("norm", mean=30, sd=sqrt(100 * .3 * .7))
#' plotDist("chisq", df=4, groups = x > 6, type="h")
#' plotDist("f", df1=1, df2 = 99)
#' if (require(mosaicData)) {
#' histogram( ~age|sex, data=HELPrct)
#' m <- mean( ~age|sex, data=HELPrct)
#' s <- sd(~age|sex, data=HELPrct)
#' plotDist( "norm", mean=m[1], sd=s[1], col="red", add=TRUE, packets=1)
#' plotDist( "norm", mean=m[2], sd=s[2], col="blue", under=TRUE, packets=2)
#' }
#'
#' @keywords graphics
#' @keywords stats
#' @export
plotDist <- function(
dist, ...,
xlim = NULL,
ylim = NULL,
add,
under = FALSE,
packets=NULL,
rows=NULL,
columns=NULL,
kind = c('density','cdf','qq','histogram'),
xlab = "", ylab = "", breaks = NULL, type,
resolution = 5000L, params = NULL ) {
kind = match.arg(kind)
if (missing(add)) add <- under
ddist = paste('d', dist, sep='')
qdist = paste('q', dist, sep='')
pdist = paste('p', dist, sep='')
original_call <- match.call()
dots <- original_call
dots[[1]] <- NULL
unnamed_dots <- original_call
named_dots <- original_call
unnamed_dots[[1]] <- NULL
named_dots[[1]] <- NULL
groupless_dots <- original_call
groupless_dots[[1]] <- NULL
for (i in length(unnamed_dots):1) {
if (names(unnamed_dots)[i] != "") {
unnamed_dots[i] <- NULL
} else {
named_dots[i] <- NULL
}
}
if (is.null(params)) {
params <- original_call
params[[1]] <- NULL
for (item in names(formals()) ) {
if (item %in% names(params)) params[[item]] <- NULL
}
dparams <- c(unnamed(params) , named_among( params, names(formals(ddist))) )
pparams <- c(unnamed(params) , named_among( params, names(formals(pdist))) )
qparams <- c(unnamed(params) , named_among( params, names(formals(qdist))) )
} else {
dparams <- params
pparams <- params
qparams <- params
}
# attempting to make evaluation of these arguments more intuitive
env <- parent.frame()
dparams <- lapply(dparams, function(x) eval(x, env))
pparams <- lapply(pparams, function(x) eval(x, env))
qparams <- lapply(qparams, function(x) eval(x, env))
values = do.call(qdist, c(p=list(ppoints(resolution)), qparams))
fewerValues <- unique(values)
discrete = length(fewerValues) < length(values)
if (! discrete) {
values = seq(
do.call(qdist, c(p = list(0.001), qparams)),
do.call(qdist, c(p = list(0.999), qparams)),
length.out = resolution
)
fewerValues <- values
}
if ( is.null(breaks) && discrete ){
step = min(diff(fewerValues))
breaks = seq( min(fewerValues) -.5 * step , max(fewerValues) + .5*step, step)
}
if (kind=='cdf') {
if (discrete) {
step = min(diff(fewerValues))
cdfx <- seq( min(fewerValues) -1.5 * step , max(fewerValues) + 1.5*step, length.out=resolution)
cdfx <- sort(unique( c(fewerValues, cdfx) ) )
cdfy <- approxfun( fewerValues, do.call(pdist, c(list(q=fewerValues),pparams)), method='constant',
f=0, yleft=0, yright=1 ) (cdfx)
} else {
cdfx <- values
cdfy <- do.call( pdist, c(list(q=values), pparams) )
}
}
if (missing(type)) {
if (discrete) {
type = switch(kind,
density = c('p','h'),
cdf = 'p',
histogram = 'density',
qq = 'l')
} else {
type = switch(kind,
density = 'l',
cdf = 'l',
histogram = 'density',
qq = 'l')
}
}
if (add) {
rlang::check_installed('latticeExtra')
call_without_add <- original_call
call_without_add["add"] <- FALSE
return(
trellis.last.object() +
latticeExtra::as.layer(
eval.parent(call_without_add),
under=under,
packets=packets,
rows=rows,
columns=columns)
)
} else { # not adding
if (is.null(xlim)) {
xlim <- do.call(qdist, c(list(p=c(0.001, .999)), qparams))
}
ydata <-
switch(kind,
density = do.call(ddist, c(list(x=fewerValues), dparams)),
cdf = cdfy,
qq = do.call( ddist, c(list(x=values), dparams)),
histogram = do.call(ddist, c(list(x=values), dparams))
)
if (is.null(ylim)) {
ymax <-
min(
1.6 * quantile(ydata, 0.90, na.rm=TRUE),
1.1 * max(ydata, na.rm=TRUE),
na.rm = TRUE)
ylim = c(0, ymax)
}
switch(kind,
density =
lattice::xyplot( y ~ x,
data=data.frame(y = ydata, x = fewerValues),
xlim = xlim, ylim = ylim,
type=type, xlab=xlab, ylab=ylab, ...),
cdf =
lattice::xyplot( y ~ x,
data=data.frame(y = ydata, x = cdfx),
xlim = xlim, ylim = ylim,
type=type, xlab = xlab, ylab = ylab, ...),
qq =
lattice::qqmath( ~ x,
data = data.frame(x = values, y = ydata),
xlim = xlim, ylim = ylim,
type = type, xlab = xlab, ylab = ylab, ...),
histogram =
histogram( ~ x,
data = data.frame(x = values, y = ydata),
xlim = xlim, ylim = ylim,
type = type, xlab = xlab, breaks = breaks, ...)
)
}
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.