Nothing
#' Histogram with Added Options
#'
#' Similar to base R function \code{\link[graphics]{hist}}, but with two added
#' features: (1) Can overlay one or more fitted probability density/mass
#' functions (PDFs/PMFs) for any univariate distribution supported in R (see
#' \code{\link[stats]{Distributions}}); and (2) Can generate more of a barplot
#' type histogram, where each possible value gets its own bin centered over its
#' value (useful for discrete variables with not too many possible values).
#'
#' When \code{x} takes on whole numbers, you typically want to set
#' \code{dis_shift = -0.5} if \code{right = TRUE}
#' (\code{\link[graphics]{hist}}'s default) and \code{dis_shift = 0.5} if
#' \code{right = FALSE}. The function will do this internally by default.
#'
#' To illustrate, suppose a particular bin represents \code{(7, 10]}. Its
#' midpoint will be at \code{x = 8.5} on the graph. But if input values are
#' whole numbers, this bin really only includes values of 8, 9, and 10, which
#' have a mean of 9. So you really want \code{f(9)} to appear at \code{x = 8.5}.
#' This requires shifting the curve to the left 0.5 units, i.e. setting
#' \code{dis_shift = -0.5}.
#'
#' When \code{x} takes on whole numbers with not too many unique values, you may
#' want the histogram to show one bin for each integer. You can do this by
#' setting \code{integer_breaks = TRUE}. By default, the function sets
#' \code{integer_breaks = TRUE} if \code{x} contains whole numbers with 10 or
#' fewer unique values.
#'
#' @param x Numeric vector of values.
#'
#' @param dis Character vector indicating which distributions should be used to
#' add fitted PDF/PMF to the histogram. If not \code{"none"}, choices for each
#' element are:
#'
#' \code{"beta"}
#'
#' \code{"binom"} (must specify \code{size})
#'
#' \code{"cauchy"}
#'
#' \code{"chisq"}
#'
#' \code{"exp"}
#'
#' \code{"f"}
#'
#' \code{"gamma"}
#'
#' \code{"geom"}
#'
#' \code{"hyper"} (must specify total number of balls in urn, \code{N}, and
#' number of balls drawn each time, \code{k})
#'
#' \code{"lnorm"}
#'
#' \code{"nbinom"} (must specify \code{size})
#'
#' \code{"norm"}
#'
#' \code{"pois"},
#'
#' \code{"t"}
#'
#' \code{"unif"}
#'
#' \code{"weibull"}
#'
#' @param dis_shift Numeric value for shifting the fitted PDF/PMF along the
#' x-axis of the histogram.
#'
#' @param integer_breaks If \code{TRUE}, integers covering the range of \code{x}
#' are used for breaks, so there is one bin for each integer. Useful for
#' discrete distributions that don't take on too many unique values.
#'
#' @param colors Character vector of colors for each PDF/PMF.
#'
#' @param lty Integer vector specifying line types for each curve.
#'
#' @param legend_form Integer value controlling what type of legend to include.
#' Choices are 0 for no legend, 1 for legend naming each distribution, and 2 for
#' legend naming each distribution and the corresponding AIC.
#'
#' @param aic_decimals Integer value for number of decimals for AIC.
#'
#' @param points_list Optional list of inputs to pass to
#' \code{\link[graphics]{points}} function, which is used to add the fitted
#' PDF/PMF.
#'
#' @param axis_list Optional list of inputs to pass to
#' \code{\link[graphics]{axis}}.
#'
#' @param legend_list Optional list of inputs to pass to
#' \code{\link[graphics]{legend}}.
#'
#' @param ... May include arguments to pass to \code{\link[graphics]{hist}}
#' and/or parameter values needed for certain distributions (\code{size} if
#' \code{dis = "binom"} or \code{dis = "nbinom"}, \code{N} and \code{k} if
#' \code{dis = "hyper"}).
#'
#'
#' @return Histogram with fitted PDFs/PMFs if requested.
#'
#'
#' @examples
#' # Sample 10,000 Poisson(2) values and commpare default hist vs. histo
#' set.seed(123)
#' x <- rpois(n = 10000, lambda = 2)
#' par(mfrow = c(1, 2))
#' hist(x, main = "hist function")
#' histo(x, main = "histo function")
#'
#' # Sample 10,000 lognormal(0, 0.35) values. Create histogram with curves
#' # showing fitted lognormal, normal, and Gamma PDFs.
#' set.seed(123)
#' x <- rlnorm(n = 10000, meanlog = 0, sdlog = 0.35)
#' par(mfrow = c(1, 1))
#' histo(x, c("lnorm", "norm", "gamma"), main = "X ~ Lognormal(0, 0.35)")
#'
#' # Generate 10,000 Binomial(8, 0.25) values. Create histogram, specifying
#' # size = 5, with blue line/points showing fitted PMF.
#' set.seed(123)
#' x <- rbinom(n = 10000, size = 5, prob = 0.25)
#' par(mfrow = c(1, 1))
#' histo(x, dis = "binom", size = 5, colors = "blue",
#' points_list = list(type = "b"))
#'
#' @export
histo <- function(x,
dis = "none", dis_shift = NULL,
integer_breaks = NULL,
colors = rep("black", length(dis)),
lty = 1: length(dis),
legend_form = ifelse(length(dis) == 1, 0, 1),
aic_decimals = 1,
points_list = NULL,
axis_list = NULL,
legend_list = NULL,
...) {
# Get x name for labels
xname <- deparse(substitute(x))
# Drop missing values
x <- x[! is.na(x)]
# Create list with ... arguments
extra.args <- list(...)
# Extract any parameters (i.e. arguments NOT for hist) included in ...
if (! is.null(extra.args)) {
loc <- which(names(extra.args) == "size")
if (length(loc) == 1) {
size <- extra.args[[loc]]
extra.args <- extra.args[-loc]
}
loc <- which(names(extra.args) == "N")
if (length(loc) == 1) {
N <- extra.args[[loc]]
extra.args <- extra.args[-loc]
}
loc <- which(names(extra.args) == "k")
if (length(loc) == 1) {
k <- extra.args[[loc]]
extra.args <- extra.args[-loc]
}
}
# If integer_breaks is NULL, set to TRUE if x takes on whole numbers with 20
# or fewer distinct values, else set to FALSE
if (is.null(integer_breaks)) {
integer_breaks <- all(x %% 1 == 0) & length(unique(x)) <= 10
}
# If right is not specified or integer_breaks is TRUE, set to TRUE
if (! "right" %in% names(extra.args) | integer_breaks) {
extra.args$right <- TRUE
}
# If integer_breaks is TRUE, make breaks a vector of integers
# covering the range of x
if (integer_breaks) {
extra.args$breaks <- seq(floor(min(x)) - 1, ceiling(max(x)), 1)
}
# If freq is not specified, set to FALSE
if (! "freq" %in% names(extra.args)) {
extra.args$freq <- FALSE
}
# If xlab/main not specified, set
if (! "xlab" %in% names(extra.args)) {
extra.args$xlab <- xname
}
if (! "main" %in% names(extra.args)) {
extra.args$main <- paste("Histogram of ", xname, sep = "")
}
# Create histogram
if (integer_breaks) {
hist.fig <- do.call(hist, c(list(x = quote(x), xaxt = "n"), extra.args))
hist.fig <- do.call(axis, c(list(side = 1, at = hist.fig$mids,
labels = hist.fig$breaks[-1]),
axis_list))
} else {
hist.fig <- do.call(hist, c(list(x = quote(x)), extra.args))
}
# Add fitted pdf/pmf if requested
if (dis[1] != "none") {
# If dis_shift is NULL and all values in x are integers, figure out how to
# shift curve to make it match up with the histogram bars
if (is.null(dis_shift)) {
if (all(x %% 1 == 0)) {
dis_shift <- ifelse(extra.args$right, -0.5, 0.5)
} else {
dis_shift <- 0
}
}
# List of inputs for 'points' function call
points_list <- list_override(list1 = list(type = "l"),
list2 = points_list)
# Initialize vector for legend text and create spf value for AIC
legend.text <- c()
spf.val <- paste("%.", aic_decimals, "f", sep = "")
for (ii in 1: length(dis)) {
if (dis[ii] == "beta") {
fit <- fitdistr(x, "beta", start = list(shape1 = 0.5, shape2 = 0.5))
theta.hat <- fit$estimate
x.vals <- seq(min(x), max(x), diff(range(x)) / 1000)
y.vals <- dbeta(x = x.vals, shape1 = theta.hat[1], shape2 = theta.hat[2])
do.call(points, c(list(x = x.vals + dis_shift, y = y.vals,
col = colors[ii], lty = lty[ii]),
points_list))
if (legend_form == 1) {
legend.text[ii] <- "Beta"
} else if (legend_form == 2) {
aic.ii <- 2 * 2 - 2 * sum(dbeta(x = x, log = TRUE,
shape1 = theta.hat[1],
shape2 = theta.hat[2]))
legend.text[ii] <-
paste("Beta (AIC = ", sprintf(spf.val, aic.ii), ")", sep = "")
}
} else if (dis[ii] == "binom") {
# Need user-input value for size
p.hat <- mean(x) / size
x.vals <- seq(round(min(x)), round(max(x)), 1)
y.vals <- dbinom(x = x.vals, size = size, prob = p.hat)
do.call(points, c(list(x = x.vals + dis_shift, y = y.vals,
col = colors[ii], lty = lty[ii]),
points_list))
if (legend_form == 1) {
legend.text[ii] <- "Binomial"
} else if (legend_form == 2) {
aic.ii <- 2 * 2 - 2 * sum(dbinom(x = x, log = TRUE,
size = size, prob = p.hat))
legend.text[ii] <-
paste("Binomial (AIC = ", sprintf(spf.val, aic.ii), ")", sep = "")
}
} else if (dis[ii] == "cauchy") {
fit <- fitdistr(x, "cauchy")
theta.hat <- fit$estimate
x.vals <- seq(min(x), max(x), diff(range(x)) / 1000)
y.vals <-
dcauchy(x = x.vals, location = theta.hat[1], scale = theta.hat[2])
do.call(points, c(list(x = x.vals + dis_shift, y = y.vals,
col = colors[ii], lty = lty[ii]),
points_list))
if (legend_form == 1) {
legend.text[ii] <- "Cauchy"
} else if (legend_form == 2) {
aic.ii <- 2 * 2 - 2 * sum(dcauchy(x = x, log = TRUE,
location = theta.hat[1],
scale = theta.hat[2]))
legend.text[ii] <-
paste("Cauchy (AIC = ", sprintf(spf.val, aic.ii), ")", sep = "")
}
} else if (dis[ii] == "chisq") {
fit <- fitdistr(x, "chi-squared", start = list(df = 1))
theta.hat <- fit$estimate
x.vals <- seq(min(x), max(x), diff(range(x)) / 1000)
y.vals <- dchisq(x = x.vals, df = theta.hat[1])
do.call(points, c(list(x = x.vals + dis_shift, y = y.vals,
col = colors[ii], lty = lty[ii]),
points_list))
if (legend_form == 1) {
legend.text[ii] <- "Chi-squared"
} else if (legend_form == 2) {
aic.ii <- 2 * 2 - 2 * sum(dchisq(x = x, log = TRUE, df = theta.hat))
legend.text[ii] <-
paste("Chi-squared (AIC = ", sprintf(spf.val, aic.ii), ")", sep = "")
}
} else if (dis[ii] == "exp") {
fit <- fitdistr(x, "exponential")
theta.hat <- fit$estimate
x.vals <- seq(min(x), max(x), diff(range(x)) / 1000)
y.vals <- dexp(x = x.vals, rate = theta.hat)
do.call(points, c(list(x = x.vals + dis_shift, y = y.vals,
col = colors[ii], lty = lty[ii]),
points_list))
if (legend_form == 1) {
legend.text[ii] <- "Exponential"
} else if (legend_form == 2) {
aic.ii <- 2 * 2 - 2 * sum(dexp(x = x, log = TRUE, rate = theta.hat))
legend.text[ii] <-
paste("Exponential (AIC = ", sprintf(spf.val, aic.ii), ")", sep = "")
}
} else if (dis[ii] == "f") {
fit <- fitdistr(x, "f", start = list(df1 = 1, df2 = 1))
theta.hat <- fit$estimate
x.vals <- seq(min(x), max(x), diff(range(x)) / 1000)
y.vals <- df(x = x.vals, df1 = theta.hat[1], df2 = theta.hat[2])
do.call(points, c(list(x = x.vals + dis_shift, y = y.vals,
col = colors[ii], lty = lty[ii]),
points_list))
if (legend_form == 1) {
legend.text[ii] <- "F"
} else if (legend_form == 2) {
aic.ii <- 2 * 2 - 2 * sum(df(x = x, log = TRUE,
df1 = theta.hat[1],
df2 = theta.hat[2]))
legend.text[ii] <-
paste("F (AIC = ", sprintf(spf.val, aic.ii), ")", sep = "")
}
} else if (dis[ii] == "gamma") {
fit <- fitdistr(x, "gamma")
theta.hat <- fit$estimate
x.vals <- seq(min(x), max(x), diff(range(x)) / 1000)
y.vals <- dgamma(x = x.vals, shape = theta.hat[1], rate = theta.hat[2])
do.call(points, c(list(x = x.vals + dis_shift, y = y.vals,
col = colors[ii], lty = lty[ii]),
points_list))
if (legend_form == 1) {
legend.text[ii] <- "Gamma"
} else if (legend_form == 2) {
aic.ii <- 2 * 2 - 2 * sum(dgamma(x = x, log = TRUE,
shape = theta.hat[1],
rate = theta.hat[2]))
legend.text[ii] <-
paste("Gamma (AIC = ", sprintf(spf.val, aic.ii), ")", sep = "")
}
} else if (dis[ii] == "geom") {
fit <- fitdistr(x, "geometric")
theta.hat <- fit$estimate
x.vals <- seq(round(min(x)), round(max(x)), 1)
y.vals <- dgeom(x = x.vals, prob = theta.hat)
do.call(points, c(list(x = x.vals + dis_shift, y = y.vals,
col = colors[ii], lty = lty[ii]),
points_list))
if (legend_form == 1) {
legend.text[ii] <- "Geometric"
} else if (legend_form == 2) {
aic.ii <- 2 * 2 - 2 * sum(dgeom(x = x, log = TRUE, prob = theta.hat))
legend.text[ii] <-
paste("Geometric (AIC = ", sprintf(spf.val, aic.ii), ")", sep = "")
}
} else if (dis[ii] == "hyper") {
# Need user-input values for N, k
ll.hyper <- function(m) {
n <- N - m
ll <- sum(log(choose(m, x) * choose(n, k - x) / choose(n + m, k)))
return(-ll)
}
m.hat <- round(nlminb(objective = ll.hyper, start = k)$par)
x.vals <- seq(round(min(x)), round(max(x)), 1)
y.vals <- dhyper(x = x.vals, m = m.hat, n = N - m.hat, k = k)
do.call(points, c(list(x = x.vals + dis_shift, y = y.vals,
col = colors[ii], lty = lty[ii]),
points_list))
if (legend_form == 1) {
legend.text[ii] <- "Hypergeo."
} else if (legend_form == 2) {
aic.ii <- 2 * 2 - 2 * sum(dhyper(x = x, log = TRUE,
m = m.hat, n = N - m.hat, k = k))
legend.text[ii] <-
paste("Hypergeo. (AIC = ", sprintf(spf.val, aic.ii), ")", sep = "")
}
} else if (dis[ii] == "lnorm") {
fit <- fitdistr(x, "lognormal")
theta.hat <- fit$estimate
x.vals <- seq(min(x), max(x), diff(range(x)) / 1000)
y.vals <- dlnorm(x = x.vals, meanlog = theta.hat[1], sdlog = theta.hat[2])
do.call(points, c(list(x = x.vals + dis_shift, y = y.vals,
col = colors[ii], lty = lty[ii]),
points_list))
if (legend_form == 1) {
legend.text[ii] <- "Lognormal"
} else if (legend_form == 2) {
aic.ii <- 2 * 2 - 2 * sum(dlnorm(x = x, log = TRUE,
meanlog = theta.hat[1],
sdlog = theta.hat[2]))
legend.text[ii] <-
paste("Lognormal (AIC = ", sprintf(spf.val, aic.ii), ")", sep = "")
}
} else if (dis[ii] == "nbinom") {
ll.nbinom <- function(p) {
ll <- sum(log(gamma(x + size) / (gamma(size) * factorial(x)) *
p^size * (1 - p)^x))
return(-ll)
}
p.hat <- nlminb(objective = ll.nbinom, start = 0.5)$par
x.vals <- seq(round(min(x)), round(max(x)), 1)
y.vals <- dnbinom(x = x.vals, size = size, prob = p.hat)
do.call(points, c(list(x = x.vals + dis_shift, y = y.vals,
col = colors[ii], lty = lty[ii]),
points_list))
if (legend_form == 1) {
legend.text[ii] <- "Neg. binom."
} else if (legend_form == 2) {
aic.ii <- 2 * 2 - 2 * sum(dnbinom(x = x, log = TRUE, prob = p.hat))
legend.text[ii] <-
paste("Neg. binom. (AIC = ", sprintf(spf.val, aic.ii), ")", sep = "")
}
} else if (dis[ii] == "norm") {
fit <- fitdistr(x, "normal")
theta.hat <- fit$estimate
x.vals <- seq(min(x), max(x), diff(range(x)) / 1000)
y.vals <- dnorm(x = x.vals, mean = theta.hat[1], sd = theta.hat[2])
do.call(points, c(list(x = x.vals + dis_shift, y = y.vals,
col = colors[ii], lty = lty[ii]),
points_list))
if (legend_form == 1) {
legend.text[ii] <- "Normal"
} else if (legend_form == 2) {
aic.ii <- 2 * 2 - 2 * sum(dnorm(x = x, log = TRUE,
mean = theta.hat[1],
sd = theta.hat[2]))
legend.text[ii] <-
paste("Normal (AIC = ", sprintf(spf.val, aic.ii), ")", sep = "")
}
} else if (dis[ii] == "pois") {
fit <- fitdistr(x, "poisson")
theta.hat <- fit$estimate
x.vals <- seq(round(min(x)), round(max(x)), 1)
y.vals <- dpois(x = x.vals, lambda = theta.hat)
do.call(points, c(list(x = x.vals + dis_shift, y = y.vals,
col = colors[ii], lty = lty[ii]),
points_list))
if (legend_form == 1) {
legend.text[ii] <- "Poisson"
} else if (legend_form == 2) {
aic.ii <- 2 * 2 - 2 * sum(dpois(x = x, log = TRUE, lambda = theta.hat))
legend.text[ii] <-
paste("Poisson (AIC = ", sprintf(spf.val, aic.ii), ")", sep = "")
}
} else if (dis[ii] == "t") {
ll.t <- function(theta) {
f.df <- theta[1]
f.ncp <- theta[2]
ll <- sum(dt(x = x, df = f.df, ncp = f.ncp, log = TRUE))
return(-ll)
}
theta.hat <- nlminb(objective = ll.t, start = c(5, 0),
lower = c(1e-4, -Inf))$par
x.vals <- seq(min(x), max(x), diff(range(x)) / 1000)
y.vals <- dt(x = x.vals, df = theta.hat[1], ncp = theta.hat[2])
do.call(points, c(list(x = x.vals + dis_shift, y = y.vals,
col = colors[ii], lty = lty[ii]),
points_list))
if (legend_form == 1) {
legend.text[ii] <- "t"
} else if (legend_form == 2) {
aic.ii <- 2 * 2 - 2 * sum(dt(x = x, log = TRUE,
df = theta.hat[1], ncp = theta.hat[2]))
legend.text[ii] <-
paste("t (AIC = ", sprintf(spf.val, aic.ii), ")", sep = "")
}
} else if (dis[ii] == "unif") {
min.hat <- min(x)
max.hat <- max(x)
x.vals <- seq(min(x), max(x), diff(range(x)) / 1000)
y.vals <- dunif(x = x.vals, min = min.hat, max = max.hat)
do.call(points, c(list(x = x.vals + dis_shift, y = y.vals,
col = colors[ii], lty = lty[ii]),
points_list))
if (legend_form == 1) {
legend.text[ii] <- "Uniform"
} else if (legend_form == 2) {
aic.ii <- 2 * 2 - 2 * sum(dunif(x = x, log = TRUE,
min = min.hat, max = max.hat))
legend.text[ii] <-
paste("Uniform (AIC = ", sprintf(spf.val, aic.ii), ")", sep = "")
}
} else if (dis[ii] == "weibull") {
fit <- fitdistr(x, "weibull")
theta.hat <- fit$estimate
x.vals <- seq(min(x), max(x), diff(range(x)) / 1000)
y.vals <- dweibull(x = x.vals, shape = theta.hat[1], scale = theta.hat[2])
do.call(points, c(list(x = x.vals + dis_shift, y = y.vals,
col = colors[ii], lty = lty[ii]),
points_list))
if (legend_form == 1) {
legend.text[ii] <- "Weibull"
} else if (legend_form == 2) {
aic.ii <- 2 * 2 - 2 * sum(dweibull(x = x, log = TRUE,
shape = theta.hat[1],
scale = theta.hat[2]))
legend.text[ii] <-
paste("Uniform (AIC = ", sprintf(spf.val, aic.ii), ")", sep = "")
}
}
}
# Add legend if requested
if (legend_form %in% c(1, 2)) {
legend_list <- list_override(list1 = list(x = "topright"),
list2 = legend_list)
do.call(legend, c(list(lty = lty, col = colors, legend = legend.text),
legend_list))
}
}
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.