#' Compare a sample proportion to a population proportion
#'
#' @details See \url{https://radiant-rstats.github.io/docs/basics/single_prop.html} for an example in Radiant
#'
#' @param dataset Dataset
#' @param var The variable selected for the proportion comparison
#' @param lev The factor level selected for the proportion comparison
#' @param comp_value Population value to compare to the sample proportion
#' @param alternative The alternative hypothesis ("two.sided", "greater", or "less")
#' @param conf_lev Span of the confidence interval
#' @param test bionomial exact test ("binom") or Z-test ("z")
#' @param data_filter Expression entered in, e.g., Data > View to filter the dataset in Radiant. The expression should be a string (e.g., "price > 10000")
#' @param envir Environment to extract data from
#'
#' @return A list of variables used in single_prop as an object of class single_prop
#'
#' @examples
#' single_prop(titanic, "survived") %>% str()
#' single_prop(titanic, "survived", lev = "Yes", comp_value = 0.5, alternative = "less") %>% str()
#'
#' @seealso \code{\link{summary.single_prop}} to summarize the results
#' @seealso \code{\link{plot.single_prop}} to plot the results
#'
#' @export
single_prop <- function(dataset, var, lev = "", comp_value = 0.5,
alternative = "two.sided", conf_lev = .95,
test = "binom", data_filter = "",
envir = parent.frame()) {
df_name <- if (is_string(dataset)) dataset else deparse(substitute(dataset))
dataset <- get_data(dataset, var, filt = data_filter, na.rm = FALSE, envir = envir) %>%
mutate_all(as.factor)
## removing any missing values
n_miss <- n_missing(dataset)
dataset <- na.omit(dataset)
levs <- levels(dataset[[var]])
if (lev != "") {
if (lev %in% levs && levs[1] != lev) {
dataset[[var]] %<>% as.character %>%
as.factor() %>%
relevel(lev)
levs <- levels(dataset[[var]])
}
} else {
lev <- levs[1]
}
n <- nrow(dataset)
ns <- sum(dataset == lev)
p <- ns / n
dat_summary <- data.frame(
diff = p - comp_value,
p = p,
ns = ns,
n = n,
n_missing = n_miss,
stringsAsFactors = FALSE
) %>% mutate(
sd = sqrt(p * (1 - p)),
se = sqrt(p * (1 - p) / n),
me = se * qnorm(conf_lev / 2 + .5, lower.tail = TRUE)
)
if (test == "z") {
## use z-test
res <- sshhr(prop.test(
ns, n,
p = comp_value, alternative = alternative,
conf.level = conf_lev, correct = FALSE
))
res <- tidy(res)
## convert chi-square stat to a z-score
res$statistic <- sqrt(res$statistic) * ifelse(res$estimate < comp_value, -1, 1)
} else {
## use binom.test for exact
res <- binom.test(
ns, n,
p = comp_value, alternative = alternative,
conf.level = conf_lev
)
res <- tidy(res)
}
# removing unneeded arguments
rm(envir)
as.list(environment()) %>% add_class("single_prop")
}
#' Summary method for the single_prop function
#'
#' @details See \url{https://radiant-rstats.github.io/docs/basics/single_prop.html} for an example in Radiant
#'
#' @param object Return value from \code{\link{single_prop}}
#' @param dec Number of decimals to show
#' @param ... further arguments passed to or from other methods
#'
#' @examples
#' result <- single_prop(titanic, "survived", lev = "Yes", comp_value = 0.5, alternative = "less")
#' summary(result)
#'
#' @seealso \code{\link{single_prop}} to generate the results
#' @seealso \code{\link{plot.single_prop}} to plot the results
#'
#' @export
summary.single_prop <- function(object, dec = 3, ...) {
if (object$test == "z") {
cat("Single proportion test (z-test)\n")
} else {
cat("Single proportion test (binomial exact)\n")
}
cat("Data :", object$df_name, "\n")
if (!is.empty(object$data_filter)) {
cat("Filter :", gsub("\\n", "", object$data_filter), "\n")
}
cat("Variable :", object$var, "\n")
cat("Level :", object$lev, "in", object$var, "\n")
cat("Confidence:", object$conf_lev, "\n")
hyp_symbol <- c(
"two.sided" = "not equal to",
"less" = "<",
"greater" = ">"
)[object$alternative]
cat("Null hyp. : the proportion of", object$lev, "in", object$var, "=", object$comp_value, "\n")
cat("Alt. hyp. : the proportion of", object$lev, "in", object$var, hyp_symbol, object$comp_value, "\n\n")
## determine lower and upper % for ci
ci_perc <- ci_label(object$alternative, object$conf_lev)
## print summary statistics
object$dat_summary[-1] %>%
as.data.frame(stringsAsFactors = FALSE) %>%
format_df(dec = dec, mark = ",") %>%
print(row.names = FALSE)
cat("\n")
res <- object$res
res <- bind_cols(data.frame(diff = object$dat_summary[["diff"]]), res[, -1]) %>%
select(base::setdiff(colnames(.), c("parameter", "method", "alternative")))
if (object$test == "z") {
names(res) <- c("diff", "z.value", "p.value", ci_perc[1], ci_perc[2])
res <- format_df(res, dec = dec, mark = ",") # restrict the number of decimals
} else {
names(res) <- c("diff", "ns", "p.value", ci_perc[1], ci_perc[2])
res <- format_df(mutate(res, ns = as.integer(res$ns)), dec = dec, mark = ",") # restrict the number of decimals
}
res$` ` <- sig_stars(res$p.value)
if (res$p.value < .001) res$p.value <- "< .001"
## print statistics
print(as.data.frame(res, stringsAsFactors = FALSE), row.names = FALSE)
cat("\nSignif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1\n")
}
#' Plot method for the single_prop function
#'
#' @details See \url{https://radiant-rstats.github.io/docs/basics/single_prop.html} for an example in Radiant
#'
#' @param x Return value from \code{\link{single_prop}}
#' @param plots Plots to generate. "bar" shows a bar chart of the data. The "simulate" chart shows the location of the sample proportion and the comparison value (comp_value). Simulation is used to demonstrate the sampling variability in the data under the null-hypothesis
#' @param shiny Did the function call originate inside a shiny app
#' @param custom Logical (TRUE, FALSE) to indicate if ggplot object (or list of ggplot objects) should be returned. This option can be used to customize plots (e.g., add a title, change x and y labels, etc.). See examples and \url{https://ggplot2.tidyverse.org/} for options.
#' @param ... further arguments passed to or from other methods
#'
#' @examples
#' result <- single_prop(titanic, "survived", lev = "Yes", comp_value = 0.5, alternative = "less")
#' plot(result, plots = c("bar", "simulate"))
#'
#' @seealso \code{\link{single_prop}} to generate the result
#' @seealso \code{\link{summary.single_prop}} to summarize the results
#'
#' @importFrom rlang .data
#'
#' @export
plot.single_prop <- function(x, plots = "bar",
shiny = FALSE, custom = FALSE, ...) {
if (any(!plots %in% c("bar", "simulate"))) {
stop("Available plot types for 'single_prop' are \"bar\" and \"simulate\"")
}
lev_name <- x$levs[1]
plot_list <- list()
if ("bar" %in% plots) {
plot_list[[which("bar" == plots)]] <-
ggplot(x$dataset, aes(x = .data[[x$var]], fill = .data[[x$var]])) +
geom_bar(aes(y = after_stat(count) / sum(after_stat(count))), alpha = 0.5) +
scale_y_continuous(labels = scales::percent) +
theme(legend.position = "none") +
labs(
title = paste0("Single proportion: ", lev_name, " in ", x$var),
y = ""
)
}
if ("simulate" %in% plots) {
simdat <- rbinom(1000, prob = x$comp_value, x$n) %>%
divide_by(x$n) %>%
data.frame(stringsAsFactors = FALSE) %>%
set_colnames(lev_name)
cip <- ci_perc(simdat[[lev_name]], x$alternative, x$conf_lev) %>% set_names(NULL)
bw <- simdat %>%
range() %>%
diff() %>%
divide_by(20)
# to avoid problems with levels that start with numbers or contain spaces
# http://stackoverflow.com/questions/13445435/ggplot2-aes-string-fails-to-handle-names-starting-with-numbers-or-containing-s
names(simdat) <- "col1"
plot_list[[which("simulate" == plots)]] <-
ggplot(simdat, aes(x = col1)) +
geom_histogram(fill = "blue", binwidth = bw, alpha = 0.5) +
geom_vline(
xintercept = x$comp_value, color = "red",
linetype = "solid", linewidth = 1
) +
geom_vline(
xintercept = x$res$estimate, color = "black",
linetype = "solid", linewidth = 1
) +
geom_vline(xintercept = cip, color = "red", linetype = "longdash", linewidth = .5) +
labs(
title = paste0("Simulated proportions if null hyp. is true (", lev_name, " in ", x$var, ")"),
x = paste0("Level ", lev_name, " in variable ", x$var)
)
}
if (length(plot_list) > 0) {
if (custom) {
if (length(plot_list) == 1) plot_list[[1]] else plot_list
} else {
patchwork::wrap_plots(plot_list, ncol = 1) %>%
(function(x) if (shiny) x else print(x))
}
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.