Nothing
utils::globalVariables(c(
"everything", "Variable", "Value", "Min", "Max", "Mean", "SD",
"Density", "density", "Sample", "Theoretical",
"q_sample", "q_theoretical", "slope", "intercept",
"x", "y", "x_var", "y_var", "qq", "q_sample_high", "q_sample_low", "q_theoretical_high", "q_theoretical_low"
))
#' Diagnostic Plots for Univariate and Multivariate Data
#'
#' Generates QQ plots, histograms with density overlays, boxplots, or scatterplot matrices
#' for numeric data (vector, matrix, or data frame).
#'
#' @param data A numeric vector, matrix, or data frame with observations in rows and variables in columns.
#' @param type Character; type of plot. One of: "qq", "histogram", "boxplot", "scatter". Default selects the first.
#' @param title Character; plot title.
#' @param interactive Logical; if TRUE, renders the plot interactively using plotly.
#'
#' @examples
#' \dontrun{
#' data <- iris[1:50, 1:3]
#' univariate_diagnostic_plot(data, type = "histogram")
#' univariate_diagnostic_plot(data, type = "qq")
#' univariate_diagnostic_plot(data, type = "boxplot")
#' univariate_diagnostic_plot(data, type = "scatter", interactive = TRUE)
#' }
#' @importFrom graphics hist curve boxplot pairs par
#' @importFrom stats complete.cases dnorm sd qqnorm qqline quantile
#' @importFrom ggplot2 ggplot aes geom_histogram geom_line geom_point geom_boxplot
#' facet_wrap facet_grid labs theme_minimal theme element_text element_blank
#' element_rect unit margin geom_abline after_stat vars ggtitle
#' @importFrom tidyr pivot_longer unnest
#' @importFrom dplyr group_by summarise rowwise mutate reframe ungroup left_join select
#' @importFrom tibble tibble
#' @importFrom purrr map_dfr
#' @importFrom plotly ggplotly
#' @export
univariate_diagnostic_plot <- function(data,
type = c("qq", "histogram", "boxplot", "scatter"),
title = NULL,
interactive = FALSE) {
type <- match.arg(type)
# Coerce input to data frame
if (is.vector(data)) {
df <- data.frame(value = data)
var_names <- deparse(substitute(data))
names(df) <- var_names
} else if (is.matrix(data) || is.data.frame(data)) {
df <- as.data.frame(data)
var_names <- if (!is.null(colnames(df)))
colnames(df)
else
paste0("V", seq_len(ncol(df)))
names(df) <- var_names
} else {
stop("Input must be a numeric vector, matrix, or data frame.")
}
# Drop non-numeric columns
is_num <- vapply(df, is.numeric, logical(1))
if (!all(is_num)) {
warning("Dropping non-numeric columns: ",
paste(names(df)[!is_num], collapse = ", "))
df <- df[, is_num, drop = FALSE]
var_names <- names(df)
}
p <- ncol(df)
if (p < 1)
stop("No numeric variables to plot.")
# Remove rows with any NA
complete_rows <- complete.cases(df)
if (sum(!complete_rows) > 0) {
warning(sprintf("Removed %d rows with missing values.", sum(!complete_rows)))
df <- df[complete_rows, , drop = FALSE]
}
df_long <- df %>%
pivot_longer(cols = everything(),
names_to = "Variable",
values_to = "Value")
num_vars <- ncol(df)
ncol_plot <- ceiling(sqrt(num_vars))
nrow_plot <- ceiling(num_vars / ncol_plot)
# Plot according to type
if (type == "histogram") {
# Step 2: Compute density curves per variable
density_data <- df_long %>%
group_by(Variable) %>%
summarise(
Mean = mean(Value, na.rm = TRUE),
SD = sd(Value, na.rm = TRUE),
Min = min(Value, na.rm = TRUE),
Max = max(Value, na.rm = TRUE),
.groups = "drop"
) %>%
rowwise() %>%
mutate(Density = list({
x_vals <- seq(Min, Max, length.out = 100)
y_vals <- dnorm(x_vals, mean = Mean, sd = SD)
tibble(Value = x_vals, Density = y_vals)
})) %>%
unnest(Density)
# Step 4: Plot histograms with normal density overlays
p <- ggplot(df_long, aes(x = Value)) +
geom_histogram(
aes(y = after_stat(density)),
bins = 30,
fill = "#E0E0E0",
color = "black"
) +
geom_line(
data = density_data,
aes(y = Density),
color = "#D62828",
linewidth = 1
) +
facet_wrap( ~ Variable,
scales = "free",
ncol = ncol_plot,
nrow = nrow_plot) +
labs(x = "Value", y = "Density") +
theme_minimal(base_family = "sans") +
theme(
strip.text = element_text(size = 12, face = "bold"),
axis.title = element_text(size = 11),
axis.text = element_text(size = 10),
panel.grid.minor = element_blank(),
plot.title = element_text(
size = 14,
face = "bold",
hjust = 0.5
)
)+
ggtitle(title)
} else if (type == "qq") {
# Step 1: Convert to long format
df_long <- df %>%
pivot_longer(cols = everything(),
names_to = "Variable",
values_to = "Value")
# Step 2: Compute theoretical quantiles and sample quantiles
qq_data <- df_long %>%
group_by(Variable) %>%
mutate(
qq = list(qqnorm(Value, plot.it = FALSE)),
Theoretical = qq[[1]]$x,
Sample = qq[[1]]$y
) %>%
ungroup() %>%
select(-qq) # optional: remove the list column
# Step 3: Compute slope and intercept for each Q-Q line (like qqline())
line_params <- qq_data %>%
group_by(Variable) %>%
summarise(
q_sample_low = quantile(Sample, 0.25, na.rm = TRUE),
q_sample_high = quantile(Sample, 0.75, na.rm = TRUE),
q_theoretical_low = quantile(Theoretical, 0.25, na.rm = TRUE),
q_theoretical_high = quantile(Theoretical, 0.75, na.rm = TRUE),
.groups = "drop"
) %>%
mutate(
slope = (q_sample_high - q_sample_low) / (q_theoretical_high - q_theoretical_low),
intercept = q_sample_low - slope * q_theoretical_low
) %>%
select(Variable, slope, intercept)
# Step 4: Merge slope/intercept into qq_data
qq_data <- left_join(qq_data, line_params, by = "Variable")
# Step 5: Plot with proper qqline per facet and dynamic layout
p <- ggplot(qq_data, aes(x = Theoretical, y = Sample)) +
geom_point(color = "#4C4C4C",
size = 1.5,
alpha = 0.7) +
geom_abline(
aes(slope = slope, intercept = intercept),
color = "#D62828",
linewidth = 0.8
) +
facet_wrap( ~ Variable,
scales = "free",
nrow = nrow_plot,
ncol = ncol_plot) +
labs(x = "Theoretical Quantiles", y = "Sample Quantiles") +
theme_minimal(base_family = "sans") +
theme(
strip.text = element_text(size = 12, face = "bold"),
axis.title = element_text(size = 11),
axis.text = element_text(size = 10),
panel.grid.minor = element_blank(),
plot.title = element_text(
size = 14,
face = "bold",
hjust = 0.5
)
)+
ggtitle(title)
} else if (type == "boxplot") {
# Step 2: Determine layout automatically
num_vars <- ncol(df)
ncol_plot <- ceiling(sqrt(num_vars))
nrow_plot <- ceiling(num_vars / ncol_plot)
# Step 3: Create faceted boxplots (unscaled)
p <- ggplot(df_long, aes(x = Variable, y = Value)) +
geom_boxplot(
fill = "#A6CEE3",
color = "#333333",
outlier.color = "#D62828",
outlier.size = 1.5,
width = 0.5
) +
facet_wrap( ~ Variable,
scales = "free",
ncol = ncol_plot,
nrow = nrow_plot) +
labs(x = NULL, y = "Value") +
theme_minimal(base_family = "sans") +
theme(
strip.text = element_text(size = 12, face = "bold"),
axis.text.x = element_blank(),
axis.ticks.x = element_blank(),
axis.title.y = element_text(size = 11),
axis.text.y = element_text(size = 10),
panel.grid.minor = element_blank(),
panel.grid.major.x = element_blank(),
plot.title = element_text(
size = 14,
face = "bold",
hjust = 0.5
)
)+
ggtitle(title)
} else if (type == "scatter") {
var_names <- names(df)
if (length(var_names) < 2)
stop("Scatterplot matrix requires at least two variables.")
# Prepare all variable combinations
combinations <- expand.grid(x_var = var_names,
y_var = var_names,
stringsAsFactors = FALSE)
# Build long-format data manually
plot_data <- purrr::map_dfr(seq_len(nrow(combinations)), function(i) {
xv <- combinations$x_var[i]
yv <- combinations$y_var[i]
tibble(
x_var = xv,
y_var = yv,
x = df[[xv]],
y = df[[yv]]
)
})
# Plot
p <- ggplot(plot_data, aes(x = x, y = y)) +
geom_point(alpha = 0.7,
size = 1.2,
color = "#3366CC") +
facet_grid(rows = vars(y_var),
cols = vars(x_var),
scales = "free") +
labs(x = NULL, y = NULL) +
theme_minimal(base_family = "sans") +
theme(
strip.background = element_rect(fill = "#f0f0f0", color = NA),
strip.text = element_text(size = 11, face = "bold"),
axis.text.x = element_text(
size = 7,
angle = 45,
hjust = 1
),
axis.text.y = element_text(size = 7),
panel.spacing = unit(0.8, "lines"),
plot.margin = margin(10, 10, 10, 10),
plot.title = element_text(
size = 14,
face = "bold",
hjust = 0.5
)
)+
ggtitle(title)
}
if (interactive) {
p <- ggplotly(p)
} else{
p
}
print(p) # <-- this is essential
invisible(p)
# invisible(NULL)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.