Nothing
#' Distribution of the Data Set Shown by Skewness and Kurtosis.
#'
#' @description This function computes summary statistics --- including sample
#' size, mean, standard error, skewness, and kurtosis --- for each numeric
#' measurement column in a data set. If grouping columns are provided via
#' `group_cols`, the function computes the metrics separately for each group
#' defined by the combination of these columns (using the first element as
#' the treatment variable and the second as the grouping variable, or
#' the same column for both if only one is given). When no grouping columns
#' are provided, the entire data set is treated as a single group ("Overall").
#' A log2 transformation (using a cutoff equal to one-tenth of the smallest
#' positive value in the data) is applied to generate alternative metrics.
#' Histograms showing the distribution of skewness and kurtosis for both raw
#' and log2-transformed data are then generated and saved to a PDF if a file
#' name is provided.
#'
#' @param data A matrix or data frame containing the raw data. If
#' `group_cols` is specified, the columns with names in `group_cols` are
#' treated as grouping variables and all other columns are assumed to be
#' numeric measurement variables.
#' @param group_cols A character vector specifying the names of the grouping
#' columns. When provided, the first element is treated as the treatment
#' variable and the second as the group variable. If not provided, the
#' entire data set is treated as one group.
#' @param output_file Optional string specifying the name of the file
#' to be created. When `NULL` (default), plots are drawn on
#' the current graphics device. Ensure that the file
#' extension matches the desired format (e.g., ".pdf" for PDF output
#' or ".png" for PNG output or .tiff for TIFF output).
#' @param print_res_raw Logical. If \code{TRUE}, the function returns and prints
#' the computed summary statistics for the raw data. Default is
#' \code{FALSE}.
#' @param print_res_log Logical. If \code{TRUE}, the function returns and prints
#' the computed summary statistics for the log2-transformed data. Default is
#' \code{FALSE}.
#'
#' @return The function generates histograms of skewness and kurtosis for both
#' raw and log2-transformed data. Additionally, if either
#' \code{printResRaw} and/or \code{printResLog} is \code{TRUE}, the function
#' returns the corresponding summary statistics as a data frame or a list of
#' data frames.
#' @author Xiaohua Douglas Zhang and Shubh Saraswat
#'
#' @details A cutoff is computed as one-tenth of the minimum positive value
#' among all numeric measurement columns to avoid taking logarithms of zero.
#' When grouping columns are provided, the function loops over unique
#' grouping columns and computes the metrics for each
#' measurement column within each subgroup. Without grouping columns, the
#' entire data set is analyzed as one overall group.
#'
#' @examples
#' # Example with grouping columns (e.g., "Group" and "Treatment")
#' data(ExampleData1)
#' cyt_skku(ExampleData1[, -c(2:3)], output_file = NULL,
#' group_cols = c("Group")
#' )
#'
#' # Example without grouping columns (analyzes the entire data set)
#' cyt_skku(ExampleData1[, -c(1:3)], output_file = NULL)
#'
#' @export
#'
#' @importFrom e1071 skewness kurtosis
#' @import ggplot2
#' @importFrom gridExtra grid.arrange
#'
cyt_skku <- function(
data,
group_cols = NULL,
output_file = NULL,
print_res_raw = FALSE,
print_res_log = FALSE
) {
names(data) <- make.names(names(data), unique = TRUE)
# Identify measurement columns.
# If grouping columns are provided, exclude them from measurement columns.
if (!is.null(group_cols)) {
measure_cols <- setdiff(names(data), group_cols)
} else {
measure_cols <- names(data)
}
# Calculate a cutoff for log2 transformation:
measure_mat <- data[, measure_cols, drop = FALSE]
condt <- !is.na(measure_mat) & (measure_mat > 0)
cutoff <- min(measure_mat[condt], na.rm = TRUE) / 10
# Helper function: compute metrics for a given numeric vector Y
# and grouping variable.
compute_metrics <- function(Y, groups) {
n <- tapply(Y, groups, function(x) sum(!is.na(x)))
center <- tapply(Y, groups, mean, na.rm = TRUE)
spread <- tapply(Y, groups, function(x) {
sd(x, na.rm = TRUE) / sqrt(sum(!is.na(x)))
})
skew <- tapply(Y, groups, e1071::skewness, na.rm = TRUE)
kurt <- tapply(Y, groups, e1071::kurtosis, na.rm = TRUE)
data.frame(
group = names(n),
n = as.numeric(n),
center = as.numeric(center),
spread = as.numeric(spread),
skewness = as.numeric(skew),
kurtosis = as.numeric(kurt),
stringsAsFactors = FALSE
)
}
# Initialize lists to store results.
raw_list <- list()
log_list <- list()
if (!is.null(group_cols)) {
# If grouping columns are provided.
# Determine treatment and group columns.
if (length(group_cols) == 1) {
treatment_col <- group_cols
group_col <- group_cols
} else {
treatment_col <- group_cols[1]
group_col <- group_cols[2]
}
# Get unique treatment levels.
treatment_levels <- unique(data[[treatment_col]])
# Loop over each treatment level.
for (trt in treatment_levels) {
subset_df <- data[data[[treatment_col]] == trt, ]
groups <- subset_df[[group_col]]
for (col in measure_cols) {
# Raw data metrics.
Y_raw <- subset_df[[col]]
df_raw <- compute_metrics(Y_raw, groups)
df_raw$measurement <- col
df_raw$treatment <- trt
raw_list[[paste(trt, col, sep = "_")]] <- df_raw
# Log2-transformed data metrics.
Y_log <- log2(Y_raw + cutoff)
df_log <- compute_metrics(Y_log, groups)
df_log$measurement <- col
df_log$treatment <- trt
log_list[[paste(trt, col, sep = "_")]] <- df_log
}
}
# Combine results.
raw_results <- do.call(rbind, raw_list)
log_results <- do.call(rbind, log_list)
} else {
# No grouping columns provided: treat the entire dataset as one group.
groups <- rep("overall", nrow(data))
for (col in measure_cols) {
# Raw data metrics.
Y_raw <- data[[col]]
df_raw <- compute_metrics(Y_raw, groups)
df_raw$measurement <- col
raw_list[[col]] <- df_raw
# Log2-transformed data metrics.
Y_log <- log2(Y_raw + cutoff)
df_log <- compute_metrics(Y_log, groups)
df_log$measurement <- col
log_list[[col]] <- df_log
}
raw_results <- do.call(rbind, raw_list)
log_results <- do.call(rbind, log_list)
}
df_skew <- rbind(
data.frame(value = raw_results$skewness, transformation = "Raw"),
data.frame(value = log_results$skewness, transformation = "Log2")
)
df_kurt <- rbind(
data.frame(value = raw_results$kurtosis, transformation = "Raw"),
data.frame(value = log_results$kurtosis, transformation = "Log2")
)
p_skew <- ggplot2::ggplot(df_skew, aes(x = value, fill = transformation)) +
ggplot2::geom_histogram(bins = 30, alpha = 0.7, position = "identity") +
ggplot2::labs(x = "Skewness", title = "Distribution of Skewness") +
ggplot2::theme_minimal() +
ggplot2::theme(
panel.background = element_rect(fill = "white", colour = "white"),
plot.background = element_rect(fill = "white", colour = "white"),
legend.background = element_rect(fill = "white", colour = "white"),
axis.title = element_text(color = "black", size = 12, face = "bold"),
legend.title = element_text(color = "black", size = 10, face = "bold"),
legend.text = element_text(color = "black")
)
p_kurt <- ggplot2::ggplot(df_kurt, aes(x = value, fill = transformation)) +
ggplot2::geom_histogram(bins = 30, alpha = 0.7, position = "identity") +
ggplot2::labs(x = "Kurtosis", title = "Distribution of Kurtosis") +
ggplot2::theme_minimal() +
ggplot2::theme(
panel.background = element_rect(fill = "white", colour = "white"),
plot.background = element_rect(fill = "white", colour = "white"),
legend.background = element_rect(fill = "white", colour = "white"),
axis.title = element_text(color = "black", size = 12, face = "bold"),
legend.title = element_text(color = "black", size = 10, face = "bold"),
legend.text = element_text(color = "black")
)
plot_list <- list(p_skew, p_kurt)
# If a pdf title is provided, generate histograms using ggplot2.
if (!is.null(output_file)) {
ext <- tolower(tools::file_ext(output_file))
if (ext == "pdf") {
pdf(file = output_file, width = 7, height = 5)
on.exit(dev.off(), add = TRUE)
for (p in plot_list) {
print(p)
}
} else {
# For PNG, SVG, etc., save each page as a separate file
for (i in seq_along(plot_list)) {
fname <- if (length(plot_list) > 1) {
sub(paste0("\\.", ext, "$"), paste0("_", i, ".", ext), output_file)
} else {
output_file
}
ggplot2::ggsave(fname, plot_list[[i]], width = 7, height = 5)
}
}
}
combined_plot <- gridExtra::grid.arrange(p_skew, p_kurt, ncol = 1)
# Return results based on flags.
if (print_res_raw && print_res_log) {
list(raw = raw_results, log2 = log_results, plot = combined_plot)
} else if (print_res_raw) {
list(data = raw_results, plot = combined_plot)
} else if (print_res_log) {
list(data = log_results, plot = combined_plot)
} else {
invisible(combined_plot) # always return the plot
}
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.