#' Plot the sample frequency
#' @param add_fill_color Logical or a numeric value, To fill or not, or specifiy the cells to be filled.
#' @param sample1, sample2 numeric vectors to be drawn, sample2 can be NULL.
#' @examples
#' sample1 <- c(11, 9, 4, 5, 6, 7, 12, 10)
#' sample2 <- c(7, 13, 14, 16, 9, 11, 15, 11)
#' Plot_Sample_Frequency(sample1, sample2, show_sample_deviation = T, deviation_from_population_mean = F)
#' @export
Plot_Sample_Frequency <- function(
sample1,
sample2 = NULL,
x_range = NULL,
bin_width = NULL,
show_sample_deviation = FALSE,
show_sample_grid = TRUE,
show_sample_sigma = FALSE,
show_sample_variation_range = FALSE,
standarized_x_axis = FALSE,
standarized_y_axis = FALSE,
show_population_curve = FALSE,
population_mean = mean(c(sample1, sample2)),
population_sd = sd(c(sample1, sample2)),
deviation_from_population_mean = TRUE,
add_fill_color = TRUE,
xlab = "",
ylab = "Frequency",
main = "",
...) {
##### 0. Define colors ######
sample1_col <- c("#f8cea2", "#b78135", "#cf232a")
sample2_col <- c("#a0d5cf", "#009f9b")
######## 1. Calculate the required value ######
sample_tidy <- function(sample) {
Mean <- mean(sample)
Table <- as.data.frame(table(sample, dnn = "data"))
names(Table)[names(Table) == "Freq"] <- "Frequency"
Table[, "data"] <- as.numeric(as.character(Table[, "data"]))
Table[, "data_deviation_rank"] <- rank(abs(scale(Table[, "data"])))
Table[, "data_deviation_sign"] <- sign(scale(Table[, "data"]))
Table_Unique <- Table[Table[, "data"] != Mean, ]
res <- list(Table = Table, Table_Unique = Table_Unique, Mean = Mean)
return(res)
}
sample_total <- c(sample1, sample2)
sample_total_tidy <- sample_tidy(sample_total)
sample_total_values_x <- sample_total_tidy[["Table"]][["data"]]
sample_total_values_x <- sort(sample_total_values_x)
sample_total_frequency_x <- sample_total_tidy[["Table"]][["Frequency"]]
sample_total_unique_x <- sample_total_tidy[["Table_Unique"]][["data"]]
sample_total_unique_x <- sort(sample_total_unique_x)
sample_total_unique_frequency <- sample_total_tidy[["Table_Unique"]][["Frequency"]]
sample_total_unique_length <- nrow(sample_total_tidy[["Table_Unique"]])
sample_total_deviation_rank <- sample_total_tidy[["Table_Unique"]]$data_deviation_rank
sample_total_deviation_rank_range <-
sample_total_tidy[["Table_Unique"]][sample_total_deviation_rank %in% range(sample_total_deviation_rank), ]
sample_total_deviation_rank_range <-
sample_total_deviation_rank_range[!duplicated(sample_total_deviation_rank_range$ data_deviation_sign), ]
sample_total_deviation_rank_range <-
sample_total_deviation_rank_range[order(sample_total_deviation_rank_range$ data_deviation_rank), ]
sample_1_tidy <- sample_tidy(sample1)
sample_1_values_x <- sample_total_values_x[sample_total_values_x %in% sample1]
sample_1_values_frequency <-
sample_total_tidy[["Table"]][sample_total_tidy[["Table"]][["data"]] %in% sample_1_values_x, "Frequency"]
sample_1_valus_length <- length(sample_1_values_x)
sample_2_tidy <- if (!is.null(sample2)) sample_tidy(sample2) else sample_1_tidy
sample_2_values_x <- sample_total_values_x[sample_total_values_x %in% sample2]
sample_2_values_frequency <-
sample_total_tidy[["Table"]][sample_total_tidy[["Table"]][["data"]] %in% sample_2_values_x, "Frequency"]
sample_2_valus_length <- length(sample_2_values_x)
if (is.null(bin_width)) {
bin_width <- min(sample_total_values_x[-1] - sample_total_values_x[-length(sample_total_values_x)])
} else {
bin_width <- bin_width
}
if (is.null(x_range)) {
xlim <- c(min(sample_total_values_x) - bin_width, max(sample_total_values_x) + bin_width)
} else {
xlim <- c(min(x_range) - bin_width, max(x_range) + bin_width)
}
ylim <- c(0, max(sample_total_frequency_x) + 1)
breaks <- seq(min(xlim) + bin_width / 2, max(xlim) + bin_width / 2, by = bin_width)
xlabel <- round(seq(min(xlim) + bin_width, max(xlim) - bin_width, by = bin_width), 2)
xzlabel <- round((xlabel - population_mean) / population_sd, 2)
ylabel <- seq(0, max(sample_total_frequency_x), by = 1)
ydensitylabel <- format(ylabel / length(sample_total), digits = 2)
##### 2. Do the plotting ######
par(mar = c(5, 4, 4, 5) + 0.1)
hist(sample_total,
ylim = ylim, xlim = xlim, breaks = breaks, font.lab = 2,
col = ifelse(is.logical(add_fill_color) && add_fill_color, sample1_col[1], "white"),
border = sample1_col[2], main = main, xlab = xlab, ylab = ylab,
yaxs = "i", xaxs = "i", xaxt = "n", yaxt = "n", ...
)
mtext(expression(italic("X")), side = 1, adj = 1.03, line = -0.6)
axis(1, at = xlim, labels = c("", ""), lwd.ticks = 0)
axis(1, at = xlabel, labels = xlabel, lwd = 0, lwd.ticks = 1, padj = -0.5)
axis(2, at = ylim, labels = c("", ""), lwd.ticks = 0)
axis(2, at = ylabel, labels = ylabel, lwd = 0, lwd.ticks = 1, las = 1)
##### 3. Add more axes ######
if (standarized_y_axis) { # AXZ
axis(4, at = ylim, labels = c("", ""), lwd.ticks = 0)
axis(4, at = ylabel, labels = ydensitylabel, lwd = 0, lwd.ticks = 1, las = 1)
mtext("Density", side = 4, line = 4, padj = -1)
}
if (standarized_x_axis) { # AYD
axis(1, at = xlim, labels = c("", ""), lwd.ticks = 0, line = 2.5)
axis(1, at = xlabel, labels = xzlabel, lwd = 0, lwd.ticks = 1, line = 2.5, padj = -0.5)
mtext(expression(italic("z")), side = 1, line = 2.5, adj = 0.99)
}
##### 4. Add more information concerning the sample data ######
plot_grids <- function(tidy, frequency, values, color) {
invisible(lapply(1:nrow(tidy[["Table"]]), function(i) {
invisible(lapply(1:frequency[i], function(j) {
segments(values[i] - bin_width / 2, 1:j, values[i] + bin_width / 2, 1:j, col = color)
}))
}))
}
if (!is.null(sample2)) {
hist(x = sample1, breaks = breaks, col = sample2_col[1], border = sample2_col[2], add = TRUE)
legend("topleft", lwd = 10, col = c(sample1_col[1], sample2_col[1]), bty = "n", legend = c("Condition A", "Condition B"))
}
if (!is.logical(add_fill_color)) {
values_to_be_colored <- sample_total[sample_total >= add_fill_color]
hist(x = values_to_be_colored, breaks = breaks, col = sample1_col[1], border = sample1_col[2], add = TRUE)
}
if (show_sample_grid) {
if (is.null(sample2)) {
plot_grids(sample_total_tidy, sample_total_frequency_x, sample_total_values_x, sample1_col[2])
} else {
plot_grids(sample_total_tidy, sample_total_frequency_x, sample_total_values_x, sample2_col[2])
plot_grids(sample_2_tidy, sample_2_values_frequency, sample_2_values_x, sample1_col[2])
}
}
##### Add legends if two sampels are included
sample_sigma <- function(sample_total, y_adj = 0.5, color = sample1_col[2]) {
segments(
x0 = mean(sample_total), y0 = ylim[2] - 1,
x1 = mean(sample_total), y1 = max(sample_total_frequency_x) + 1, col = color
)
arrows(mean(sample_total) - sd(sample_total), ylim[2] - y_adj, mean(sample_total) + sd(sample_total),
ylim[2] - y_adj,
code = 3, length = 0.1, col = color
)
mtext(side = 3, at = mean(sample_total), text = bquote(italic(M) == .(round(mean(sample_total), 2))), col = color)
text(
x = mean(sample_total) + sd(sample_total) / 2, y = ylim[2] - y_adj - 0.05,
label = bquote(s == .(round(sd(sample_total), 2))), pos = 3, col = color
)
text(
x = mean(sample_total) - sd(sample_total) / 2, y = ylim[2] - y_adj - 0.05,
label = bquote(s == .(round(sd(sample_total), 2))), pos = 3, col = color
)
}
if (show_sample_sigma) {
if (is.null(sample2)) {
sample_sigma(sample1, y_adj = 0.5, color = sample1_col[2])
} else {
sample_sigma(sample1, y_adj = 0.6, color = sample2_col[2])
sample_sigma(sample2, y_adj = 0.3, color = sample1_col[2])
}
}
if (show_sample_variation_range) {
segments(sample_total_deviation_rank_range[, "data"], sample_total_deviation_rank_range[, "Frequency"],
sample_total_deviation_rank_range[, "data"], max(sample_total_frequency_x) + 0.5,
col = sample1_col[3], lwd = 2, lty = "dashed"
)
abline(v = mean(sample_total), col = sample1_col[3], lwd = 2)
arrows(mean(sample_total), max(sample_total_frequency_x) + 0.5,
sample_total_deviation_rank_range[, "data"], ,
col = sample1_col[3], lwd = 2, length = 0.1, code = 3
)
mtext(side = 3, at = mean(sample_total), text = bquote(italic(M) == .(round(mean(sample_total), 2))), col = sample1_col[3])
text(
x = (sample_total_deviation_rank_range[, "data"] + mean(sample_total)) / 2,
y = max(sample_total_frequency_x) + 0.5,
label = round(abs(sample_total_deviation_rank_range[, "data"] - mean(sample_total)), 2), pos = 3, col = sample1_col[3]
)
}
add_vertical_horizontal_lines <- function(sample, center = mean(sample), values, frequency, length, color, start, end) {
## vertical line above mu
segments(x0 = center, y0 = length(frequency[frequency == center]), y1 = max(frequency) + 1, col = "gray")
## vertical lines above each unique observation
segments(x0 = values, y0 = frequency, y1 = seq(max(frequency) + start, max(frequency) + end, length.out = length), col = "gray")
## horizontal lines connecting the vertical lines and the population mean
segments(
x0 = values, y0 = seq(max(frequency) + start, max(frequency) + end, length.out = length),
x1 = rep(center, sample_total_unique_length), col = color
)
##
mtext(side = 3, at = mean(sample), text = bquote(italic(M) == .(round(mean(sample), 2))))
}
add_lines_together <- function(center1, center2) {
break_point <- (sample_1_valus_length / sample_total_unique_length) * 0.8
if (is.null(sample2)) {
add_vertical_horizontal_lines(
sample = sample1, center = center1, values = sample_1_values_x,
frequency = sample_1_values_frequency, length = sample_1_valus_length, color = "red", start = 0.1, end = break_point
)
} else {
add_vertical_horizontal_lines(sample1, center1, sample_1_values_x, sample_1_values_frequency,
sample_1_valus_length,
color = "blue", 0.1, break_point
)
add_vertical_horizontal_lines(sample2, center2, sample_2_values_x, sample_2_values_frequency,
sample_2_valus_length,
color = "red", break_point, 0.9
)
}
}
if (show_sample_deviation) {
if (deviation_from_population_mean) {
add_lines_together(population_mean, population_mean)
mtext(side = 3, at = population_mean, text = bquote(italic(mu[0]) == .(population_mean)), line = 1)
} else {
add_lines_together(mean(sample1), mean(sample2))
mtext(side = 3, at = population_mean, text = bquote(italic(mu[0]) == .(population_mean)), line = 1)
}
}
##### 5. Add population curve ######
if (show_population_curve) {
zx <- seq(population_mean - 3 * population_sd, population_mean + 3 * population_sd, len = 100)
zy <- dnorm(zx, mean = population_mean, sd = population_sd) * length(sample1)
lines(zx, zy, col = "#cf232a", lwd = 2)
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.