#' Visualize a causal network as directed graph from node and edge data.
#'
#'
#' @param graph_data A tidygraph object with nodes and edges with "name", "from", "to", and optionally other variables.
#' @param geom A character name of geometric object to represent nodes, either "text" or "point".
#' @param layout A character name of layout algorithm to use in ggraph, defaults to the force layout. See ?ggraph::ggraph for more.
#' @param str_wrap_width A numeric width for the string wrapping algorithm.
#' @param arrow_curvature A numeric curvature of the edge arrows.
#'
#' @importFrom magrittr "%>%"
#' @export
plot_causal_network <- function(graph_data,
geom = "text",
layout = "auto",
str_wrap_width = 10,
text_size = 5,
edge_width = 3,
arrow_curvature = 0.05,
arrow_length = 2,
clip = "off") {
if (geom == "text") {
gg_object <- graph_data %N>%
mutate(name = stringr::str_wrap(name, width = str_wrap_width)) %>%
ggraph::ggraph(layout = layout) +
ggraph::geom_node_text(
mapping = ggplot2::aes(label = name),
lineheight = 0.8,
size = text_size
) +
ggraph::geom_edge_arc(
mapping = ggplot2::aes(
start_cap = ggraph::label_rect(node1.name),
end_cap = ggraph::label_rect(node2.name)
),
width = edge_width,
curvature = arrow_curvature,
arrow = grid::arrow(length = grid::unit(arrow_length, 'mm'), type = "closed", angle = 10)
) +
ggraph::theme_graph() +
ggplot2::coord_cartesian(clip = clip)
} else if (geom == "point") {
gg_object <- graph_data %N>%
mutate(name = stringr::str_wrap(name, width = str_wrap_width)) %>%
ggraph::ggraph(layout = layout) +
ggraph::geom_edge_arc(
curvature = arrow_curvature,
arrow = grid::arrow(length = grid::unit(arrow_length, 'mm'), type = "closed", angle = 10)
) +
ggraph::geom_node_point() +
ggraph::theme_graph() +
ggplot2::coord_cartesian(clip = clip)
} else {
stop(stringr::str_c("geom", geom, "has not been implemented", sep = " "))
}
return(gg_object)
}
#' Visualize missing value cells in dataframes.
#'
#' For plotting a table of a dataset where missingness is mapped to cell/tile color.
#' Remember the distinction between actual and structural missingness due to the data-format and impossible values.
#' You can easily explore the patterns in missigness visually with this function by sorting the dataset differently before plotting.
#'
#' @param data A dataframe.
#' @param color_values A character vector of two color hex values. The second one is for the missing values.
#' @param text_size An integer size for the variable names on the y-axis.
#' @param variables_max_n The maximum number of variables that can be plotted. Prevents unwanted slow plotting.
#'
#' @return A ggplot2 object of the visualized table.
#' @export
plot_missing <- function(data,
tile_colors = c("#f2f4fb", "#c30000"),
text_size = 9,
variables_max_n = 1000) {
if (ncol(data) > variables_max_n) {
stop("Plotting more variables can be very slow. Adjust the *variables_max_n* argument, if you want to do it.")
}
plot_result <- data %>%
dplyr::mutate(row = dplyr::row_number()) %>%
tidyr::gather("variable", "value", -row) %>%
dplyr::mutate(missing = dplyr::if_else(is.na(value), TRUE, FALSE)) %>%
ggplot2::ggplot() +
ggplot2::geom_tile(ggplot2::aes(
x = row,
y = forcats::fct_relevel(variable, colnames(data)),
fill = missing
)) +
ggplot2::scale_fill_manual(values = tile_colors, guide = "none") +
ggplot2::theme_void() +
ggplot2::theme(
axis.text.y = ggplot2::element_text(hjust = 1, size = text_size),
plot.margin = ggplot2::margin(0, 0.5, 0, 0.5, "cm")
) +
ggplot2::scale_x_discrete(position = "top") +
ggplot2::coord_cartesian(clip = "off")
return(plot_result)
}
#' Create png and svg tables.
#'
#' For creating very simple, easily post-modifiable (svg) tables from small dataframes.
#'
#' @param input A dataframe to plot.
#' @param significant_digits How many digits to spare when rounding long numbers.
#' @param str_wrap_width How many characters to approximately print on one line.
#' @return A ggplot2 object.
#' @export
plot_table <- function(input, significant_digits = 5, str_wrap_width = 10) {
# dont print massive tables
stopifnot(nrow(input) < 50, ncol(input) < 20)
# --- number formatter ----
number_patterns <- rep("0", times = significant_digits) %>%
accumulate(str_c) %>%
str_c("\\.", ., "$") %>%
prepend(str_sub(., start = 3)) %>%
rev()
number_replacement <- number_patterns %>%
str_replace_all(c("0" = " ", "\\." = " ")) %>%
str_remove_all("[^[:space:]]")
replacement_map <- number_replacement %>%
set_names(number_patterns)
format_numbers <- . %>%
signif(digits = significant_digits) %>%
format(scientific = FALSE, trim = FALSE) %>%
str_replace_all(" ", " ") %>%
str_replace_all(replacement_map)
# --- text formatter -----
format_text <- . %>%
stringr::str_replace_all("[^[:alnum:]]+", " ") %>%
stringr::str_wrap(str_wrap_width)
# --- re-format input ---
input_plotformat <- input %>%
mutate_if(is.character, format_text) %>%
mutate_if(is.numeric, format_numbers) %>%
mutate_all(funs(if_else(is.na(.), "", .))) %>%
mutate(
row = row_number(),
even_row = if_else(row %% 2 == 0, TRUE, FALSE)
) %>%
gather("variable", "value", -row, -even_row)
# --- plot ---
plot <- input_plotformat %>%
ggplot() +
geom_tile(
aes(x = reorder(variable, row), y = row, fill = even_row),
width = 0.97
) +
geom_text(
aes(x = reorder(variable, row), y = row, label = value),
color = "#565656",
lineheight = 0.8,
size = 3
) +
scale_x_discrete(position = "top") +
scale_fill_manual(values = c("#ffffff", "#f8f8f8"), guide = "none") +
scale_y_continuous() +
theme_void() +
theme(axis.text.x = element_text(
margin = margin(t = 10, b = -15),
color = "#3c3c3c",
face = "bold"),
panel.background = element_rect(fill = "#ffffff", color = "#ffffff")
)
return(plot)
}
#' Visualize strings, like unique names, with a wordcloud.
#'
#' For visualizing wordclouds of large numbers of strings.
#' Note: only about 500 strings can be visualized at once without problems.
#'
#' @param data The input dataframe.
#' @param label An unquoted name of the variable to be visualized.
#' @param title A plot title string.
#'
#' @return A ggplot2 object of the wordcloud.
#' @export
plot_strings <- function(data, label) {
# supports unquoted input
label <- enquo(label)
p <- data %>%
mutate_all(stringr::str_wrap, width = 15) %>%
# ggrepel adjusts the label positions (x, y)
ggplot(aes(x = 1, y = 1, label = !!label)) +
ggrepel::geom_text_repel(
segment.size = 0,
max.iter = 5000,
force = 1,
# text size in relation to number of labels
size = (1 / log(nrow(data))) * 15,
lineheight = 0.8
) +
theme_void() +
theme(plot.title = element_text(size = 40))
return(p)
}
#' Visualize the distributions of many variables as one plot matrix.
#'
#' @param data The input dataframe.
#' @param geom The geometric object to use; points represent observations (one per row) and bars represent counts (one per unique value).
#' @param plot_columns_n The number of plot columns.
#'
#' @return A ggplot2 object of the resulting plots.
#' @export
plot_variables <- function(input, geom = "point", plot_columns_n = 2) {
if (geom == "density") {
variable_r_classes <- purrr::map_chr(input, class)
if (all(variable_r_classes %in% c("numeric", "integer"))) {
plot <- input %>%
gather("variable", "value") %>%
ggplot() +
geom_density_ridges(
aes(x = value, y = variable),
scale = 4
) +
theme_ridges() +
scale_y_discrete(expand = c(0.01, 0)) +
theme(
axis.text.y = element_text(size = 8),
axis.text.x = element_text(size = 8)
)
return(plot)
} else {
stop("densities are not applicable for non-numeric variables")
}
}
plot_variable <- function(var, var_name, geom) {
if (geom == "point") {
if (is.numeric(var) | is.Date(var)) {
p <- var %>%
tibble::tibble(x = .) %>%
dplyr::mutate(mock = "") %>%
ggplot2::ggplot() +
ggplot2::geom_jitter(
ggplot2::aes(x = x, y = mock),
width = 0,
height = 0.15,
alpha = 0.1,
stroke = 0
) +
ggplot2::theme_minimal() +
ggplot2::theme(
axis.text.y = ggplot2::element_blank(),
axis.title.y = ggplot2::element_blank(),
panel.grid.major.y = ggplot2::element_blank(),
panel.grid.minor = ggplot2::element_blank(),
axis.text.x = ggplot2::element_text(
hjust = 1,
angle = 25,
size = 7
)
) +
scale_x_continuous(
name = var_name,
labels = function(x) format(x, scientific = FALSE)
)
} else {
# for non-numeric data
p <- var %>%
tibble::tibble(x = .) %>%
mutate(x = if_else(is.na(x), "Missing", x)) %>%
dplyr::mutate(mock = "") %>%
ggplot2::ggplot() +
ggplot2::geom_jitter(
ggplot2::aes(x = x, y = mock),
width = 0.2,
height = 0.2,
alpha = 0.15
) +
ggplot2::theme_minimal() +
ggplot2::theme(
axis.text.y = ggplot2::element_blank(),
axis.title.y = ggplot2::element_blank(),
panel.grid.major.y = ggplot2::element_blank(),
panel.grid.minor = ggplot2::element_blank(),
axis.text.x = ggplot2::element_text(
hjust = 1,
angle = 25,
size = 7
)
) +
scale_x_discrete(
name = var_name,
drop = FALSE,
labels = function(x) format(x, scientific = FALSE)
)
}
}
if (geom == "bar") {
if (is.numeric(var) | is.Date(var)) {
p <- var %>%
tibble::tibble(x = .) %>%
ggplot() +
geom_histogram(aes(x = x), bins = 20) +
theme_minimal() +
ggplot2::theme(
plot.margin = ggplot2::margin(20, 20, 20, 20),
panel.grid.minor = ggplot2::element_blank(),
panel.grid.major.x = ggplot2::element_blank(),
plot.title = element_text(
hjust = 0,
vjust = 1,
size = 8
),
axis.title.y = element_blank(),
axis.title.x = element_text(size = 10)
) +
scale_x_continuous(
name = var_name,
labels = function(x) format(x, scientific = FALSE)
) +
coord_cartesian(clip = "off") +
# labs(tag = "Count")
ggtitle("Count")
} else {
# for non-numeric data
p <- var %>%
tibble::tibble(x = .) %>%
mutate(x = if_else(is.na(x), "Missing", x)) %>%
ggplot() +
geom_bar(aes(x = x, y = ..count..)) +
coord_flip(clip = "off") +
theme_minimal() +
ggplot2::theme(
plot.margin = ggplot2::margin(20, 20, 20, 20),
panel.grid.minor = ggplot2::element_blank(),
panel.grid.major.x = ggplot2::element_blank(),
plot.title = element_text(
hjust = 0,
vjust = 1,
size = 8
),
axis.title.y = element_blank(),
axis.title.x = element_text(size = 8),
axis.text.y = element_text(hjust = 1)
) +
scale_x_discrete(
drop = FALSE
) +
scale_y_continuous(
limits = c(0, length(var)),
name = "Count"
) +
# labs(tag = var_name)
ggtitle(var_name)
}
}
return(p)
}
var_names <- colnames(input)
plots <- purrr::map2(input, var_names, plot_variable, geom = geom) %>%
# adding plots is a patchwork-package feature
purrr::reduce(`%+%`) +
patchwork::plot_layout(ncol = plot_columns_n)
return(plots)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.