Nothing
#
# _ _ _ _ _
# (_) | | | | | | | |
# _ __ ___ _ _ __ | |_ | |__ | | __ _ _ __ | | __
# | '_ \ / _ \ | || '_ \ | __|| '_ \ | | / _` || '_ \ | |/ /
# | |_) || (_) || || | | || |_ | |_) || || (_| || | | || <
# | .__/ \___/ |_||_| |_| \__||_.__/ |_| \__,_||_| |_||_|\_\
# | |
# |_|
#
# This file is part of the 'rich-iannone/pointblank' package.
#
# (c) Richard Iannone <riannone@me.com>
#
# For full copyright and license information, please look at
# https://rich-iannone.github.io/pointblank/LICENSE.html
#
# nocov start
#' Thoroughly scan a table to better understand it
#'
#' @description
#' Generate an HTML report that scours the input table data. Before calling up
#' an *agent* to validate the data, it's a good idea to understand the data with
#' some level of precision. Make this the initial step of a well-balanced
#' *data quality reporting* workflow. The reporting output contains several
#' sections to make everything more digestible, and these are:
#'
#' \describe{
#' \item{Overview}{Table dimensions, duplicate row counts, column types, and
#' reproducibility information}
#' \item{Variables}{A summary for each table variable and further statistics and
#' summaries depending on the variable type}
#' \item{Interactions}{A matrix plot that shows interactions between variables}
#' \item{Correlations}{A set of correlation matrix plots for numerical
#' variables}
#' \item{Missing Values}{A summary figure that shows the degree of missingness
#' across variables}
#' \item{Sample}{A table that provides the head and tail rows of the dataset}
#' }
#'
#' The resulting object can be printed to make it viewable in the RStudio
#' Viewer. It's also a `"shiny.tag.list"` object and so can be integrated in R
#' Markdown HTML output or in Shiny applications. If you need the output HTML,
#' it's to export that to a file with the [export_report()] function.
#'
#' @param tbl The input table. This can be a data frame, tibble, a `tbl_dbi`
#' object, or a `tbl_spark` object.
#' @param sections The sections to include in the finalized `Table Scan` report.
#' A string with key characters representing section names is required here.
#' The default string is `"OVICMS"` wherein each letter stands for the
#' following sections in their default order: `"O"`: `"overview"`; `"V"`:
#' `"variables"`; `"I"`: `"interactions"`; `"C"`: `"correlations"`; `"M"`:
#' `"missing"`; and `"S"`: `"sample"`. This string can be comprised of less
#' characters and the order can be changed to suit the desired layout of the
#' report. For `tbl_dbi` and `tbl_spark` objects supplied to `tbl`, the
#' `"interactions"` and `"correlations"` sections are currently excluded.
#' @param navbar Should there be a navigation bar anchored to the top of the
#' report page? By default this is `TRUE`.
#' @param width An optional fixed width (in pixels) for the HTML report. By
#' default, no fixed width is applied.
#' @param lang The language to use for label text in the report. By default,
#' `NULL` will create English (`"en"`) text. Other options include French
#' (`"fr"`), German (`"de"`), Italian (`"it"`), Spanish (`"es"`), Portuguese
#' (`"pt"`), Turkish (`"tr"`), Chinese (`"zh"`), Russian (`"ru"`), Polish
#' (`"pl"`), Danish (`"da"`), Swedish (`"sv"`), and Dutch (`"nl"`).
#' @param locale An optional locale ID to use for formatting values in the
#' report according the locale's rules. Examples include `"en_US"` for English
#' (United States) and `"fr_FR"` for French (France); more simply, this can be
#' a language identifier without a country designation, like `"es"` for
#' Spanish (Spain, same as `"es_ES"`).
#'
#' @return A `ptblank_tbl_scan` object.
#'
#' @section Examples:
#'
#' Get an HTML document that describes all of the data in the `dplyr::storms`
#' dataset.
#'
#' ```r
#' tbl_scan <- scan_data(tbl = dplyr::storms)
#' ```
#'
#' \if{html}{
#' \out{
#' `r pb_get_image_tag(file = "man_scan_data_1.png")`
#' }
#' }
#'
#' @family Planning and Prep
#' @section Function ID:
#' 1-1
#'
#' @export
scan_data <- function(
tbl,
sections = "OVICMS",
navbar = TRUE,
width = NULL,
lang = NULL,
locale = NULL
) {
# If the document is undergoing knitting then adjust some options
if (isTRUE(getOption("knitr.in.progress"))) {
navbar <- FALSE
if (is.null(width)) {
width <- "720px"
}
}
if (!is.null(width) && is.numeric(width)) {
width <- gt::px(width[1])
}
if (interactive()) {
cli::cli_div(
theme = list(
span.time_taken = list(color = "magenta", "font-weight" = "normal")
)
)
}
# Stop function if the length of the `sections` vector is not 1
if (length(sections) != 1) {
stop(
"The length of the `section` vector must be 1.",
call. = FALSE
)
}
# Stop function if the `sections` vector is not of the `character` type
if (!is.character(sections)) {
stop(
"The `section` vector must be of the `character` class.",
call. = FALSE
)
}
# Stop function if the length of `sections` string is not at least 1
if (nchar(sections) < 1) {
stop(
"At least one `section` is required.",
call. = FALSE
)
}
# Stop function if there are unrecognized sections in `sections`
if (!all(unique(unlist(strsplit(toupper(sections), ""))) %in%
c("O", "V", "I", "C", "M", "S"))) {
stop(
"All key characters provided in `sections` must be valid:\n",
"* Allowed values are \"O\", \"V\", \"I\", \"C\", \"M\", and \"S\".",
call. = FALSE
)
}
sections_abbrev <- unique(unlist(strsplit(toupper(sections), "")))
# Transform the `sections` string to a vector of section names
sections <-
vapply(
unique(unlist(strsplit(toupper(sections), ""))),
FUN.VALUE = character(1),
USE.NAMES = FALSE,
FUN = function(x) {
switch(
x,
O = "overview",
V = "variables",
I = "interactions",
C = "correlations",
M = "missing",
S = "sample"
)
}
)
# Limit components if a `tbl_dbi` object is supplied as the `tbl`
if (inherits(tbl, "tbl_dbi") || inherits(tbl, "tbl_spark")) {
sections <- setdiff(sections, c("interactions", "correlations"))
}
if (any(c("interactions", "correlations") %in% sections)) {
if (!requireNamespace("ggplot2", quietly = TRUE)) {
stop(
"The `interactions` and `correlations` sections require ",
"the ggplot2 package:\n",
"* It can be installed with `install.packages(\"ggplot2\")`.",
call. = FALSE
)
}
if (!requireNamespace("ggforce", quietly = TRUE)) {
stop(
"The `interactions` and `correlations` sections require ",
"the ggforce package:\n",
"* It can be installed with `install.packages(\"ggforce\")`.",
call. = FALSE
)
}
}
# Normalize the reporting language identifier and stop if necessary
lang <- normalize_reporting_language(lang)
# Set the `locale` to the `lang` value if `locale` isn't set
if (is.null(locale)) locale <- lang
# Attempt to get the table name through `match.call()` and `deparse()`
tbl_name <- deparse(match.call()$tbl)
# In the case where the table is piped in a `"."` is the
# result; since it's unknown, we treat it as NA
if (length(tbl_name) == 1 && tbl_name == ".") {
tbl_name <- NA_character_
} else if (length(tbl_name) > 1) {
tbl_name <- NA_character_
}
# Get the starting time for the table scan
scan_start_time <- Sys.time()
if (interactive()) {
cli::cli_h1(
paste0(
"Data Scan started. Processing ",
length(unique(sections_abbrev)), " ",
"section",
ifelse(length(unique(sections_abbrev)) > 1, "s", ""),
"."
)
)
}
table_scan <-
build_table_scan_page(
data = tbl,
tbl_name = tbl_name,
sections = sections,
navbar = navbar,
lang = lang,
locale = locale,
width = width
)
# Get the ending time for the table scan
scan_end_time <- Sys.time()
# Get the time duration for the table scan
time_diff_s <-
get_time_duration(
start_time = scan_start_time,
end_time = scan_end_time
)
if (interactive()) {
cli::cli_h1(
paste0(
"Data Scan finished. ", print_time(time_diff_s)
)
)
}
table_scan
}
#
# Generate section components such as tables and plots
#
probe_overview_stats <- function(
data,
lang,
locale
) {
n_cols <- get_table_total_columns(data = data)
n_rows <- get_table_total_rows(data = data)
n_rows_distinct <- get_table_total_distinct_rows(data = data)
duplicate_rows <- n_rows - n_rows_distinct
na_cells <- get_table_total_missing_values(data = data)
tbl_info <- get_tbl_information(tbl = data)
tbl_src <- tbl_info$tbl_src
r_col_types <- tbl_info$r_col_types
data_overview_tbl <-
dplyr::tibble(
label = c(
get_lsv("table_scan/tbl_lab_columns")[[lang]],
get_lsv("table_scan/tbl_lab_rows")[[lang]],
get_lsv("table_scan/tbl_lab_NAs")[[lang]],
get_lsv("table_scan/tbl_lab_duplicate_rows")[[lang]]
),
value = c(
n_cols, n_rows, na_cells, duplicate_rows
),
pct = NA_real_
) %>%
dplyr::mutate(pct = dplyr::case_when(
dplyr::row_number() == 3 ~ na_cells / (n_cols * n_rows),
dplyr::row_number() == 4 ~ duplicate_rows / n_rows,
TRUE ~ NA_real_
))
r_col_types_tbl <-
dplyr::tibble(r_col_types = r_col_types) %>%
dplyr::group_by(r_col_types) %>%
dplyr::summarize(count = dplyr::n()) %>%
dplyr::arrange(dplyr::desc(count)) %>%
utils::head(6E8)
data_overview_gt <-
gt::gt(data_overview_tbl) %>%
gt::fmt_markdown(columns = "label") %>%
gt::fmt_number(columns = "value", decimals = 0, locale = locale) %>%
gt::fmt_percent(columns = "pct", decimals = 2, locale = locale) %>%
gt::cols_merge(columns = c("value", "pct"), pattern = "{1} ({2})") %>%
gt::cols_align(align = "right", columns = "value") %>%
gt::text_transform(
locations = gt::cells_body(columns = "value", rows = 1:2),
fn = function(x) {
gsub(" (NA)", "", x, fixed = TRUE)
}
) %>%
gt::text_transform(
locations = gt::cells_body(columns = "value", rows = 3:4),
fn = function(x) {
gsub("^0 \\(.*", "0", x)
}
) %>%
gt::tab_options(
column_labels.hidden = TRUE,
table.border.top.style = "none",
table.width = "100%"
)
r_col_types_gt <-
gt::gt(r_col_types_tbl) %>%
gt::fmt_number(columns = "count", decimals = 0, locale = locale) %>%
gt::cols_align(align = "right", columns = "count") %>%
gt::tab_options(
column_labels.hidden = TRUE,
table.border.top.style = "none",
table.width = "100%"
)
reproducibility_gt <-
dplyr::tibble(
label = c(
get_lsv("table_scan/tbl_lab_scan_build_time")[[lang]],
get_lsv("table_scan/tbl_lab_pointblank_version")[[lang]],
get_lsv("table_scan/tbl_lab_r_version")[[lang]],
get_lsv("table_scan/tbl_lab_system_os")[[lang]]
),
value = c(
paste0("`", strftime(Sys.time()), "`"),
paste0("`", as.character(utils::packageVersion("pointblank")), "`"),
paste0(
R.version$version.string,
"<br><span style=\"font-size: smaller;\"><em>",
R.version$nickname, "</em></span>") %>%
gsub("-", "–", .),
paste0("`", R.version$platform, "`")
)
) %>%
gt::gt() %>%
gt::fmt_markdown(columns = gt::everything()) %>%
gt::tab_options(
column_labels.hidden = TRUE,
table.border.top.style = "none",
table.width = "100%"
)
list(
data_overview_gt = data_overview_gt,
r_col_types_gt = r_col_types_gt,
reproducibility_gt = reproducibility_gt
)
}
probe_columns <- function(
data,
lang,
locale
) {
n_rows <- get_table_total_rows(data = data)
col_names <- get_table_column_names(data = data)
col_labels <- get_tbl_df_column_labels(data = data)
tbl_info <- get_tbl_information(tbl = data)
col_types <- gsub("integer64", "integer", tbl_info$r_col_types, fixed = TRUE)
column_descriptions <-
lapply(
seq_along(col_names),
FUN = function(x) {
col_name <- col_names[x]
col_type <- col_types[x]
col_label <- col_labels[x]
switch(
col_type,
character = probe_columns_character(
data = data,
column = col_name,
col_label = col_label,
n_rows = n_rows,
lang = lang,
locale = locale
),
Date = probe_columns_date(
data = data,
column = col_name,
col_label = col_label,
n_rows = n_rows,
lang = lang,
locale = locale
),
factor = probe_columns_factor(
data = data,
column = col_name,
col_label = col_label,
n_rows = n_rows,
lang = lang,
locale = locale
),
integer = probe_columns_integer(
data = data,
column = col_name,
col_label = col_label,
n_rows = n_rows,
lang = lang,
locale = locale
),
logical = probe_columns_logical(
data = data,
column = col_name,
col_label = col_label,
n_rows = n_rows,
lang = lang,
locale = locale
),
numeric = probe_columns_numeric(
data = data,
column = col_name,
col_label = col_label,
n_rows = n_rows,
lang = lang,
locale = locale
),
POSIXct = probe_columns_posix(
data = data,
column = col_name,
col_label = col_label,
n_rows = n_rows,
lang = lang,
locale = locale
),
probe_columns_other(
data = data,
column = col_name,
col_label = col_label,
n_rows = n_rows
)
)
}
)
column_descriptions
}
get_column_description_gt <- function(
data_column,
n_rows,
lang,
locale
) {
distinct_count <- get_table_column_distinct_rows(data_column = data_column)
na_cells <- get_table_column_na_values(data_column = data_column)
inf_cells <- get_table_column_inf_values(data_column = data_column)
column_description_tbl <-
dplyr::tibble(
label = c(
get_lsv("table_scan/tbl_lab_distinct")[[lang]],
get_lsv("table_scan/tbl_lab_NAs")[[lang]],
"`Inf`/`-Inf`"
),
value = c(distinct_count, na_cells, inf_cells)
)
column_description_gt <-
gt::gt(column_description_tbl) %>%
gt::fmt_markdown(columns = "label") %>%
gt::fmt_number(columns = "value", decimals = 0, locale = locale) %>%
gt::tab_options(
column_labels.hidden = TRUE,
table.border.top.style = "none",
table.width = "100%"
)
column_description_gt
}
get_numeric_stats_gt <- function(
data_column,
lang,
locale
) {
summary_stats <- get_table_column_summary(data_column = data_column)
column_stats_tbl <-
dplyr::tibble(
label = c(
get_lsv("table_scan/tbl_lab_mean")[[lang]],
get_lsv("table_scan/tbl_lab_minimum")[[lang]],
get_lsv("table_scan/tbl_lab_maximum")[[lang]]
),
value = c(
summary_stats$mean,
summary_stats$min,
summary_stats$max
)
)
column_stats_gt <-
gt::gt(column_stats_tbl) %>%
gt::fmt_markdown(columns = "label") %>%
gt::fmt_number(
columns = "value",
decimals = 2,
drop_trailing_zeros = TRUE,
locale = locale
) %>%
gt::tab_options(
column_labels.hidden = TRUE,
table.border.top.style = "none",
table.width = "100%"
)
column_stats_gt
}
get_quantile_stats_gt <- function(
data_column,
lang,
locale
) {
if (inherits(data_column, "tbl_dbi")) {
data_column <- dplyr::filter(data_column, !is.na(1))
n_rows <- get_table_total_rows(data = data_column)
if (n_rows <= 1000) {
data_column <- dplyr::collect(data_column)
quantile_stats <- get_df_column_qtile_stats(data_column = data_column)
} else {
quantile_stats <- get_dbi_column_qtile_stats(data_column = data_column)
}
} else {
quantile_stats <- get_df_column_qtile_stats(data_column = data_column)
}
quantile_stats_tbl <-
dplyr::tibble(
label = c(
get_lsv("table_scan/tbl_lab_minimum")[[lang]],
get_lsv("table_scan/tbl_lab_5_percentile")[[lang]],
"Q1",
get_lsv("table_scan/tbl_lab_median")[[lang]],
"Q3",
get_lsv("table_scan/tbl_lab_95_percentile")[[lang]],
get_lsv("table_scan/tbl_lab_maximum")[[lang]],
get_lsv("table_scan/tbl_lab_range")[[lang]],
"IQR"
),
value = c(
quantile_stats$min,
quantile_stats$p05,
quantile_stats$q_1,
quantile_stats$med,
quantile_stats$q_3,
quantile_stats$p95,
quantile_stats$max,
quantile_stats$range,
quantile_stats$iqr
)
)
quantile_stats_gt <-
gt::gt(quantile_stats_tbl) %>%
gt::fmt_number(
columns = "value",
decimals = 2,
locale = locale
) %>%
gt::tab_options(
column_labels.hidden = TRUE,
table.border.top.style = "none",
table.width = "100%"
)
quantile_stats_gt
}
get_df_column_quantile_stats <- function(data_column) {
if (inherits(data_column, "tbl_spark")) {
quantile_stats <- get_spark_column_qtile_stats(data_column = data_column)
} else {
quantile_stats <- get_df_column_qtile_stats(data_column = data_column)
}
quantile_stats
}
get_descriptive_stats_gt <- function(
data_column,
lang,
locale
) {
if (inherits(data_column, "tbl_dbi") ||
inherits(data_column, "tbl_spark")) {
data_column <- dplyr::filter(data_column, !is.na(1))
mean <- get_dbi_column_mean(data_column = data_column)
variance <-
get_dbi_column_variance(
data_column = data_column,
mean_value = mean
)
sd <- variance^0.5
cv <- sd / mean
descriptive_stats <-
dplyr::tibble(mean = mean, variance = variance, sd = sd, cv = cv)
descriptive_stats <-
dplyr::summarize_all(descriptive_stats, ~ round(., 2))
descriptive_stats <- as.list(descriptive_stats)
} else {
# Create simple function to obtain the coefficient of variation
cv <- function(x) stats::sd(x, na.rm = TRUE) / mean(x, na.rm = TRUE)
descriptive_stats <-
dplyr::summarize_all(
data_column,
.funs = list(
mean = ~ mean(., na.rm = TRUE),
variance = ~ stats::var(., na.rm = TRUE),
sd = ~ stats::sd(., na.rm = TRUE),
cv = ~ cv(.)
)
)
descriptive_stats <-
dplyr::summarize_all(descriptive_stats, ~ round(., 2))
descriptive_stats <- as.list(descriptive_stats)
}
descriptive_stats_tbl <-
dplyr::tibble(
label = c(
get_lsv("table_scan/tbl_lab_mean")[[lang]],
get_lsv("table_scan/tbl_lab_variance")[[lang]],
get_lsv("table_scan/tbl_lab_standard_deviation")[[lang]],
get_lsv("table_scan/tbl_lab_cov")[[lang]]
),
value = c(
descriptive_stats$mean,
descriptive_stats$variance,
descriptive_stats$sd,
descriptive_stats$cv
)
)
gt::gt(descriptive_stats_tbl) %>%
gt::fmt_number(
columns = "value",
decimals = 2,
drop_trailing_zeros = FALSE,
locale = locale
) %>%
gt::tab_options(
column_labels.hidden = TRUE,
table.border.top.style = "none",
table.width = "100%"
)
}
get_common_values_gt <- function(
data_column,
lang,
locale
) {
n_rows <- get_table_total_rows(data = data_column)
common_values_tbl <- dplyr::group_by_at(data_column, 1)
common_values_tbl <- dplyr::count(common_values_tbl)
common_values_tbl <- dplyr::arrange(common_values_tbl, dplyr::desc(n))
common_values_tbl <- utils::head(common_values_tbl, 6E8)
common_values_tbl <- dplyr::ungroup(common_values_tbl)
n_rows_common_values_tbl <-
dplyr::pull(dplyr::count(common_values_tbl, name = "n", wt = n), n)
if (n_rows_common_values_tbl > 10) {
top_ten_rows <- utils::head(common_values_tbl, 10)
other_values_tbl <-
dplyr::anti_join(
common_values_tbl,
top_ten_rows,
by = colnames(common_values_tbl)
)
other_values_tbl <- dplyr::arrange(other_values_tbl, dplyr::desc(n))
other_values_tbl <- utils::head(other_values_tbl, 6E8)
other_values_distinct <-
dplyr::pull(dplyr::count(other_values_tbl, name = "n", wt = n), n)
other_values_n <- dplyr::select(other_values_tbl, n)
other_values_n <- dplyr::group_by(other_values_n)
other_values_n <-
dplyr::summarize(other_values_n, sum = sum(n, na.rm = TRUE))
other_values_n <- dplyr::ungroup(other_values_n)
other_values_n <- dplyr::pull(other_values_n, sum)
common_values_tbl <-
dplyr::bind_rows(
top_ten_rows %>%
utils::head(9) %>%
dplyr::collect() %>%
dplyr::mutate(
n = as.numeric(n),
frequency = n / n_rows
) %>%
dplyr::rename(value = 1) %>%
dplyr::mutate(value = as.character(value)),
dplyr::tibble(
value = paste0(
"**",
get_lsv("table_scan/other_values_text")[[lang]],
"** (", other_values_distinct, ")"
),
n = other_values_n,
frequency = other_values_n / n_rows
)
)
n_rows_summary_tbl <- nrow(common_values_tbl)
common_values_gt <-
common_values_tbl %>%
gt::gt() %>%
gt::cols_label(
value = get_lsv("table_scan/tbl_lab_value")[[lang]],
n = get_lsv("table_scan/tbl_lab_count")[[lang]],
frequency = get_lsv("table_scan/tbl_lab_frequency")[[lang]],
) %>%
gt_missing(columns = "value", missing_text = "**NA**") %>%
gt::text_transform(
locations = gt::cells_body(columns = "value"),
fn = function(x) {
ifelse(
x == "**NA**",
"<code>NA</code>",
x
)
}
) %>%
gt::fmt_percent(
columns = "frequency",
decimals = 1,
locale = locale
) %>%
gt::fmt_markdown(
columns = "value",
rows = n_rows_summary_tbl
) %>%
gt::tab_options(
table.border.top.style = "none",
table.width = "100%"
)
} else {
common_values_gt <-
dplyr::collect(common_values_tbl) %>%
dplyr::mutate(frequency = n / n_rows) %>%
dplyr::rename(value = 1) %>%
dplyr::mutate(value = as.character(value)) %>%
gt::gt() %>%
gt::cols_label(
value = get_lsv("table_scan/tbl_lab_value")[[lang]],
n = get_lsv("table_scan/tbl_lab_count")[[lang]],
frequency = get_lsv("table_scan/tbl_lab_frequency")[[lang]],
) %>%
gt_missing(columns = "value", missing_text = "**NA**") %>%
gt::text_transform(
locations = gt::cells_body(columns = "value"),
fn = function(x) ifelse(x == "**NA**", "<code>NA</code>", x)
) %>%
gt::fmt_percent(
columns = "frequency",
decimals = 1,
locale = locale
) %>%
gt::fmt_markdown(columns = "value") %>%
gt::tab_options(
table.border.top.style = "none",
table.width = "100%"
)
}
common_values_gt
}
get_head_tail_slices <- function(data_column) {
head_tail_slices <- probe_sample(data = data_column)
head_tail_slices$probe_sample
}
get_top_bottom_slice <- function(
data_column,
lang,
locale
) {
n_rows <- get_table_total_rows(data = data_column)
data_column_freq <-
data_column %>%
dplyr::group_by_at(1) %>%
dplyr::count() %>%
dplyr::ungroup()
name_1 <- rlang::sym(get_lsv("table_scan/tbl_lab_value")[[lang]])
name_2 <- rlang::sym(get_lsv("table_scan/tbl_lab_count")[[lang]])
data_column_freq <-
dplyr::select(data_column_freq, !!name_1 := 1, !!name_2 := 2)
data_column_top_n <- dplyr::arrange(data_column_freq, dplyr::desc(!!name_2))
data_column_top_n <- utils::head(data_column_top_n, 10)
data_column_top_n <- dplyr::collect(data_column_top_n)
data_column_top_n[, 3] <- data_column_top_n[, 2, drop = TRUE] / n_rows
colnames(data_column_top_n)[3] <-
get_lsv("table_scan/tbl_lab_frequency")[[lang]]
data_column_bottom_n <- dplyr::arrange(data_column_freq, !!name_2)
data_column_bottom_n <- utils::head(data_column_bottom_n, 10)
data_column_bottom_n <- dplyr::collect(data_column_bottom_n)
data_column_bottom_n[, 3] <-
data_column_bottom_n[, 2, drop = TRUE] / n_rows
colnames(data_column_bottom_n)[3] <-
get_lsv("table_scan/tbl_lab_frequency")[[lang]]
top_slice <-
get_table_slice_gt(
data_column = data_column_top_n,
locale = locale
)
bottom_slice <-
get_table_slice_gt(
data_column = data_column_bottom_n,
locale = locale
)
list(
top_slice = top_slice,
bottom_slice = bottom_slice
)
}
get_character_nchar_stats_gt <- function(
data_column,
lang,
locale
) {
character_nchar_stats <-
data_column %>%
dplyr::mutate_all(.funs = nchar) %>%
dplyr::rename(nchar = 1) %>%
dplyr::summarize_all(
.funs = list(
mean = ~ mean(., na.rm = TRUE),
min = ~ min(., na.rm = TRUE),
max = ~ max(., na.rm = TRUE)
)
) %>%
dplyr::collect() %>%
dplyr::mutate_all(.funs = as.numeric) %>%
as.list()
dplyr::tribble(
~label, ~value,
get_lsv("table_scan/tbl_lab_mean")[[lang]], character_nchar_stats$mean,
get_lsv("table_scan/tbl_lab_minimum")[[lang]], character_nchar_stats$min,
get_lsv("table_scan/tbl_lab_maximum")[[lang]], character_nchar_stats$max
) %>%
gt::gt() %>%
gt::fmt_number(
columns = "value",
decimals = 1,
locale = locale
) %>%
gt::tab_options(
column_labels.hidden = TRUE,
table.border.top.style = "none",
table.width = "100%"
)
}
get_character_nchar_plot <- function(
data_column,
lang,
locale
) {
suppressWarnings(
plot_histogram <-
get_table_column_histogram(
data_column = data_column,
lang = lang,
locale = locale
)
)
# Save PNG file to disk
ggplot2::ggsave(
filename = "temp_histogram_ggplot.png",
plot = plot_histogram,
device = "png",
dpi = 300,
width = 5,
height = 3
)
# Wait longer for file to be written on async file systems
Sys.sleep(0.25)
image_html <-
htmltools::tags$div(
style = "text-align: center",
htmltools::HTML(
gt::local_image(
filename = "temp_histogram_ggplot.png",
height = "500px"
) %>%
gsub("height:500px", "width: 100%", .)
)
)
file.remove("temp_histogram_ggplot.png")
image_html
}
probe_columns_numeric <- function(
data,
column,
col_label,
n_rows,
lang,
locale
) {
data_column <- dplyr::select(data, {{ column }})
column_description_gt <-
get_column_description_gt(
data_column = data_column,
n_rows = n_rows,
lang = lang,
locale = locale
)
column_numeric_stats_gt <-
get_numeric_stats_gt(
data_column = data_column,
lang = lang,
locale = locale
)
column_quantile_stats_gt <-
get_quantile_stats_gt(
data_column = data_column,
lang = lang,
locale = locale
)
column_descriptive_stats_gt <-
get_descriptive_stats_gt(
data_column = data_column,
lang = lang,
locale = locale
)
column_common_values_gt <-
get_common_values_gt(
data_column = data_column,
lang = lang,
locale = locale
)
top_bottom_slices_gt <-
get_top_bottom_slice(
data_column = data_column,
lang = lang,
locale = locale
)
list(
column_name = column,
column_label = col_label,
column_type = "numeric",
column_description_gt = column_description_gt,
column_stats_gt = column_numeric_stats_gt,
column_quantile_gt = column_quantile_stats_gt,
column_descriptive_gt = column_descriptive_stats_gt,
column_common_gt = column_common_values_gt,
column_top_slice_gt = top_bottom_slices_gt$top_slice,
column_bottom_slice_gt = top_bottom_slices_gt$bottom_slice
)
}
probe_columns_integer <- function(
data,
column,
col_label,
n_rows,
lang,
locale
) {
probe_columns_integer_list <-
probe_columns_numeric(
data = data,
column = column,
col_label = col_label,
n_rows = n_rows,
lang = lang,
locale = locale
)
probe_columns_integer_list$column_type <- "integer"
probe_columns_integer_list
}
probe_columns_character <- function(
data,
column,
col_label,
n_rows,
lang,
locale
) {
data_column <- data %>% dplyr::select({{ column }})
column_description_gt <-
get_column_description_gt(
data_column = data_column,
n_rows = n_rows,
lang = lang,
locale = locale
)
column_common_values_gt <-
get_common_values_gt(
data_column = data_column,
lang = lang,
locale = locale
)
column_nchar_stats_gt <-
get_character_nchar_stats_gt(
data_column = data_column,
lang = lang,
locale = locale
)
column_nchar_plot <-
get_character_nchar_plot(
data_column = data_column,
lang = lang,
locale = locale
)
list(
column_name = column,
column_label = col_label,
column_type = "character",
column_description_gt = column_description_gt,
column_common_gt = column_common_values_gt,
column_nchar_gt = column_nchar_stats_gt,
column_nchar_plot = column_nchar_plot
)
}
probe_columns_logical <- function(
data,
column,
col_label,
n_rows,
lang,
locale
) {
data_column <- data %>% dplyr::select({{ column }})
column_description_gt <-
get_column_description_gt(
data_column = data_column,
n_rows = n_rows,
lang = lang,
locale = locale
)
list(
column_name = column,
column_label = col_label,
column_type = "logical",
column_description_gt = column_description_gt
)
}
probe_columns_factor <- function(
data,
column,
col_label,
n_rows,
lang,
locale
) {
data_column <- data %>% dplyr::select({{ column }})
column_description_gt <-
get_column_description_gt(
data_column = data_column,
n_rows = n_rows,
lang = lang,
locale = locale
)
list(
column_name = column,
column_label = col_label,
column_type = "factor",
column_description_gt = column_description_gt
)
}
probe_columns_date <- function(
data,
column,
col_label,
n_rows,
lang,
locale
) {
data_column <- data %>% dplyr::select({{ column }})
column_description_gt <-
get_column_description_gt(
data_column = data_column,
n_rows = n_rows,
lang = lang,
locale = locale
)
list(
column_name = column,
column_label = col_label,
column_type = "date",
column_description_gt = column_description_gt
)
}
probe_columns_posix <- function(
data,
column,
col_label,
n_rows,
lang,
locale
) {
data_column <- data %>% dplyr::select({{ column }})
column_description_gt <-
get_column_description_gt(
data_column = data_column,
n_rows = n_rows,
lang = lang,
locale = locale
)
list(
column_name = column,
column_label = col_label,
column_type = "datetime",
column_description_gt = column_description_gt
)
}
probe_columns_other <- function(
data,
column,
col_label,
n_rows
) {
data_column <- data %>% dplyr::select({{ column }})
column_classes <- paste(class(data_column), collapse = ", ")
list(
column_name = column,
column_label = col_label,
column_type = column_classes
)
}
probe_interactions <- function(data) {
category_cutoff <- 5
tbl_info <- get_tbl_information(tbl = data)
col_names <- tbl_info$col_names
col_types <- tbl_info$r_col_types
columns_char <- col_names[col_types == "character"]
columns_numeric <- col_names[col_types %in% c("integer", "numeric")]
col_names <- c(columns_char, columns_numeric)
columns_char_distinct_count <-
vapply(
columns_char, FUN.VALUE = integer(1), USE.NAMES = FALSE,
FUN = function(x) {
data %>%
dplyr::select(dplyr::one_of(x)) %>%
dplyr::distinct() %>%
dplyr::count() %>%
dplyr::pull(n)
}
)
# Remove the character-based columns from the vector of
# `col_names` if there are too many categories
col_names <-
col_names %>%
base::setdiff(columns_char[columns_char_distinct_count > category_cutoff])
# Create a ggplot2 plot matrix with the data
plot_matrix <-
data %>%
dplyr::select(dplyr::one_of(col_names)) %>%
ggplot2::ggplot(ggplot2::aes(x = .panel_x, y = .panel_y)) +
ggplot2::geom_point(alpha = 0.50, shape = 16, size = 1) +
ggforce::geom_autodensity() +
ggplot2::geom_density2d() +
ggforce::facet_matrix(
rows = ggplot2::vars(gt::everything()), layer.diag = 2, layer.upper = 3,
grid.y.diag = FALSE) +
ggplot2::theme_minimal() +
ggplot2::theme(
axis.text.x = ggplot2::element_text(
angle = 90,
vjust = 0.5,
hjust = 1,
size = 8
),
axis.text.y = ggplot2::element_text(size = 8)
)
# Save PNG file to disk
ggplot2::ggsave(
filename = "temp_matrix_ggplot.png",
plot = plot_matrix,
device = "png",
dpi = 300,
width = length(col_names),
height = length(col_names)
)
# Wait longer for file to be written on async file systems
Sys.sleep(0.25)
image_html <-
htmltools::tags$div(
style = "text-align: center",
htmltools::HTML(
gt::local_image(
filename = "temp_matrix_ggplot.png",
height = "500px"
) %>%
gsub("height:500px", "width: 100%", .)
)
)
file.remove("temp_matrix_ggplot.png")
list(
probe_interactions = image_html
)
}
probe_correlations <- function(data) {
tbl_info <- get_tbl_information(tbl = data)
col_names <- tbl_info$col_names
col_types <- tbl_info$r_col_types
columns_numeric <- col_names[col_types %in% c("integer", "numeric")]
if (length(columns_numeric) < 3) {
return(
list(
probe_corr_pearson = NULL,
probe_corr_kendall = NULL,
probe_corr_spearman = NULL
)
)
}
data_corr <- dplyr::select(data, dplyr::one_of(columns_numeric))
corr_pearson <-
stats::cor(data_corr, method = "pearson", use = "pairwise.complete.obs")
corr_kendall <-
stats::cor(data_corr, method = "kendall", use = "pairwise.complete.obs")
corr_spearman <-
stats::cor(data_corr, method = "spearman", use = "pairwise.complete.obs")
labels_vec <- seq_along(columns_numeric)
names(labels_vec) <- columns_numeric
pearson_plot <- get_corr_plot(mat = corr_pearson, labels_vec = labels_vec)
kendall_plot <- get_corr_plot(mat = corr_kendall, labels_vec = labels_vec)
spearman_plot <- get_corr_plot(mat = corr_spearman, labels_vec = labels_vec)
list(
probe_corr_pearson = pearson_plot,
probe_corr_kendall = kendall_plot,
probe_corr_spearman = spearman_plot
)
}
get_corr_plot <- function(
mat,
labels_vec
) {
corr_df <-
as.data.frame(as.table(mat)) %>%
dplyr::mutate(Freq = ifelse(Var1 == Var2, NA_real_, Freq)) %>%
dplyr::mutate(Var1 = factor(Var1, levels = names(labels_vec))) %>%
dplyr::mutate(Var2 = factor(Var2, levels = rev(names(labels_vec))))
plot_missing <-
corr_df %>%
ggplot2::ggplot(ggplot2::aes(x = Var1, y = Var2, fill = Freq)) +
ggplot2::geom_tile(color = "white", linejoin = "bevel") +
ggplot2::scale_fill_gradientn(
colours = c("blue", "white", "red"),
na.value = "gray30",
limits = c(-1, 1)
) +
ggplot2::labs(x = "", y = "") +
ggplot2::theme_minimal() +
ggplot2::theme(
axis.text.x = ggplot2::element_text(
angle = 90, vjust = 0.5, hjust = 1, size = 10
),
axis.text.y = ggplot2::element_text(size = 10),
panel.grid = ggplot2::element_blank(),
legend.direction = "horizontal",
legend.title = ggplot2::element_blank(),
legend.position = c(0.5, 1.03),
plot.margin = ggplot2::unit(c(1, 0.5, 0, 0), "cm"),
legend.key.width = ggplot2::unit(2.0, "cm"),
legend.key.height = ggplot2::unit(3.0, "mm")
)
temp_filename <- paste0("temp_correlation_ggplot-", gt::random_id(), ".png")
# Save PNG file to disk
ggplot2::ggsave(
filename = temp_filename,
plot = plot_missing,
device = "png",
dpi = 300,
width = length(labels_vec),
height = length(labels_vec)
)
# Wait longer for file to be written on async file systems
Sys.sleep(0.25)
image_html <-
htmltools::tags$div(
style = "text-align: center",
htmltools::HTML(
gt::local_image(filename = temp_filename, height = "500px") %>%
gsub("height:500px", "width: 100%", .)
)
)
file.remove(temp_filename)
image_html
}
probe_missing <- function(data) {
n_rows <- get_table_total_rows(data = data)
col_names <- get_table_column_names(data = data)
if (n_rows <= 1000) {
data <- dplyr::collect(data)
}
if (inherits(data, "tbl_dbi")) {
frequency_tbl <- get_tbl_dbi_missing_tbl(data = data)
} else if (inherits(data, "tbl_spark")) {
frequency_tbl <- get_tbl_spark_missing_tbl(data = data)
} else {
frequency_tbl <- get_tbl_df_missing_tbl(data = data)
}
missing_by_column_tbl <- get_missing_by_column_tbl(data = data)
missing_value_plot <-
get_missing_value_plot(
data = data,
frequency_tbl = frequency_tbl,
missing_by_column_tbl = missing_by_column_tbl
)
# Save PNG file to disk
ggplot2::ggsave(
filename = "temp_missing_ggplot.png",
plot = missing_value_plot,
device = "png",
dpi = 300,
width = length(col_names) * 0.8,
height = 6
)
# Wait longer for file to be written on async file systems
Sys.sleep(0.25)
image_html <-
htmltools::tags$div(
style = "text-align: center",
htmltools::HTML(
gt::local_image(
filename = "temp_missing_ggplot.png",
height = "500px"
) %>%
gsub("height:500px", "width: 100%", .)
)
)
file.remove("temp_missing_ggplot.png")
list(probe_missing = image_html)
}
probe_sample <- function(data) {
probe_sample <-
data %>%
gt::gt_preview(top_n = 5, bottom_n = 5) %>%
gt_missing(columns = gt::everything(), missing_text = "**NA**") %>%
gt::text_transform(
locations = gt::cells_body(columns = gt::everything()),
fn = function(x) ifelse(x == "**NA**", "<code>NA</code>", x)
) %>%
gt::tab_options(table.width = "100%") %>%
gt::tab_style(
style = gt::cell_text(font = "monospace"),
locations = list(
gt::cells_column_labels(),
gt::cells_body()
)
) %>%
gt::text_transform(
locations = gt::cells_body(),
fn = function(x) {
ifelse(
x == "<code>NA</code>",
"<code style=\"color:#3A70FB\">NA</code>",
x
)
}
)
list(probe_sample = probe_sample)
}
bootstrap_lib <- function() {
htmltools::htmlDependency(
name = "bootstrap",
version = "3.3.2",
package = "pointblank",
src = file.path("lib", "bootstrap"),
script = "js/bootstrap.min.js",
stylesheet = "css/bootstrap.min.css",
meta = list(viewport = "width=device-width, initial-scale=1")
)
}
build_table_scan_page <- function(
data,
tbl_name,
sections,
navbar,
lang,
locale,
width = NULL
) {
if (navbar) {
navbar <- navbar(sections = sections, lang = lang)
} else {
navbar <- NULL
}
probe_list <-
sections %>%
lapply(
FUN = function(x) {
switch(
x,
overview = probe_overview_stats_assemble(
data = data,
tbl_name = tbl_name,
lang = lang,
locale = locale
),
variables = probe_columns_assemble(
data = data,
lang = lang,
locale = locale
),
interactions = probe_interactions_assemble(
data = data,
lang = lang
),
correlations = probe_correlations_assemble(
data = data,
lang = lang
),
missing = probe_missing_assemble(
data = data,
lang = lang
),
sample = probe_sample_assemble(
data = data,
lang = lang
)
)
}
)
bootstrap_lib <-
readr::read_file(
file = system.file(
"lib", "bootstrap", "css", "bootstrap.min.css",
package = "pointblank"
)
)
jquery_lib <-
readr::read_file(
file = system.file(
"lib", "jquery", "jquery-1.12.4.min.js",
package = "pointblank"
)
)
extra_js <-
readr::read_file(
file = system.file(
"javascript", "toggle_anchor.js",
package = "pointblank"
)
)
extra_css <-
readr::read_file(
file = system.file(
"css", "extra.css",
package = "pointblank"
)
)
table_scan <-
htmltools::tagList(
htmltools::HTML("<!doctype html>"),
htmltools::tags$html(
lang = lang,
htmltools::HTML(
"<head>\n",
" <style>code {color: #333 !important; ",
" background: none !important; padding: 0 !important; }</style>",
paste0(
" <meta name=\"viewport\" content=\"width=device-width, ",
"initial-scale=1, shrink-to-fit=no\">\n"
),
" <style>\n",
" ", bootstrap_lib, "\n",
" </style>\n",
" <style>\n",
" ", extra_css, "\n",
" </style>\n",
" </head>"
),
htmltools::tags$body(
htmltools::tags$a(class = "anchor-pos", id = "top"),
navbar,
htmltools::tags$div(
class = "content",
htmltools::tags$div(
class = "container",
style = if (!is.null(width)) {
paste0("width: ", width, ";")
} else {
NULL
},
probe_list
)
),
htmltools::tags$footer(
htmltools::tags$div(
class = "container-fluid",
htmltools::tags$div(
class = "row center-block footer-text",
htmltools::tags$p(
class = "text-muted text-center",
htmltools::HTML(
get_lsv("table_scan/footer_text_fragment")[[lang]]
)
)
)
)
),
htmltools::HTML(
"<script>\n",
jquery_lib, "\n",
"</script>"
),
htmltools::tags$script(
"<script>\n",
extra_js, "\n",
"</script>"
)
)
)
)
class(table_scan) <- c("ptblank_tbl_scan", class(table_scan))
table_scan
}
probe_overview_stats_assemble <- function(
data,
tbl_name,
lang,
locale
) {
cli::cli_div(
theme = list(
span.overview = list(color = "red"),
span.variables = list(color = "orange"),
span.interactions = list(color = "yellow"),
span.correlations = list(color = "green"),
span.missing_values = list(color = "blue"),
span.sample = list(color = "purple")
)
)
if (is.na(tbl_name)) {
header <-
get_lsv("table_scan/nav_overview_ts")[[lang]]
} else {
header <-
glue::glue(get_lsv("table_scan/section_title_overview_of_ts")[[lang]])
}
row_header <- row_header(id = "overview", header = htmltools::HTML(header))
# Get the starting time for the section
section_start_time <- Sys.time()
if (interactive()) {
cli::cli_alert_info(
"{.overview Starting assembly of 'Overview' section...}"
)
}
overview_stats <-
probe_overview_stats(data = data, lang = lang, locale = locale)
overview_stats_tags <-
htmltools::tagList(
row_header,
htmltools::tags$div(
class = "section-items",
htmltools::tags$div(
class = "row spacing",
htmltools::tags$ul(
class = "nav nav-pills",
role = "tablist",
nav_pill_li(
label = get_lsv("table_scan/nav_overview_ts")[[lang]],
id = "overview-dataset_overview",
active = TRUE
),
nav_pill_li(
label = get_lsv(text = c(
"table_scan",
"button_label_overview_reproducibility_ts"
))[[lang]],
id = "overview-reproducibility",
active = FALSE
)
),
htmltools::tags$div(
class = "tab-content",
style = "padding-top: 10px;",
tab_panel(
id = "overview-dataset_overview",
active = TRUE,
panel_component_list = list(
panel_component(
size = 6,
title = get_lsv(text = c(
"table_scan",
"subsection_title_overview_table_overview"
))[[lang]],
content = overview_stats$data_overview_gt
),
panel_component(
size = 6,
title = get_lsv(text = c(
"table_scan",
"subsection_title_overview_column_types"
))[[lang]],
content = overview_stats$r_col_types_gt
)
)
),
tab_panel(
id = "overview-reproducibility",
active = FALSE,
panel_component_list = list(
panel_component(
size = 12,
title = get_lsv(text = c(
"table_scan",
"subsection_title_overview_reproducibility_information"
))[[lang]],
content = overview_stats$reproducibility_gt
)
)
)
)
)
)
)
# Get the ending time for the section
section_end_time <- Sys.time()
# Get the time duration for the section
time_diff_s <-
get_time_duration(
start_time = section_start_time,
end_time = section_end_time
)
if (interactive()) {
cli::cli_alert_success(
paste0("{.overview ...Finished!} ", print_time(time_diff_s))
)
}
overview_stats_tags
}
probe_columns_assemble <- function(
data,
lang,
locale
) {
cli::cli_div(
theme = list(
span.overview = list(color = "red"),
span.variables = list(color = "orange"),
span.interactions = list(color = "yellow"),
span.correlations = list(color = "green"),
span.missing_values = list(color = "blue"),
span.sample = list(color = "purple")
)
)
header <- get_lsv("table_scan/nav_variables_ts")[[lang]]
row_header <- row_header(id = "variables", header = header)
# Get the starting time for the section
section_start_time <- Sys.time()
if (interactive()) {
cli::cli_alert_info(
"{.variables Starting assembly of 'Variables' section...}"
)
}
columns_data <- probe_columns(data = data, lang = lang, locale = locale)
columns_tag_lists <-
lapply(
columns_data,
function(x) {
id_val <- gt::random_id()
if (x$column_type %in% c("numeric", "integer")) {
htmltools::tagList(
htmltools::tags$div(
class = "row spacing",
htmltools::tags$a(
class = "anchor-pos anchor-pos-variable",
id = paste0("pp_var_", id_val),
htmltools::tags$div(
class = "variable",
htmltools::tags$div(
class = "col-sm-3",
htmltools::tags$p(
class = "h4",
style = htmltools::css(
`margin-bottom` = "0",
`margin-top` = "-2px"
),
title = x$column_name,
htmltools::tags$a(
href = paste0("#pp_var_", id_val),
x$column_name,
htmltools::tags$br(),
if (!is.na(x$column_label)) {
htmltools::tagList(
htmltools::tags$p(
x$column_label,
style = htmltools::css(
`font-size` = "small",
color = "gray",
`margin-bottom` = "0"
)
)
)
},
htmltools::tags$small(
style = htmltools::css(
`margin-top` = "0"
),
htmltools::tags$code(x$column_type)
)
)
)
),
htmltools::tags$div(
class = "col-sm-5",
x$column_description_gt
),
htmltools::tags$div(
class = "col-sm-4",
x$column_stats_gt
),
htmltools::tags$div(
class = "col-sm-12 text-left",
htmltools::tags$button(
class = "btn btn-default btn-sm",
style = "margin-top: 5px;",
`data-toggle` = "collapse",
`data-target` = paste0(
"#bottom-", id_val, ", #minifreqtable", id_val
),
`aria-expanded` = "true",
`aria-controls` = "collapseExample",
get_lsv("table_scan/btn_toggle_details")[[lang]]
)
),
htmltools::tags$div(
id = paste0("bottom-", id_val),
class = "collapse",
`aria-expanded` = "false",
style = "height: 5px;",
htmltools::tags$div(
class = "row spacing",
htmltools::tags$ul(
class = "nav nav-tabs",
role = "tablist",
htmltools::tags$li(
role = "presentation",
class = "active",
style = "padding-top: 5px;",
htmltools::tags$a(
href = paste0(
"#", id_val, "bottom-",
id_val, "statistics"
),
`aria-controls` = paste0(
id_val, "bottom-", id_val, "statistics"
),
role = "tab",
`data-toggle` = "tab",
get_lsv(text = c(
"table_scan",
"tab_label_variables_statistics_ts"
))[[lang]]
)
),
htmltools::tags$li(
role = "presentation",
class = "",
style = "padding-top: 5px;",
htmltools::tags$a(
href = paste0(
"#", id_val, "bottom-",
id_val, "common_values"
),
`aria-controls` = paste0(
id_val, "bottom-",
id_val, "common_values"
),
role = "tab",
`data-toggle` = "tab",
get_lsv(text = c(
"table_scan",
"tab_label_variables_common_values_ts"
))[[lang]]
)
),
htmltools::tags$li(
role = "presentation",
class = "",
style = "padding-top: 5px;",
htmltools::tags$a(
href = paste0(
"#", id_val, "bottom-",
id_val, "max_min_slices"
),
`aria-controls` = paste0(
id_val, "bottom-",
id_val, "max_min_slices"
),
role = "tab",
`data-toggle` = "tab",
get_lsv(text = c(
"table_scan",
"tab_label_variables_max_min_slices_ts"
))[[lang]]
)
)
),
htmltools::tags$div(
class = "tab-content",
htmltools::tags$div(
role = "tabpanel",
class = "tab-pane col-sm-12 active",
id = paste0(
id_val, "bottom-",
id_val, "statistics"
),
htmltools::tags$div(
class = "col-sm-6",
htmltools::tags$p(
class = "h4",
get_lsv(text = c(
"table_scan",
"subsection_title_variables_quantile_statistics"
))[[lang]]
),
x$column_quantile_gt
),
htmltools::tags$div(
class = "col-sm-6",
htmltools::tags$p(
class = "h4",
get_lsv(text = c(
"table_scan",
paste0(
"subsection_title_variables_",
"descriptive_statistics"
)
))[[lang]]
),
x$column_descriptive_gt
)
),
htmltools::tags$div(
role = "tabpanel",
class = "tab-pane col-sm-12",
id = paste0(
id_val, "bottom-",
id_val, "common_values"
),
htmltools::tags$div(
class = "col-sm-12",
htmltools::tags$p(
class = "h4",
get_lsv(text = c(
"table_scan",
"subsection_title_variables_common_values"
))[[lang]]
),
x$column_common_gt
)
),
htmltools::tags$div(
role = "tabpanel",
class = "tab-pane col-sm-12",
id = paste0(
id_val, "bottom-",
id_val, "max_min_slices"
),
htmltools::tags$div(
class = "col-sm-6",
htmltools::tags$p(
class = "h4",
get_lsv(text = c(
"table_scan",
"subsection_title_variables_maximum_values"
))[[lang]]
),
x$column_top_slice_gt
),
htmltools::tags$div(
class = "col-sm-6",
htmltools::tags$p(
class = "h4",
get_lsv(text = c(
"table_scan",
"subsection_title_variables_minimum_values"
))[[lang]]
),
x$column_bottom_slice_gt
)
)
)
)
)
)
)
)
)
} else if (x$column_type == "character") {
htmltools::tagList(
htmltools::tags$div(
class = "row spacing",
htmltools::tags$a(
class = "anchor-pos anchor-pos-variable",
id = paste0("pp_var_", id_val),
htmltools::tags$div(
class = "variable",
htmltools::tags$div(
class = "col-sm-3",
htmltools::tags$p(
class = "h4",
style = htmltools::css(
`margin-bottom` = "0",
`margin-top` = "-2px"
),
title = x$column_name,
htmltools::tags$a(
href = paste0("#pp_var_", id_val),
x$column_name,
htmltools::tags$br(),
if (!is.na(x$column_label)) {
htmltools::tagList(
htmltools::tags$p(
x$column_label,
style = htmltools::css(
`font-size` = "small",
color = "gray",
`margin-bottom` = "0"
)
)
)
},
htmltools::tags$small(
style = htmltools::css(
`margin-top` = "0"
),
htmltools::tags$code(x$column_type)
)
)
)
),
htmltools::tags$div(
class = "col-sm-5",
x$column_description_gt
),
htmltools::tags$div(
class = "col-sm-4",
" "
),
htmltools::tags$div(
class = "col-sm-12 text-left",
htmltools::tags$button(
class = "btn btn-default btn-sm",
style = "margin-top: 5px;",
`data-toggle` = "collapse",
`data-target` = paste0(
"#bottom-", id_val, ", #minifreqtable", id_val
),
`aria-expanded` = "true",
`aria-controls` = "collapseExample",
get_lsv("table_scan/btn_toggle_details")[[lang]]
)
),
htmltools::tags$div(
id = paste0("bottom-", id_val),
class = "collapse",
`aria-expanded` = "false",
style = "height: 5px;",
htmltools::tags$div(
class = "row spacing",
htmltools::tags$ul(
class = "nav nav-tabs",
role = "tablist",
htmltools::tags$li(
role = "presentation",
class = "active",
style = "padding-top: 5px;",
htmltools::tags$a(
href = paste0(
"#", id_val, "bottom-",
id_val, "common_values"
),
`aria-controls` = paste0(
id_val, "bottom-",
id_val, "common_values"
),
role = "tab",
`data-toggle` = "tab",
get_lsv(text = c(
"table_scan",
"tab_label_variables_common_values_ts"
))[[lang]]
)
),
htmltools::tags$li(
role = "presentation",
class = "",
style = "padding-top: 5px;",
htmltools::tags$a(
href = paste0(
"#", id_val, "bottom-",
id_val, "lengths"
),
`aria-controls` = paste0(
id_val, "bottom-",
id_val, "lengths"
),
role = "tab",
`data-toggle` = "tab",
get_lsv(text = c(
"table_scan",
"subsection_title_variables_string_lengths"
))[[lang]]
)
)
),
htmltools::tags$div(
class = "tab-content",
htmltools::tags$div(
role = "tabpanel",
class = "tab-pane col-sm-12 active",
id = paste0(
id_val, "bottom-",
id_val, "common_values"
),
htmltools::tags$div(
class = "col-sm-12",
htmltools::tags$p(
class = "h4",
get_lsv(text = c(
"table_scan",
"subsection_title_variables_common_values"
))[[lang]]
),
x$column_common_gt
)
),
htmltools::tags$div(
role = "tabpanel",
class = "tab-pane col-sm-12",
id = paste0(
id_val, "bottom-",
id_val, "lengths"
),
htmltools::tags$div(
class = "col-sm-4",
htmltools::tags$p(
class = "h4",
get_lsv(text = c(
"table_scan",
"subsection_title_variables_string_lengths"
))[[lang]]
),
x$column_nchar_gt
),
htmltools::tags$div(
class = "col-sm-8",
htmltools::tags$p(
class = "h4",
get_lsv(text = c(
"table_scan",
"subsection_title_variables_histogram"
))[[lang]]
),
x$column_nchar_plot
)
)
)
)
)
)
)
)
)
} else if (x$column_type %in%
c("logical", "factor", "date", "datetime")) {
htmltools::tagList(
htmltools::tags$div(
class = "row spacing",
htmltools::tags$a(
class = "anchor-pos anchor-pos-variable",
id = paste0("pp_var_", id_val),
htmltools::tags$div(
class = "variable",
htmltools::tags$div(
class = "col-sm-3",
htmltools::tags$p(
class = "h4",
style = htmltools::css(
`margin-bottom` = "0",
`margin-top` = "-2px"
),
title = x$column_name,
htmltools::tags$a(
href = paste0("#pp_var_", id_val),
x$column_name,
htmltools::tags$br(),
if (!is.na(x$column_label)) {
htmltools::tagList(
htmltools::tags$p(
x$column_label,
style = htmltools::css(
`font-size` = "small",
color = "gray",
`margin-bottom` = "0"
)
)
)
},
htmltools::tags$small(
style = htmltools::css(
`margin-top` = "0"
),
htmltools::tags$code(x$column_type)
)
)
)
),
htmltools::tags$div(
class = "col-sm-5",
x$column_description_gt
),
htmltools::tags$div(
class = "col-sm-4",
" "
),
)
)
)
)
} else {
htmltools::tagList(
htmltools::tags$div(
class = "row spacing",
htmltools::tags$a(
class = "anchor-pos anchor-pos-variable",
id = paste0("pp_var_", id_val),
htmltools::tags$div(
class = "variable",
htmltools::tags$div(
class = "col-sm-12",
htmltools::tags$p(
class = "h4",
title = x$column_name,
htmltools::tags$a(
href = paste0("#pp_var_", id_val),
x$column_name,
htmltools::tags$br(),
htmltools::tags$small(
htmltools::tags$code(x$column_type)
)
)
)
)
)
)
)
)
}
}
)
columns_tags <-
htmltools::tagList(
row_header,
htmltools::tags$div(
class = "section-items",
columns_tag_lists
)
)
# Get the ending time for the section
section_end_time <- Sys.time()
# Get the time duration for the section
time_diff_s <-
get_time_duration(
start_time = section_start_time,
end_time = section_end_time
)
if (interactive()) {
cli::cli_alert_success(
paste0("{.variables ...Finished!} ", print_time(time_diff_s))
)
}
columns_tags
}
probe_interactions_assemble <- function(
data,
lang
) {
cli::cli_div(
theme = list(
span.overview = list(color = "red"),
span.variables = list(color = "orange"),
span.interactions = list(color = "yellow"),
span.correlations = list(color = "green"),
span.missing_values = list(color = "blue"),
span.sample = list(color = "purple")
)
)
header <- get_lsv("table_scan/nav_interactions_ts")[[lang]]
row_header <- row_header(id = "interactions", header = header)
# Get the starting time for the section
section_start_time <- Sys.time()
if (interactive()) {
cli::cli_alert_info(
"{.interactions Starting assembly of 'Interactions' section...}"
)
}
interactions_data <- suppressWarnings(probe_interactions(data = data))
interactions_tags <-
htmltools::tagList(
row_header,
htmltools::tags$div(
class = "section-items",
htmltools::tags$div(
class = "row spacing",
htmltools::tags$div(
id = "sample-container",
class = "col-sm-12",
interactions_data$probe_interactions
)
)
)
)
# Get the ending time for the section
section_end_time <- Sys.time()
# Get the time duration for the section
time_diff_s <-
get_time_duration(
start_time = section_start_time,
end_time = section_end_time
)
if (interactive()) {
cli::cli_alert_success(
paste0("{.interactions ...Finished!} ", print_time(time_diff_s))
)
}
interactions_tags
}
probe_correlations_assemble <- function(
data,
lang
) {
cli::cli_div(
theme = list(
span.overview = list(color = "red"),
span.variables = list(color = "orange"),
span.interactions = list(color = "yellow"),
span.correlations = list(color = "green"),
span.missing_values = list(color = "blue"),
span.sample = list(color = "purple")
)
)
header <- get_lsv("table_scan/nav_correlations_ts")[[lang]]
row_header <- row_header(id = "correlations", header = header)
# Get the starting time for the section
section_start_time <- Sys.time()
if (interactive()) {
cli::cli_alert_info(
"{.correlations Starting assembly of 'Correlations' section...}"
)
}
correlations_data <- probe_correlations(data = data)
correlations_tags <-
htmltools::tagList(
row_header,
htmltools::tags$div(
class = "section-items",
htmltools::tags$div(
class = "row spacing",
htmltools::tags$ul(
class = "nav nav-pills",
role = "tablist",
nav_pill_li(
label = "Pearson",
id = "correlations-pearson",
active = TRUE
),
nav_pill_li(
label = "Kendall",
id = "correlations-kendall",
active = FALSE
),
nav_pill_li(
label = "Spearman",
id = "correlations-spearman",
active = FALSE
),
),
htmltools::tags$div(
class = "tab-content",
style = "padding-top: 10px;",
tab_panel(
id = "correlations-pearson",
active = TRUE,
panel_component_list = list(
panel_component(
size = 12,
title = NULL,
content = correlations_data$probe_corr_pearson
)
)
),
tab_panel(
id = "correlations-kendall",
active = FALSE,
panel_component_list = list(
panel_component(
size = 12,
title = NULL,
content = correlations_data$probe_corr_kendall
)
)
),
tab_panel(
id = "correlations-spearman",
active = FALSE,
panel_component_list = list(
panel_component(
size = 12,
title = NULL,
content = correlations_data$probe_corr_spearman
)
)
)
)
)
)
)
# Get the ending time for the section
section_end_time <- Sys.time()
# Get the time duration for the section
time_diff_s <-
get_time_duration(
start_time = section_start_time,
end_time = section_end_time
)
if (interactive()) {
cli::cli_alert_success(
paste0("{.correlations ...Finished!} ", print_time(time_diff_s))
)
}
correlations_tags
}
probe_missing_assemble <- function(
data,
lang
) {
cli::cli_div(
theme = list(
span.overview = list(color = "red"),
span.variables = list(color = "orange"),
span.interactions = list(color = "yellow"),
span.correlations = list(color = "green"),
span.missing_values = list(color = "blue"),
span.sample = list(color = "purple")
)
)
header <- get_lsv("table_scan/nav_missing_values_ts")[[lang]]
row_header <- row_header(id = "missing", header = header)
# Get the starting time for the section
section_start_time <- Sys.time()
if (interactive()) {
cli::cli_alert_info(
"{.missing_values Starting assembly of 'Missing Values' section...}"
)
}
missing_data <- probe_missing(data = data)
missing_tags <-
htmltools::tagList(
row_header,
htmltools::tags$div(
class = "section-items",
htmltools::tags$div(
class = "row spacing",
htmltools::tags$div(
id = "sample-container",
class = "col-sm-12",
missing_data$probe_missing
)
)
)
)
# Get the ending time for the section
section_end_time <- Sys.time()
# Get the time duration for the section
time_diff_s <-
get_time_duration(
start_time = section_start_time,
end_time = section_end_time
)
if (interactive()) {
cli::cli_alert_success(
paste0("{.missing_values ...Finished!} ", print_time(time_diff_s))
)
}
missing_tags
}
probe_sample_assemble <- function(
data,
lang
) {
cli::cli_div(
theme = list(
span.overview = list(color = "red"),
span.variables = list(color = "orange"),
span.interactions = list(color = "yellow"),
span.correlations = list(color = "green"),
span.missing_values = list(color = "blue"),
span.sample = list(color = "purple")
)
)
header <- get_lsv("table_scan/nav_sample_values_ts")[[lang]]
row_header <- row_header(id = "sample", header = header)
# Get the starting time for the section
section_start_time <- Sys.time()
if (interactive()) {
cli::cli_alert_info(
"{.sample Starting assembly of 'Sample' section...}"
)
}
sample_data <- probe_sample(data = data)
sample_tags <-
htmltools::tagList(
row_header,
htmltools::tags$div(
class = "section-items",
htmltools::tags$div(
class = "row spacing",
htmltools::tags$div(
id = "sample-container",
class = "col-sm-12",
sample_data$probe_sample
)
)
)
)
# Get the ending time for the section
section_end_time <- Sys.time()
# Get the time duration for the section
time_diff_s <-
get_time_duration(
start_time = section_start_time,
end_time = section_end_time
)
if (interactive()) {
cli::cli_alert_success(
paste0("{.sample ...Finished!} ", print_time(time_diff_s))
)
}
sample_tags
}
#
# Components of the page
#
navbar <- function(
sections,
lang
) {
# Compose the list of navigational links for the navbar
item_list <-
sections %>%
lapply(
FUN = function(x) {
label <-
switch(
x,
overview = get_lsv("table_scan/nav_overview_ts")[[lang]],
variables = get_lsv("table_scan/nav_variables_ts")[[lang]],
interactions = get_lsv("table_scan/nav_interactions_ts")[[lang]],
correlations = get_lsv("table_scan/nav_correlations_ts")[[lang]],
missing = get_lsv("table_scan/nav_missing_values_ts")[[lang]],
sample = get_lsv("table_scan/nav_sample_values_ts")[[lang]]
)
htmltools::tags$li(
htmltools::tags$a(
class = "anchor",
href = paste0("#", x),
label
)
)
}
)
htmltools::tags$nav(
class = "navbar navbar-default navbar-fixed-top",
htmltools::tags$div(
class = "container-fluid",
htmltools::tags$div(
class = "navbar-header",
htmltools::tags$button(
type = "button",
class = "navbar-toggle collapsed",
style = "margin-top: 5px;",
`data-toggle` = "collapse",
`data-target` = "#navbar",
`aria-expanded` = "false",
`aria-controls` = "navbar",
htmltools::tags$span(class = "sr-only", "Toggle navigation"),
htmltools::tags$span(class = "icon-bar"),
htmltools::tags$span(class = "icon-bar"),
htmltools::tags$span(class = "icon-bar")
),
htmltools::tags$a(
class = "navbar-brand anchor",
href = "#top",
style = "font-size: 14.5px;",
get_lsv("table_scan/nav_title_ts")[[lang]]
),
),
htmltools::tags$div(
id = "navbar",
class = "navbar-collapse collapse",
htmltools::tags$ul(
class = "nav navbar-nav navbar-right",
item_list
)
)
)
)
}
tab_panel <- function(
id,
panel_component_list,
active = FALSE
) {
htmltools::tags$div(
role = "tabpanel",
class = if (active) "tab-pane col-sm-12 active" else "tab-pane col-sm-12",
id = id,
panel_component_list
)
}
row_header <- function(
id,
header
) {
htmltools::tags$div(
class = "row header",
htmltools::tags$a(
class = "anchor-pos",
id = id
),
htmltools::tags$h1(
class = "page-header",
header
)
)
}
# nolint start
panel_component <- function(
size,
content,
title = NULL
) {
glue_list <- list(size = size, title = title)
htmltools::tags$div(
class = pb_glue_data(glue_list, "col-sm-{size}"),
htmltools::tags$p(
class = "h4",
if (!is.null(title)) pb_glue_data(glue_list, "{title}") else ""
),
content
)
}
nav_pill_li <- function(
label,
id,
active = FALSE
) {
glue_list <- list(label = label, id = id)
htmltools::tags$li(
role = "presentation",
class = if (active) "active" else "",
htmltools::tags$a(
href = pb_glue_data(glue_list, "#{id}"),
`aria-controls` = pb_glue_data(glue_list, "{id}"),
role = "tab",
`data-toggle` = "tab",
`aria-expanded` = if (active) "true" else "false",
pb_glue_data(glue_list, "{label}")
)
)
}
pb_glue_data <- function(.x, ...) {
glue::glue_data(.x, ..., .transformer = get, .envir = emptyenv())
}
use_cli_theme <- function() {
cli::cli_div(
theme = list(
span.overview = list(color = "red"),
span.variables = list(color = "orange"),
span.interactions = list(color = "yellow"),
span.correlations = list(color = "green"),
span.missing_values = list(color = "blue"),
span.sample = list(color = "purple")
)
)
}
# nolint end
# nocov end
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.