Nothing
#' `teal` module: Variable browser
#'
#' Module provides provides a detailed summary and visualization of variable distributions
#' for `data.frame` objects, with interactive features to customize analysis.
#'
#' Numeric columns with fewer than 30 distinct values can be treated as either discrete
#' or continuous with a checkbox allowing users to switch how they are treated(if < 6 unique values
#' then the default is discrete, otherwise it is continuous).
#'
#' @inheritParams teal::module
#' @inheritParams shared_params
#' @param parent_dataname (`character(1)`) string specifying a parent dataset.
#' If it exists in `datanames` then an extra checkbox will be shown to
#' allow users to not show variables in other datasets which exist in this `dataname`.
#' This is typically used to remove `ADSL` columns in `CDISC` data.
#' In non `CDISC` data this can be ignored. Defaults to `"ADSL"`.
#' @param datasets_selected (`character`) `r lifecycle::badge("deprecated")` vector of datasets to show, please
#' use the `datanames` argument.
#'
#' @inherit shared_params return
#'
#' @examplesShinylive
#' library(teal.modules.general)
#' interactive <- function() TRUE
#' {{ next_example }}
# nolint start: line_length_linter.
#' @examples
# nolint end: line_length_linter.
#' # general data example
#' data <- teal_data()
#' data <- within(data, {
#' iris <- iris
#' mtcars <- mtcars
#' women <- women
#' faithful <- faithful
#' CO2 <- CO2
#' })
#'
#' app <- init(
#' data = data,
#' modules = modules(
#' tm_variable_browser(
#' label = "Variable browser"
#' )
#' )
#' )
#' if (interactive()) {
#' shinyApp(app$ui, app$server)
#' }
#'
#' @examplesShinylive
#' library(teal.modules.general)
#' interactive <- function() TRUE
#' {{ next_example }}
# nolint start: line_length_linter.
#' @examples
# nolint end: line_length_linter.
#' # CDISC example data
#' library(sparkline)
#' data <- teal_data()
#' data <- within(data, {
#' ADSL <- teal.data::rADSL
#' ADTTE <- teal.data::rADTTE
#' })
#' join_keys(data) <- default_cdisc_join_keys[names(data)]
#'
#' app <- init(
#' data = data,
#' modules = modules(
#' tm_variable_browser(
#' label = "Variable browser"
#' )
#' )
#' )
#' if (interactive()) {
#' shinyApp(app$ui, app$server)
#' }
#'
#' @export
#'
tm_variable_browser <- function(label = "Variable Browser",
datasets_selected = deprecated(),
datanames = if (missing(datasets_selected)) "all" else datasets_selected,
parent_dataname = "ADSL",
pre_output = NULL,
post_output = NULL,
ggplot2_args = teal.widgets::ggplot2_args(),
transformators = list()) {
message("Initializing tm_variable_browser")
# Start of assertions
checkmate::assert_string(label)
if (!missing(datasets_selected)) {
lifecycle::deprecate_soft(
when = "0.4.0",
what = "tm_variable_browser(datasets_selected)",
with = "tm_variable_browser(datanames)",
details = c(
"If both `datasets_selected` and `datanames` are set `datasets_selected` will be silently ignored.",
i = 'Use `tm_variable_browser(datanames = "all")` to keep the previous behavior and avoid this warning.'
)
)
}
checkmate::assert_character(datanames, min.len = 0, min.chars = 1, null.ok = TRUE)
checkmate::assert_character(parent_dataname, min.len = 0, max.len = 1)
checkmate::assert_multi_class(pre_output, c("shiny.tag", "shiny.tag.list", "html"), null.ok = TRUE)
checkmate::assert_multi_class(post_output, c("shiny.tag", "shiny.tag.list", "html"), null.ok = TRUE)
checkmate::assert_class(ggplot2_args, "ggplot2_args")
# End of assertions
datanames_module <- if (identical(datanames, "all") || is.null(datanames)) {
datanames
} else {
union(datanames, parent_dataname)
}
ans <- module(
label,
server = srv_variable_browser,
ui = ui_variable_browser,
datanames = datanames_module,
server_args = list(
datanames = if (is.null(datanames)) "all" else datanames,
parent_dataname = parent_dataname,
ggplot2_args = ggplot2_args
),
ui_args = list(
pre_output = pre_output,
post_output = post_output
),
transformators = transformators
)
# `shiny` inputs are stored properly but the majority of the module is state of `datatable` which is not stored.
attr(ans, "teal_bookmarkable") <- NULL
ans
}
# UI function for the variable browser module
ui_variable_browser <- function(id,
pre_output = NULL,
post_output = NULL) {
ns <- NS(id)
tagList(
include_css_files("custom"),
shinyjs::useShinyjs(),
teal.widgets::standard_layout(
output = fluidRow(
htmlwidgets::getDependency("sparkline"), # needed for sparklines to work
column(
6,
# variable browser
teal.widgets::white_small_well(
uiOutput(ns("ui_variable_browser")),
shinyjs::hidden({
checkboxInput(ns("show_parent_vars"), "Show parent dataset variables", value = FALSE)
})
)
),
column(
6,
teal.widgets::white_small_well(
### Reporter
teal.reporter::simple_reporter_ui(ns("simple_reporter")),
###
tags$div(
class = "block",
uiOutput(ns("ui_histogram_display"))
),
tags$div(
class = "block",
uiOutput(ns("ui_numeric_display"))
),
teal.widgets::plot_with_settings_ui(ns("variable_plot")),
tags$br(),
# input user-defined text size
teal.widgets::panel_item(
title = "Plot settings",
collapsed = TRUE,
selectInput(
inputId = ns("ggplot_theme"), label = "ggplot2 theme",
choices = ggplot_themes,
selected = "grey"
),
fluidRow(
column(6, sliderInput(
inputId = ns("font_size"), label = "font size",
min = 5L, max = 30L, value = 15L, step = 1L, ticks = FALSE
)),
column(6, sliderInput(
inputId = ns("label_rotation"), label = "rotate x labels",
min = 0L, max = 90L, value = 45L, step = 1, ticks = FALSE
))
)
),
tags$br(),
teal.widgets::get_dt_rows(ns("variable_summary_table"), ns("variable_summary_table_rows")),
DT::dataTableOutput(ns("variable_summary_table"))
)
)
),
pre_output = pre_output,
post_output = post_output
)
)
}
# Server function for the variable browser module
srv_variable_browser <- function(id,
data,
reporter,
filter_panel_api,
datanames, parent_dataname, ggplot2_args) {
with_reporter <- !missing(reporter) && inherits(reporter, "Reporter")
with_filter <- !missing(filter_panel_api) && inherits(filter_panel_api, "FilterPanelAPI")
checkmate::assert_class(data, "reactive")
checkmate::assert_class(isolate(data()), "teal_data")
moduleServer(id, function(input, output, session) {
teal.logger::log_shiny_input_changes(input, namespace = "teal.modules.general")
# if there are < this number of unique records then a numeric
# variable can be treated as a factor and all factors with < this groups
# have their values plotted
.unique_records_for_factor <- 30
# if there are < this number of unique records then a numeric
# variable is by default treated as a factor
.unique_records_default_as_factor <- 6 # nolint: object_length.
varname_numeric_as_factor <- reactiveValues()
datanames <- Filter(function(name) {
is.data.frame(isolate(data())[[name]])
}, if (identical(datanames, "all")) names(isolate(data())) else datanames)
output$ui_variable_browser <- renderUI({
ns <- session$ns
do.call(
tabsetPanel,
c(
id = ns("tabset_panel"),
do.call(
tagList,
lapply(datanames, function(dataname) {
tabPanel(
dataname,
tags$div(
class = "mt-4",
textOutput(ns(paste0("dataset_summary_", dataname)))
),
tags$div(
class = "mt-4",
teal.widgets::get_dt_rows(
ns(paste0("variable_browser_", dataname)),
ns(paste0("variable_browser_", dataname, "_rows"))
),
DT::dataTableOutput(ns(paste0("variable_browser_", dataname)), width = "100%")
)
)
})
)
)
)
})
# conditionally display checkbox
shinyjs::toggle(
id = "show_parent_vars",
condition = length(parent_dataname) > 0 && parent_dataname %in% datanames
)
columns_names <- new.env()
# plot_var$data holds the name of the currently selected dataset
# plot_var$variable[[<dataset_name>]] holds the name of the currently selected
# variable for dataset <dataset_name>
plot_var <- reactiveValues(data = NULL, variable = list())
establish_updating_selection(datanames, input, plot_var, columns_names)
# validations
validation_checks <- validate_input(input, plot_var, data)
# data_for_analysis is a list with two elements: a column from a dataset and the column label
plotted_data <- reactive({
validation_checks()
get_plotted_data(input, plot_var, data)
})
treat_numeric_as_factor <- reactive({
if (is_num_var_short(.unique_records_for_factor, input, plotted_data)) {
input$numeric_as_factor
} else {
FALSE
}
})
render_tabset_panel_content(
input = input,
output = output,
data = data,
datanames = datanames,
parent_dataname = parent_dataname,
columns_names = columns_names,
plot_var = plot_var
)
# add used-defined text size to ggplot arguments passed from caller frame
all_ggplot2_args <- reactive({
user_text <- teal.widgets::ggplot2_args(
theme = list(
"text" = ggplot2::element_text(size = input[["font_size"]]),
"axis.text.x" = ggplot2::element_text(angle = input[["label_rotation"]], hjust = 1)
)
)
user_theme <- utils::getFromNamespace(sprintf("theme_%s", input[["ggplot_theme"]]), ns = "ggplot2")
user_theme <- user_theme()
# temporary fix to circumvent assertion issue with resolve_ggplot2_args
# drop problematic elements
user_theme <- user_theme[grep("strip.text.y.left", names(user_theme), fixed = TRUE, invert = TRUE)]
teal.widgets::resolve_ggplot2_args(
user_plot = user_text,
user_default = teal.widgets::ggplot2_args(theme = user_theme),
module_plot = ggplot2_args
)
})
output$ui_numeric_display <- renderUI({
validation_checks()
dataname <- input$tabset_panel
varname <- plot_var$variable[[dataname]]
df <- data()[[dataname]]
numeric_ui <- tagList(
fluidRow(
tags$div(
class = "col-md-4",
tags$br(),
shinyWidgets::switchInput(
inputId = session$ns("display_density"),
label = "Show density",
value = `if`(is.null(isolate(input$display_density)), TRUE, isolate(input$display_density)),
width = "50%",
labelWidth = "100px",
handleWidth = "50px"
)
),
tags$div(
class = "col-md-4",
tags$br(),
shinyWidgets::switchInput(
inputId = session$ns("remove_outliers"),
label = "Remove outliers",
value = `if`(is.null(isolate(input$remove_outliers)), FALSE, isolate(input$remove_outliers)),
width = "50%",
labelWidth = "100px",
handleWidth = "50px"
)
),
tags$div(
class = "col-md-4",
uiOutput(session$ns("outlier_definition_slider_ui"))
)
),
tags$div(
class = "ml-4",
uiOutput(session$ns("ui_density_help")),
uiOutput(session$ns("ui_outlier_help"))
)
)
observeEvent(input$numeric_as_factor, ignoreInit = TRUE, {
varname_numeric_as_factor[[plot_var$variable[[dataname]]]] <- input$numeric_as_factor
})
if (is.numeric(df[[varname]])) {
unique_entries <- length(unique(df[[varname]]))
if (unique_entries < .unique_records_for_factor && unique_entries > 0) {
list(
checkboxInput(
session$ns("numeric_as_factor"),
"Treat variable as factor",
value = `if`(
is.null(varname_numeric_as_factor[[varname]]),
unique_entries < .unique_records_default_as_factor,
varname_numeric_as_factor[[varname]]
)
),
conditionalPanel("!input.numeric_as_factor", ns = session$ns, numeric_ui)
)
} else if (unique_entries > 0) {
numeric_ui
}
} else {
NULL
}
})
output$ui_histogram_display <- renderUI({
validation_checks()
dataname <- input$tabset_panel
varname <- plot_var$variable[[dataname]]
df <- data()[[dataname]]
numeric_ui <- tagList(fluidRow(
tags$div(
class = "col-md-4",
shinyWidgets::switchInput(
inputId = session$ns("remove_NA_hist"),
label = "Remove NA values",
value = FALSE,
width = "50%",
labelWidth = "100px",
handleWidth = "50px"
)
)
))
var <- df[[varname]]
if (anyNA(var) && (is.factor(var) || is.character(var) || is.logical(var))) {
groups <- unique(as.character(var))
len_groups <- length(groups)
if (len_groups >= .unique_records_for_factor) {
NULL
} else {
numeric_ui
}
} else {
NULL
}
})
output$outlier_definition_slider_ui <- renderUI({
req(input$remove_outliers)
sliderInput(
inputId = session$ns("outlier_definition_slider"),
tags$div(
class = "teal-tooltip",
tagList(
"Outlier definition:",
icon("circle-info"),
tags$span(
class = "tooltiptext",
paste(
"Use the slider to choose the cut-off value to define outliers; the larger the value the",
"further below Q1/above Q3 points have to be in order to be classed as outliers"
)
)
)
),
min = 1,
max = 5,
value = 3,
step = 0.5
)
})
output$ui_density_help <- renderUI({
req(is.logical(input$display_density))
if (input$display_density) {
tags$small(helpText(paste(
"Kernel density estimation with gaussian kernel",
"and bandwidth function bw.nrd0 (R default)"
)))
} else {
NULL
}
})
output$ui_outlier_help <- renderUI({
req(is.logical(input$remove_outliers), input$outlier_definition_slider)
if (input$remove_outliers) {
tags$small(
helpText(
withMathJax(paste0(
"Outlier data points (\\( X \\lt Q1 - ", input$outlier_definition_slider, "\\times IQR \\) or
\\(Q3 + ", input$outlier_definition_slider, "\\times IQR \\lt X\\))
have not been displayed on the graph and will not be used for any kernel density estimations, ",
"although their values remain in the statisics table below."
))
)
)
} else {
NULL
}
})
variable_plot_r <- reactive({
display_density <- `if`(is.null(input$display_density), FALSE, input$display_density)
remove_outliers <- `if`(is.null(input$remove_outliers), FALSE, input$remove_outliers)
if (remove_outliers) {
req(input$outlier_definition_slider)
outlier_definition <- as.numeric(input$outlier_definition_slider)
} else {
outlier_definition <- 0
}
plot_var_summary(
var = plotted_data()$data,
var_lab = plotted_data()$var_description,
wrap_character = 15,
numeric_as_factor = treat_numeric_as_factor(),
remove_NA_hist = input$remove_NA_hist,
display_density = display_density,
outlier_definition = outlier_definition,
records_for_factor = .unique_records_for_factor,
ggplot2_args = all_ggplot2_args()
)
})
pws <- teal.widgets::plot_with_settings_srv(
id = "variable_plot",
plot_r = variable_plot_r,
height = c(500, 200, 2000)
)
output$variable_summary_table <- DT::renderDataTable({
var_summary_table(
plotted_data()$data,
treat_numeric_as_factor(),
input$variable_summary_table_rows,
if (!is.null(input$remove_outliers) && input$remove_outliers) {
req(input$outlier_definition_slider)
as.numeric(input$outlier_definition_slider)
} else {
0
}
)
})
### REPORTER
if (with_reporter) {
card_fun <- function(comment) {
card <- teal::TealReportCard$new()
card$set_name("Variable Browser Plot")
card$append_text("Variable Browser Plot", "header2")
if (with_filter) card$append_fs(filter_panel_api$get_filter_state())
card$append_text("Plot", "header3")
card$append_plot(variable_plot_r(), dim = pws$dim())
if (!comment == "") {
card$append_text("Comment", "header3")
card$append_text(comment)
}
card
}
teal.reporter::simple_reporter_srv("simple_reporter", reporter = reporter, card_fun = card_fun)
}
###
})
}
#' Summarize NAs.
#'
#' Summarizes occurrence of missing values in vector.
#' @param x vector of any type and length
#' @return Character string describing `NA` occurrence.
#' @keywords internal
var_missings_info <- function(x) {
sprintf("%s [%s%%]", sum(is.na(x)), round(mean(is.na(x) * 100), 2))
}
#' Summarizes variable
#'
#' Creates html summary with statistics relevant to data type. For numeric values it returns central
#' tendency measures, for factor returns level counts, for Date date range, for other just
#' number of levels.
#'
#' @param x vector of any type
#' @param numeric_as_factor `logical` should the numeric variable be treated as a factor
#' @param dt_rows `numeric` current/latest `DT` page length
#' @param outlier_definition If 0 no outliers are removed, otherwise
#' outliers (those more than `outlier_definition*IQR below/above Q1/Q3` be removed)
#' @return text with simple statistics.
#' @keywords internal
var_summary_table <- function(x, numeric_as_factor, dt_rows, outlier_definition) {
if (is.null(dt_rows)) {
dt_rows <- 10
}
if (is.numeric(x) && !numeric_as_factor) {
req(!any(is.infinite(x)))
x <- remove_outliers_from(x, outlier_definition)
qvals <- round(stats::quantile(x, na.rm = TRUE, probs = c(0.25, 0.5, 0.75), type = 2), 2)
# classical central tendency measures
summary <-
data.frame(
Statistic = c("min", "Q1", "median", "mean", "Q3", "max", "sd", "n"),
Value = c(
round(min(x, na.rm = TRUE), 2),
qvals[1],
qvals[2],
round(mean(x, na.rm = TRUE), 2),
qvals[3],
round(max(x, na.rm = TRUE), 2),
round(stats::sd(x, na.rm = TRUE), 2),
length(x[!is.na(x)])
)
)
DT::datatable(summary, rownames = FALSE, options = list(dom = "<t>", pageLength = dt_rows))
} else if (is.factor(x) || is.character(x) || (is.numeric(x) && numeric_as_factor) || is.logical(x)) {
# make sure factor is ordered numeric
if (is.numeric(x)) {
x <- factor(x, levels = sort(unique(x)))
}
level_counts <- table(x)
max_levels_signif <- nchar(level_counts)
if (!all(is.na(x))) {
levels <- names(level_counts)
counts <- sprintf(
"%s [%.2f%%]",
format(level_counts, width = max_levels_signif), prop.table(level_counts) * 100
)
} else {
levels <- character(0)
counts <- numeric(0)
}
summary <- data.frame(
Level = levels,
Count = counts,
stringsAsFactors = FALSE
)
# sort the dataset in decreasing order of counts (needed as character variables default to alphabetical)
summary <- summary[order(summary$Count, decreasing = TRUE), ]
dom_opts <- if (nrow(summary) <= 10) {
"<t>"
} else {
"<lf<t>ip>"
}
DT::datatable(summary, rownames = FALSE, options = list(dom = dom_opts, pageLength = dt_rows))
} else if (inherits(x, "Date") || inherits(x, "POSIXct") || inherits(x, "POSIXlt")) {
summary <-
data.frame(
Statistic = c("min", "median", "max"),
Value = c(
min(x, na.rm = TRUE),
stats::median(x, na.rm = TRUE),
max(x, na.rm = TRUE)
)
)
DT::datatable(summary, rownames = FALSE, options = list(dom = "<t>", pageLength = dt_rows))
} else {
NULL
}
}
#' Plot variable
#'
#' Creates summary plot with statistics relevant to data type.
#'
#' @inheritParams shared_params
#' @param var vector of any type to be plotted. For numeric variables it produces histogram with
#' density line, for factors it creates frequency plot
#' @param var_lab text describing selected variable to be displayed on the plot
#' @param wrap_character (`numeric`) number of characters at which to wrap text values of `var`
#' @param numeric_as_factor (`logical`) should the numeric variable be treated as a factor
#' @param display_density (`logical`) should density estimation be displayed for numeric values
#' @param remove_NA_hist (`logical`) should `NA` values be removed for histogram of factor like variables
#' @param outlier_definition if 0 no outliers are removed, otherwise
#' outliers (those more than outlier_definition*IQR below/above Q1/Q3 be removed)
#' @param records_for_factor (`numeric`) if the number of factor levels is >= than this value then
#' a graph of the factors isn't shown, only a list of values
#'
#' @return plot
#' @keywords internal
plot_var_summary <- function(var,
var_lab,
wrap_character = NULL,
numeric_as_factor,
display_density = is.numeric(var),
remove_NA_hist = FALSE, # nolint: object_name.
outlier_definition,
records_for_factor,
ggplot2_args) {
checkmate::assert_character(var_lab)
checkmate::assert_numeric(wrap_character, null.ok = TRUE)
checkmate::assert_flag(numeric_as_factor)
checkmate::assert_flag(display_density)
checkmate::assert_logical(remove_NA_hist, null.ok = TRUE)
checkmate::assert_number(outlier_definition, lower = 0, finite = TRUE)
checkmate::assert_integerish(records_for_factor, lower = 0, len = 1, any.missing = FALSE)
checkmate::assert_class(ggplot2_args, "ggplot2_args")
grid::grid.newpage()
plot_main <- if (is.factor(var) || is.character(var) || is.logical(var)) {
groups <- unique(as.character(var))
len_groups <- length(groups)
if (len_groups >= records_for_factor) {
grid::textGrob(
sprintf(
"%s unique values\n%s:\n %s\n ...\n %s",
len_groups,
var_lab,
paste(utils::head(groups), collapse = ",\n "),
paste(utils::tail(groups), collapse = ",\n ")
),
x = grid::unit(1, "line"),
y = grid::unit(1, "npc") - grid::unit(1, "line"),
just = c("left", "top")
)
} else {
if (!is.null(wrap_character)) {
var <- stringr::str_wrap(var, width = wrap_character)
}
var <- if (isTRUE(remove_NA_hist)) as.vector(stats::na.omit(var)) else var
ggplot2::ggplot(data.frame(var), ggplot2::aes(x = forcats::fct_infreq(as.factor(var)))) +
ggplot2::geom_bar(
stat = "count", ggplot2::aes(fill = ifelse(is.na(var), "withcolor", "")), show.legend = FALSE
) +
ggplot2::scale_fill_manual(values = c("gray50", "tan"))
}
} else if (is.numeric(var)) {
validate(need(any(!is.na(var)), "No data left to visualize."))
# Filter out NA
var <- var[which(!is.na(var))]
validate(need(!any(is.infinite(var)), "Cannot display graph when data includes infinite values"))
if (numeric_as_factor) {
var <- factor(var)
ggplot2::ggplot(NULL, ggplot2::aes(x = var)) +
ggplot2::geom_histogram(stat = "count")
} else {
# remove outliers
if (outlier_definition != 0) {
number_records <- length(var)
var <- remove_outliers_from(var, outlier_definition)
number_outliers <- number_records - length(var)
outlier_text <- paste0(
number_outliers, " outliers (",
round(number_outliers / number_records * 100, 2),
"% of non-missing records) not shown"
)
validate(need(
length(var) > 1,
"At least two data points must remain after removing outliers for this graph to be displayed"
))
}
## histogram
binwidth <- get_bin_width(var)
p <- ggplot2::ggplot(data = data.frame(var = var), ggplot2::aes(x = var, y = ggplot2::after_stat(count))) +
ggplot2::geom_histogram(binwidth = binwidth) +
ggplot2::scale_y_continuous(
sec.axis = ggplot2::sec_axis(
trans = ~ . / nrow(data.frame(var = var)),
labels = scales::percent,
name = "proportion (in %)"
)
)
if (display_density) {
p <- p + ggplot2::geom_density(ggplot2::aes(y = ggplot2::after_stat(count * binwidth)))
}
if (outlier_definition != 0) {
p <- p + ggplot2::annotate(
geom = "text",
label = outlier_text,
x = Inf, y = Inf,
hjust = 1.02, vjust = 1.2,
color = "black",
# explicitly modify geom text size according
size = ggplot2_args[["theme"]][["text"]][["size"]] / 3.5
)
}
p
}
} else if (inherits(var, "Date") || inherits(var, "POSIXct") || inherits(var, "POSIXlt")) {
var_num <- as.numeric(var)
binwidth <- get_bin_width(var_num, 1)
p <- ggplot2::ggplot(data = data.frame(var = var), ggplot2::aes(x = var, y = ggplot2::after_stat(count))) +
ggplot2::geom_histogram(binwidth = binwidth)
} else {
grid::textGrob(
paste(strwrap(
utils::capture.output(utils::str(var)),
width = .9 * grid::convertWidth(grid::unit(1, "npc"), "char", TRUE)
), collapse = "\n"),
x = grid::unit(1, "line"), y = grid::unit(1, "npc") - grid::unit(1, "line"), just = c("left", "top")
)
}
dev_ggplot2_args <- teal.widgets::ggplot2_args(
labs = list(x = var_lab)
)
###
all_ggplot2_args <- teal.widgets::resolve_ggplot2_args(
ggplot2_args,
module_plot = dev_ggplot2_args
)
if (is.ggplot(plot_main)) {
if (is.numeric(var) && !numeric_as_factor) {
# numeric not as factor
plot_main <- plot_main +
theme_light() +
list(
labs = do.call("labs", all_ggplot2_args$labs),
theme = do.call("theme", all_ggplot2_args$theme)
)
} else {
# factor low number of levels OR numeric as factor OR Date
plot_main <- plot_main +
theme_light() +
list(
labs = do.call("labs", all_ggplot2_args$labs),
theme = do.call("theme", all_ggplot2_args$theme)
)
}
plot_main <- ggplot2::ggplotGrob(plot_main)
}
grid::grid.draw(plot_main)
plot_main
}
is_num_var_short <- function(.unique_records_for_factor, input, data_for_analysis) {
length(unique(data_for_analysis()$data)) < .unique_records_for_factor && !is.null(input$numeric_as_factor)
}
#' Validates the variable browser inputs
#'
#' @param input (`session$input`) the `shiny` session input
#' @param plot_var (`list`) list of a data frame and an array of variable names
#' @param data (`teal_data`) the datasets passed to the module
#'
#' @returns `logical` TRUE if validations pass; a `shiny` validation error otherwise
#' @keywords internal
validate_input <- function(input, plot_var, data) {
reactive({
dataset_name <- req(input$tabset_panel)
varname <- plot_var$variable[[dataset_name]]
validate(need(dataset_name, "No data selected"))
validate(need(varname, "No variable selected"))
df <- data()[[dataset_name]]
teal::validate_has_data(df, 1)
teal::validate_has_variable(varname = varname, data = df, "Variable not available")
TRUE
})
}
get_plotted_data <- function(input, plot_var, data) {
dataset_name <- input$tabset_panel
varname <- plot_var$variable[[dataset_name]]
df <- data()[[dataset_name]]
var_description <- teal.data::col_labels(df)[[varname]]
list(data = df[[varname]], var_description = var_description)
}
#' Renders the left-hand side `tabset` panel of the module
#'
#' @param datanames (`character`) the name of the dataset
#' @param parent_dataname (`character`) the name of a parent `dataname` to filter out variables from
#' @param data (`teal_data`) the object containing all datasets
#' @param input (`session$input`) the `shiny` session input
#' @param output (`session$output`) the `shiny` session output
#' @param columns_names (`environment`) the environment containing bindings for each dataset
#' @param plot_var (`list`) the list containing the currently selected dataset (tab) and its column names
#' @keywords internal
render_tabset_panel_content <- function(datanames, parent_dataname, output, data, input, columns_names, plot_var) {
lapply(datanames, render_single_tab,
input = input,
output = output,
data = data,
parent_dataname = parent_dataname,
columns_names = columns_names,
plot_var = plot_var
)
}
#' Renders a single tab in the left-hand side tabset panel
#'
#' Renders a single tab in the left-hand side tabset panel. The rendered tab contains
#' information about one dataset out of many presented in the module.
#'
#' @param dataset_name (`character`) the name of the dataset contained in the rendered tab
#' @param parent_dataname (`character`) the name of a parent `dataname` to filter out variables from
#' @inheritParams render_tabset_panel_content
#' @keywords internal
render_single_tab <- function(dataset_name, parent_dataname, output, data, input, columns_names, plot_var) {
render_tab_header(dataset_name, output, data)
render_tab_table(
dataset_name = dataset_name,
parent_dataname = parent_dataname,
output = output,
data = data,
input = input,
columns_names = columns_names,
plot_var = plot_var
)
}
#' Renders the text headlining a single tab in the left-hand side tabset panel
#'
#' @param dataset_name (`character`) the name of the dataset of the tab
#' @inheritParams render_tabset_panel_content
#' @keywords internal
render_tab_header <- function(dataset_name, output, data) {
dataset_ui_id <- paste0("dataset_summary_", dataset_name)
output[[dataset_ui_id]] <- renderText({
df <- data()[[dataset_name]]
join_keys <- teal.data::join_keys(data())
if (!is.null(join_keys)) {
key <- teal.data::join_keys(data())[dataset_name, dataset_name]
} else {
key <- NULL
}
sprintf(
"Dataset with %s unique key rows and %s variables",
nrow(unique(`if`(length(key) > 0, df[, key, drop = FALSE], df))),
ncol(df)
)
})
}
#' Renders the table for a single dataset in the left-hand side tabset panel
#'
#' The table contains column names, column labels,
#' small summary about NA values and `sparkline` (if appropriate).
#'
#' @param dataset_name (`character`) the name of the dataset
#' @param parent_dataname (`character`) the name of a parent `dataname` to filter out variables from
#' @inheritParams render_tabset_panel_content
#' @keywords internal
render_tab_table <- function(dataset_name, parent_dataname, output, data, input, columns_names, plot_var) {
table_ui_id <- paste0("variable_browser_", dataset_name)
output[[table_ui_id]] <- DT::renderDataTable({
df <- data()[[dataset_name]]
get_vars_df <- function(input, dataset_name, parent_name, data) {
data_cols <- colnames(df)
if (isTRUE(input$show_parent_vars)) {
data_cols
} else if (dataset_name != parent_name && parent_name %in% names(data)) {
setdiff(data_cols, colnames(data()[[parent_name]]))
} else {
data_cols
}
}
if (length(parent_dataname) > 0) {
df_vars <- get_vars_df(input, dataset_name, parent_dataname, data)
df <- df[df_vars]
}
if (is.null(df) || ncol(df) == 0) {
columns_names[[dataset_name]] <- character(0)
df_output <- data.frame(
Type = character(0),
Variable = character(0),
Label = character(0),
Missings = character(0),
Sparklines = character(0),
stringsAsFactors = FALSE
)
} else {
# extract data variable labels
labels <- teal.data::col_labels(df)
columns_names[[dataset_name]] <- names(labels)
# calculate number of missing values
missings <- vapply(
df,
var_missings_info,
FUN.VALUE = character(1),
USE.NAMES = FALSE
)
# get icons proper for the data types
icons <- vapply(df, function(x) class(x)[1L], character(1L))
join_keys <- teal.data::join_keys(data())
if (!is.null(join_keys)) {
icons[intersect(join_keys[dataset_name, dataset_name], colnames(df))] <- "primary_key"
}
icons <- variable_type_icons(icons)
# generate sparklines
sparklines_html <- vapply(
df,
create_sparklines,
FUN.VALUE = character(1),
USE.NAMES = FALSE
)
df_output <- data.frame(
Type = icons,
Variable = names(labels),
Label = labels,
Missings = missings,
Sparklines = sparklines_html,
stringsAsFactors = FALSE
)
}
# Select row 1 as default / fallback
selected_ix <- 1
# Define starting page index (base-0 index of the first item on page
# note: in many cases it's not the item itself
selected_page_ix <- 0
# Retrieve current selected variable if any
isolated_variable <- isolate(plot_var$variable[[dataset_name]])
if (!is.null(isolated_variable)) {
index <- which(columns_names[[dataset_name]] == isolated_variable)[1]
if (!is.null(index) && !is.na(index) && length(index) > 0) selected_ix <- index
}
# Retrieve the index of the first item of the current page
# it works with varying number of entries on the page (10, 25, ...)
table_id_sel <- paste0("variable_browser_", dataset_name, "_state")
dt_state <- isolate(input[[table_id_sel]])
if (selected_ix != 1 && !is.null(dt_state)) {
selected_page_ix <- floor(selected_ix / dt_state$length) * dt_state$length
}
DT::datatable(
df_output,
escape = FALSE,
rownames = FALSE,
selection = list(mode = "single", target = "row", selected = selected_ix),
options = list(
fnDrawCallback = htmlwidgets::JS("function() { HTMLWidgets.staticRender(); }"),
pageLength = input[[paste0(table_ui_id, "_rows")]],
displayStart = selected_page_ix
)
)
})
}
#' Creates observers updating the currently selected column
#'
#' The created observers update the column currently selected in the left-hand side
#' tabset panel.
#'
#' @note
#' Creates an observer for each dataset (each tab in the tabset panel).
#'
#' @inheritParams render_tabset_panel_content
#' @keywords internal
establish_updating_selection <- function(datanames, input, plot_var, columns_names) {
lapply(datanames, function(dataset_name) {
table_ui_id <- paste0("variable_browser_", dataset_name)
table_id_sel <- paste0(table_ui_id, "_rows_selected")
observeEvent(input[[table_id_sel]], {
plot_var$data <- dataset_name
plot_var$variable[[dataset_name]] <- columns_names[[dataset_name]][input[[table_id_sel]]]
})
})
}
get_bin_width <- function(x_vec, scaling_factor = 2) {
x_vec <- x_vec[!is.na(x_vec)]
qntls <- stats::quantile(x_vec, probs = c(0.1, 0.25, 0.75, 0.9), type = 2)
iqr <- qntls[3] - qntls[2]
binwidth <- max(scaling_factor * iqr / length(x_vec) ^ (1 / 3), sqrt(qntls[4] - qntls[1])) # styler: off
binwidth <- ifelse(binwidth == 0, 1, binwidth)
# to ensure at least two bins when variable span is very small
x_span <- diff(range(x_vec))
if (isTRUE(x_span / binwidth >= 2)) binwidth else x_span / 2
}
#' Removes the outlier observation from an array
#'
#' @param var (`numeric`) a numeric vector
#' @param outlier_definition (`numeric`) if `0` then no outliers are removed, otherwise
#' outliers (those more than `outlier_definition*IQR below/above Q1/Q3`) are removed
#' @returns (`numeric`) vector without the outlier values
#' @keywords internal
remove_outliers_from <- function(var, outlier_definition) {
if (outlier_definition == 0) {
return(var)
}
q1_q3 <- stats::quantile(var, probs = c(0.25, 0.75), type = 2, na.rm = TRUE)
iqr <- q1_q3[2] - q1_q3[1]
var[var >= q1_q3[1] - outlier_definition * iqr & var <= q1_q3[2] + outlier_definition * iqr]
}
# sparklines ----
#' S3 generic for `sparkline` widget HTML
#'
#' Generates the `sparkline` HTML code corresponding to the input array.
#' For numeric variables creates a box plot, for character and factors - bar plot.
#' Produces an empty string for variables of other types.
#'
#' @param arr vector of any type and length
#' @param width `numeric` the width of the `sparkline` widget (pixels)
#' @param bar_spacing `numeric` the spacing between the bars (in pixels)
#' @param bar_width `numeric` the width of the bars (in pixels)
#' @param ... `list` additional options passed to bar plots of `jquery.sparkline`;
#' see [`jquery.sparkline docs`](https://omnipotent.net/jquery.sparkline/#common)
#'
#' @return Character string containing HTML code of the `sparkline` HTML widget.
#' @keywords internal
create_sparklines <- function(arr, width = 150, ...) {
if (all(is.null(arr))) {
return("")
}
UseMethod("create_sparklines")
}
#' @rdname create_sparklines
#' @keywords internal
#' @export
create_sparklines.logical <- function(arr, ...) {
create_sparklines(as.factor(arr))
}
#' @rdname create_sparklines
#' @keywords internal
#' @export
create_sparklines.numeric <- function(arr, width = 150, ...) {
if (any(is.infinite(arr))) {
return(as.character(tags$code("infinite values", class = "text-blue")))
}
if (length(arr) > 100000) {
return(as.character(tags$code("Too many rows (>100000)", class = "text-blue")))
}
arr <- arr[!is.na(arr)]
sparkline::spk_chr(unname(arr), type = "box", width = width, ...)
}
#' @rdname create_sparklines
#' @keywords internal
#' @export
create_sparklines.character <- function(arr, ...) {
return(create_sparklines(as.factor(arr)))
}
#' @rdname create_sparklines
#' @keywords internal
#' @export
create_sparklines.factor <- function(arr, width = 150, bar_spacing = 5, bar_width = 20, ...) {
decreasing_order <- TRUE
counts <- table(arr)
if (length(counts) >= 100) {
return(as.character(tags$code("> 99 levels", class = "text-blue")))
} else if (length(counts) == 0) {
return(as.character(tags$code("no levels", class = "text-blue")))
} else if (length(counts) == 1) {
return(as.character(tags$code("one level", class = "text-blue")))
}
# Summarize the occurences of different levels
# and get the maximum and minimum number of occurences
# This is needed for the sparkline to correctly display the bar plots
# Otherwise they are cropped
counts <- sort(counts, decreasing = decreasing_order, method = "radix")
max_value <- if (decreasing_order) counts[1] else counts[length[counts]]
max_value <- unname(max_value)
sparkline::spk_chr(
unname(counts),
type = "bar",
chartRangeMin = 0,
chartRangeMax = max_value,
width = width,
barWidth = bar_width,
barSpacing = bar_spacing,
tooltipFormatter = custom_sparkline_formatter(names(counts), as.vector(counts))
)
}
#' @rdname create_sparklines
#' @keywords internal
#' @export
create_sparklines.Date <- function(arr, width = 150, bar_spacing = 5, bar_width = 20, ...) {
arr_num <- as.numeric(arr)
arr_num <- sort(arr_num, decreasing = FALSE, method = "radix")
binwidth <- get_bin_width(arr_num, 1)
bins <- floor(diff(range(arr_num)) / binwidth) + 1
if (all(is.na(bins))) {
return(as.character(tags$code("only NA", class = "text-blue")))
} else if (bins == 1) {
return(as.character(tags$code("one date", class = "text-blue")))
}
counts <- as.vector(unname(base::table(cut(arr_num, breaks = bins))))
max_value <- max(counts)
start_bins <- as.integer(seq(1, length(arr_num), length.out = bins))
labels_start <- as.character(as.Date(arr_num[start_bins], origin = as.Date("1970-01-01")))
labels <- paste("Start:", labels_start)
sparkline::spk_chr(
unname(counts),
type = "bar",
chartRangeMin = 0,
chartRangeMax = max_value,
width = width,
barWidth = bar_width,
barSpacing = bar_spacing,
tooltipFormatter = custom_sparkline_formatter(labels, counts)
)
}
#' @rdname create_sparklines
#' @keywords internal
#' @export
create_sparklines.POSIXct <- function(arr, width = 150, bar_spacing = 5, bar_width = 20, ...) {
arr_num <- as.numeric(arr)
arr_num <- sort(arr_num, decreasing = FALSE, method = "radix")
binwidth <- get_bin_width(arr_num, 1)
bins <- floor(diff(range(arr_num)) / binwidth) + 1
if (all(is.na(bins))) {
return(as.character(tags$code("only NA", class = "text-blue")))
} else if (bins == 1) {
return(as.character(tags$code("one date-time", class = "text-blue")))
}
counts <- as.vector(unname(base::table(cut(arr_num, breaks = bins))))
max_value <- max(counts)
start_bins <- as.integer(seq(1, length(arr_num), length.out = bins))
labels_start <- as.character(format(as.POSIXct(arr_num[start_bins], origin = as.Date("1970-01-01")), "%Y-%m-%d"))
labels <- paste("Start:", labels_start)
sparkline::spk_chr(
unname(counts),
type = "bar",
chartRangeMin = 0,
chartRangeMax = max_value,
width = width,
barWidth = bar_width,
barSpacing = bar_spacing,
tooltipFormatter = custom_sparkline_formatter(labels, counts)
)
}
#' @rdname create_sparklines
#' @keywords internal
#' @export
create_sparklines.POSIXlt <- function(arr, width = 150, bar_spacing = 5, bar_width = 20, ...) {
arr_num <- as.numeric(arr)
arr_num <- sort(arr_num, decreasing = FALSE, method = "radix")
binwidth <- get_bin_width(arr_num, 1)
bins <- floor(diff(range(arr_num)) / binwidth) + 1
if (all(is.na(bins))) {
return(as.character(tags$code("only NA", class = "text-blue")))
} else if (bins == 1) {
return(as.character(tags$code("one date-time", class = "text-blue")))
}
counts <- as.vector(unname(base::table(cut(arr_num, breaks = bins))))
max_value <- max(counts)
start_bins <- as.integer(seq(1, length(arr_num), length.out = bins))
labels_start <- as.character(format(as.POSIXct(arr_num[start_bins], origin = as.Date("1970-01-01")), "%Y-%m-%d"))
labels <- paste("Start:", labels_start)
sparkline::spk_chr(
unname(counts),
type = "bar",
chartRangeMin = 0,
chartRangeMax = max_value,
width = width,
barWidth = bar_width,
barSpacing = bar_spacing,
tooltipFormatter = custom_sparkline_formatter(labels, counts)
)
}
#' @rdname create_sparklines
#' @keywords internal
#' @export
create_sparklines.default <- function(arr, width = 150, ...) {
as.character(tags$code("unsupported variable type", class = "text-blue"))
}
custom_sparkline_formatter <- function(labels, counts) {
htmlwidgets::JS(
sprintf(
"function(sparkline, options, field) {
return 'ID: ' + %s[field[0].offset] + '<br>' + 'Count: ' + %s[field[0].offset];
}",
jsonlite::toJSON(labels),
jsonlite::toJSON(counts)
)
)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.