R/create_report.R

Defines functions create_report

Documented in create_report

#' Create report
#'
#' This function creates a data profiling report.
#' @param data input data
#' @param output_format output format in \link[rmarkdown]{render}. Default is \code{html_document(toc = TRUE, toc_depth = 6, theme = "yeti")}.
#' @param output_file output file name in \link[rmarkdown]{render}. Default is "report.html".
#' @param output_dir output directory for report in \link[rmarkdown]{render}. Default is user's current directory.
#' @param y name of response variable if any. Response variables will be passed to appropriate plotting functions automatically.
#' @param plotly if \code{TRUE}, use interactive plotly charts in the report (requires the \pkg{plotly} package). Default is \code{FALSE}. Only applies to HTML output; PDF reports use static plots.
#' @param config report configuration generated by \link{configure_report}.
#' @param report_title report title. Default is "Data Profiling Report".
#' @param \dots other arguments to be passed to \link[rmarkdown]{render}.
#' @keywords create_report
#' @details \code{config} is a named list to be evaluated by \code{create_report}.
#' Each name should exactly match a function name.
#' By doing so, that function and corresponding content will be added to the report.
#' If you do not want to include certain functions/content, do not add it to \code{config}.
#' @details \link{configure_report} generates the default template. You may customize the content using that function.
#' @details All function arguments will be passed to \link[base]{do.call} as a list.
#' @note If both \code{y} and \code{plot_prcomp} are present, \code{y} will be removed from \code{plot_prcomp}.
#' @note If there are multiple options for the same function, all of them will be plotted.
#' For example, \code{create_report(..., y = "a", config = list("plot_bar" = list("with" = "b")))} will create 3 bar charts:
#' \itemize{
#' \item regular frequency bar chart
#' \item bar chart aggregated by response variable "a"
#' \item bar chart aggregated by `with` variable "b"`
#' }
#' @importFrom utils browseURL
#' @import rmarkdown
#' @export
#' @seealso \link{configure_report}
#' @examples
#' \dontrun{
#' # Create report
#' create_report(iris)
#' create_report(airquality, y = "Ozone")
#' 
#' # Create report with plotly
#' # Note: It is a known issue that some facet panels may not show up in plotly.
#' # More details in the following issues:
#' # * https://github.com/plotly/plotly.R/issues/1243
#' # * https://github.com/plotly/plotly.R/issues/1962
#' create_report(airquality, y = "Ozone", plotly = TRUE)
#'
#' # Load library
#' library(ggplot2)
#' library(data.table)
#' library(rmarkdown)
#'
#' # Set some missing values
#' diamonds2 <- data.table(diamonds)
#' for (j in 5:ncol(diamonds2)) {
#'   set(diamonds2,
#'       i = sample.int(nrow(diamonds2), sample.int(nrow(diamonds2), 1)),
#'       j,
#'       value = NA_integer_)
#' }
#'
#' # Create customized report for diamonds2 dataset
#' create_report(
#'   data = diamonds2,
#'   output_format = html_document(toc = TRUE, toc_depth = 6, theme = "flatly"),
#'   output_file = "report.html",
#'   output_dir = getwd(),
#'   y = "price",
#'   config = configure_report(
#'     add_plot_prcomp = TRUE,
#'     plot_qq_args = list("by" = "cut", sampled_rows = 1000L),
#'     plot_bar_args = list("with" = "carat"),
#'     plot_correlation_args = list("cor_args" = list("use" = "pairwise.complete.obs")),
#'     plot_boxplot_args = list("by" = "cut"),
#'     global_ggtheme = quote(theme_light())
#'   )
#' )
#' 
#' ## Configure report without `configure_report`
#' config <- list(
#'   "introduce" = list(),
#'   "plot_intro" = list(),
#'   "plot_str" = list(
#'     "type" = "diagonal",
#'     "fontSize" = 35,
#'     "width" = 1000,
#'     "margin" = list("left" = 350, "right" = 250)
#'   ),
#'   "plot_missing" = list(),
#'   "plot_histogram" = list(),
#'   "plot_density" = list(),
#'   "plot_qq" = list(sampled_rows = 1000L),
#'   "plot_bar" = list(),
#'   "plot_correlation" = list("cor_args" = list("use" = "pairwise.complete.obs")),
#'   "plot_prcomp" = list(),
#'   "plot_boxplot" = list(),
#'   "plot_scatterplot" = list(sampled_rows = 1000L)
#' )
#' }

create_report <- function(data,
                          output_format = html_document(toc = TRUE, toc_depth = 6, theme = "yeti"),
                          output_file = "report.html",
                          output_dir = getwd(),
                          y = NULL,
                          plotly = FALSE,
                          config = configure_report(),
                          report_title = "Data Profiling Report",
                          ...) {
  ## Check if input is data.table
  if (!is.data.table(data)) data <- data.table(data)
  ## Check response variable
  if (!is.null(y)) {
    if (!(y %in% names(data))) stop("`", y, "` not found in data!")
  }
  ## Get directory of report markdown template
  report_dir <- system.file("rmd_template/report.rmd", package = "DataExplorer")
  ## When output format is PDF, ensure output_file has .pdf extension
  is_pdf <- identical(output_format, "pdf_document") ||
    "pdf_document" %in% class(output_format) ||
    (is.list(output_format) && !is.null(output_format$pandoc$to) && grepl("^latex", output_format$pandoc$to))
  if (is_pdf && !grepl("\\.pdf$", output_file, ignore.case = TRUE)) {
    output_file <- paste0(sub("\\.[^.]+$", "", output_file), ".pdf")
  }
  ## Render report
  report_path <- suppressWarnings(render(
    input = report_dir,
    output_format = output_format,
    output_file = output_file,
    output_dir = output_dir,
    intermediates_dir = output_dir,
    params = list(data = data, report_config = config, response = y, set_title = report_title, plotly = plotly),
    ...
  ))
  ## Open report (use path returned by render in case extension was normalized)
  browseURL(path.expand(report_path))
}

Try the DataExplorer package in your browser

Any scripts or data that you put into this service are public.

DataExplorer documentation built on March 8, 2026, 9:06 a.m.