R/create_report.r

Defines functions create_report

Documented in create_report

#' Create report
#'
#' This function creates a data profiling report.
#' @param data input data
#' @param output_format output format in \link{render}. Default is \code{html_document(toc = TRUE, toc_depth = 6, theme = "yeti")}.
#' @param output_file output file name in \link{render}. Default is "report.html".
#' @param output_dir output directory for report in \link{render}. Default is user's current directory.
#' @param y name of response variable if any. Response variables will be passed to appropriate plotting functions automatically.
#' @param config report configuration generated by \link{configure_report}.
#' @param report_title report title. Default is "Data Profiling Report".
#' @param \dots other arguments to be passed to \link{render}.
#' @keywords create_report
#' @details \code{config} is a named list to be evaluated by \code{create_report}.
#' Each name should exactly match a function name.
#' By doing so, that function and corresponding content will be added to the report.
#' If you do not want to include certain functions/content, do not add it to \code{config}.
#' @details \link{configure_report} generates the default template. You may customize the content using that function.
#' @details All function arguments will be passed to \link{do.call} as a list.
#' @note If both \code{y} and \code{plot_prcomp} are present, \code{y} will be removed from \code{plot_prcomp}.
#' @note If there are multiple options for the same function, all of them will be plotted.
#' For example, \code{create_report(..., y = "a", config = list("plot_bar" = list("with" = "b")))} will create 3 bar charts:
#' \itemize{
#' \item regular frequency bar chart
#' \item bar chart aggregated by response variable "a"
#' \item bar chart aggregated by `with` variable "b"`
#' }
#' @importFrom utils browseURL
#' @import rmarkdown
#' @export
#' @seealso \link{configure_report}
#' @examples
#' \dontrun{
#' # Create report
#' create_report(iris)
#' create_report(airquality, y = "Ozone")
#'
#' # Load library
#' library(ggplot2)
#' library(data.table)
#' library(rmarkdown)
#'
#' # Set some missing values
#' diamonds2 <- data.table(diamonds)
#' for (j in 5:ncol(diamonds2)) {
#'   set(diamonds2,
#'       i = sample.int(nrow(diamonds2), sample.int(nrow(diamonds2), 1)),
#'       j,
#'       value = NA_integer_)
#' }
#'
#' # Create customized report for diamonds2 dataset
#' create_report(
#'   data = diamonds2,
#'   output_format = html_document(toc = TRUE, toc_depth = 6, theme = "flatly"),
#'   output_file = "report.html",
#'   output_dir = getwd(),
#'   y = "price",
#'   config = configure_report(
#'     add_plot_prcomp = TRUE,
#'     plot_qq_args = list("by" = "cut", sampled_rows = 1000L),
#'     plot_bar_args = list("with" = "carat"),
#'     plot_correlation_args = list("cor_args" = list("use" = "pairwise.complete.obs")),
#'     plot_boxplot_args = list("by" = "cut"),
#'     global_ggtheme = quote(theme_light())
#'   )
#' )
#' 
#' ## Configure report without `configure_report`
#' config <- list(
#'   "introduce" = list(),
#'   "plot_intro" = list(),
#'   "plot_str" = list(
#'     "type" = "diagonal",
#'     "fontSize" = 35,
#'     "width" = 1000,
#'     "margin" = list("left" = 350, "right" = 250)
#'   ),
#'   "plot_missing" = list(),
#'   "plot_histogram" = list(),
#'   "plot_density" = list(),
#'   "plot_qq" = list(sampled_rows = 1000L),
#'   "plot_bar" = list(),
#'   "plot_correlation" = list("cor_args" = list("use" = "pairwise.complete.obs")),
#'   "plot_prcomp" = list(),
#'   "plot_boxplot" = list(),
#'   "plot_scatterplot" = list(sampled_rows = 1000L)
#' )
#' }

create_report <- function(data,
                          output_format = html_document(toc = TRUE, toc_depth = 6, theme = "yeti"),
                          output_file = "report.html",
                          output_dir = getwd(),
                          y = NULL,
                          config = configure_report(),
                          report_title = "Data Profiling Report",
                          ...) {
  ## Check if input is data.table
  if (!is.data.table(data)) data <- data.table(data)
  ## Check response variable
  if (!is.null(y)) {
    if (!(y %in% names(data))) stop("`", y, "` not found in data!")
  }
  ## Get directory of report markdown template
  report_dir <- system.file("rmd_template/report.rmd", package = "DataExplorer")
  ## Render report into html
  suppressWarnings(render(
    input = report_dir,
    output_format = output_format,
    output_file = output_file,
    output_dir = output_dir,
    intermediates_dir = output_dir,
    params = list(data = data, report_config = config, response = y, set_title = report_title),
    ...
  ))
  ## Open report
  report_path <- path.expand(file.path(output_dir, output_file))
  browseURL(report_path)
}
boxuancui/eda documentation built on Feb. 2, 2024, 1:54 a.m.