knitr::opts_chunk$set(echo = FALSE, 
                      message = FALSE, 
                      warning = FALSE, 
                      collapse = FALSE,
                      comment = "#>",
                      fig.align = "center")
knitr::opts_chunk$set(fig.width = 12, fig.height = 9)
# Load required packages
library(dplyr)
library(ggplot2)
library(kableExtra)
library(dlookr)
library(htmltools)
library(reactable)

reportData <- get("reportData", .dlookrEnv)
targetVariable <- get("targetVariable", .dlookrEnv)
sample_percent <- get("sample_percent", .dlookrEnv)
author <- get("author", .dlookrEnv)

```{css, echo=FALSE} .pagedjs_page.pagedjs_first_page .pagedjs_margin-top-right>.pagedjs_margin-content::after { content: url("$logo$"); }

.title { color: $title_color$; }

.subtitle { color: $subtitle_color$; font-size: 2.5em !important; }

# Overview
## Data Structures

```r
# Number of observations
N <- NROW(reportData)

# sampling with sample_percent
if (sample_percent < 100) {
  N_sample <- ceiling(N * sample_percent / 100)
  idx <- sample(seq(N), size = N_sample)

  reportData <- reportData[idx, ]
} else {
  N_sample <- N
}

if (length(targetVariable) == 0) targetVariable <- NULL
# solve the overview
ov <- overview(reportData)

tab_left <- ov[1:9, ]
tab_right <- ov[10:nrow(ov), ]
rownames(tab_right) <- seq(nrow(tab_right))

tab_left <- tab_left %>% 
  mutate(value = ifelse (metrics %in% "observations", N, value)) %>%   
  mutate(value = ifelse (metrics %in% "memory size", 
                         ifelse(value / 1024^2 > 0, round(value / 1024^2),
                                round(value / 1024)), value)) %>%   
  mutate(metrics = ifelse (metrics %in% "memory size", 
                         ifelse(value / 1024^2 > 0, "memory size (MB)",
                                "memory size (KB)"), metrics))

knitr::kables(format = "html",
  list(
    knitr::kable(tab_left, digits = 2, format = "html", valign = "t",
                 format.args = list(big.mark = ","),
                 table.attr = "style=\"margin-right:40px !important;\"") %>% 
      kable_styling(full_width = FALSE, font_size = 15), 
    knitr::kable(tab_right, digits = 2, format = "html", valign = "t",
                 format.args = list(big.mark = ",")) %>% 
      kable_styling(full_width = FALSE, font_size = 15) 
    ),
  caption = "Data structures and types") %>% 
  gsub("font-size: initial !important;",
       "font-size: 12px !important;", .) %>%    
  cat()

Job Information

division <- c("dataset" ,"dataset" ,"dataset", "job", "job", "job")
metrics <- c("dataset" ,"dataset type", "target", "samples", 
             "created", "created by")

value <- c("$dataset$", 
           class(reportData)[1], 
           ifelse(is.null(targetVariable), "not defied", targetVariable),
           paste0(format(N_sample, big.mark = ","), " / ", 
                  format(N, big.mark = ","), " (", sample_percent, "%)"),
           "$date$",
           ifelse(author == "", "dlookr", author))

info_job <- data.frame(division = division, metrics = metrics, value = value)

cap <- "Job informations"

print_tab(info_job, caption = cap)

Imputation

Missing Values

html_paged_impute_missing(reportData, target = targetVariable)

Outliers

html_paged_impute_outlier(reportData)

Resolving Skewness

html_paged_resolve_skewness(reportData)

Binning

html_paged_binning(reportData)

$targeted_eda$

Optimal Binning

html_paged_optimal_binning(reportData, target = targetVariable)

$targeted_eda$



choonghyunryu/dlookr documentation built on June 11, 2024, 9:12 a.m.