knitr::opts_chunk$set(echo = FALSE, message = FALSE, warning = FALSE, collapse = FALSE, comment = "#>", fig.align = "center") knitr::opts_chunk$set(fig.width = 12, fig.height = 9)
library(dlookr) library(dplyr)
```{css, echo=FALSE} :root { --custom-grey60: rgb(102, 102, 102); --custom-grey20: rgb(204, 204, 204); --custom-grey10: rgb(230, 230, 230); --custom-blue: rgb(0, 114, 188); --custom-lightblue: rgb(204, 227, 242); --custom-orange: rgb(255, 127, 42); --custom-lightorange: rgb(255, 204, 170); }
color: $title_color$; }
.navbar {
background-color: #f0f0f0;
border-bottom : 2px solid $customColor$ !important;
}
.nav>li>a { position: relative; display: block; padding: 10px 15px; }
a { color: #337ab7 !important; background-color: transparent !important; text-decoration: none; }
a.disable-links { pointer-events: none; color: var(--custom-grey60) !important; }
/ Warnings / .tag { display: inline-block; padding: 2px 12px; border-radius: 5px; font-weight: 600; font-size: 12px; }
.variables { font-size: 16px; color: hsl(358, 42%, 56%); }
.box-type { margin-right: 4px; padding: 0 4px; border: 1px solid hsl(0, 0%, 75%); border-radius: 2px; }
.variable-info-details { margin-top: 2px; font-size: 14px; font-weight: 400; color: hsl(0, 0%, 40%); overflow: hidden; text-overflow: ellipsis; }
.variable-info-text { margin-left: 12px; font-weight: 600; overflow: hidden; text-overflow: ellipsis; }
.variable-info { display: flex; align-items: center; }
.variables-tbl { margin-top: 16px; }
.value-info-text { margin-top: 2px; font-size: 14px; font-weight: 400; color: hsl(0, 0%, 40%); overflow: hidden; text-overflow: ellipsis; text-align: right; }
/ Defined title with H1, H2, H3 / .title-h1 { font-size: 2.5em; color: $customColor$; }
.title-h2 { font-size: 2.0em; font-weight: 400; color: $customColor$; }
.title-h3 { font-size: 1.75em; font-weight: 400; color: rgb(102, 102, 102); }
```r reportData <- as.data.frame(get("reportData", .dlookrEnv)) targetVariable <- get("targetVariable", .dlookrEnv) sample_percent <- get("sample_percent", .dlookrEnv) author <- get("author", .dlookrEnv) base_family <- get("base_family", .dlookrEnv) # Number of observations N <- NROW(reportData) # solve the overview ov <- overview(reportData) # sampling with sample_percent if (sample_percent < 100) { N_sample <- ceiling(N * sample_percent / 100) idx <- sample(seq(N), size = N_sample) reportData <- reportData[idx, ] } else { N_sample <- N } if (length(targetVariable) == 0) targetVariable <- NULL
idx.numeric <- find_class(reportData, type = "numerical") nm.numeric <- find_class(reportData, type = "numerical", index = FALSE) if (!is.null(targetVariable)) { # remove target variable from variable index idx.numeric <- idx.numeric[nm.numeric != targetVariable] factor_flag <- class(pull(reportData, targetVariable))[1] %in% c("factor", "ordered") numeric_flag <- class(pull(reportData, targetVariable))[1] %in% c("integer", "numeric") target <- if (!factor_flag & numeric_flag) factor(pull(reportData, targetVariable)) else pull(reportData, targetVariable) } else { # fix error while performing eda_web_report() #83 factor_flag <- FALSE } # if all elements of a numerical variable are NA, # remove from correlation coefficient calculation idx.numeric <- idx.numeric[apply(as.data.frame(reportData[, idx.numeric]), 2, function(x) !all(is.na(x)))] # if all elements of the numerical variable are the same value, # remove from the correlation coefficient calculation idx.numeric <- idx.numeric[apply(as.data.frame(reportData[, idx.numeric]), 2, function(x) diff(range(x, na.rm = TRUE)) > 0)]
division <- c("dataset" ,"dataset" ,"dataset", "job", "job", "job") metrics <- c("dataset" ,"dataset type", "target", "samples", "created", "created by") value <- c("$dataset$", "$datatype$", ifelse(is.null(targetVariable), "not defied", targetVariable), paste0(format(N_sample, big.mark = ","), " / ", format(N, big.mark = ","), " (", sample_percent, "%)"), "$date$", ifelse(author == "", "dlookr", author)) overview <- data.frame(Divisions = division, Metrics = metrics, Values = value)
h1("Overview", id = "ID-h1-overview", class = "title-h1")
tab_left <- ov[1:9, ] tab_right <- ov[10:nrow(ov), ] rownames(tab_right) <- seq(nrow(tab_right)) tab_left <- tab_left %>% mutate(value = ifelse (metrics %in% "observations", N, value)) %>% mutate(value = ifelse (metrics %in% "memory size", ifelse(value / 1024^2 > 0, round(value / 1024^2), round(value / 1024)), value)) %>% mutate(metrics = ifelse (metrics %in% "memory size", ifelse(value / 1024^2 > 0, "memory size (MB)", "memory size (KB)"), metrics)) cap <- "Data Structures" knitr::kable(tab_left, digits = 2, caption = cap, format = "html", format.args = list(big.mark = ","), col.names = c("Divisions", "Metrics", "Values"), table.attr = "style=\"color: hsl(0, 0%, 40%);margin-right:30px !important;\"") %>% kable_styling(full_width = FALSE, font_size = 14, position = "float_left") cap <- "Data Types" knitr::kable(tab_right, digits = 2, caption = cap, format = "html", col.names = c("Divisions", "Metrics", "Values"), format.args = list(big.mark = ","), table.attr = "style=\"color: hsl(0, 0%, 40%);margin-right:30px !important;\"") %>% kable_styling(full_width = FALSE, font_size = 14, position = "float_left") cap <- "Job Information" knitr::kable(overview, caption = cap, format = "html", table.attr = "style=\"color: hsl(0, 0%, 40%);\"") %>% kable_styling(full_width = FALSE, font_size = 14, position = "left") breaks <- 9 - nrow(overview) + 2 break_line_asis(breaks)
h1("Univariate Analysis", id = "ID-h1-univariate", class = "title-h1")
h2("Descriptive Statistics", id = "ID-h2-descriptive", class = "title-h2")
in_numerical <- find_class(reportData, type = "numerical") %>% length() %>% as.logical() if (in_numerical) { html_descriptive(reportData, base_family = base_family) } else { html_cat("No numerical variable.") } break_line_asis(1)
h2("Normality Test", id = "ID-h2-normality", class = "title-h2")
if (length(idx.numeric) > 0) { suppressWarnings({ reportData %>% html_normality(theme = "$theme$", base_family = base_family) }) } else { html_cat("No numerical variable.") } break_line_asis(1)
h1("Bivariate Analysis", id = "ID-h1-bivariate", class = "title-h1")
h2("Compare Numerical Variables", id = "ID-h2-compare-numerical", class = "title-h2")
html_compare_numerical(reportData, base_family = base_family) break_line_asis(1)
h2("Compare Categorical Variables", id = "ID-h2-compare-categorical", class = "title-h2")
html_compare_category(reportData, base_family = base_family) break_line_asis(1)
h1("Multivariate Analysis", id = "ID-h1-multivariate", class = "title-h1")
h2("Correlation Analysis", id = "ID-h2-correlation", class = "title-h2")
h3("Correlation Matrix", id = "ID-h3-correlation-matrix", class = "title-h3")
html_correlation(reportData) break_line_asis(1)
h3("Correlation Plot", id = "ID-h3-correlation-plot", class = "title-h3")
if (length(idx.numeric) < 2) { html_cat("The number of numerical variables is less than 2.") } else { cap <- "Correlation coefficient matrix plot" html_cat(cap) reportData %>% correlate() %>% plot(base_family = base_family) } if (!is.null(targetVariable)) { break_line_asis(1) }
if (!is.null(targetVariable)) h1("Target based Analysis", id = "ID-h1-target-based", class = "title-h1")
if (!is.null(targetVariable)) h2("Target Variable and Numeric Variables", id = "ID-h2-group-numerical", class = "title-h2")
if (!is.null(targetVariable)) { content <- html_target_numerical(reportData, target = targetVariable, base_family = base_family) } else { content <- HTML("") } if (!is.null(content)) content if (!is.null(targetVariable)) { break_line_asis(1) }
if (!is.null(targetVariable)) h2("Target Variable and Categorical Variables", id = "ID-h2-group-categorical", class = "title-h2")
if (!is.null(targetVariable)) { content <- html_target_categorical(reportData, target = targetVariable, base_family = base_family) } else { content <- HTML("") } if (!is.null(content)) content if (!is.null(targetVariable)) { break_line_asis(1) }
if (!is.null(targetVariable) & factor_flag) h2("Grouped Correlation", id = "ID-h2-group-correlation", class = "title-h2")
if (!is.null(targetVariable) & factor_flag) { cap <- "Correlation by level of target variable" html_cat(cap) content <- html_target_correlation(reportData, target = targetVariable, base_family = base_family) } else { content <- HTML("") } if (!is.null(content)) content break_line_asis(1)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.