# Options
knitr::opts_chunk$set(echo = FALSE, fig.align = 'center',
    results = "asis", dpi = 150, warning = FALSE)
options(knitr.kable.NA = '')

# Packages to load
library(dplyr)
library(knitr)
library(kableExtra)
library(datascan)
library(purrr)

# Dataset and caracteristics
data <- if (!is.null(params$dataset)) {params$dataset} else {dplyr::starwars}
nr <- nrow(data)
nc <- ncol(data)
ncnum <- select_if(data, is.numeric) %>% ncol()
nccat <- select_if(data, ~is.factor(.x) | is.character(.x)) %>% ncol()

# Columns as responses ?
target <- params$target
target_cat <- select(data, !!!target) %>% 
  select_if(~is.factor(.x) | is.character(.x)) %>% 
  colnames()
ntarget_cat = length(target_cat)
target_num <- select(data, !!!target) %>% 
  select_if(is.numeric) %>% 
  colnames()
ntarget_num = length(target_num)

# Number of covariation plots
if (ntarget_cat != 0 & (nccat - ntarget_cat) != 0 ) {
  nplot_cc <- ntarget_cat*(nccat - ntarget_cat)
} else {
  nplot_cc <- choose(nccat, 2)
}
if (ntarget_num != 0 & (ncnum - ntarget_num) != 0 ) {
  nplot_nn <- ntarget_num*(ncnum - ntarget_num)
} else {
  nplot_nn <- choose(ncnum, 2)
}
if (ntarget_num != 0) {
  nplot_nc <- ntarget_num*nccat
} else {
  nplot_nc <- ncnum*nccat
}

# If target, what should be draw ?
if (ntarget_num == 0 & ntarget_cat != 0) {
  draw_numnum <- FALSE
  draw_numcat <- FALSE
} else {
  draw_numnum <- TRUE
  draw_numcat <- TRUE
}
if (ntarget_num != 0 & ntarget_cat == 0) {
  draw_catcat <- FALSE
} else {
  draw_catcat <- TRUE
}

# Regroup groups ?
regroup <- params$regroup

Dataset

scd <- scan_data(data)
scc <- scan_columns(data)

General status

scd %>% 
  kable(align = c("l", "r")) %>% 
  kable_styling(full_width = F, bootstrap_options = "condensed") %>% 
  add_indent(3:(nrow(scd) - 2)) %>%
  column_spec(1, bold = T)

Columns status

scc %>% 
  select(-contains("p_")) %>% 
  mutate(
    Unique = .apply_colorbar(Unique, nr),
    NAs = .apply_colorbar(n_na, nr),
    Zeros = .apply_colorbar(n_0, nr),
    'Inf' = .apply_colorbar(n_Inf, nr)
  ) %>%
  select(-contains("n_")) %>%
  kable("html", escape = F, align = c("l", "l", rep("r",4))) %>% 
  kable_styling(full_width = F, bootstrap_options = "condensed") %>%
  column_spec(1, bold = T) %>% 
  column_spec(3:6, width = "70px")

Variation


Numerical columns


if (ncnum > 0) {
  cat("### Numerical columns", fill = TRUE)
  cat("", fill = TRUE)
}

Summary table


if (ncnum > 0) {
  cat("#### Summary table", fill = TRUE)
  cat("", fill = TRUE)
}
if (ncnum > 0) {
  scan_numerics(data) %>%
  kable("html", escape = F, align = c("l", rep("r", 9)), digits = 2) %>% 
  kable_styling(full_width = F, bootstrap_options = "condensed")
}

Graphics


if (ncnum > 0) {
  cat("#### Graphics", fill = TRUE)
  cat("", fill = TRUE)
}
if (ncnum == 1) {
  plotnum <- vis_numerics(data)
  walk(plotnum, plot)
}
wzxhzdk:9

Categorical columns


if (nccat > 0) {
  cat("### Categorical columns", fill = TRUE)
  cat("", fill = TRUE)
}
if (nccat == 1) {
  plotcat <- vis_groups(data)
  walk(plotcat, plot)
}
wzxhzdk:12

Covariation

Global covariations


Correlations


if (ncnum >= 2) {
  cat("#### Correlations", fill = TRUE)
  cat("", fill = TRUE)
}
if (ncnum >= 2) {
  vis_corr(data)
}

Bias corrected Cramer's V


if (nccat >= 2) {
  cat("#### Bias corrected Cramer's V", fill = TRUE)
  cat("", fill = TRUE)
}
if (nccat >= 2) {
  vis_cramerv(data)
}

Linear R coefficient between numerical and categorical columns


if (nccat > 0 & ncnum > 0) {
  cat("#### Linear R coefficient between numerical and categorical columns", fill = TRUE)
  cat("", fill = TRUE)
}
if (nccat > 0 & ncnum > 0) {
  vis_r(data)
}

Specific covariations


Numerical ~ Numerical


if (nplot_nn > 0) {
  cat("#### Numerical ~ Numerical", fill = TRUE)
  cat("", fill = TRUE)
}
if (nplot_nn == 0 & draw_numnum) {
  plotnumnum <- vis_nncovar(data, !!!syms(target_num))
  walk(plotnumnum, plot)
}
wzxhzdk:21

Numerical ~ Categorical


if (nplot_nc > 0) {
  cat("#### Numerical ~ Categorical", fill = TRUE)
  cat("", fill = TRUE)
}
if (nplot_nc == 1 & draw_numcat) {
  plotnumcat <- vis_ngcovar(data,!!!syms(target_num), .regroup = regroup)
  walk(plotnumcat, plot)
}
wzxhzdk:24

Categorical ~ Categorical


if (nplot_cc > 0) {
  cat("#### Categorical ~ Categorical", fill = TRUE)
  cat("", fill = TRUE)
}
if (nplot_cc == 1 & draw_catcat) {
  plotcatcat <- vis_ggcovar(data, !!!syms(target_cat), .regroup = regroup)
  walk(plotcatcat, plot)
}
wzxhzdk:27

Annexe



```r

if (nccat > 0) {

cat("Summary table", fill = TRUE)

scan_groups(data) %>%

select(-p) %>%

mutate(

N = .apply_colorbar(n, nr)

) %>%

select(-n) %>%

kable("html", escape = F, align = c("l", "l", rep("r", 2))) %>%

kable_styling(full_width = F, bootstrap_options = "condensed") %>%

column_spec(1, bold = T) %>%

column_spec(3, width = "50px")

}

```




BenjaminLouis/datascan documentation built on May 26, 2019, 1:35 p.m.