knitr::opts_chunk$set(echo = TRUE)
library(plotly)
library(dplyr)
library(gridExtra)
library(ggplot2)
library(stringr)
library(stats)
library(kableExtra)
library(data.table)
'%notin%' <- Negate('%in%')
colores_miss <- function(x){case_when(x < 0.25 ~ cell_spec(x,color="white",background="green",bold =T),
                                 x < 0.50 ~ cell_spec(x,color="white",background="orange",bold =T),
                                 x < 0.75 ~ cell_spec(x,color="white",background="lightcoral",bold=T),
                                 TRUE ~ cell_spec(x,color="white",background="orangered",bold=T))}
colores_psi <- function(x){case_when(x < 0.1 ~ cell_spec(x,color="white",background="green",bold =T),
                                 x < 0.2 ~ cell_spec(x,color="white",background="orange",bold =T),
                                 TRUE ~ cell_spec(x,color="white",background="orangered",bold=T))}
colores_positive <- function(x){case_when(x >= 0 ~ cell_spec(x,color="white",background="green",bold =T),
                                 TRUE ~ cell_spec(x,color="white",background="orangered",bold=T))}


Variables Continuas

Proporcion de Missing

base <- base %>% select(cont_vars,group_var)
miss <- base %>% group_by(get(group_var)) %>% summarize_at(cont_vars,funs(round(sum(is.na(.)) / length(.),3)))
miss <- miss %>% setnames(old="get(group_var)",new=group_var)
miss %>% data.frame() %>% select(group_var,everything()) %>% mutate_at(names(miss %>% select(-group_var)),"colores_miss") %>% kable(escape = F, align = "c") %>%
  kable_styling(c("striped", "condensed"), full_width = F)



Estadisticos Descriptivos

Media

df_stats <- base %>% group_by(get(group_var)) %>% summarize_at(cont_vars,funs(mean(. %>% na.omit()) %>% round(3)))
df_stats <- df_stats %>% setnames(old="get(group_var)",new=group_var)
df_stats %>% data.frame() %>% kable(escape = F, align = "c") %>%
  kable_styling(c("striped", "condensed"), full_width = F)

Desviación Estandar

df_stats <- base %>% group_by(get(group_var)) %>% summarize_at(cont_vars,funs(sd(. %>% na.omit()) %>% round(3)))
df_stats <- df_stats %>% setnames(old="get(group_var)",new=group_var)
df_stats %>% data.frame() %>% kable(escape = F, align = "c") %>%
  kable_styling(c("striped", "condensed"), full_width = F)

Mímimo

df_stats <- base %>% group_by(get(group_var)) %>% summarize_at(cont_vars,funs(min(. %>% na.omit()) %>% round(3)))
df_stats <- df_stats %>% setnames(old="get(group_var)",new=group_var)
df_stats %>% data.frame() %>% select(group_var,everything()) %>% mutate_at(names(df_stats %>% select(-group_var)),"colores_positive") %>% kable(escape = F, align = "c") %>%
  kable_styling(c("striped", "condensed"), full_width = F)

Maximo

df_stats <- base %>% group_by(get(group_var)) %>% summarize_at(cont_vars,funs(max(. %>% na.omit()) %>% round(3)))
df_stats <- df_stats %>% setnames(old="get(group_var)",new=group_var)
df_stats %>% data.frame() %>% select(group_var,everything()) %>% mutate_at(names(df_stats %>% select(-group_var)),"colores_positive") %>% kable(escape = F, align = "c") %>%
  kable_styling(c("striped", "condensed"), full_width = F)

Q1

per_25 <- function(x){quantile(x %>% na.omit(),probs = (0.25))}
per_75 <- function(x){quantile(x %>% na.omit(),probs = (0.75))}
df_stats <- base %>% group_by(get(group_var)) %>% summarize_at(cont_vars,funs(per_25(.) %>% round(3)))
df_stats <- df_stats %>% setnames(old="get(group_var)",new=group_var)
df_stats %>% data.frame() %>% kable(escape = F, align = "c") %>%
  kable_styling(c("striped", "condensed"), full_width = F)

Q2

df_stats <- base %>% group_by(get(group_var)) %>% summarize_at(cont_vars,funs(median(.,na.rm=TRUE) %>% round(3)))
df_stats <- df_stats %>% setnames(old="get(group_var)",new=group_var)
df_stats %>% data.frame() %>% kable(escape = F, align = "c") %>%
  kable_styling(c("striped", "condensed"), full_width = F)

Q3

df_stats <- base %>% group_by(get(group_var)) %>% summarize_at(cont_vars,funs(per_75(.) %>% round(3)))
df_stats <- df_stats %>% setnames(old="get(group_var)",new=group_var)
df_stats %>% data.frame() %>% kable(escape = F, align = "c") %>%
  kable_styling(c("striped", "condensed"), full_width = F)

Indice de estabilidad poblacional (PSI)

df_stats <- DSutilsR::multiple_variable_psi(cont_vars,base,group_var,cat_train)
name <- rownames(df_stats)
df_stats <- df_stats %>% as.data.frame() %>% mutate(group=name)
df_stats  %>% mutate_at(names(df_stats %>% select(-group)) ,"colores_psi") %>% select(group,everything())%>% kable(escape = F, align = "c") %>% kable_styling(c("striped", "condensed"), full_width = F)


skpalominos/DSutilsR documentation built on Nov. 14, 2021, 11:15 p.m.