knitr::opts_chunk$set(echo = TRUE)
library(plotly)
library(dplyr)
library(gridExtra)
library(ggplot2)
library(stringr)
library(stats)
library(kableExtra)
library(data.table)
library(ggcorrplot)
library(moments)
'%notin%' <- Negate('%in%')
colores <- function(x){case_when(x < 25 ~ cell_spec(x,color="white",background="green",bold =T),
                                 x < 50 ~ cell_spec(x,color="white",background="orange",bold =T),
                                 x < 75 ~ cell_spec(x,color="white",background="lightcoral",bold=T),
                                 TRUE ~ cell_spec(x,color="white",background="orangered",bold=T))}


Variables Continuas

Porcentaje de Missing

base <- base %>% select(cont_vars,cat_vars,group_var)
miss <- base %>% group_by(get(group_var)) %>% summarize_at(cont_vars,funs(sum(is.na(.)) / length(.)))
miss <- miss %>% setnames(old="get(group_var)",new=group_var)
miss %>% data.frame() %>% select(group_var,everything()) %>% mutate_at(names(miss %>% select(-group_var)),"colores") %>% kable(escape = F, align = "c") %>%
  kable_styling(c("striped", "condensed"), full_width = F)



Estadisticos Descriptivos

per_25 <- function(x){quantile(x %>% na.omit(),probs = (0.25))}
per_75 <- function(x){quantile(x %>% na.omit(),probs = (0.75))}

M <- matrix(nrow=8,ncol=length(cont_vars))
M[1,] <- round((base %>% select(cont_vars) %>% apply(2,mean,na.rm=TRUE)),3) 
M[2,] <- round((base %>% select(cont_vars) %>% apply(2,sd,na.rm=TRUE)),3) 
M[3,] <- round((base %>% select(cont_vars) %>% apply(2,min,na.rm=TRUE)),3) 
M[4,] <- round((base %>% select(cont_vars) %>% apply(2,per_25)),3) 
M[5,] <- round((base %>% select(cont_vars) %>% apply(2,median,na.rm=TRUE)),3) 
M[6,] <- round((base %>% select(cont_vars) %>% apply(2,per_75)),3) 
M[7,] <- round((base %>% select(cont_vars) %>% apply(2,max,na.rm=TRUE)),3)
M[8,] <- round((base %>% select(cont_vars) %>% apply(2,skewness,na.rm=TRUE)),3)
colnames(M) <- cont_vars
rownames(M) <- c("mean","sd","min","Q1","Q2","Q3","max","skewness")
M %>% data.frame() %>% kable(escape = F, align = "c") %>%
  kable_styling(c("striped", "condensed"), full_width = F)

Correlaciones

Gráfica de Correlaciones

corr <- base %>% select(cont_vars) %>% cor(method = "pearson") %>% round(3)
ggcorrplot(corr)+ggtitle("Grafica Correlación de Pearson")+theme_minimal()

Matriz de Correlaciones

corr %>% data.frame() %>% kable(escape = F, align = "c") %>%
  kable_styling(c("striped", "condensed"), full_width = F)

Gráficas Box Plot

variable_graph <- function(df,var){
  if(eval(parse(text=paste0("df$",var))) %>% class()=="numeric"){
    ggplot(df, aes( y=eval(parse(text=var)))) + geom_boxplot(fill="slateblue", alpha=0.2) +
      xlab(var)+ggtitle(paste0("Boxplot ",var)) + ylab(var)
  } else {
    ggplot(df, aes(x=as.factor(eval(parse(text=var))), fill=as.factor(eval(parse(text=var))))) +
      geom_bar( ) + scale_fill_brewer(palette = "Set2") + theme(legend.position="none") +
      ggtitle("Grafico de barras ") + xlab(var) + ylab("N")
  }
}

multiplot <- function(..., plotlist=NULL, file, cols=1, layout=NULL) {
  library(grid)
  plots <- c(list(...), plotlist)
  numPlots = length(plots)
  if (is.null(layout)) {
    layout <- matrix(seq(1, cols * ceiling(numPlots/cols)),
                     ncol = cols, nrow = ceiling(numPlots/cols))
  }
  if (numPlots==1) {
    print(plots[[1]])
  } else {
    grid.newpage()
    pushViewport(viewport(layout = grid.layout(nrow(layout), ncol(layout))))
    for (i in 1:numPlots) {
      matchidx <- as.data.frame(which(layout == i, arr.ind = TRUE))
      print(plots[[i]], vp = viewport(layout.pos.row = matchidx$row,
                                      layout.pos.col = matchidx$col))
    }
  }
}
L <- cont_vars %>% map(~variable_graph(base,.x))
L <- L %>% stats::setNames(cont_vars)
multiplot(plotlist = L,cols=2)

Variables Categoricas

Porcentaje de Missing

miss <- base %>% summarize_at(cat_vars,funs(sum(is.na(.)) / length(.)))
miss %>% data.frame() %>% select(cat_vars) %>% mutate_at(names(miss),"colores") %>% kable(escape = F, align = "c") %>%
  kable_styling(c("striped", "condensed"), full_width = F)


Estadisticos Descriptivos

Moda

base %>% summarize_at(cat_vars,"get_mode",'NO')  %>% kable(escape = F, align = "c") %>%
  kable_styling(c("striped", "condensed"), full_width = F)


Porporción de casos iguales a la moda:

base %>% summarize_at(cat_vars,"get_mode",'SI')  %>% kable(escape = F, align = "c") %>%
  kable_styling(c("striped", "condensed"), full_width = F)

Gráficas de Barra

L <- cat_vars %>% map(~variable_graph(base,.x))
L <- L %>% stats::setNames(cat_vars)
multiplot(plotlist = L,cols=2)


skpalominos/DSutilsR documentation built on Nov. 14, 2021, 11:15 p.m.