knitr::opts_chunk$set(echo = TRUE)
library(plotly) library(dplyr) library(gridExtra) library(ggplot2) library(stringr) library(stats) library(kableExtra) library(data.table) library(ggcorrplot) library(moments) '%notin%' <- Negate('%in%')
colores <- function(x){case_when(x < 25 ~ cell_spec(x,color="white",background="green",bold =T), x < 50 ~ cell_spec(x,color="white",background="orange",bold =T), x < 75 ~ cell_spec(x,color="white",background="lightcoral",bold=T), TRUE ~ cell_spec(x,color="white",background="orangered",bold=T))}
base <- base %>% select(cont_vars,cat_vars,group_var) miss <- base %>% group_by(get(group_var)) %>% summarize_at(cont_vars,funs(sum(is.na(.)) / length(.))) miss <- miss %>% setnames(old="get(group_var)",new=group_var)
miss %>% data.frame() %>% select(group_var,everything()) %>% mutate_at(names(miss %>% select(-group_var)),"colores") %>% kable(escape = F, align = "c") %>% kable_styling(c("striped", "condensed"), full_width = F)
per_25 <- function(x){quantile(x %>% na.omit(),probs = (0.25))} per_75 <- function(x){quantile(x %>% na.omit(),probs = (0.75))} M <- matrix(nrow=8,ncol=length(cont_vars)) M[1,] <- round((base %>% select(cont_vars) %>% apply(2,mean,na.rm=TRUE)),3) M[2,] <- round((base %>% select(cont_vars) %>% apply(2,sd,na.rm=TRUE)),3) M[3,] <- round((base %>% select(cont_vars) %>% apply(2,min,na.rm=TRUE)),3) M[4,] <- round((base %>% select(cont_vars) %>% apply(2,per_25)),3) M[5,] <- round((base %>% select(cont_vars) %>% apply(2,median,na.rm=TRUE)),3) M[6,] <- round((base %>% select(cont_vars) %>% apply(2,per_75)),3) M[7,] <- round((base %>% select(cont_vars) %>% apply(2,max,na.rm=TRUE)),3) M[8,] <- round((base %>% select(cont_vars) %>% apply(2,skewness,na.rm=TRUE)),3) colnames(M) <- cont_vars rownames(M) <- c("mean","sd","min","Q1","Q2","Q3","max","skewness")
M %>% data.frame() %>% kable(escape = F, align = "c") %>% kable_styling(c("striped", "condensed"), full_width = F)
Gráfica de Correlaciones
corr <- base %>% select(cont_vars) %>% cor(method = "pearson") %>% round(3) ggcorrplot(corr)+ggtitle("Grafica Correlación de Pearson")+theme_minimal()
Matriz de Correlaciones
corr %>% data.frame() %>% kable(escape = F, align = "c") %>% kable_styling(c("striped", "condensed"), full_width = F)
variable_graph <- function(df,var){ if(eval(parse(text=paste0("df$",var))) %>% class()=="numeric"){ ggplot(df, aes( y=eval(parse(text=var)))) + geom_boxplot(fill="slateblue", alpha=0.2) + xlab(var)+ggtitle(paste0("Boxplot ",var)) + ylab(var) } else { ggplot(df, aes(x=as.factor(eval(parse(text=var))), fill=as.factor(eval(parse(text=var))))) + geom_bar( ) + scale_fill_brewer(palette = "Set2") + theme(legend.position="none") + ggtitle("Grafico de barras ") + xlab(var) + ylab("N") } } multiplot <- function(..., plotlist=NULL, file, cols=1, layout=NULL) { library(grid) plots <- c(list(...), plotlist) numPlots = length(plots) if (is.null(layout)) { layout <- matrix(seq(1, cols * ceiling(numPlots/cols)), ncol = cols, nrow = ceiling(numPlots/cols)) } if (numPlots==1) { print(plots[[1]]) } else { grid.newpage() pushViewport(viewport(layout = grid.layout(nrow(layout), ncol(layout)))) for (i in 1:numPlots) { matchidx <- as.data.frame(which(layout == i, arr.ind = TRUE)) print(plots[[i]], vp = viewport(layout.pos.row = matchidx$row, layout.pos.col = matchidx$col)) } } }
L <- cont_vars %>% map(~variable_graph(base,.x)) L <- L %>% stats::setNames(cont_vars)
multiplot(plotlist = L,cols=2)
miss <- base %>% summarize_at(cat_vars,funs(sum(is.na(.)) / length(.)))
miss %>% data.frame() %>% select(cat_vars) %>% mutate_at(names(miss),"colores") %>% kable(escape = F, align = "c") %>% kable_styling(c("striped", "condensed"), full_width = F)
Moda
base %>% summarize_at(cat_vars,"get_mode",'NO') %>% kable(escape = F, align = "c") %>% kable_styling(c("striped", "condensed"), full_width = F)
Porporción de casos iguales a la moda:
base %>% summarize_at(cat_vars,"get_mode",'SI') %>% kable(escape = F, align = "c") %>% kable_styling(c("striped", "condensed"), full_width = F)
L <- cat_vars %>% map(~variable_graph(base,.x)) L <- L %>% stats::setNames(cat_vars)
multiplot(plotlist = L,cols=2)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.