library(rmarkdown) library(SmartEDA) library(knitr) library(scales) library(gridExtra) library(ggplot2) knit_hooks$set(optipng = hook_optipng) data <- params$data
Analyzing the data sets to summarize their main characteristics of variables, often with visual graphs, without using a statistical model.
Understanding the dimensions of the dataset, variable names, overall missing summary and data types of each variables
# Overview of the data ExpData(data=data,type=1) # Structure of the data ExpData(data=data,type=2)
ovw_tabl <- ExpData(data=data,type=1) ovw_tab2 <- ExpData(data=data,type=2)
Overview of the data
paged_table(ovw_tabl)
Structure of the data
paged_table(ovw_tab2)
Target variable
Summary of categorical dependent variable
r Target
r label
if(is.null(label)) label <- Target tab_tar <- data.frame(table(data[,Target])) tab_tar$Descriptions <- label names(tab_tar) <- c(Target,"Frequency","Descriptions") rownames(tab_tar)<-NULL
tab_tar
Summary of all numerical variables
snv_2 = ExpNumStat(data,by="GA",gp=Target,Qnt=seq(0,1,0.1),MesofShape=2,Outlier=TRUE,round=2) rownames(snv_2)<-NULL
Summary statistics when dependent variable is categorical r Target
. Summary statistics will be splitted into category level
ExpNumStat(data,by="GA",gp=Target,Qnt=seq(0,1,0.1),MesofShape=2,Outlier=TRUE,round=2)
paged_table(snv_2)
Box plots for all numerical variables vs categorical dependent variable - Bivariate comparision only with categories
Quantile-quantile plot(Univariate)
Quantile-quantile plot for all Numerical variables
ExpOutQQ(data,nlim=4,fname=NULL,Page=c(2,2),sample=sn)
Density plot for all Numerical variables
ExpNumViz(data,target=NULL,type=1,Page=c(2,2),theme=theme,sample=sn)
ExpNumViz(data,target=Target,Page=c(2,1),theme=theme,sample=sn,scatter=TRUE)
Boxplot for all the numeric attributes by each category of r Target
ExpNumViz(data,target=Target,type=2,theme=theme,Page=c(2,2),sample=sn)
Summary of categorical variable
if(is.null(Rc)) {Rc=as.character(paste0(unique(data[,Target])[1]))} et1 <- ExpCTable(data,Target=Target,margin=1,clim=10,nlim=5,round=2,bin=NULL,per=F) rownames(et1)<-NULL et4 <- ExpCatStat(data,Target=Target,result = "Stat",clim=10,nlim=5,bins=10,Pclass=Rc,plot=FALSE,top=20,Round=2) rownames(et4)<-NULL et5 <- ExpCatStat(data,Target=Target,result = "IV",clim=10,nlim=5,bins=10,Pclass=Rc,plot=FALSE,top=20,Round=2) rownames(et5)<-NULL
Cross tabulation with target variable
r Target
ExpCTable(data,Target=Target,margin=1,clim=10,nlim=5,round=2,bin=NULL,per=F)
if(!is.null(dim(et1))){ paged_table(et1) }
Information Value
ExpCatStat(data,Target=Target,Label=label,result = "IV",clim=10,nlim=5,Pclass=Rc)
paged_table(et5)
Statistical test
ExpCatStat(data,Target=Target,Label=label,result = "Stat",clim=10,nlim=5,Pclass=Rc)
paged_table(et4)
Variable importance based on Information value
varimp <- ExpCatStat(data,Target=Target,result = "Stat",clim=10,nlim=5,Pclass=Rc,bins=10,plot=TRUE,top=30,Round=2)
Graphical representation of all categorical variables
Bar plots for all categorical variables
ExpCatViz(data,target=NULL,fname=NULL,clim=10,margin=2,theme=theme,Page = c(2,1),sample=sc)
ExpCatViz(data,target=Target,fname=NULL,clim=10,margin=2,theme=theme,Page = c(2,1),sample=sc)
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.