#' Generating pie charts based on frequency of elements in all qualitative features
#'
#' This function develops pie charts of frequency of all Qualitative Variables
#'
#' @param lsVS is a list generated by verticalSplit function in this same package
#' @return a list of pie charts
#' @author Saurabh
#' @details
#' The input to this function is output of verticalSplit function in this package
#' It outputs a list containing pie chart of frequency of all qualitative variables.
#' Output must be stored in a variable.
#' And you could access each single pie chart as outputList[1] or outputList[2]
#' @import rCharts,reshape2
#'
#library(plyr)
############################## External function
piePlots<-function(lsVS){
dfs<-ls()
NoTabs<-length(runFacsummary(lsVS))
for(i in 1:NoTabs){
dfs[[i]]<-list(data.frame(runFacsummary(lsVS)[i]))
}
df1<-dfs[c(1:NoTabs)]
rm(dfs) #clearing space
plts<-ls() #Plot list
for(i in 1:NoTabs){
df<-data.frame(df1[[i]])
names(df)<-c("Var","Freq")
h3 = hPlot(x="Var", y ="Freq",data = df,type = "pie") ###changing type from pie
plts[[i]]<-list(h3)
}
plts<-plts[c(1:NoTabs)]
#return(plts[Plotn])
paste("# of Plots",length(unlist(plts)),sep=" ")
return(unlist(plts))
}
#######################################BAR CHRT
##################################################### External function
# Generating bar plots based on frequency of elements in all qualitative features
#'
#' This function develops bar plots of frequency of all Qualitative Variables
#'
#' @param lsVS is a list generated by verticalSplit function in this same package
#' @return a list of bar plots
#' @author Saurabh
#' @details
#' The input to this function is output of verticalSplit function in this package
#' It outputs a list containing bar plots of frequency of all qualitative variables.
#' Output must be stored in a variable.
#' And you could access each single bar plots as outputList[1] or outputList[2]
#' @import rCharts,reshape2
#'
barPlots<-function(lsVS){
dfs<-ls()
NoTabs<-length(runFacsummary(lsVS))
for(i in 1:NoTabs){
dfs[[i]]<-list(data.frame(runFacsummary(lsVS)[i]))
}
df1<-dfs[c(1:NoTabs)]
rm(dfs) #clearing space
plts<-ls() #Plot list
for(i in 1:NoTabs){
df<-data.frame(df1[[i]])
names(df)<-c("Var","Freq")
p1 = mPlot(x = 'Var', y = list('Freq'), data = df, type = 'Bar', labels = list("Count"))
p1$set(hideHover = "auto")
plts[[i]]<-list(p1)
}
plts<-plts[c(1:NoTabs)]
#return(plts[Plotn])
paste("# of Plots",length(unlist(plts)),sep=" ")
return(plts)
}
############################ Internal function
#'
#' Calculating coefficient of variance
#'
#'
#' This function calculates Coefficient of variance Quantitative Variables
#'
#' @param var any numeric vector
#' @return Numeric Coefficient of variance
#' @author Saurabh
#' @details
#' Its an internal function
CoefV<-function(var){
ans=sd(var,na.rm=TRUE)/mean(var,na.rm=TRUE)
return(ans)
}
################################################# Internal function
#'
#' Univariate exploration of all
#' Quantitative Variables in a Dataset
#'
#' This function calculates various statistics of Quantitative variables in your data
#' The function calculates Coef. of variance,min,max,standard deviation,variance,
#' various qunatiles, mean,median,skewness,kurtosis it also checks if data contains NA's and number of unique values.
#' @param df a data frame (your dataset)
#' @return rbound values, can be efficently converted to data frame
#' @author Saurabh
#' @details
#' Its an internal function
#' @import moments
#############################################################
# CarIntT<-function(df){
# names<-names(df)
# dfin<-1:dim(df)[1]
# dfot<-1:dim(df)[1]
# typ<-sapply(df, class)
# typ<-as.character(typ)
# ##integer df
# for(i in 1:length(names)){
# if(typ[i]=="integer"||typ[i]=="numeric"){
# dfin<-cbind(dfin,df[i])
# }else{
# dfot<-cbind(dfot,df[i])
# }
# }
# }
####################################### Internal function
mysummary<-function(df){
names<-names(df)
dfin<-1:dim(df)[1]
dfot<-1:dim(df)[1]
typ<-sapply(df, class)
typ<-as.character(typ)
##integer df
for(i in 1:length(names)){
if(typ[i]=="integer"||typ[i]=="numeric"){
dfin<-cbind(dfin,df[i])
}else{
dfot<-cbind(dfot,df[i])
}
}
############################################## creating two different tables
dfin<-data.frame(dfin)
dfot<-data.frame(dfot)
dfin<-dfin[-c(1)]
dfot<-dfot[-c(1)]
############## Some real satistics
#library(moments)
minmy<-round(sapply(dfin,min,na.rm=TRUE),3)
maxmy<-round(sapply(dfin,max,na.rm=TRUE),3)
sdmy<-round(sapply(dfin,sd,na.rm=TRUE),3)
varmy<-round(sapply(dfin,var,na.rm=TRUE),3)
q0my<-round(sapply(dfin,quantile,probs=c(0),na.rm=TRUE),3)
q25my<-round(sapply(dfin,quantile,probs=c(0.25),na.rm=TRUE),3)
q50my<-round(sapply(dfin,quantile,probs=c(0.5),na.rm=TRUE),3)
q75my<-round(sapply(dfin,quantile,probs=c(0.75),na.rm=TRUE),3)
q90my<-round(sapply(dfin,quantile,probs=c(0.9),na.rm=TRUE),3)
q95my<-round(sapply(dfin,quantile,probs=c(0.95),na.rm=TRUE),3)
q100my<-round(sapply(dfin,quantile,probs=c(1),na.rm=TRUE),3)
meanmy<-round(sapply(dfin,mean,na.rm=TRUE),3)
medianmy<-round(sapply(dfin,median,na.rm=TRUE),3)
skewnessmy<-round(sapply(dfin,skewness,na.rm=TRUE),3)
kurtosismy<-round(sapply(dfin,kurtosis,na.rm=TRUE),3)
CoefVmy<-round(sapply(dfin,CoefV),4)
##checking na
naChk<-sapply(dfin,is.na)
naChk<-data.frame(naChk)
naChk<-sapply(naChk,sum)
##no of unique values
numUnq<-sapply(dfin,unique)
numUnq<-sapply(numUnq,length)
##for df int untill now
mysummary<-rbind(CoefVmy,minmy,maxmy,sdmy,varmy,q0my,q25my,q50my,q75my,q90my,q95my,q100my,meanmy,medianmy,skewnessmy,kurtosismy,naChk,numUnq)
return(mysummary)
}
###############################################Internal function
#'
#' Univariate exploration of all
#' Qualitative Variables in a Dataset
#'
#' This function calculates the frequency of various qualitative values
#' in the data
#' @param df a data frame (your dataset)
#' @return List of frequency of all internal values of all qualitative values
#' @author Saurabh
#' @details
#' Its an internal function
#'
facsummary<-function(df){
names<-names(df)
dfin<-1:dim(df)[1]
dfot<-1:dim(df)[1]
typ<-sapply(df, class)
typ<-as.character(typ)
################################fac df
for(i in 1:length(names)){
if(typ[i]=="integer"||typ[i]=="numeric"){
dfin<-cbind(dfin,df[i])
}else{
dfot<-cbind(dfot,df[i])
}
}
dfot<-data.frame(dfot)
dfot<-dfot[-c(1)]
unqCnt<-sapply(dfot,table)
return(unqCnt)
}
###################################### External function I/P from vertical split
#'
#' Executes mysummary function on complete list output by verticalSplit function
#'
#' This function calculates various statistics of Quantitative variables in your data
#' The function calculates Coef. of variance,min,max,standard deviation,variance,
#' various qunatiles, mean,median,skewness,kurtosis it also checks if data contains NA's and number of unique values.
#' @param lsVS a list of dataframes produced by verticalSplit function
#' @return rbound values, can be efficently converted to data frame
#' @author Saurabh
#' @details
#' Its an internal function
#' @import moments,plyr
runSummary<-function(lsVS){
#library(plyr)
if(length(lsVS)>1){
for(i in 1:5){
df1 <- ldply(lsVS[i], data.frame)
if(i==1){report1<-mysummary(df1)
}else if(i==2){report2<-mysummary(df1)
}else if(i==3){report3<-mysummary(df1)
}else if(i==4){report4<-mysummary(df1)
}else {report5<-mysummary(df1)}
rm(df1)
}
return(cbind(report1,report2,report3,report4,report5))
}
else{
df1 <- ldply(lsVS[1], data.frame)
return(mysummary(df1))
}
}
## lsVS = List from vertical split
####################################### External Function i/p from Vertical Split
#' Executes facsummary function on complete list output by verticalSplit function
#'
#' This function calculates the frequency of various qualitative values
#' in the data
#' @param lsVS a list of dataframes produced by verticalSplit function
#' @return List of frequency of all internal values of all qualitative values
#' @author Saurabh
#' @details
#' Its an internal function
#' @import plyr
runFacsummary<-function(lsVS){
# library(plyr)
if(length(lsVS)>1){
for(i in 1:5){
df1 <- ldply(lsVS[i], data.frame)
if(i==1){report1<-facsummary(df1)
}else if(i==2){report2<-facsummary(df1)
}else if(i==3){report3<-facsummary(df1)
}else if(i==4){report4<-facsummary(df1)
}else if(i==5){report5<-facsummary(df1)}
rm(df1)
}
#####Cant pass 3 lists to append
finalrep<-append(append(report1,report2),append(report3,report4))
return(append(finalrep,report5))
}
else{
df1 <- ldply(lsVS[1], data.frame)
return(facsummary(df1))
}
}
################################# ###########Internal function outputs lsVS
#' Splitting of large datasets to more manageable data frames
#'
#'
#' This function splits a large dataset column wise in more managable chunks of data frames
#' and returns the resulting data frames in form of a list of data frames
#' @param df a data frame (your dataset)
#' @return List of data frames created by dividing orignal data
#' @author Saurabh
#' @details
#' Its an internal function
verticalSplit<-function(df){
if(length(names(df))>30){
x<-1:length(names(df))
n<-5
chunk <- function(x, n) split(x, sort(rank(x) %% n))
attrList<-chunk(x,n)
attrList1<-as.vector(unlist(attrList[1]))
attrList2<-as.vector(unlist(attrList[2]))
attrList3<-as.vector(unlist(attrList[3]))
attrList4<-as.vector(unlist(attrList[4]))
attrList5<-as.vector(unlist(attrList[5]))
## }
df1<-df[,attrList1]
df2<-df[,attrList2]
df3<-df[,attrList3]
df4<-df[,attrList4]
df5<-df[,attrList5]
rm(df)
rm(attrList1)
rm(attrList2)
rm(attrList3)
rm(attrList4)
rm(attrList5)
return(list(df1,df2,df3,df4,df5))
}##
else{
return(list(df))
}
}
############################################################ i/p is o/p of quantVarSumm
#' Generating ScatterPlot of coefficient of variance of all quantitative variables
#'
#'
#' This function generates ScatterPlot of coefficient of variance.
#'
#' @param quantSummary is output of quantVarSumm function from explore package
#' @return a Scatter Plot
#' @author Saurabh Jaju
#' @details
#' The input to this function is output of verticalSplit function in this package
#' It outputs a list containing pie chart of frequency of all qualitative variables.
#' Output must be stored in a variable.
#' And you could access each single pie chart as outputList[1] or outputList[2]
#' @export
#' @import rCharts,reshape2
#'
covarianceSPlot<-function(quantSummary){
summ<-data.frame(quantSummary)
variance<-as.numeric(summ[1,])
index<-1:length(variance)
feature<-names(summ)
df<-data.frame(index,variance,feature)
#df<-df[variance<1 & variance>-1,]
n1 <- rPlot(variance ~ index, data = df, color = "feature", type = "point")
return(n1)
}
################################################ROUGH
# df1 <- ldply(df[1], data.frame)
# df2 <- ldply(df[2], data.frame)
# df3 <- ldply(df[3], data.frame)
# df4 <- ldply(df[4], data.frame)
# df5 <- ldply(df[5], data.frame)
################################################################# External catering functions
##############User allowed to call these
#' Generates a detailed univariate statistical summary of quantitative data
#'
#'
#' This function calculates various statistics of the quantitative features of input dataset
#'
#' @param df your data frame
#' @return rbound vectors of input data statistics
#' @author Saurabh Jaju
#' @details
#' The function first divides the dataset in managable smaller dataframes with \code{verticalSplit}
#' Then calculates summary statistics like
#' Coef. of variance,minimum,maximum,standard deviation,variance,
#' various qunatiles, mean,median,skewness,kurtosis with \code{runSummary}
#' it also checks if data contains NA's and number of unique values.
#'
#' @export
#' @import moments,plyr
#'
quantVarSumm<-function(df){
lsVS<-verticalSplit(df)
return(runSummary(lsVS))
}
#' Generates a detailed univariate frequency summary of qualitative features in input dataset
#'
#' The function first divides the dataset in managable smaller dataframes with \code{verticalSplit}
#' This function provides frequency count of each unique entry in each qualitative feature with \code{runFacsummary}
#'
#' @param df your data frame
#' @return list of variable name, unique entry, count(frequency)
#' @author Saurabh Jaju
#' @details
#' The function first divides the dataset in managable smaller dataframes
#' Then calculates frequency of all unique values in all qualitative features
#' And returns it in the form of a list
#'
#' @export
#' @import plyr
#'
qualiVarSumm<-function(df){
lsVS<-verticalSplit(df)
return(runFacsummary(lsVS))
}
#' Generating BarPlots
#'
#' The function first divides the dataset in managable smaller dataframes with \code{verticalSplit}
#' This function generates BarPlots of frequency of all unique values qualitative variables with \code{barPlots}
#'
#' @param df your dataframe
#' @return a list of barplots
#' @author Saurabh Jaju
#' @details
#' It outputs a list containing BarPlots of frequency of all unique values qualitative variables.
#' Output must be stored in a variable.
#' And you could access each single bar Plot as
#' outputList<-freqBarPlots(df)
#' outputList[1] or outputList[2]....
#' @export
#' @import rCharts,reshape2
#'
freqBarPlots<-function(df){
lsVS<-verticalSplit(df)
return(barPlots(lsVS))
}
#' Generating PieCharts
#' The function first divides the dataset in managable smaller dataframes with \code{verticalSplit}
#' This function generates PieCharts of frequency of all unique values qualitative variables with \code{piePlots}
#'
#' @param df your dataframe
#' @return a list of PieCharts
#' @author Saurabh Jaju
#' @details
#' It outputs a list containing PieCharts of frequency of all qualitative variables.
#' Output must be stored in a variable.
#' And you could access each single pie chart as
#' outputList<-freqPiePlots(df)
#' outputList[1] or outputList[2]....
#' @export
#' @import rCharts,reshape2
#'
freqPiePlots<-function(df){
lsVS<-verticalSplit(df)
return(piePlots(lsVS))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.