#' clustFunc
#' @param dfSel a data.frame object or a character string indicating the name of the data.frame object. This is a subset of the data.frame object \code{dfOriginal} containing only the non-numeric columns of \code{dfOriginal}
#' @param dfOriginal a data.frame object or a character string indicating the name of the data.frame object.
#' @param addName a string (default: "Clust") to be added to the name of the output data.frame object and the output .rda file
#' @param subDir a character string indicating the name of the subdirectory within "output" and "plot" directories to save the output data.frame object (as a .rda file) and plot (as a .png file) respectively. If a subdirectory with the given name does not exist within output and/or plot, then it is created. If not specified, the outputs are saved in output/ and plot/.
#' @param n a number indicating the number of clusters to be formed on clustering.
#' @return clustFunc updates the data.frame object \code{dfOriginal} by adding two new columns corresponding to the cluster number to which each row of the data.frame is assigned after clustering using the Gower clustering algorithm from the function daisy from package cluster (cluster::daisy). One column (called "clustN") is of class numeric, the other (called "clustF") is of class factor. It then saves this updated data.frame object as a .rda file in the subdirectory called \code{subDir} within the directory "output" inside the current working directory. It also creates a silhouette plot for \code{n} clusters and saves it as a .png file in the \code{subDir} subdirectory within the directory "plot" inside the current working directory. It creates "output" and/or "plot" directories in the current working directory if not present already. Similarly, if \code{subDir} is specified, it creates a subdirectory with the name \code{subDir} within both output/ and plot/ if not already present, and saves the outputs in the respective subdirectories. If a subdirectory is not specified (i.e. missing \code{subDir}), then it saves the output .rda file in output/ and the plot in plot/. It also saves the output data.frame object in the ". GlobalEnv" environment.
#' @description clustFunc takes as input two data.frame objects (\code{dfSel} and \code{dfOriginal}), a character string (optional) (\code{addName}) to be added to the output file name, a character string indicating the name of the output subdirectory (\code{subDir}), the number of clusters to be formed (\code{n}), three positive numbers indicating the width (\code{width}), height (\code{height}) and resolution (\code{res}) of the output plot and inserts two new columns in the data.frame object containing the cluster number assigned to each row. One column (called "clustN") is of class numeric, the other (called "clustF") is of class factor. The output data.frame object is saved as a .rda file in the \code{subDir} subdirectory within the directory "output" inside the current working directory. A silhouette plot of clusters is saved as a .png file in the \code{subDir} subdirectory within the directory "plot" inside the current working directory. "output" and/or "plot" directories are created in the current working directory if not present already. Similarly, if \code{subDir} is specified, a subdirectory with the name \code{subDir} is created within both output/ and plot/ if not already present, and the outputs are saved in that subdirectory. If a subdirectory is not specified (i.e. missing \code{subDir}), then the output .rda file is saved in output/ and the plot is saved in plot/. The output data.frame object is also saved in the ". GlobalEnv" environment.
#' @examples
#' \dontrun{
#' tab1 = xlsx::read.xlsx("./inst/extdata/sample-data.xlsx",sheetName = "data")
#' tab1Vars <- c("i..id" , "age" ,"area" , "paddArea" , "paddyFld" , "date")
#' tab1Var <- selectExclude(tab1,tab1Vars)
#' clustFunc(tab1Var,tab1,,,3)}
#' @export
clustFunc<-function(dfSel,dfOriginal,addName="Clust",subDir,n,...)
{
envClustFunc<-new.env()
if(is.character(dfSel))
{
nameDfSel<-dfSel
dfSel<-get(dfSel)
}
else
{
nameDfSel<-deparse(substitute(dfSel))
}
if(is.character(dfOriginal))
{
nameDfOriginal<-dfOriginal
dfOriginal<-get(dfOriginal)
}
else
{
nameDfOriginal<-deparse(substitute(dfOriginal))
}
dfSel<-droplevels(dfSel)
gowvarTable<-cluster::daisy(dfSel,metric=c("gower")) #Unaehnlichkeitsmatrix fuer ausgewaehlte vars. similarity matrix for selected vars
clustTab<-cluster::pam(gowvarTable, n, diss=TRUE) #Cluster mit pam fuer n Grupen. Cluster with pam for n groups
clustN<-data.frame(clustN=clustTab[3]) #Clusterspalte als class::numeric. Cluster column as class::numeric
clustF<-as.factor(as.character(clustN[,1])) #Clusterspalte als class::factor. Cluster column as class::factor
tabClust<-cbind(dfOriginal,clustN=clustN[,1],clustF)
nameTabClust<-paste(nameDfSel,addName,n,sep="")
assign(nameTabClust,tabClust,envir=envClustFunc)
assign(nameTabClust,tabClust,envir=.GlobalEnv)
#test or add plot directory
if (!any(dir(getwd())=="plot"))
{
print("directory plot has been created in the present working directory")
dir.create("plot/",recursive=T)
}
#test or add output directory
if (!any(dir(getwd())=="output"))
{
print("directory output has been created in the present working directory")
dir.create("output/",recursive=T)
}
# test or add subDir als directory
if(missing(subDir))
{
dirOutput<-file.path(getwd(),"output")
dirPlot<-file.path(getwd(),"plot")
}else
{
dirOutput<-file.path(getwd(),"output",subDir)
if(!dir.exists(dirOutput))
{
dir.create(dirOutput)
}
dirPlot<-file.path(getwd(),"plot",subDir)
if(!dir.exists(dirPlot))
{
dir.create(dirPlot)
}
}
par(...)
plot(clustTab,main=paste("Silhouette plot for table",nameDfSel,"with",n,"clusters",sep=" "))
dev.copy(png, filename=paste(dirPlot,"/",nameDfSel,"_",n,"_clusters.png",sep=""),
width=2000,height=2000, res=150)
dev.off()
save(list=nameTabClust, envir=envClustFunc,file=(paste(dirOutput,"/",nameTabClust,".rda",sep="")))
###PLEASE ADD TWO BOOLEAN PARAMETERS FOR THIS TWO LINES
# write.xlsx(data.frame(clustTab$silinfo[1]),file=paste(dirOutput,"/sil_",nameTabClust,".xlsx",sep=""))
# write.xlsx(tabClust,file=paste(dirOutput,"/",nameTabClust,".xlsx",sep=""),row.names=F)
rm(list=ls())
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.