R/inpdfr_ANA_cluster.R

Defines functions doKmeansClust doCluster

Documented in doCluster doKmeansClust

#' Performs a cluster analysis on the basis of the word-occurrence data.frame.
#'
#' Performs a cluster analysis on the basis of the word-occurrence data.frame
#'   using \code{\link[stats]{hclust}} function.
#'
#' @param wordF The data.frame containing word occurrences.
#' @param myMethod The method to compute distances, see \code{\link[stats]{dist}}
#'   function.
#' @param gp A logical to specify if groups should be made.
#' @param nbGp An intger to specify the number of groups. Ignored if \code{gp=FALSE}.
#' @param getPlot If \code{TRUE}, save the cluster plot in the RESULTS directory.
#' @param mwidth The width of the plot in pixels.
#' @param mheight The height of the plot in pixels.
#' @param formatType The format for the output file ("eps", "pdf", "png", "svg", "tiff", "jpeg", "bmp").
#' @param ... Additional arguments from the \code{\link[stats]{hclust}} function.
#' @return An object of class \code{\link[stats]{hclust}}.
#' @examples
#' data("wordOccuDF")
#' doCluster(wordF = wordOccuDF, myMethod = "ward.D2", getPlot = FALSE)
#' @export
doCluster <- function(wordF, myMethod = "ward.D2", gp = FALSE, nbGp = 5, getPlot = TRUE, mwidth = 800,
  mheight = 800, formatType = "png", ...){
  ## create RESULTS folder
  if(getPlot == TRUE){
    subDir <- "RESULTS"
    dir.create(file.path(getwd(), subDir), showWarnings = FALSE)
  }
  ## make cluster analysis
  if(ncol(wordF) > 3){
    fitClust <-  stats::hclust(stats::dist(t(as.matrix(wordF[,2:length(wordF[1,])]))),
      method = myMethod, ...)

    if(getPlot == TRUE){
      if (gp == TRUE){
        ## strat graph
        R.devices::devEval(type = formatType, name = "HCLUST",
          aspectRatio = mheight / mwidth,
          scale = do.call(function(){if((mheight / mwidth) <= 1) {
            x <- max(mheight / 480, mwidth / 480)} else {
              x <- min(mheight / 480, mwidth / 480)}
            return(x)}, list())
          , path = file.path(getwd(), subDir), {
            try(graphics::plot(fitClust, hang = -1), silent = TRUE)
            groups <- try(stats::cutree(fitClust, k = nbGp), silent = TRUE)
            try(stats::rect.hclust(fitClust, k = nbGp, border = "red"), silent = TRUE)
          }
        )
        # end graph
      }else{
        R.devices::devEval(type = formatType, name = "HCLUST",
          aspectRatio = mheight / mwidth,
          scale = do.call(function(){if((mheight / mwidth) <= 1) {
            x <- max(mheight / 480, mwidth / 480)} else {
              x <- min(mheight / 480, mwidth / 480)}
            return(x)}, list())
          , path = file.path(getwd(), subDir), {
          try(graphics::plot(fitClust), silent = TRUE)
          }
        )
      }
    }
    return(fitClust)
  }
}

#' Performs a k-means cluster analysis on the basis of the word-occurrence data.frame.
#'
#' Performs a k-means cluster analysis on the basis of the word-occurrence data.frame
#'   using \code{\link[stats]{kmeans}} function.
#'
#' @param wordF The data.frame containing word occurrences.
#' @param nbClust The number of clusters.
#' @param nbIter The number of iterations allowed.
#' @param algo The algoritm used (see \code{\link[stats]{kmeans}}).
#' @param getPlot If \code{TRUE}, save the k-means cluster plot in the RESULTS directory.
#' @param mwidth The width of the plot in pixels.
#' @param mheight The height of the plot in pixels.
#' @param formatType The format for the output file ("eps", "pdf", "png", "svg", "tiff", "jpeg", "bmp").
#' @param ... Additional arguments from the \code{\link[stats]{kmeans}} function.
#' @return An object of class kmeans (see \code{\link[stats]{kmeans}}).
#' @examples
#' data("wordOccuDF")
#' doKmeansClust(wordF = wordOccuDF, nbClust = 2, getPlot = FALSE)
#' @export
doKmeansClust <- function(wordF, nbClust = 4, nbIter = 10, algo = "Hartigan-Wong", getPlot = TRUE,
  mwidth = 800, mheight = 800, formatType = "png", ...){
  ## create RESULTS folder
  if(getPlot == TRUE){
    subDir <- "RESULTS"
    dir.create(file.path(getwd(), subDir), showWarnings = FALSE)
  }
  ## make kmeans-cluster analysis
  if(ncol(wordF) > 3){
    dd <-(stats::dist(t(as.matrix(wordF[,2:length(wordF[1,])])), method="euclidian"))# ,colnames=fileNames
    kfit <- stats::kmeans(x = dd, centers = nbClust, 
            iter.max = nbIter, algorithm = algo, ...)
    if(getPlot == TRUE){
      R.devices::devEval(type = formatType, name = "KMEANCLUST",
        aspectRatio = mheight / mwidth,
        scale = do.call(function(){if((mheight / mwidth) <= 1) {
          x <- max(mheight / 480, mwidth / 480)} else {
            x <- min(mheight / 480, mwidth / 480)}
          return(x)}, list())
        , path = file.path(getwd(), subDir), {
        try(cluster::clusplot(as.matrix(dd), kfit$cluster, color = TRUE, 
          shade = TRUE, labels = 2, lines = 0), silent = TRUE)
        }
      )
    }
    return(kfit)
  }
}

Try the inpdfr package in your browser

Any scripts or data that you put into this service are public.

inpdfr documentation built on Aug. 24, 2023, 9:09 a.m.