R/partition_agreement.R

Defines functions partitionAgreement

Documented in partitionAgreement

#' Partition Agreement calculation between two clustering runs
#'
#' Calculate the agreement (0,1) between two partitioning generated by two
#' clustering runs using the adjust Rand Index. We can use three clustering
#' algorithms (spectral, kmeans and hierarchical) along with the following
#' parameters for each:
#'
#' Spectral
#'  kernels: rbfdot, polydot, vanilladot, tanhdot, laplacedot, besseldot,
#'  anovadot, splinedot
#'
#'  K-means
#'  kernels: "Hartigan-Wong", Lloyd, Forgy, MacQueen
#'
#' Hierarchical
#'  Agglomeration methods: average, ward.D, ward.D2, single, complete, mcquitty,
#'  median, centroid
#'
#'  Distance measures: euclidean, manhattan, canberra, minkowski, maximum
#'
#' @param data A dataframe, where columns are features and rows are data points
#' @param measure.1 Concerns the first algorithm to be used and represents a
#' kernel for Spectral/kmeans or a distance measure for hierarchical clustering
#' @param hier.agglo.algorithm.1 Concerns the first algorithm to be used and
#' represents the agglomerative method for hierarchical clustering (not used in
#' spectral/kmeans clustering)
#' @param algorithm.2 First algorithm to be used (spectral/kmeans/hierarchical)
#' @param measure.2 Concerns the second algorithm to be used and represents a
#' kernel for Spectral/kmeans or a distance measure for hierarchical clustering
#' @param hier.agglo.algorithm.2 Concerns the second algorithm to be used and
#' represents the agglomerative method for hierarchical clustering (not used in
#' spectral/kmeans clustering)
#' @param number.of.clusters The upper limit of clusters to form starting from 2
#' @param algorithm.1 Second algorithm to be used (spectral/kmeans/hierarchical)
#'
#' @return An object of class "partitionAgreement" containing agreements
#' (Rand Indexes) from 1 cluster (ARI=0) up to the number of clusters requested
#'
#' @examples
#' partitionAgreement(toy_genes, algorithm.1 = "hierarchical",
#' measure.1 = "canberra",hier.agglo.algorithm.1 = "average",
#' algorithm.2 = "hierarchical",measure.2 = "manhattan",
#' hier.agglo.algorithm.2 = "average",number.of.clusters = 3)
#'
#' partitionAgreement(toy_genes, algorithm.1 = "spectral", measure.1 = "rbfdot",
#' algorithm.2 = "kmeans",measure.2 = "Lloyd", number.of.clusters = 5)
#' @export
#'
#' @importFrom stats cutree
#' @importFrom pdfCluster adj.rand.index

partitionAgreement <- function(data, algorithm.1 = "hierarchical",
                               measure.1 = "canberra",
                               hier.agglo.algorithm.1 = "average",
                               algorithm.2 = "hierarchical",
                               measure.2 = "manhattan",
                               hier.agglo.algorithm.2 = "average",
                               number.of.clusters = 5) {

  # Convert to matrix
  dataset <- data.matrix(data, rownames.force = NA)

  # cr1
  cr1 <- data.frame(matrix(ncol = 0, nrow = dim(data)[1]))
  for(i in 2:number.of.clusters) {
    #Spectral clustering
    if(algorithm.1 == "spectral") {
      cl <- kernlab::specc(dataset, centers=i, kernel = measure.1)
      temp <- data.frame(cl@.Data)
      cr1 <- cbind(cr1, temp)
    }

    #Hierarchical
    else if(algorithm.1 == "hierarchical") {
      dist_mat <- stats::dist(dataset, method = measure.1)
      cl <- stats::hclust(dist_mat, method = hier.agglo.algorithm.1)
      temp <- data.frame(cutree(cl, k = i))
      cr1 <- cbind(cr1, temp)
    }

    # #k-means
    else if(algorithm.1 == "kmeans") {
      cl <- stats::kmeans(dataset, i, algorithm = measure.1)
      temp <- data.frame(cl$cluster)
      cr1 <- cbind(cr1, temp)
    }

  }

  # cr2
  cr2 <- data.frame(matrix(ncol = 0, nrow = dim(data)[1]))
  for(i in 2:number.of.clusters) {
    #Spectral clustering
    if(algorithm.2 == "spectral") {
      cl <- kernlab::specc(dataset, centers=i, kernel = measure.2)
      temp <- data.frame(cl@.Data)
      cr2 <- cbind(cr2, temp)
    }

    #Hierarchical
    else if(algorithm.2 == "hierarchical") {
      dist_mat <- stats::dist(dataset, method = measure.2)
      cl <- stats::hclust(dist_mat, method = hier.agglo.algorithm.2)
      temp <- data.frame(cutree(cl, k = i))
      cr2 <- cbind(cr2, temp)
    }

    #k-means
    else if(algorithm.2 == "kmeans") {
      cl <- stats::kmeans(dataset, i, algorithm = measure.2)
      temp <- data.frame(cl$cluster)
      cr2 <- cbind(cr2, temp)
    }
  }

  Ks1 <- rep(1, dim(dataset)[1])

  colnames(cr1) <- paste0("Ks", 2:number.of.clusters)
  clusters.1 <- cbind(dataset, Ks1, cr1)

  colnames(cr2) <- paste0("Ks", 2:number.of.clusters)
  clusters.2 <- cbind(dataset, Ks1, cr2)

  # Convert to dataframe
  clusters.1 <- as.data.frame(clusters.1)
  clusters.2 <- as.data.frame(clusters.2)

  # Calculate Adjusted Rand Index vector
  ari.vector <- c(0)

  for (k in seq(2, number.of.clusters)) {
    k.name <- paste0("Ks", k)
    ari.vector <- append(ari.vector,
                         adj.rand.index(clusters.1[[k.name]],
                                        clusters.2[[k.name]]))
  }

  names(ari.vector) <- c(paste0("K_", seq(1, number.of.clusters)))

  partitionAgreement <- function(ari.scores=ari.vector){

    pa <- list(ari.scores = ari.scores)

    ## Set the name for the class
    class(pa) <- "partitionAgreement"

    return(pa)
  }

  partition.agreement <- partitionAgreement()

  return(partition.agreement)
}
BioSok/omada documentation built on Aug. 21, 2023, 2:38 p.m.