R/cluster_accnet.R

Defines functions cluster_accnet

Documented in cluster_accnet

#' Internal function to cluster accnet data
#'
#' @param data \emph{accnet} object
#' @param method Clustering method
#' @param n_cluster number of cluster (if applicable)
#' @param d_reduction boolean. Dimensional reduction using umap
#' @param ... additional parameters
#'
#' @return \emph{data.frame} with two columns: \emph{Source} and \emph{Target}
#'
#' @import dplyr
#' @import tidyr
#' @import tibble
#' @import uwot
#' @import mclust
#' @importFrom data.table fread
#' @import dbscan
#' @import parallelDist
#'
#'
cluster_accnet <- function(data, method, n_cluster, d_reduction, ...)
{

  if(is.null(data$dist))
  {
  Dist <-
    parallelDist(data$matrix %>% tibble::column_to_rownames("Source") %>% as.matrix(),
         method = "binary")
  }else{
    Dist <- data$dist
  }

  if (method == "mclust")
  {
    if(!d_reduction)
    {
      min <- 2
      max <- 9
      max_clust <- 9

      for (i in 1:10)
      {
        cluster <- Mclust(Dist, min:max)
        max_clust <-  max(cluster$classification)
        if (max_clust == max)
        {
          min = max - 1
          max = max + 10
        } else{
          break
        }

      }

      cluster <-
        cluster$classification %>% as.data.frame() %>% tibble::rownames_to_column("Source")
      colnames(cluster) <- c("Source", "Cluster")
    } else{
      min <- 2
      max <- 9
      max_clust <- 9


      umap <-  uwot::umap(Dist)
      rownames(umap) <- rownames(as.matrix(Dist))
      for (i in 1:10)
      {
        cluster <- Mclust(umap, min:max)
        max_clust <-  max(cluster$classification)
        if (max_clust == max)
        {
          min = max - 1
          max = max + 10
        } else{
          break
        }

      }

      cluster <-
        cluster$classification %>% as.data.frame() %>% rownames_to_column("Source")
      colnames(cluster) <- c("Source", "Cluster")
    }

  } else if (method == "upgma")
  {
    tree <-  hclust(Dist, method = "average")
    cluster <- cutree(tree, n_cluster)
    cluster <-
      cluster %>% as.data.frame() %>% tibble::rownames_to_column("Source")
    colnames(cluster) <- c("Source", "Cluster")

  } else if (method == "ward.D2")
  {
    tree <-  hclust(Dist, method = "ward.D2")
    cluster <- cutree(tree, n_cluster)
    cluster <-
      cluster %>% as.data.frame() %>% tibble::rownames_to_column("Source")
    colnames(cluster) <- c("Source", "Cluster")


  } else if (method == "hdbscan")
  {
    umap <-  uwot::umap(Dist)
    rownames(umap) <- rownames(as.matrix(Dist))
    cluster <- dbscan::hdbscan(umap, minPts = 0.05 * nrow(umap))
    cluster <-
      cluster$cluster %>%  as.data.frame() %>% mutate(Source = rownames(umap))
    colnames(cluster) <- c("Cluster", "Source")
    return(cluster %>% select(Source, Cluster))

  } else{
    stop("Unrecognized method")
  }
  return(cluster)
}
irycisBioinfo/PATO documentation built on Oct. 19, 2023, 3:07 p.m.