R/cluster_funcs.R

Defines functions louvain_clust louvain_k pam_k

Documented in louvain_clust louvain_k pam_k

#' Optimizes PAM clustering over a range of cluter numbers 'k' using silhouette score.
#' 
#' @param dist.mat Distance matrix or distance structure like that generated by 'dist'.
#' @param kmin Minimum number of clusters. Default of 2.
#' @param kmax Maximum number of clusters. Default of 5.
#' @return A list; 
#' 'opt.clust' with the optimal clustering; 
#' 'clustering.objs', a list of all clusterings generated; 
#' 'sil.scores' a list of silhouette scores for each clustering object.
#' @export
pam_k <- function(dist.mat, kmin = 2, kmax = 5) {
  require(cluster)
  
  # generate clustering for each value of k
  cat('Generating clusterings...\n')
  clustering.objects <- lapply(kmin:kmax, function(k) {pam(dist.mat, k, diss = TRUE)})
  # generate silhouette score for each cluster
  cat('Generating silhouette scores...\n')
  sil.scores <- sapply(clustering.objects, function(clust.obj) { mean(silhouette(clust.obj, dist.mat)[,3]) })
  # name the lists
  names(clustering.objects) <- paste('k', kmin:kmax, sep = '')
  names(sil.scores) <- paste('k', kmin:kmax, sep = '')
  # identify the optimal clustering
  opt.clust <- clustering.objects[[which.max(sil.scores)]]$clustering
  # return objects
  ret.list <- list('opt.clust' = opt.clust,
                   'clustering.objs' = clustering.objects,
                   'sil.scores' = sil.scores)
  return(ret.list)
}

#' Optimizes Louvain clustering over a range of number of neighbors 'k' by silhouette score.
#' 
#' @param dist.mat Distance matrix or distance structure like that generated by 'dist'.
#' @param kmin Minimum number of neighbors Default of 5.
#' @param kmax Maximum number of neibhbors Default of 50.
#' @param kstep Step size between k values. Default of 5.
#' @return A list; 
#' 'opt.clust' with the optimal clustering; 
#' 'clustering.objs', a list of all clusterings generated; 
#' 'sil.scores' a list of silhouette scores for each clustering object.
#' @export
louvain_k <- function(dist.mat, kmin = 5, kmax = 50, kstep = 5) {
  require(cluster)
  
  k.vals <- seq(kmin, kmax, kstep)
  # generate clustering for each value of k
  cat('Generating clusterings...\n')
  clustering.objects <- lapply(k.vals, function(k) {louvain_clust(dist.mat, k)})
  # generate silhouette score for each cluster
  cat('Generating silhouette scores...\n')
  sil.scores <- sapply(clustering.objects, function(clust.obj) { mean(silhouette(clust.obj, dist.mat)[,3]) })
  # name the lists
  names(clustering.objects) <- paste('k', k.vals, sep = '')
  names(sil.scores) <- paste('k', k.vals, sep = '')
  # identify the optimal clustering
  opt.clust <- clustering.objects[[which.max(sil.scores)]]
  # return objects
  ret.list <- list('opt.clust' = opt.clust,
                   'clustering.objs' = clustering.objects,
                   'sil.scores' = sil.scores)
  return(ret.list)
}

#' Runs Louvain clusering on the given object with the specified KNN graph.
#' 
#' @param data.obj Seurat object w/ dist.mat object in misc of active assay.
#' @param num.neighbors Number of neighbors to use in the KNN graph. Default of 5.
#' @return Vector of cluster assignments.
#' @export
louvain_clust <- function(dist.mat, num.neighbors = 5) {
  require(igraph, quietly = TRUE)
  # get KNN matrix
  knn.mat <- knn(dist.mat, num.neighbors)
  # generate graph
  adj.mat <- matrix(0L, nrow = nrow(knn.mat), ncol = nrow(knn.mat))
  colnames(adj.mat) <- rownames(knn.mat)
  rownames(adj.mat) <- rownames(knn.mat)
  for (i in 1:nrow(knn.mat)) {
    adj.mat[i, knn.mat[i,]] <- 1
  }
  graph.obj <- igraph::graph_from_adjacency_matrix(adj.mat)
  graph.obj <- as.undirected(graph.obj)
  # create clustering
  l.clust <- unlist(as.list(igraph::membership(cluster_louvain(graph.obj))))
  return(l.clust)
}
califano-lab/PISCES documentation built on Jan. 11, 2023, 5:34 a.m.