R/supervoxels.R

Defines functions supervoxel_cluster_surface supervoxel_cluster_time supervoxels knn_shrink supervoxel_cluster_fit init_cluster tesselate

Documented in knn_shrink supervoxel_cluster_time supervoxels tesselate

#' Tesselate a Mask Volume into K Clusters using K-means
#'
#' This function tesselates a given mask volume into K clusters using k-means clustering
#' applied to spatial coordinates. It returns a clustered mask volume object.
#'
#' @param mask A \code{NeuroVol} object representing the mask volume.
#' @param K An integer value specifying the number of clusters (default: 100).
#' @return An instance of \code{ClusteredNeuroVol} representing the clustered mask volume.
#' @examples
#' # Assuming you have a NeuroVol object 'mask' and you want to create 150 clusters
#' clustered_volume <- tesselate(mask, K = 150)
#' @export
tesselate <- function(mask, K=100) {
  mask.idx <- which(mask>0)
  grid <- index_to_coord(mask, mask.idx)

  gcen <- grid[as.integer(seq(1, nrow(grid), length.out=K)),]
  kres <- kmeans(grid, centers=gcen, iter.max=500)
  clusvol <- ClusteredNeuroVol(mask, kres$cluster)
}



# supervoxelR <- function(mask, bvec, K=500, lambda=.5, iterations=25, connectivity=27, use_medoid=FALSE) {
#   assert_that(lambda >= 0 && lambda <= 1)
#   assert_that(connectivity > 1 & connectivity <= 27)
#
#   mask.idx <- which(mask > 0)
#   grid <- indexToCoord(mask, mask.idx)
#   vgrid <- indexToGrid(mask, mask.idx)
#
#   #if (shrink > 0) {
#   #  message("running ", shrink, "shrinkage iterations with k = ", 5)
#   #  bvec <- shrink_vector(mask, vgrid, bvec, k=5, iter=shrink)
#   #}
#
#   kres <- kmeans(grid, K, iter.max=500)
#   kvol <- NeuroVol(kres$cluster, space(mask), indices=mask.idx)
#
#   clusid <- sort(unique(kres$cluster))
#   neib <- get.knn(grid, k=connectivity)
#
#   iter <- 1
#   switches <- 1
#   iter.max <- iterations
#
#   feature_mat <- series(bvec, mask.idx)
#
#   centroids <- compute_centroids(feature_mat, grid, kres$cluster, medoid=use_medoid)
#   sp_centroids <- do.call(rbind, lapply(centroids, "[[", "centroid"))
#   num_centroids <- do.call(rbind, lapply(centroids, "[[", "center"))
#
#   denom <- max(get.knn(sp_centroids, k=1)$nn.dist[,1])
#
#
#   curclus <- kvol[mask.idx]
#
#
#   ## port iteration to rcpp
#   while (iter < iter.max && switches > 0) {
#     message("turboclust, iteration: ", iter)
#
#     newclus <- sapply(1:nrow(vgrid), function(i) {
#
#       ind <- neib$nn.index[i,]
#       D <- neib$nn.dist[i,]
#       keep <- which(D < dthresh)
#       ind <- ind[keep]
#
#       oclus <- unique(kvol[mask.idx[ind]])
#       diffclus <- which(oclus != curclus[i])
#
#       if (length(diffclus) > 0) {
#
#         candclus <- c(curclus[i], oclus[diffclus])
#
#         cval <- sapply(candclus, function(cc) {
#           (1 - cor(feature_mat[,i], centroids[[cc]]$center))/2
#         })
#
#         dvals <- sapply(candclus, function(cc) {
#           sqrt(sum((grid[i,] - centroids[[cc]]$centroid)^2))
#         })/denom
#
#         cost <- lambda*cval + (1-lambda)*dvals
#         #cost <- cvals
#         newc <- candclus[which.min(cost)]
#       } else {
#         curclus[i]
#       }
#     })
#
#
#
#     centroids <- compute_centroids(bvec, mask.idx, grid, newclus, medoid=use_medoid)
#     switches <- sum(newclus != curclus)
#
#     curclus <- newclus
#     message("tuboclust: nswitches ", switches)
#
#     iter <- iter + 1
#   }
#
#   kvol <- NeuroVol(newclus, space(mask), indices=mask.idx)
#   voxsplit <- split(data.frame(vgrid), newclus)
#
#   list(clusvol=kvol,
#        clusters=newclus,
#        centers = do.call(rbind, lapply(centroids, "[[", "center")),
#        coordCenters=do.call(rbind, lapply(centroids, "[[", "centroid")),
#        voxelSets=voxsplit)
# }




#' @keywords internal
init_cluster <- function(bvec, mask, grid, K, use_gradient=TRUE) {
  mask.idx <- which(mask>0)

  if (use_gradient) {
    refvol <- bvec[[1]]
    grad <- spatial_gradient(refvol, mask)

    valid_coords <- index_to_grid(mask, mask.idx)
    init <- find_initial_points(valid_coords, grad, K)
    #centroid_idx = init$selected
    #centroids <- norm_coords[init$selected,]
    kres <- kmeans(grid, centers=grid[init$selected,], iter.max=500)
    #clusvol <- NeuroVol(kres$cluster, space(mask), indices=mask.idx)
    kres$cluster

  } else {
    gcen <- grid[as.integer(seq(1, nrow(grid), length.out=K)),]
    kres <- kmeans(grid, centers=gcen, iter.max=500)
    #clusvol <- NeuroVol(kres$cluster, space(mask), indices=mask.idx)
    kres$cluster
  }
}


#' @keywords internal
#' @importFrom assertthat assert_that
#' @import neuroim2
#' @noRd
supervoxel_cluster_fit <- function(feature_mat, grid, K=min(500, nrow(grid)),sigma1=1, sigma2=5,
                              alpha=.5, iterations=25, connectivity=26, use_medoid=FALSE,
                              initclus, use_gradient=TRUE) {

  message("supervoxel_fit: sigma1 = ", sigma1, " sigma2 = ", sigma2)

  assert_that(connectivity > 1 & connectivity <= 27)
  assert_that(alpha >= 0 && alpha <= 1)

  feature_mat <- scale(feature_mat, center=TRUE, scale=TRUE)

  if (is.null(initclus)) {
    ## kmeans using coordinates only
    gcen <- grid[as.integer(seq(1, nrow(grid), length.out=K)),]
    kres <- kmeans(grid, centers=gcen, iter.max=500)

    clusid <- sort(unique(kres$cluster))
    curclus <- kres$cluster

    seeds <- FNN::get.knnx(grid, kres$centers)$nn.index[,1]
    sp_centroids <- grid[seeds,]
    num_centroids <- feature_mat[, seeds]

  } else {
    assert_that(length(initclus) == nrow(grid))
    clusid <- sort(unique(initclus))
    assert_that(length(clusid) == K)
    curclus <- initclus
    centroids <- compute_centroids(feature_mat, grid, curclus, medoid=use_medoid)
    sp_centroids <- do.call(rbind, centroids$centroid)
    num_centroids <- do.call(rbind, centroids$center)

    #browser()
  }

  ## find k neighbors within 'connectivity' radius
  neib <- FNN::get.knn(grid, k=connectivity)

  ## has to be changed for surface... pass in neighbor_fun?
  dthresh <- median(neib$nn.dist[,connectivity,drop=FALSE])
  message("dthresh: ", dthresh)

  iter <- 1
  switches <- 1
  iter.max <- iterations

  while (iter < iter.max && switches > 0) {

    candlist <- find_candidates(neib$nn.index-1, neib$nn.dist, curclus, dthresh)
    newclus <- best_candidate(candlist, curclus, t(grid),
                              t(num_centroids), t(sp_centroids),
                              feature_mat, sigma1, sigma2, alpha)

    switches <- attr(newclus, "nswitches")

    if (switches > 0) {
      centroids <- compute_centroids(feature_mat, grid, newclus, medoid=use_medoid)
      sp_centroids <- do.call(rbind, centroids$centroid)
      num_centroids <- do.call(rbind, centroids$center)
      curclus <- newclus
    }

    message("supervoxels_fit: iter ", iter, " -- num  switches =  ", switches)
    iter <- iter + 1
  }

  list(clusters=newclus,
       centers = do.call(rbind, lapply(centroids, "[[", "center")),
       coord_centers=do.call(rbind, lapply(centroids, "[[", "centroid")))
}

#' knn_shrink
#'
#' Replace each voxel by the mean of its k nearest neighbors in its local spatial neighborhood
#'
#' @param bvec bvec a \code{\linkS4class{NeuroVec}} instance
#' @param mask a mask defining the voxels to include in the clustering result.
#' @param k the number of nearest neighbors to average over.
#' @param connectivity the number of spatial neighbors to include in the search space around each voxel.
#' @export
#'
#' @examples
#' mask <- NeuroVol(array(1, c(20,20,20)), NeuroSpace(c(20,20,20)))
#' bvec <- replicate(10, NeuroVol(array(runif(20*20*20), c(20,20,20)),
#' NeuroSpace(c(20,20,20))), simplify=FALSE)
#' bvec <- do.call(concat, bvec)
#'
#' sbvec <- knn_shrink(bvec, mask,k=3)
#'
knn_shrink <- function(bvec, mask, k=5, connectivity=27) {
  assert_that(inherits(bvec, "NeuroVec"))
  assert_that(inherits(mask, "NeuroVol"))
  assert_that(k >= 1)
  assert_that(connectivity >= k)

  mask <- as.logical(mask)
  mask.idx <- which(mask)
  grid <- index_to_coord(mask, mask.idx)

  feature_mat <- series(bvec, mask.idx)

  ## find k neighbors within 'connectivity' radius
  neib <- FNN::get.knn(grid, k=connectivity)

  sfeature_mat <- t(do.call(rbind, map(1:nrow(neib$nn.index), function(i) {
    rowMeans(feature_mat[, c(i, neib$nn.index[i,1:(k-1)])])
  })))

  SparseNeuroVec(sfeature_mat, space(bvec), mask=mask)
}

#' supervoxels
#'
#' Cluster a \code{NeuroVec} instance into a set of spatially constrained clusters.
#'
#'
#' @param bvec a \code{\linkS4class{NeuroVec}} instance supplying the data to cluster.
#' @param mask a \code{\linkS4class{NeuroVol}} mask defining the voxels to include in the clustering result.
#'    If the mask contains \code{numeric} data, nonzero values will define the included voxels. If the mask
#'    is a \code{\linkS4class{LogicalNeuroVol}} then \code{TRUE} will define the set of included voxels.
#' @param K the number of clusters to find.
#' @param sigma1 the bandwidth of the heat kernel for computing similarity of the data vectors.
#' @param sigma2 the bandwidth of the heat kernel for computing similarity of the coordinate vectors.
#'    If this value is small, then relatively larger weights are given to nearby voxels. If is is large, then
#'    spatial weights will be less salient. A relatively large sigma1/sigma2 ratio weights data features more than
#'    spatial features, whereas as large sigma2/sigma1 ration does the opposite.
#' @param iterations the maximum number of cluster iterations
#' @param connectivity the number of nearest neighbors defining the neighborhood
#' @param use_medoid whether to use the medoids to define cluster centroids
#' @param use_gradient use the image gradient to initialize clusters
#' @param alpha the relative weighting between spatial coherence and feature similarity metrics.
#' alpha = 1, means all feature-weighting, alpha=0 is spatial weighting. Default is .5.
#'
#' @export
#' @importFrom neuroim2 NeuroVec NeuroVol
#' @import furrr
#' @import assertthat
#' @return a \code{list} of class \code{supervoxels_cluster_result} with the following elements:
#'
#' \describe{
#'   \item{clustervol}{an instance of type \linkS4class{ClusteredNeuroVol}}
#'   \item{clusters}{ a vector of cluster indices equal to the number of voxels in the mask}
#'   \item{centers}{ a matrix of cluster centers with each column representing the feature vector for a cluster }
#'   \item{coord_centers}{ a matrix of spatial coordinates with each row corresponding to a cluster }
#' }
#'
#' @examples
#'
#' mask <- NeuroVol(array(1, c(20,20,20)), NeuroSpace(c(20,20,20)))
#' bvec <- replicate(10, NeuroVol(array(runif(20*20*20), c(20,20,20)),
#' NeuroSpace(c(20,20,20))), simplify=FALSE)
#' bvec <- do.call(concat, bvec)
#'
#' cres1 <- supervoxels(bvec, mask, K=100, sigma1=1, sigma2=3)
#' @details
#'
#' Here's a brief overview of the algorithm:
#' 1. It first scales input data (bvec)
#'
#' 2. It initializes the clusters using the gradient of the image and a furthest neighbor approach.
#'
#' 3. It then runs the "supervoxel_fit" function, which iteratively finds spatially constrained clusters using a combination of feature similarity and spatial similarity. The feature similarity is calculated based on the heat kernel with bandwidth sigma1, while the spatial similarity is calculated based on the heat kernel with bandwidth sigma2. The relative weighting between these two similarities is controlled by the alpha parameter.
#'
#' 4. Finally, it returns a list containing the clustered NeuroVol object (clusvol), cluster assignments for each voxel (cluster), the feature vector for each cluster (centers), and the spatial coordinates of each cluster (coord_centers).
#' This algorithm is particularly suitable for brain data analysis, as it takes both the spatial and feature similarities into account, allowing it to identify clusters that are spatially coherent and share similar features.
#'
supervoxels <- function(bvec, mask, K=500, sigma1=1,
                                sigma2=2.5, iterations=50,
                                connectivity=27, use_medoid=FALSE,
                                use_gradient=TRUE,
                                alpha=.5) {



  mask.idx <- which(mask > 0)

  ## coordinate grid in mm units
  grid <- index_to_coord(mask, mask.idx)
  clusinit <- init_cluster(bvec, mask, grid, K, use_gradient)

  feature_mat <- neuroim2::series(bvec, mask.idx)

  ret <- supervoxel_cluster_fit(feature_mat, grid, sigma1=sigma1, sigma2=sigma2, K=K,
                                iterations=iterations, connectivity=connectivity,
                                use_medoid=use_medoid, alpha=alpha,initclus=clusinit)

  kvol <- ClusteredNeuroVol(as.logical(mask), clusters=ret$clusters)

  structure(
         list(clusvol=kvol,
              cluster=ret$clusters,
              centers=ret$center,
              coord_centers=ret$coord_centers),
         class=c("cluster_result", "list"))

}



#' cluster objects with a temporal constraint
#'
#' @export
#' @inheritParams supervoxels
#' @examples
#' feature_mat <- matrix(rnorm(100*10), 100, 10)
#' library(future)
#' plan(multicore)
#' cres <- supervoxel_cluster_time(t(feature_mat), K=20)
supervoxel_cluster_time <- function(feature_mat, K=min(nrow(feature_mat), 100), sigma1=1, sigma2=TR*3,
                               iterations=50, TR=2, filter=list(lp=0, hp=0),
                               use_medoid=FALSE, nreps=5) {
  if (filter$lp > 0 || filter$hp > 0) {
    message("supervoxel_cluster: filtering time series")
    feature_mat <- filter_mat(feature_mat, filter$lp, filter$hp)
  }

  nels <- nrow(feature_mat)
  grid <- as.matrix(seq(0, by=TR, length.out=nels))

  fits <- furrr::future_map(1:nreps, function(i) {
    initsamps <- sort(sample(1:nrow(feature_mat), K))
    initcenters <- feature_mat[initsamps,]

    curclus <- FNN::get.knnx(grid[initsamps,,drop=FALSE], grid, k=1)$nn.index[,1]

    ret <- supervoxel_cluster_fit(t(feature_mat), as.matrix(grid),
                           sigma1=sigma1, sigma2=sigma2,
                           K=K,
                           initclus=curclus,
                           iterations=iterations,
                           connectivity=3,
                           use_medoid=use_medoid)

    class(ret) <- c("cluster_result_time", "cluster_result", "list")
    ret
  })

}


#' @export
#' @import neurosurf
supervoxel_cluster_surface <- function(bsurf, K=500, sigma1=1, sigma2=5, iterations=50,
                                  connectivity=6, use_medoid=FALSE) {
  mask.idx <- indices(bsurf)
  grid <- coords(bsurf)[mask.idx,]

  feature_mat <- series(bsurf, mask.idx)

  ret <- supervoxel_cluster_fit(feature_mat, grid, sigma1=sigma1, sigma2=sigma2, K=K,
                           iterations=iterations, connectivity=connectivity,
                           use_medoid=use_medoid)


  kvol <- NeuroSurface(geometry=geometry(bsurf), indices=mask.idx, data=ret$clusters)
  index_sets <- split(mask.idx, ret$clusters)

  ret <- list(clusvol=kvol,
              clusters=ret$clusters,
              centers = ret$centers,
              coord_centers=ret$coord_centers,
              index_sets=index_sets)

  class(ret) <- c("cluster_result_surface", "cluster_result", "list")
}
bbuchsbaum/neurocluster documentation built on April 1, 2024, 8:43 p.m.