R/kmeanspp.R

Defines functions .Roulettemethod .get_centroids kmeanspp

Documented in kmeanspp

#' A new version of kmeans that generates stable cluster result
#' @param X a data matrix with each row as a sample and each column as a feature
#' @param k the cluster number
#'
#' @return res, the cluster result generated by this function
#' @export
#' @importFrom stats kmeans
#' @examples
#' library(InterSIM)
#' sim.data <- InterSIM(n.sample=500, cluster.sample.prop = c(0.20,0.30,0.27,0.23),
#' delta.methyl=5, delta.expr=5, delta.protein=5,p.DMP=0.2, p.DEG=NULL,
#' p.DEP=NULL,sigma.methyl=NULL, sigma.expr=NULL, sigma.protein=NULL,cor.methyl.expr=NULL,
#' cor.expr.protein=NULL,do.plot=FALSE, sample.cluster=TRUE, feature.cluster=TRUE)
#' sim.methyl <- sim.data$dat.methyl
#' sim.expr <- sim.data$dat.expr
#' sim.protein <- sim.data$dat.protein
#' temp_data <- list(sim.methyl, sim.expr, sim.protein)
#' init_list <- initialize_WL(temp_data,k=4)
#' update_E_list <- update_E(temp_data,init_list)
#' lambda <- 0.01
#' cluster_res <- kmeanspp(update_E_list[[4]],4)
kmeanspp <- function(X,k){
  centroids <- .get_centroids(X,k)
  res <- kmeans(X,centroids)
  return(res)
}


# get k data centroids
.get_centroids <- function(X,k){
  p_num <- nrow(X)
  feature_len <- ncol(X)
  data_center <- apply(X,2,mean)
  distance_list1 <- vector('numeric',nrow(X))
  for (i in 1:nrow(X))
  {
    distance_list1[i] <- (X[i,]-data_center)%*%(X[i,]-data_center)
  }
  min_index <- which.min(distance_list1)
  centroids <- matrix(X[min_index,],1,feature_len)
  for (i in 2:k){
    distance_matrix <- matrix(0,p_num,(i-1))
    for (j in 1:nrow(distance_matrix)){
      for (p in 1:ncol(distance_matrix)){
        distance_matrix[j,p] <- (X[j,]-centroids[p,])%*%(X[j,]-centroids[p,])
      }
    }
    index <- .Roulettemethod(distance_matrix)
    centroids <- rbind(centroids,X[index,])
  }
  return(centroids)
}


# select the next data point index according to the distance matrix
.Roulettemethod <- function(distance_matrix){
  min_distance <- apply(distance_matrix, 1, min)
  index <- which.max(min_distance)
  return(index)
}

Try the M3JF package in your browser

Any scripts or data that you put into this service are public.

M3JF documentation built on Aug. 14, 2023, 9:08 a.m.