cstab: Selection of Number of Clusters via Normalized Clustering Instability

Documented in cStability cStability_mEst cStability_orig

#' !Deprecated! Selection of number of clusters via clustering instability
#'
#' !Deprecated! Selection of number of clusters via \emph{model-based} or \emph{model-free},
#'   \emph{normalized} or \emph{unnormalized} clustering instability.
#'
#' @param data a n x p data matrix of type numeric.
#' @param kseq a vector with considered numbers clusters k > 1
#' @param nB an integer specifying the number of bootstrap comparisons.
#' @param norm logical specifying whether the instability path should be
#'    normalized. If TRUE, the instability path is normalized, accounting for a
#'    trivial decrease in instability due to a increasing k (see Haslbeck & Wulff,
#'    2016).
#' @param predict boolean specifying whether the model-based or the model-free
#'    variant should be used (see Haslbeck & Wulff, 2016).
#' @param method character string specifying the clustering algorithm. 'kmeans' for
#'    the k-means algorithm, 'hierarchical' for hierarchical clustering.
#' @param linkage character specifying the linkage criterion, in case
#'    \code{type='hierarchical'}. The available options are "single", "complete",
#'    "average", "mcquitty", "ward.D", "ward.D2", "centroid" or "median". See
#'    \link[stats]{hclust}.
#' @param kmIter integer specifying the the number of restarts of the k-means algorithm
#'    in order to avoid local minima.
#' @param pbar logical
#'
#' @return a list that contains the optimal k selected by the unnormalized and
#'    normalized instability method. It also includes a vector containing the averaged
#'    instability path (over bootstrap samples) and a matrix containing the instability
#'    path of each bootstrap sample for both the normalized and the unnormalized method.
#'
#' @references
#'    Ben-Hur, A., Elisseeff, A., & Guyon, I. (2001). A stability based method for
#'    discovering structure in clustered data. \emph{Pacific symposium on biocomputing,
#'    7}, 6-17.
#'
#'    Tibshirani, R., & Walther, G. (2005). Cluster validation by prediction strength.
#'    \emph{Journal of Computational and Graphical Statistics, 14}(3), 511-528.
#'
#' @author
#'    Dirk U. Wulff <dirk.wulff@gmail.com>
#'    Jonas M. B. Haslbeck <jonas.haslbeck@gmail.com>
#'
#' @examples
#' \dontrun{
#'   # Generate Data from Gaussian Mixture
#'   s <- .1
#'   n <- 50
#'   data <- rbind(cbind(rnorm(n, 0, s), rnorm(n, 0, s)),
#'                 cbind(rnorm(n, 1, s), rnorm(n, 1, s)),
#'                 cbind(rnorm(n, 0, s), rnorm(n, 1, s)),
#'                 cbind(rnorm(n, 1, s), rnorm(n, 0, s)))
#'   plot(data)
#'
#'   # Selection of Number of Clusters using Instability-based Measures
#'   stab_obj <- cStability(data, kseq=2:10)
#'   print(stab_obj)
#'   }
#'
#' @export

cStability_orig <- function(data, # n x p data matrix
                            kseq = 2:20, # sequence of ks tested
                            nB   = 10, # number of bootstrap comparisons
                            norm = TRUE, # norm over pw equal assign,FALSE=as in Wang etal
                            predict = TRUE, # use prediction approach, if FALSE, use brute pair in equal cluster approach
                            method    = 'kmeans', # or 'hierarchical'
                            linkage = 'complete', # or average, or ...
                            kmIter  = 5,
                            pbar = TRUE) # number of reruns of k-means algorithm
{


  # ---------- Input Checks ----------

  # On Data
  if(sum(is.na(data))>0) stop('No missing values permitted!')

  # On k-sequence
  if(1 %in% kseq) stop('Please select a k sequence starting with 2: {2,3,...K}!')

  # On B
  if(round(nB)!=nB) stop('The number of bootstrap comparison has to be a positive integer value.')

  # On type
  if(method %in% c('kmeans', 'hierarchical'))


  # ---------- Create Containers ----------

  n_obj <- nrow(data)
  m_instab <- matrix(NA, nB, length(kseq)) # storage: no normalization
  m_instab_norm <- matrix(NA, nB, length(kseq)) # storage: normalization


  # ---------- Draw bootstrap samples ----------

  ind <- list()
  share <- list()
  for(b in 1:nB) {
    tmp_1 <- sample(1:n_obj, n_obj, replace=T)
    tmp_2 <- sample(1:n_obj, n_obj, replace=T)
    tmp_1 <- tmp_1[order(tmp_1)]
    tmp_2 <- tmp_2[order(tmp_2)]
    ind[[b]] <- list(tmp_1,tmp_2)
    intersct <- intersect(tmp_1,tmp_2)
    share[[b]] <- list(tmp_1 %in% intersct & !duplicated(tmp_1), tmp_2 %in% intersct & !duplicated(tmp_2)) # leave duplicates in ?
    }


  # ---------- Calculate distance matrix ----------

  distm = fast_dist(data) # maybe here?

  # ----- Loop over B comparisons -----

  if(pbar)  pb <- utils::txtProgressBar(min=0, max=nB, style = 2)

  for(b in 1:nB) {
    for(k in kseq) {

        # kmeans
        if(method == 'kmeans') {
          kms_1 <- list()
          kms_2 <- list()
          for(km_i in 1:kmIter) {
            kms_1[[km_i]] = stats::kmeans(data[ind[[b]][[1]],], centers = k)
            kms_2[[km_i]] = stats::kmeans(data[ind[[b]][[2]],], centers = k) # this has to be 2
            }
          km_1 = kms_1[[which.min(sapply(kms_1,function(x) mean(x$tot.withinss)))]]
          km_2 = kms_2[[which.min(sapply(kms_1,function(x) mean(x$tot.withinss)))]]
          if(predict == TRUE) {
            cl_1 = kmeans_predict(km_1, data = data)
            cl_2 = kmeans_predict(km_2, data = data)
            } else {
            cl_1 = km_1$cluster[share[[b]][[1]]]
            cl_2 = km_2$cluster[share[[b]][[2]]]
            }
          }

        if(method == 'hierarchical') {
          #t = proc.time()[3]
          hc_1 = fastcluster::hclust(stats::as.dist(distm[ind[[b]][[1]],ind[[b]][[1]]]), method = linkage)
          hc_2 = fastcluster::hclust(stats::as.dist(distm[ind[[b]][[2]],ind[[b]][[2]]]), method = linkage)
          cl_1 = stats::cutree(hc_1, k)[share[[b]][[1]]]
          cl_2 = stats::cutree(hc_2, k)[share[[b]][[2]]]
          #print(proc.time()[3] - t)
          }

      # check for equality of clusterings
      eq_1 = equal(cl_1)
      eq_2 = equal(cl_2)
      InStab <- mean(eq_1 != eq_2)

      # Normalize = FALSE
      m_instab[b, which(kseq==k)] <- InStab

      # Normalize = TRUE
      norm_val <- instabLookup(table(cl_1), table(cl_2))
      m_instab_norm[b, which(kseq==k)] <- InStab / norm_val

    } # end for k

    if(pbar) utils::setTxtProgressBar(pb, b)

  } # end for B

  # taking the mean
  m_instab_M      = colMeans(m_instab)
  m_instab_norm_M = colMeans(m_instab_norm)

  kopt_instab  = which(m_instab_M == min(m_instab_M))+(min(kseq)-1)
  kopt_instabN = which(m_instab_norm_M == min(m_instab_norm_M))+(min(kseq)-1)

  # replicate function call
  f_call <- list('kseq'=kseq,
                 'nB'=nB,
                 'norm'=norm,
                 'prediction'=predict,
                 'method'=method,
                 'linkage'=linkage,
                 'kmIter'=kmIter)

  outlist <- list("k_instab"=kopt_instab,
                  "k_instab_norm"=kopt_instabN,
                  "instab_path"=m_instab_M,
                  "instab_path_norm"=m_instab_norm_M,
                  "instab_path_matrix"=m_instab,
                  "instab_path_nrom_matrix"=m_instab_norm,
                  'call'=f_call)

  class(outlist) <- c('list', 'cstab', 'cStability')

  return(outlist)

} # EoF

#' Selection of number of clusters via clustering instability
#'
#' Selection of number of clusters via \emph{model-based} or \emph{model-free},
#'   \emph{normalized} or \emph{unnormalized} clustering instability.
#'
#' @param data a n x p data matrix of type numeric.
#' @param kseq a vector with considered numbers clusters k > 1
#' @param nB an integer specifying the number of bootstrap comparisons.
#' @param norm logical specifying whether the instability path should be
#'    normalized. If TRUE, the instability path is normalized, accounting for a
#'    trivial decrease in instability due to a increasing k (see Haslbeck & Wulff,
#'    2016).
#' @param predict boolean specifying whether the model-based or the model-free
#'    variant should be used (see Haslbeck & Wulff, 2016).
#' @param method character string specifying the clustering algorithm. 'kmeans' for
#'    the k-means algorithm, 'hierarchical' for hierarchical clustering.
#' @param linkage character specifying the linkage criterion, in case
#'    \code{type='hierarchical'}. The available options are "single", "complete",
#'    "average", "mcquitty", "ward.D", "ward.D2", "centroid" or "median". See
#'    \link[stats]{hclust}.
#' @param kmIter integer specifying the the number of restarts of the k-means algorithm
#'    in order to avoid local minima.
#' @param pbar logical
#'
#' @return a list that contains the optimal k selected by the unnormalized and
#'    normalized instability method. It also includes a vector containing the averaged
#'    instability path (over bootstrap samples), k-wise confidence intervals around these paths
#'    and a matrix containing the instability path of each bootstrap sample for both the normalized
#'    and the unnormalized method.
#'
#' @references
#'    Ben-Hur, A., Elisseeff, A., & Guyon, I. (2001). A stability based method for
#'    discovering structure in clustered data. \emph{Pacific symposium on biocomputing,
#'    7}, 6-17.
#'
#'    Tibshirani, R., & Walther, G. (2005). Cluster validation by prediction strength.
#'    \emph{Journal of Computational and Graphical Statistics, 14}(3), 511-528.
#'
#' @author
#'    Dirk U. Wulff <dirk.wulff@gmail.com>
#'    Jonas M. B. Haslbeck <jonas.haslbeck@gmail.com>
#'
#' @examples
#' \dontrun{
#'   # Generate Data from Gaussian Mixture
#'   s <- .1
#'   n <- 50
#'   data <- rbind(cbind(rnorm(n, 0, s), rnorm(n, 0, s)),
#'                 cbind(rnorm(n, 1, s), rnorm(n, 1, s)),
#'                 cbind(rnorm(n, 0, s), rnorm(n, 1, s)),
#'                 cbind(rnorm(n, 1, s), rnorm(n, 0, s)))
#'   plot(data)
#'
#'   # Selection of Number of Clusters using Instability-based Measures
#'   stab_obj <- cStability(data, kseq=2:10)
#'   print(stab_obj)
#'   }
#'
#' @export

cStability <- function(data, # n x p data matrix
                       kseq = 2:20, # sequence of ks tested
                       nB   = 10, # number of bootstrap comparisons
                       norm = TRUE, # norm over pw equal assign,FALSE=as in Wang etal
                       predict = TRUE, # use prediction approach, if FALSE, use brute pair in equal cluster approach
                       method  = 'kmeans', # or 'hierarchical'
                       linkage = 'complete', # or average, or ...
                       kmIter  = 5,
                       pbar = TRUE) # number of reruns of k-means algorithm
{


  # ---------- Input Checks ----------

  # On Data
  if(sum(is.na(data))>0) stop('No missing values permitted!')

  # On k-sequence
  if(1 %in% kseq) stop('Please select a k sequence starting with 2: {2,3,...K}!')

  # On B
  if(round(nB)!=nB) stop('The number of bootstrap comparison has to be a positive integer value.')

  # On type
  if(method %in% c('kmeans', 'hierarchical'))

  # On N
  if(nrow(data) <= max(kseq) * .63) stop('The maximum k cannot be larger than ca. 63% of the number of objects, the expected number of unique objects per bootstrap sample.')
  if(predict == TRUE & nrow(data) <= max(kseq) * .39) stop('For the model-free approach, the maximum k cannot be larger than ca. 39% of the number of objects, the expected number of unique objects in the intersection of two bootstrap samples.')

    # ---------- Create Containers ----------

  n_obj <- nrow(data)
  m_instab <- matrix(NA, nB, length(kseq)) # storage: no normalization
  m_instab_norm <- matrix(NA, nB, length(kseq)) # storage: normalization


  # ---------- Draw bootstrap samples ----------

  ind <- list()
  share <- list()
  for(b in 1:nB) {
    tmp_1 <- sample(1:n_obj, n_obj, replace=T)
    tmp_2 <- sample(1:n_obj, n_obj, replace=T)
    tmp_1 <- tmp_1[order(tmp_1)]
    tmp_2 <- tmp_2[order(tmp_2)]
    ind[[b]] <- list(tmp_1,tmp_2)
    intersct <- intersect(tmp_1,tmp_2)
    share[[b]] <- list(tmp_1 %in% intersct & !duplicated(tmp_1), tmp_2 %in% intersct & !duplicated(tmp_2)) # leave duplicates in ?
  }


  # ---------- Calculate distance matrix ----------

  distm = fast_dist(data) # maybe here?

  # ----- Loop over B comparisons -----

  if(pbar)  pb <- utils::txtProgressBar(min=0, max=nB, style = 2)

  for(b in 1:nB) {
    for(k in kseq) {

      # kmeans
      if(method == 'kmeans') {
        kms_1 <- list()
        kms_2 <- list()
        for(km_i in 1:kmIter) {
          kms_1[[km_i]] = stats::kmeans(data[ind[[b]][[1]],], centers = k)
          kms_2[[km_i]] = stats::kmeans(data[ind[[b]][[2]],], centers = k) # this has to be 2
        }
        km_1 = kms_1[[which.min(sapply(kms_1,function(x) mean(x$tot.withinss)))]]
        km_2 = kms_2[[which.min(sapply(kms_1,function(x) mean(x$tot.withinss)))]]
        if(predict == TRUE) {
          cl_1 = kmeans_predict(km_1, data = data)
          cl_2 = kmeans_predict(km_2, data = data)
        } else {
          cl_1 = km_1$cluster[share[[b]][[1]]]
          cl_2 = km_2$cluster[share[[b]][[2]]]
        }
      }

      if(method == 'hierarchical') {
        #t = proc.time()[3]
        hc_1 = fastcluster::hclust(stats::as.dist(distm[ind[[b]][[1]],ind[[b]][[1]]]), method = linkage)
        hc_2 = fastcluster::hclust(stats::as.dist(distm[ind[[b]][[2]],ind[[b]][[2]]]), method = linkage)
        cl_1 = stats::cutree(hc_1, k)[share[[b]][[1]]]
        cl_2 = stats::cutree(hc_2, k)[share[[b]][[2]]]
        #print(proc.time()[3] - t)
      }

      # check for equality of clusterings
      eq_1 = equal(cl_1)
      eq_2 = equal(cl_2)
      InStab <- mean(eq_1 != eq_2)

      # Normalize = FALSE
      m_instab[b, which(kseq==k)] <- InStab

      # Normalize = TRUE
      M_1 = table(cl_1)
      M_2 = table(cl_2)

      # old correction to ensure that
      # if(predict == FALSE) {
      #   M_1 = M_1 * (nrow(data) / sum(M_1))
      #   M_2 = M_2 * (nrow(data) / sum(M_2))
      #   }
      cs <- instab_simple_var(M_1, M_2)
      m_instab_norm[b, which(kseq==k)] <- .5 * (InStab - cs[1]) / cs[2]

    } # end for k

    if(pbar) utils::setTxtProgressBar(pb, b)

  } # end for B

  # taking the mean
  m_instab_M      = colMeans(m_instab)
  m_instab_norm_M = colMeans(m_instab_norm)

  # extracting 95% CIs
  m_instab_CI      = apply(m_instab,2, quantile,c(.025,.975))
  m_instab_norm_CI = apply(m_instab_norm,2, quantile,c(.025,.975))

  # identifying k_opt
  kopt_instab  = which(m_instab_M == min(m_instab_M))+(min(kseq)-1)
  kopt_instabN = which(m_instab_norm_M == min(m_instab_norm_M))+(min(kseq)-1)

  # replicate function call
  f_call <- list('kseq'=kseq,
                 'nB'=nB,
                 'norm'=norm,
                 'prediction'=predict,
                 'method'=method,
                 'linkage'=linkage,
                 'kmIter'=kmIter)

  outlist <- list("k_instab"=kopt_instab,
                  "k_instab_norm"=kopt_instabN,
                  "instab_path"=m_instab_M,
                  "instab_path_norm"=m_instab_norm_M,
                  "instab_path_CI" = m_instab_CI,
                  "instab_path_norm_CI" = m_instab_norm_CI,
                  "instab_path_matrix"=m_instab,
                  "instab_path_nrom_matrix"=m_instab_norm,
                  'call'=f_call)

  class(outlist) <- c('list', 'cstab', 'cStability')

  return(outlist)

} # EoF

#' !Deprecated! Selection of number of clusters via clustering instability
#'
#' !Deprecated! Selection of number of clusters via \emph{model-based} or \emph{model-free},
#'   \emph{normalized} or \emph{unnormalized} clustering instability.
#'
#' @param data a n x p data matrix of type numeric.
#' @param kseq a vector with considered numbers clusters k > 1
#' @param nB an integer specifying the number of bootstrap comparisons.
#' @param norm logical specifying whether the instability path should be
#'    normalized. If TRUE, the instability path is normalized, accounting for a
#'    trivial decrease in instability due to a increasing k (see Haslbeck & Wulff,
#'    2016).
#' @param predict boolean specifying whether the model-based or the model-free
#'    variant should be used (see Haslbeck & Wulff, 2016).
#' @param method character string specifying the clustering algorithm. 'kmeans' for
#'    the k-means algorithm, 'hierarchical' for hierarchical clustering.
#' @param linkage character specifying the linkage criterion, in case
#'    \code{type='hierarchical'}. The available options are "single", "complete",
#'    "average", "mcquitty", "ward.D", "ward.D2", "centroid" or "median". See
#'    \link[stats]{hclust}.
#' @param kmIter integer specifying the the number of restarts of the k-means algorithm
#'    in order to avoid local minima.
#' @param pbar logical
#'
#' @return a list that contains the optimal k selected by the unnormalized and
#'    normalized instability method. It also includes a vector containing the averaged
#'    instability path (over bootstrap samples) and a matrix containing the instability
#'    path of each bootstrap sample for both the normalized and the unnormalized method.
#'
#' @references
#'    Ben-Hur, A., Elisseeff, A., & Guyon, I. (2001). A stability based method for
#'    discovering structure in clustered data. \emph{Pacific symposium on biocomputing,
#'    7}, 6-17.
#'
#'    Tibshirani, R., & Walther, G. (2005). Cluster validation by prediction strength.
#'    \emph{Journal of Computational and Graphical Statistics, 14}(3), 511-528.
#'
#' @author
#'    Dirk U. Wulff <dirk.wulff@gmail.com>
#'    Jonas M. B. Haslbeck <jonas.haslbeck@gmail.com>
#'
#' @examples
#' \dontrun{
#'   # Generate Data from Gaussian Mixture
#'   s <- .1
#'   n <- 50
#'   data <- rbind(cbind(rnorm(n, 0, s), rnorm(n, 0, s)),
#'                 cbind(rnorm(n, 1, s), rnorm(n, 1, s)),
#'                 cbind(rnorm(n, 0, s), rnorm(n, 1, s)),
#'                 cbind(rnorm(n, 1, s), rnorm(n, 0, s)))
#'   plot(data)
#'
#'   # Selection of Number of Clusters using Instability-based Measures
#'   stab_obj <- cStability(data, kseq=2:10)
#'   print(stab_obj)
#'   }
#'
#' @export

cStability_mEst <- function(data, # n x p data matrix
                               kseq = 2:20, # sequence of ks tested
                               nB   = 10, # number of bootstrap comparisons
                               norm = TRUE, # norm over pw equal assign,FALSE=as in Wang etal
                               predict = TRUE, # use prediction approach, if FALSE, use brute pair in equal cluster approach
                               method    = 'kmeans', # or 'hierarchical'
                               linkage = 'complete', # or average, or ...
                               kmIter  = 5,
                               pbar = TRUE) # number of reruns of k-means algorithm
{


  # ---------- Input Checks ----------

  # On Data
  if(sum(is.na(data))>0) stop('No missing values permitted!')

  # On k-sequence
  if(1 %in% kseq) stop('Please select a k sequence starting with 2: {2,3,...K}!')

  # On B
  if(round(nB)!=nB) stop('The number of bootstrap comparison has to be a positive integer value.')

  # On type
  if(method %in% c('kmeans', 'hierarchical'))


    # ---------- Create Containers ----------

  n_obj <- nrow(data)
  m_instab <- matrix(NA, nB, length(kseq)) # storage: no normalization
  m_exp_instab <- numeric(length(kseq)) # storage: no normalization

  # ---------- Draw bootstrap samples ----------

  ind <- list()
  share <- list()
  for(b in 1:nB) {
    tmp_1 <- sample(1:n_obj, n_obj, replace=T)
    tmp_2 <- sample(1:n_obj, n_obj, replace=T)
    tmp_1 <- tmp_1[order(tmp_1)]
    tmp_2 <- tmp_2[order(tmp_2)]
    ind[[b]] <- list(tmp_1,tmp_2)
    intersct <- intersect(tmp_1,tmp_2)
    share[[b]] <- list(tmp_1 %in% intersct & !duplicated(tmp_1), tmp_2 %in% intersct & !duplicated(tmp_2)) # leave duplicates in ?
  }


  # ---------- Calculate distance matrix ----------

  distm = fast_dist(data) # maybe here?

  # ----- Loop over B comparisons -----

  if(pbar)  pb <- utils::txtProgressBar(min=0, max=max(kseq), style = 2)

  for(k in kseq) {

    # container for ms
    m_ms <- matrix(NA, nB * 2, k) # storage: normalization
    new_ns <- rep(0, nB)

    for(b in 1:nB) {

      # kmeans
      if(method == 'kmeans') {
        kms_1 <- list()
        kms_2 <- list()
        for(km_i in 1:kmIter) {
          kms_1[[km_i]] = stats::kmeans(data[ind[[b]][[1]],], centers = k)
          kms_2[[km_i]] = stats::kmeans(data[ind[[b]][[2]],], centers = k) # this has to be 2
        }
        km_1 = kms_1[[which.min(sapply(kms_1,function(x) mean(x$tot.withinss)))]]
        km_2 = kms_2[[which.min(sapply(kms_1,function(x) mean(x$tot.withinss)))]]
        if(predict == TRUE) {
          cl_1 = kmeans_predict(km_1, data = data)
          cl_2 = kmeans_predict(km_2, data = data)
        } else {
          cl_1 = km_1$cluster[share[[b]][[1]]]
          cl_2 = km_2$cluster[share[[b]][[2]]]
        }
      }

      if(method == 'hierarchical') {
        #t = proc.time()[3]
        hc_1 = fastcluster::hclust(stats::as.dist(distm[ind[[b]][[1]],ind[[b]][[1]]]), method = linkage)
        hc_2 = fastcluster::hclust(stats::as.dist(distm[ind[[b]][[2]],ind[[b]][[2]]]), method = linkage)
        cl_1 = stats::cutree(hc_1, k)[share[[b]][[1]]]
        cl_2 = stats::cutree(hc_2, k)[share[[b]][[2]]]
        #print(proc.time()[3] - t)
      }

      # orig n
      new_n = length(cl_1)

      # check for equality of clusterings
      eq_1 = equal(cl_1)
      eq_2 = equal(cl_2)

      # calculate instability
      InStab <- mean(eq_1 != eq_2)

      # Normalize = FALSE
      m_instab[b, which(kseq==k)] <- InStab

      # Normalize = TRUE
      m_1 = table(cl_1)
      m_2 = table(cl_2)
      m_1 = m_1[as.character(1:k)]
      m_2 = m_2[as.character(1:k)]
      m_1[is.na(m_1)] = 0
      m_2[is.na(m_2)] = 0
      m_ms[2*b - 1,] = m_1
      m_ms[2*b,] = m_2

      # store new ns
      new_ns[b] = new_n

    } # end for B

    if(pbar) utils::setTxtProgressBar(pb, k)

    # Gather Ms
    m_ms_M = colMeans(m_ms)
    if(predict == FALSE) m_ms_M = m_ms_M * (nrow(data) / mean(new_ns))
    #print(paste0(sum(m_ms_M),"  -  ",instab_simple(m_ms_M, m_ms_M),"  -  ",paste0(m_ms_M,collapse=' ')))

    # compute instab
    m_exp_instab[which(kseq==k)] = instab_simple(m_ms_M, m_ms_M)

  } # end for B

  # taking the mean
  m_instab_M      = colMeans(m_instab)
  m_instab_norm_M = .5*(m_instab_M - m_exp_instab) / (sqrt(m_exp_instab/2)**2)

  kopt_instab  = which(m_instab_M == min(m_instab_M))+(min(kseq)-1)
  kopt_instabN = which(m_instab_norm_M == min(m_instab_norm_M))+(min(kseq)-1)

  # replicate function call
  f_call <- list('kseq'=kseq,
                 'nB'=nB,
                 'norm'=norm,
                 'prediction'=predict,
                 'method'=method,
                 'linkage'=linkage,
                 'kmIter'=kmIter)

  outlist <- list("k_instab"=kopt_instab,
                  "k_instab_norm"=kopt_instabN,
                  "instab_path"=m_instab_M,
                  "instab_path_norm"=m_instab_norm_M,
                  "instab_path_matrix"=m_instab,
                  "instab_path_norm_matrix"=NA,
                  "c_1"=m_exp_instab,
                  "c_2"=sqrt(m_exp_instab/2)**2,
                  'call'=f_call)

  class(outlist) <- c('list', 'cstab', 'cStability')

  return(outlist)

} # EoF
jmbh/cstab documentation built on Oct. 2, 2023, 1:56 p.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
jmbh/cstab
Selection of Number of Clusters via Normalized Clustering Instability

R/cStability.R
In jmbh/cstab: Selection of Number of Clusters via Normalized Clustering Instability

Defines functions cStability_mEst cStability cStability_orig

Documented in cStability cStability_mEst cStability_orig

R Package Documentation

Browse R Packages

We want your feedback!

jmbh/cstab Selection of Number of Clusters via Normalized Clustering Instability

R/cStability.R In jmbh/cstab: Selection of Number of Clusters via Normalized Clustering Instability

Defines functions cStability_mEst cStability cStability_orig

Documented in cStability cStability_mEst cStability_orig

R Package Documentation

Browse R Packages

We want your feedback!

jmbh/cstab
Selection of Number of Clusters via Normalized Clustering Instability

R/cStability.R
In jmbh/cstab: Selection of Number of Clusters via Normalized Clustering Instability