#' Wrapper for constrained K-means on data subsampled to the smallest cohort size.
#'
#' This fuction is a wrapper for the constrained Kmeans algorithm using
#' lcvqe() from the conclust package. This function will subset each
#' cohort down to that with the smallest number of observations.This
#' function is not meant to be run individually, but as a 'clustFunc'
#' argument for running K2tax().
#' @param labels Vector of cohort values
#' @param features List of features (genes) to include
#' @return A character string of concatenated 1's and 2's pertaining to the
#' cohort assignments.
#' @references
#' \insertRef{reed_2020}{K2Taxonomer}
#' \insertRef{cKm}{K2Taxonomer}
#' @inheritParams K2tax
#' @export
#' @import conclust
## Create wrapper to subsample
cKmeansDownsampleSmallest <- function(labels, features, K2res) {
if("maxIter" %in% names(K2meta(K2res)$clustList)) {
MI <- K2meta(K2res)$clustList$maxIter
} else {
MI <- 25
}
labs <- as.character(K2colData(K2res)[, K2meta(K2res)$cohorts])
obsKeep <- labs %in% labels
labsSub <- labs[obsKeep]
eMatSub <- K2eMat(K2res)[features, obsKeep]
## Subsample the data
minSize <- min(table(labsSub))
sVec <- unlist(lapply(unique(labsSub), function(x, minSize) {
sample(which(labsSub == x), minSize)
}, minSize))
eMatSub <- eMatSub[, sVec]
labsSub <- labsSub[sVec]
## Get constraints
mustLink <- outer(labsSub, labsSub, "==")
mustLink[upper.tri(mustLink, diag=TRUE)] <- FALSE
mustLink <- which(mustLink, arr.ind=TRUE)
## Cluster data
dClust=factor(lcvqe(t(eMatSub), k=2, mustLink=mustLink,
cantLink=matrix(c(1, 1), nrow=1), maxIter=MI),
levels=c(1, 2))
## Get label-level clusters
dMat <- as.matrix(table(dClust, labsSub))[, labels]
modVec <- apply(dMat, 2, which.max)
mods <- paste(modVec, collapse="")
return(mods)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.