Nothing
SOTAclustering=sotaClustering =function(Data,ClusterNo,PlotIt=FALSE,UnrestGrowth,...){
# Cls=sotaClustering(Data,ClusterNo=2)
# Self-organizing Tree Algorithm (SOTA)
#
# INPUT
# Data[1:n,1:d] Data set with n observations and d features
# ClusterNo Number of clusters to search for
#
# OUTPUT
#
#
# Author: Luis Winckelmann
if(missing(UnrestGrowth)){
UnrestGrowth=TRUE
}
if(isFALSE(UnrestGrowth))#maxCycles is number of iterations
res=sota(Data,maxCycles = ClusterNo,unrest.growth=UnrestGrowth,...)
else #maxCycles is number of of clusters+1
res=sota(Data,maxCycles = ClusterNo-1, unrest.growth=UnrestGrowth,...)
Cls=res$clust
if(!is.null(rownames(Data)))
names(Cls)=rownames(Data)
if(PlotIt){
ClusterPlotMDS(Data,Cls)
}
Cls=ClusterRename(Cls,Data)
return(list(Cls=Cls,sotaObject=res))
} #end of SOTAclustering
# Helper functions needed for sota Clustering, copied from clValid cause it's ORPHANED
sota <- function (data, maxCycles, maxEpochs = 1000, distance = "euclidean",
wcell = 0.01, pcell = 0.005, scell = 0.001, delta = 1e-04,
neighb.level = 0, maxDiversity = 0.9, unrest.growth = TRUE,
...)
{
tree <- sota.init(data)
pr <- 4:ncol(tree)
n <- 3
genes <- 1:nrow(data)
clust <- rep(1, length(genes))
Node.Split <- 1
Res.V <- getResource(data, tree, clust, distance, pr)
if (distance == "correlation")
Res.V <- 1 - Res.V
diversity <- Res.V
for (k in 1:maxCycles) {
trainNode <- Node.Split
trainSamp <- genes[clust == trainNode]
curr.err <- 1e+10
ep <- 1
while (ep <= maxEpochs) {
last.err <- 0
left.ctr <- right.ctr <- 0
left.d <- right.d <- 0
for (i in trainSamp) {
cells <- tree[c(n - 1, n), ]
dist <- rep(0, nrow(cells))
for (j in 1:2) dist[j] <- dist.fn(data[i, ],
cells[j, pr], distance = distance)
or <- which.min(dist)
if (or == 1)
left.ctr <- left.ctr + 1
else right.ctr <- right.ctr + 1
closest <- cells[or, 1]
sis <- ifelse(closest%%2 == 0, closest + 1,
closest - 1)
sis.is.cell <- ifelse(tree[sis, "cell"] == 1,
1, 0)
if (sis.is.cell == 1) {
parent <- tree[closest, "anc"]
tree[closest, pr] <- tree[closest, pr] + wcell *
(data[i, ] - tree[closest, pr])
tree[sis, pr] <- tree[sis, pr] + scell * (data[i,
] - tree[sis, pr])
tree[parent, pr] <- tree[parent, pr] + pcell *
(data[i, ] - tree[parent, pr])
}
else {
tree[closest, pr] <- tree[closest, pr] + wcell *
(data[i, ] - tree[closest, pr])
}
}
cells <- tree[c(n - 1, n), ]
for (i in trainSamp) {
for (j in 1:2) dist[j] <- dist.fn(data[i, ],
cells[j, pr], distance = distance)
last.err <- last.err + min(dist)
}
last.err <- last.err/length(trainSamp)
if (ifelse(last.err == 0, 0, abs((curr.err - last.err)/last.err)) <
delta && left.ctr != 0 && right.ctr != 0)
break
ep <- ep + 1
curr.err <- last.err
}
clust <- assignGenes(data, trainSamp, clust, tree, n,
distance, pr, neighb.level)
Res.V <- getResource(data, tree, clust, distance, pr)
if (distance == "correlation")
Res.V <- 1 - Res.V
tempRes <- Res.V
tempRes[tempRes == 0] <- diversity[tempRes == 0]
diversity <- tempRes
if (k == maxCycles || (max(Res.V) < maxDiversity & unrest.growth ==
FALSE))
break
newCells <- splitNode(Res.V, tree, n)
tree <- newCells$tree
n <- newCells$n
Node.Split <- newCells$toSplit
}
tree <- trainLeaves(data, tree, clust, pr, wcell, distance,
n, delta)
Res.V <- getResource(data, tree, clust, distance, pr)
Res.V <- Res.V[Res.V != 0]
if (distance == "correlation")
Res.V <- 1 - Res.V
diversity[(length(diversity) - length(Res.V) + 1):length(diversity)] <- Res.V
treel <- tree[tree[, "cell"] == 1, ]
old.cl <- treel[, 1]
treel[, 1] <- 1:nrow(treel)
old.clust <- clust
clust <- cl.ID(old.clust, old.cl, 1:nrow(treel))
totals <- table(clust)
out <- list(data = data, c.tree = cbind(tree[1:n, ], Diversity = diversity),
tree = treel, clust = clust, totals = totals, dist = distance,
diversity = Res.V)
class(out) <- "sota"
return(out)
}
sota.init <- function (data)
{
nodes <- matrix(0, nrow(data) * 2, 3 + ncol(data))
if (is.null(colnames(data)))
colnames(data) <- paste("V", 1:ncol(data))
colnames(nodes) <- c("ID", "anc", "cell", colnames(data))
nodes[, "ID"] = 1:(nrow(data) * 2)
nodes[1, ] <- c(1, 0, 0, apply(data, 2, function(x) mean(x,
na.rm = TRUE)))
nodes[2, ] <- c(2, 1, 1, nodes[1, ][-c(1, 2, 3)])
nodes[3, ] <- c(3, 1, 1, nodes[1, ][-c(1, 2, 3)])
return(nodes)
}
getResource <- function (data, tree, clust, distance, pr)
{
dist <- rep(0, length(clust))
resource <- rep(0, max(clust))
for (i in unique(clust)) {
temp <- data[clust == i, ]
if (is.vector(temp))
temp <- matrix(temp, nrow = 1, ncol = ncol(data))
if (distance == "correlation")
resource[i] <- mean(apply(temp, 1, dist.fn, profile = tree[i,
pr], distance = distance))
else resource[i] <- mean(apply(temp, 1, dist.fn, profile = tree[i,
pr], distance = distance))
}
resource
}
dist.fn <- function (input, profile, distance)
{
if (distance == "correlation")
return(1 - cor(input, profile, use = "pairwise.complete.obs"))
else return(sqrt(sum((input - profile)^2)))
}
assignGenes <- function (data, Sample, clust, tree, n, distance, pr, neighb.level)
{
if (neighb.level == 0)
cells <- tree[c(n - 1, n), ]
else cells <- getCells(tree, neighb.level, n)
for (i in Sample) {
dist <- rep(0, nrow(cells))
for (j in 1:nrow(cells)) dist[j] <- dist.fn(data[i,
], cells[j, pr], distance)
or <- which.min(dist)
closest <- cells[or, 1]
clust[i] <- closest
}
clust
}
splitNode <- function (Res.V, tree, n)
{
maxheter <- which.max(Res.V)
cl.to.split <- tree[maxheter, 1]
tree[n <- n + 1, -1] <- tree[cl.to.split, -1]
tree[n, "anc"] <- cl.to.split
tree[n <- n + 1, -1] <- tree[cl.to.split, -1]
tree[n, "anc"] <- cl.to.split
tree[cl.to.split, "cell"] <- 0
return(list(tree = tree, n = n, toSplit = cl.to.split))
}
trainLeaves <- function (data, tree, clust, pr, wcell, distance, n, delta)
{
nc <- ncol(data)
for (i in 1:n) {
if (!is.element(i, clust))
next
temp <- matrix(data[clust == i, ], ncol = nc)
converged <- FALSE
init.err <- getCellResource(temp, tree[i, pr], distance)
while (!converged) {
for (j in 1:nrow(temp)) tree[i, pr] <- tree[i, pr] +
wcell * (temp[j, ] - tree[i, pr])
last.err <- getCellResource(temp, tree[i, pr], distance)
converged <- ifelse(abs((last.err - init.err)/last.err) <
delta, TRUE, FALSE)
init.err <- last.err
}
}
return(tree)
}
cl.ID <- function (clust, old.cl, new.cl)
{
for (i in 1:length(clust)) clust[i] <- new.cl[which(old.cl ==
clust[i])]
clust
}
getCellResource <- function (temp, profile, distance)
{
if (distance == "correlation")
resource <- mean(apply(temp, 1, dist.fn, profile, distance = distance))
else (distance == "euclidean")
resource <- mean(apply(temp, 1, dist.fn, profile, distance = distance))
resource
}
getCells <- function(tree, neighb.level, n){
or.n <- n
cells <- c(n-1,n)
for(i in 1:(neighb.level+1)){
n <- tree[n, "anc"]
if(n==1)
break
}
for(j in 2:(or.n-2)){
z <- j
if(tree[j,"cell"]!=1)
next
while(z > 0){
z <- tree[z, "anc"]
if(z==n){
cells <- c(cells, j)
break}
}
}
return(tree[cells,])
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.