Nothing
# clustering
# experiment: dataset and metadata list
# method: "hc" (hierarchical clustering) or "kmeans"
# distance: "euclidean", "manhattan", spearman" or "pearson" methods
# clustMethod: "ward", "single", "complete", "average", "mcquitty", "median" or "centroid".
# hc.type: "samples" or "variables"
# num.clusters: number of clusters in kmeans
clustering = function(dataset, method = "hc", distance = "euclidean",
type = "samples", num.clusters = 5, clustMethod = "complete"){
if (method == "hc") {
result = hierarchical_clustering(dataset, distance, hc.type = type,
clustMethod = clustMethod)
}
else if (method == "kmeans") {
result = kmeans_clustering(dataset, num.clusters, type)
}
result
}
# HIERARCHICAL CLUSTERING
hierarchical_clustering <- function(dataset, distance='euclidean', clustMethod='complete',
hc.type = "samples")
{
if (hc.type == "samples"){
datamat = t(dataset$data)
}
else datamat = dataset$data
if (distance == 'pearson' || distance == 'spearman'){
dist.matrix = dist(1-cor(t(datamat), method = distance))
} else {
dist.matrix = dist(datamat, method = distance);
}
hc.tree = hclust(dist.matrix, method = clustMethod);
hc.tree
}
# K-MEANS
kmeans_clustering <- function(dataset, num.clusters, type = "samples")
{
if (type == "samples"){
datamat = t(dataset$data)
}
else datamat = dataset$data
result.kmeans = kmeans(datamat, centers = num.clusters, nstart = 100, iter.max = 50)
result.kmeans
}
kmeans_result_df = function(kmeans.result){
kmeans.result.df = NULL
for (i in 1:max(kmeans.result$cluster)){
kmeans.result.df = rbind(kmeans.result.df, data.frame(cluster = i, samples = paste(names(kmeans.result$cluster[kmeans.result$cluster==i]),collapse=' ',sep=" ")))
}
kmeans.result.df = data.frame(kmeans.result.df)
kmeans.result.df$samples = as.character(kmeans.result.df$samples)
kmeans.result.df
}
######################### CLUSTERING PLOTS #########################
#kmeans plot
kmeans_plot = function(dataset, kmeans.result){
num.clusters = max(kmeans.result$cluster)
opar=par(mfrow=c(num.clusters, 1), mar = c(2,2,2,2))
on.exit(par(opar))
for (i in 1:num.clusters){
matplot(as.matrix(dataset$data[,kmeans.result$cluster == i]), type="l", col="grey",
main = paste("Cluster ", i, ", n = ", kmeans.result$size[i], sep =""), axes = FALSE)
lines(apply(as.matrix(dataset$data[,kmeans.result$cluster == i,drop=FALSE]), 1, median), type="l", col="blue",lwd=1)
axis(2)
axis(1, 1:nrow(dataset$data), rownames(dataset$data))
}
}
#dendrogram
"dendrogram_plot" = function(dataset, hc.result, column.metadata = 1, labels = NULL, ...){
if (!requireNamespace("ggdendro", quietly = TRUE)) {
stop("Package ggdendro needed for this function to work. Please install it: install.packages('ggdendro')",
call. = FALSE)
}
if (!is.null(labels)){
labels.hc = labels
} else {
if (is.null(column.metadata)){
labels.hc = colnames(dataset$data)
} else {
labels.hc = dataset$metadata[,column.metadata]
}
}
hc.result$labels = labels.hc
ggdendro::ggdendrogram(hc.result, ...)
}
"dendrogram_plot_col" = function(dataset, hc.result, classes.col, colors=NULL, title = "", lab.cex = 1.0, leg.pos = "topright", label_samples=NULL, ...)
{
classes = dataset$metadata[,classes.col]
cluster = as.dendrogram(hc.result)
if(is.null(label_samples)) label_samples=colnames(dataset$data)
else label_samples=dataset$metadata[,label_samples]
cluster = dendrapply(cluster, color_leaf, dataset, classes, label_samples, lab.cex)
plot(cluster, main = title, horiz = FALSE, ...)
leg.txt = levels(classes)
leg.txt = c("Legend:", leg.txt)
if (is.null(colors)) leg.col = 1:length(levels(classes))
else leg.col = colors
leg.col = c("black", leg.col)
if (leg.pos != "none")
legend(leg.pos, leg.txt, text.col = leg.col, bty = "n")
}
"color_leaf" = function (n, dataset, classes, labels, lab.cex = 1.0) {
if (is.leaf(n)) {
a <- attributes(n)
i <- match(a$label, get_sample_names(dataset))
attr(n, "nodePar") <- c(a$nodePar, list(lab.col = as.integer(classes[i]),
lab.cex = lab.cex, pch = NA))
attr(n, "label") <- labels[i]
}
n
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.