inst/doc/example_figures.R

## ---- eval=FALSE--------------------------------------------------------------
#  install.packages("dendsort")
#  install.packages("seriation")
#  install.packages("gplots")
#  install.packages("RColorBrewer")

## ---- message=FALSE-----------------------------------------------------------
library("dendsort")
library("seriation")
library("gplots")
library("RColorBrewer")

## ---- fig.width=7, fig.height=5-----------------------------------------------
#simulate the data
set.seed(1234)
x <- rnorm(10, mean=rep(1:5, each=2), sd=0.4)
y <- rnorm(10, mean=rep(c(1,2), each=5), sd=0.4)
dataFrame <- data.frame(x=x, y=y, row.names=c(1:10))
#calculate distance matrix. default is Euclidean distance
distxy <- dist(dataFrame)
#perform hierarchical clustering. The default is complete linkage.
hc <- hclust(distxy)
#plot Figure 2
par(mfrow = c(1, 3), mai=c(1,0.2,1,0.2))
#Scatter plot
plot(x, y, col="gray", pch=19, cex=3.5, xlim=c(0,5), ylim=c(0,5))
text(x, y, labels=as.character(1:10), cex=1)
#Default dendrogram
plot(hc, main="before sorting", sub ="", xlab="" )
#Reordered dendrogram
plot(dendsort(hc), main="after sorting", sub="", xlab="")

## ---- fig.width=7, fig.height=5-----------------------------------------------
#simulate the data
set.seed(1234)
x=matrix(rnorm(50*2), ncol=2)
x[1:25, 1] = x[1:25, 1]+3
x[1:25, 2] = x[1:25, 2]-4
x = scale(x)
#different linkage types
hc.complete = hclust(dist(x), method="complete")
hc.average = hclust(dist(x), method="average")
hc.single = hclust(dist(x), method="single")
#generate Figure 4
par(mfrow=c(1,3), mar = c(2, 2, 2, 2))
plot(hc.complete, main="Complete Linkage", xlab="", sub="", cex=0.7)
plot(hc.average, main="Average Linkage", xlab="", sub="", cex=0.7)
plot(hc.single, main="Single Linkage", xlab="", sub="", cex=0.7)


## ---- fig.width=7, fig.height=5-----------------------------------------------
par(mfrow=c(1,3), mar = c(5, 4, 4, 2))
plot(dendsort(hc.complete), main="Complete Linkage", xlab="", sub="", cex=0.7)
plot(dendsort(hc.average), main="Average Linkage", xlab="", sub="", cex=0.7)
plot(dendsort(hc.single), main="Single Linkage", xlab="", sub="", cex=0.7)

## ---- fig.width=7, fig.height=7-----------------------------------------------
#load the iris data
data("iris")
x <- as.matrix(iris[-5]) #drop the 5th colum
d <- dist(x) #calculate Euclidian distance
#Comparing different seriation methods
methods <- c("HC", "GW", "OLO")
results <- sapply(methods, FUN=function(m) seriate(d, m), simplify = FALSE)
#get hclust objects
hc_HC = results[["HC"]][[1]]
hc_GW = results[["GW"]][[1]]
hc_OLO = results[["OLO"]][[1]]
#to color by spieces
sideColors = rep( "#000000", nrow(iris))
sideColors[which(iris$Species == "setosa" )] = "#66C2A5";
sideColors[which(iris$Species == "versicolor" )] = "#FC8D62";
sideColors[which(iris$Species == "virginica")] = "#8DA0CB";

par(mar=c(2, 5, 5, 2))
# HC
heatmap.2(as.matrix(d), col=gray.colors(100), dendrogram ="both",
          Rowv=rev(as.dendrogram(hc_HC)), Colv=(as.dendrogram(hc_HC)),
          scale="none", labRow="", labCol="", ColSideColors = sideColors,
          symm = TRUE, key = TRUE, keysize =1, trace="none", density.info="none", xlab="HC")
#legend("topright", pch = 15, col = c("#66C2A5", "#FC8D62", "#8DA0CB"), legend = c("setosa", "versicolor", "virginica"))

#GW
heatmap.2(as.matrix(d), col=gray.colors(100), dendrogram ="both",
          Rowv=rev(as.dendrogram(hc_GW)), Colv=(as.dendrogram(hc_GW)),
          scale="none", labRow="", labCol="", ColSideColors = sideColors,
          symm =TRUE, key = TRUE, keysize =1, trace="none", density.info="none", xlab="GW")
#legend("topright", pch = 15, col = c("#66C2A5", "#FC8D62", "#8DA0CB"), legend = c("setosa", "versicolor", "virginica"))

#OLO
heatmap.2(as.matrix(d), col=gray.colors(100), dendrogram ="both",
          Rowv=rev(as.dendrogram(hc_OLO)), Colv=(as.dendrogram(hc_OLO)),
          scale="none", labRow="", labCol="", ColSideColors = sideColors,
          symm =TRUE, key = TRUE, keysize =1, trace="none", density.info="none", xlab="OLO")
#legend("topright", pch = 15, col = c("#66C2A5", "#FC8D62", "#8DA0CB"), legend = c("setosa", "versicolor", "virginica"))

#MOLO
hc_MOLO = dendsort(hc_HC)
heatmap.2(as.matrix(d), col=gray.colors(100), dendrogram ="both",
          Rowv=rev(as.dendrogram(hc_MOLO)), Colv=(as.dendrogram(hc_MOLO)),
          scale="none", labRow="", labCol="", ColSideColors = sideColors,
          symm =TRUE, key = TRUE, keysize =1, trace="none", density.info="none", xlab="MOLO")
#legend("topright", pch = 15, col = c("#66C2A5", "#FC8D62", "#8DA0CB"), legend = c("setosa", "versicolor", "virginica"))


## ---- fig.width=7, fig.height=5-----------------------------------------------
par(mfrow=c(1,3), mar = c(5, 4, 4, 2))
data(sample_tcga)
#transpose
dataTable <- t(sample_tcga)
#calculate the correlation based distance
row_dist <- as.dist(1-cor(t(dataTable), method = "pearson"))
col_dist <- as.dist(1-cor(dataTable, method = "pearson"))
#hierarchical clustering
col_hc <- hclust(col_dist, method = "complete")
row_hc <- hclust(row_dist, method = "complete")


## ---- fig.width=7, fig.height=5-----------------------------------------------
par(mfrow=c(1,3), mar = c(5, 4, 4, 2))
#MOLO Figure 7
heatmap.2(dataTable, Rowv=dendsort(as.dendrogram(row_hc), isRevers=TRUE), Colv=dendsort(as.dendrogram(col_hc)),
             labRow="", labCol="", margins = c(2,1), xlab = "MOLO", 
             col=brewer.pal(11, "RdBu"))
#MOLO_AVG Figure 9
heatmap.2(dataTable, Rowv=dendsort(as.dendrogram(row_hc), isRevers=TRUE, type="average"), Colv=dendsort(as.dendrogram(col_hc), type="average"),
             labRow="", labCol="", margins = c(2,1), xlab = "MOLO_AVG", 
             col=brewer.pal(11, "RdBu"))


## ---- fig.widt=10-------------------------------------------------------------
#seriation based method (GW and OLO)
methods <- c("GW", "OLO")
row_results <- sapply(methods, FUN=function(m) seriate(row_dist, m), simplify = FALSE)
row_gw <- row_results[["GW"]][[1]]
row_olo <- row_results[["OLO"]][[1]]
col_results <- sapply(methods, FUN=function(m) seriate(col_dist, m), simplify = FALSE)
col_gw <- col_results[["GW"]][[1]]
col_olo <- col_results[["OLO"]][[1]]

#dendrogram comparison # Figure 8
par(mar=c(2, 3, 2,1), mfrow = c(5, 1))
plot(as.dendrogram(row_hc), leaflab= "none", main ="HC")
plot(as.dendrogram(row_gw), leaflab= "none", main ="GW")
plot(as.dendrogram(row_olo), leaflab= "none", main ="OLO")
plot(dendsort(as.dendrogram(row_hc)), leaflab= "none", main ="MOLO")
plot(dendsort(as.dendrogram(row_hc), type="average"), leaflab= "none", main ="MOLO_AVG")

#dendrogram comparison # Figure 10
par(mar=c(2, 3, 2,1), mfrow = c(2, 3))
plot(as.dendrogram(row_hc), leaflab= "none", main ="HC")
plot(as.dendrogram(row_gw), leaflab= "none", main ="GW")
plot(as.dendrogram(row_olo), leaflab= "none", main ="OLO")
plot(dendsort(as.dendrogram(row_hc)), leaflab= "none", main ="MOLO")
plot(dendsort(as.dendrogram(row_hc), type="average"), leaflab= "none", main ="MOLO_AVG")


## -----------------------------------------------------------------------------
#Table 1
#calculate the length
l_hc = cal_total_length(as.dendrogram(row_hc))
l_gw = cal_total_length(as.dendrogram(row_gw))
l_olo = cal_total_length(as.dendrogram(row_olo))
l_molo = cal_total_length(dendsort(as.dendrogram(row_hc)))
l_molo_avg = cal_total_length(dendsort(as.dendrogram(row_hc), type ="average"))
#ratio
l_gw/l_hc
l_olo/l_hc
l_molo/l_hc
l_molo_avg/l_hc

Try the dendsort package in your browser

Any scripts or data that you put into this service are public.

dendsort documentation built on April 20, 2021, 5:05 p.m.