#list.of.packages <- c("clusterSim","ade4")
#new.packages <- list.of.packages[!(list.of.packages %in% installed.packages()[,"Package"])]
#if(length(new.packages)) install.packages(new.packages)
#
##required packages
#require(clusterSim)
#require(ade4)
#require(vegan)
dist.JSD <- function(inMatrix, pseudocount=0.000001, ...) {
KLD <- function(x,y) sum(x *log(x/y))
JSD<- function(x,y) sqrt(0.5 * KLD(x, (x+y)/2) + 0.5 * KLD(y, (x+y)/2))
matrixColSize <- length(colnames(inMatrix))
matrixRowSize <- length(rownames(inMatrix))
colnames <- colnames(inMatrix)
resultsMatrix <- matrix(0, matrixColSize, matrixColSize)
inMatrix = apply(inMatrix,1:2,function(x) ifelse (x==0,pseudocount,x))
for(i in 1:matrixColSize) {
for(j in 1:matrixColSize) {
resultsMatrix[i,j]=JSD(as.vector(inMatrix[,i]),
as.vector(inMatrix[,j]))
}
}
colnames -> colnames(resultsMatrix) -> rownames(resultsMatrix)
as.dist(resultsMatrix)->resultsMatrix
attr(resultsMatrix, "method") <- "dist"
return(resultsMatrix)
}
pam.clustering=function(x,k) { # x is a distance matrix and k the number of clusters
cluster = as.vector(cluster::pam(as.dist(x), k, diss=TRUE)$clustering)
return(cluster)
}
noise.removal <- function(dataframe, percent=0.01, top=NULL){
dataframe->Matrix
bigones <- rowSums(Matrix)*100/(sum(rowSums(Matrix))) > percent
Matrix_1 <- Matrix[bigones,]
print(percent)
return(Matrix_1)
}
enterotyping <- function(otu,remove.noise=TRUE,rm.pct=0.01,kx=NULL){
if(remove.noise==TRUE){
otu <- noise.removal(dataframe = otu,percent = rm.pct)
}
data.dist=dist.JSD(otu)
nclusters=NULL
require(clusterSim)
require(cluster)
for (k in 1:20) {
if (k==1) {
nclusters[k]=NA
} else {
data.cluster_temp=pam.clustering(data.dist, k)
nclusters[k]=clusterSim::index.G1(t(otu),data.cluster_temp, d = data.dist,
centrotypes = "medoids")
}
}
best.k <- which.max(nclusters)
print(best.k)
plot(nclusters, type="h", xlab="k clusters", ylab="CH index")
if(is.null(kx)){
best.k <- best.k
} else best.k <- kx
data.cluster=pam.clustering(data.dist, k=best.k)
return(list(otu=otu,data.dist=data.dist,data.cluster=data.cluster))
}
plot.ET <- function(enterotyping,colset,...){
#ordNtest <- function(data,group,dist=NULL){
# if(is.null(dist)){
# bc.dist <- vegan::vegdist(t(data))
# } else if(!is.null(dist)){
# bc.dist <- dist
# }
# pcoa <- cmdscale(bc.dist,eig = T)
# set.seed(123); permanova <- adonisx(bc.dist~group)
# return(list(pcoa=pcoa,permanova=permanova))
#}
#plot.ordi <- function(ordN,group,colset,ordiellipse=FALSE,ordispider=FALSE,plot=TRUE,pch=19,legend=NULL,test=FALSE,ordilabel=NULL){
# ord <- ordN$pcoa
# pct <- round(prop.table(ord$eig[ord$eig>0])*100,2)
# par(mar=c(5.1,4.1,4.1,10.1))
# if(length(pct)>0){
# plot(vegan::scores(ord),type="n",las=1,
# xlab=paste0("PCoA1 (",round(pct[1],2),"%)"),ylab=paste0("PCoA2 (",round(pct[2],2),"%)"))
# } else if(length(pct)==0) plot(vegan::scores(ord),type="n",las=1,xlab="Dim1",ylab="Dim2")
# if(ordiellipse==TRUE) vegan::ordiellipse(ord,group,col = colset,lwd = 0.5)
# if(ordispider==TRUE) vegan::ordispider(ord,group,col = colset,lwd = 0.5)
# if(plot==TRUE & length(pch)>1){
# points(vegan::scores(ord),pch=pch[as.numeric(group)],cex=1,col=paste0(colset,"aa")[as.numeric(group)])
# } else if(plot==TRUE & length(pch)==1) points(vegan::scores(ord),pch=pch,cex=1,col=paste0(colset,"aa")[as.numeric(group)])
# if(!is.null(legend)) legend("topright",legend =legend ,pch=pch,col=paste0(colset,"aa"),bty="n",cex=0.8,xpd=T,inset = c(-0.3,0))
# if(test==TRUE) {
# p <- format(ordN$permanova$aov.tab$`Pr(>F)`[1],digits = 2)
# r2 <- format(ordN$permanova$aov.tab$R2[1],digits = 2)
# legend("bottomright",legend =bquote(R^2 == .(r2)),bty="n",cex=0.8,xpd=T,inset = c(-0.2,0.06))
# legend("bottomright",legend =bquote(italic(p == .(p))),bty="n",cex=0.8,xpd=T,inset = c(-0.2,0))
# }
# if(!is.null(ordilabel)){
# vegan::ordilabel(scores(ordN$pcoa),cex = 0.7,fill = "#cacaca55",border = 1,col = "#1b262c",
# select = which(rownames(scores(ordN$pcoa)) %in% ordilabel))
# }
#}
pcoa <- ordNtest(data = enterotyping$otu,
group = as.factor(enterotyping$data.cluster),
dist = enterotyping$data.dist)
plot.ordi(ordN = pcoa,group = as.factor(enterotyping$data.cluster),
colset = colset,legend = paste("cluster",levels(as.factor(enterotyping$data.cluster))),...)
}
box.ET <- function(enterotyping,clusterN,colset=NULL,out=FALSE,...){
jitter.points <- function(data,groups,colset){
for(i in 1:length(groups)){
set.seed(123)
points(jitter(rep(i,sum(as.numeric(groups)==i)),8/(2^(i-1))),data[as.numeric(groups)==i],pch=21,bg=colset[i],cex=1.1)
}
}
ETn <- enterotyping$otu[,enterotyping$data.cluster==clusterN]
MAX <- which.max(prop.table(rowSums(ETn)))
MAX.OTU <- (sort(prop.table(rowSums(ETn)),decreasing = T)[1])*100
print(MAX.OTU)
ETotu <- as.data.frame(prop.table(t(enterotyping$otu),1))
boxplot(ETotu[,MAX]~as.factor(enterotyping$data.cluster),
las=1,col=paste0(colset,"99"),
xlab="Enterotypes",
ylab="Relative abundance",...)
jitter.points(ETotu[,MAX],as.factor(enterotyping$data.cluster),colset)
if(out==TRUE){
return(ETn)
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.