knitr::opts_chunk$set(echo = TRUE)
In this document, we group wells from the AM, PM1, and PM2A plates by similarity of their chemical products signature.
if(!require(ChemmineR)){ source("http://bioconductor.org/biocLite.R") # Sources the biocLite.R installation script. biocLite("ChemmineR") # Installs the package. library("ChemmineR") # Loads the package }
source_db = read.table('../data/biolog_wells.txt',sep='\t') colnames(source_db) = c('plateID','wellID','sourceName','pubchem','manualAnno','chemoCluster') head(source_db)
# initiate output list ANNO = list() ENR = list()
plateID = 'AM'
wells = source_db[which(source_db$plateID == plateID),] head(wells) job1 <- launchCMTool("pubchemID2SDF", wells$pubchem) while(status(job1) == 'RUNNING'){} result1 <- result(job1) job4 <- launchCMTool("OpenBabel Descriptors", result1) while(status(job4) == 'RUNNING'){} result4 <- result(job4) result4 = cbind(wells$sourceName,result4) head(result4) # convert apset <- sdf2ap(result1) cid(apset) = as.character(wells[,2]) ANNO[[plateID]] = list('result' = result4,'apset'=apset) #save(result4,apset,file='../data/biolog_sugar_molecular_features.Rdata')
We compute the Tanimoto similarity between each sugar molecule, based on their binary fingerprints (one bit per pattern), as explained in https://www.surechembl.org/knowledgebase/84207-tanimoto-coefficient-and-fingerprint-generation.
#load('../data/biolog_sugar_molecular_features.Rdata') fpset <- desc2fp(ANNO[[plateID]]$apset)
Here are two examples of the fingerprints extracted from PubChem (only 20 first bits):
cat(as.character(wells$sourceName[5]), head(fpset[[5]]@fp,20),'\n') cat(as.character(wells$sourceName[7]), head(fpset[[7]]@fp,20),'\n') cat("Tanimoto's Similarity between",as.character(wells$sourceName[5]), "and",as.character(wells$sourceName[7]) ,":",fpSim(fpset[5], fpset[7], sorted=FALSE),'\n')
simMA <- sapply(cid(fpset), function(x) fpSim(fpset[x], fpset, sorted=FALSE)) ANNO[[plateID]]$simMA = simMA #save(simMA,file='../data/pubchem_kegg/cluster_info.Rdata') hc <- hclust(as.dist(1-simMA), method="single") if(!require(heatmaply)){ install.packages('heatmaply') library(heatmaply) # Loads the package } heatmaply(simMA, k_col = NA, k_row = NA, label_names = c("product1", "product2", "similarity"),labRow = rownames(simMA),labCol =colnames(simMA),fontsize_row = 4,fontsize_col = 4) %>% layout(margin = list(l = 130, b = 40)) # 10 groups were found # get cluster/well info tmp = heatmapr(simMA, k_col = NA, k_row = NA) dend = as.dendrogram(tmp$rows) if(!require(dendextend)){ install.packages('dendextend') library(dendextend) # Loads the package } col.leaf = get_leaves_branches_col(dend) col.leaf = as.numeric(as.factor(col.leaf)) wells.dend = labels(dend)
Here is the assignment of sugars in each cluster:
head(cbind(wells.dend,col.leaf))
Here is the distribution of cluster size:
sort(table(col.leaf))
Here is the distribution of molecular weight in each cluster:
boxplot(result4$MW~col.leaf,xlab='cluster ID',ylab='Molecular Weight')
Here we rely on KEGG database for compounds to understand the underlying biology of the clusters we obtained in the previous step:
if(!require(KEGGREST)){ install.packages('KEGGREST') library(KEGGREST) # Loads the package } DB.KEGG = list() i=1 tmp = names(keggFind('compound',query=as.character(wells$sourceName[i]))) tmp2 = keggGet(tmp) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[2]] i=2 tmp = names(keggFind('compound',query=as.character(wells$sourceName[i]))) tmp2 = keggGet(tmp) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[2]] i=3 tmp = names(keggFind('compound',query='N-Acetyl-D-mannosamine')) tmp2 = keggGet(tmp) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=4 tmp = names(keggFind('compound',query=as.character(wells$sourceName[i]))) tmp2 = keggGet(tmp) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=5 tmp = names(keggFind('compound',query=as.character(wells$sourceName[i]))) tmp2 = keggGet(tmp) as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=6 tmp = names(keggFind('compound',query=as.character(wells$sourceName[i]))) tmp2 = keggGet(tmp) as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=7 tmp = names(keggFind('compound',query=as.character(wells$sourceName[i]))) tmp2 = keggGet(tmp) as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=8 tmp = names(keggFind('compound',query='Cellobiose')) tmp2 = keggGet(tmp) as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=9 tmp = names(keggFind('compound',query='Cyclodextrin')) tmp2 = keggGet(tmp) as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=10 tmp = names(keggFind('compound',query=as.character(wells$sourceName[i]))) tmp2 = keggGet(tmp) as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=11 tmp = names(keggFind('compound',query=as.character(wells$sourceName[i]))) tmp2 = keggGet(tmp) as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=12 tmp = names(keggFind('compound',query=as.character(wells$sourceName[i]))) tmp2 = keggGet(tmp) as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=13 tmp = names(keggFind('compound',query='Erythritol')) tmp2 = keggGet(tmp) as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=14 tmp = names(keggFind('compound',query=as.character(wells$sourceName[i]))) tmp2 = keggGet(tmp) as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[2]] i=15 tmp = names(keggFind('compound',query=as.character(wells$sourceName[i]))) tmp2 = keggGet(tmp) as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[2]] i=16 tmp = names(keggFind('compound',query=as.character(wells$sourceName[i]))) tmp2 = keggGet(tmp) as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[2]] i=17 tmp = names(keggFind('compound',query=as.character(wells$sourceName[i]))) tmp2 = keggGet(tmp) as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=18 tmp = names(keggFind('compound',query=as.character(wells$sourceName[i]))) tmp2 = keggGet(tmp) as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=19 tmp = names(keggFind('compound',query=as.character(wells$sourceName[i]))) tmp2 = keggGet(tmp) as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=20 tmp = names(keggFind('compound',query=as.character(wells$sourceName[i]))) tmp2 = keggGet(tmp) as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=21 tmp = names(keggFind('compound',query=as.character(wells$sourceName[i]))) tmp2 = keggGet(tmp) as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[3]] i=22 tmp2 = keggGet('C00103') as.character(wells$sourceName[i]) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=23 tmp2 = keggGet('C00092') as.character(wells$sourceName[i]) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=24 tmp = names(keggFind('compound',query=as.character(wells$sourceName[i]))) tmp2 = keggGet(tmp) as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[2]] i=25 tmp2 = keggGet('C04508') as.character(wells$sourceName[i]) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=26 tmp2 = keggGet('C00137') as.character(wells$sourceName[i]) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=27 tmp2 = keggGet('C00984') as.character(wells$sourceName[i]) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=28 tmp = names(keggFind('compound',query=as.character(wells$sourceName[i]))) tmp2 = keggGet(tmp) as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=29 tmp = names(keggFind('compound',query=as.character(wells$sourceName[i]))) tmp2 = keggGet(tmp) as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=30 tmp = names(keggFind('compound',query=as.character(wells$sourceName[i]))) tmp2 = keggGet(tmp) as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=31 tmp = names(keggFind('compound',query=as.character(wells$sourceName[i]))) tmp2 = keggGet(tmp) as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=32 tmp = names(keggFind('compound',query=as.character(wells$sourceName[i]))) tmp2 = keggGet(tmp) as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[2]] i=33 tmp2 = keggGet('C08243') as.character(wells$sourceName[i]) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=34 tmp2 = keggGet('C05402') as.character(wells$sourceName[i]) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=35 tmp2 = keggGet('C11911') as.character(wells$sourceName[i]) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=36 tmp2 = keggGet('C04698') as.character(wells$sourceName[i]) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=37 tmp2 = keggGet('C03619') as.character(wells$sourceName[i]) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=38 tmp2 = keggGet('C00963') as.character(wells$sourceName[i]) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=39 tmp2 = keggGet('C00963') as.character(wells$sourceName[i]) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=40 tmp = names(keggFind('compound',query=as.character(wells$sourceName[i]))) tmp2 = keggGet(tmp) as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=41 tmp2 = keggGet('C00492') as.character(wells$sourceName[i]) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=42 tmp = names(keggFind('compound',query=as.character(wells$sourceName[i]))) tmp2 = keggGet(tmp) as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=43 tmp = names(keggFind('compound',query=as.character(wells$sourceName[i]))) tmp2 = keggGet(tmp) as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=44 tmp = names(keggFind('compound',query=as.character(wells$sourceName[i]))) tmp2 = keggGet(tmp) as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=45 tmp = names(keggFind('compound',query=as.character(wells$sourceName[i]))) tmp2 = keggGet(tmp) as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=46 tmp = names(keggFind('compound',query=as.character(wells$sourceName[i]))) tmp2 = keggGet(tmp) as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=47 tmp2 = keggGet('C01083') as.character(wells$sourceName[i]) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=48 tmp = names(keggFind('compound',query=as.character(wells$sourceName[i]))) tmp2 = keggGet(tmp) as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=49 tmp = names(keggFind('compound',query=as.character(wells$sourceName[i]))) tmp2 = keggGet(tmp) as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=50 tmp = names(keggFind('compound',query=as.character(wells$sourceName[i]))) tmp2 = keggGet(tmp) as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=51 tmp = names(keggFind('compound',query=as.character(wells$sourceName[i]))) tmp2 = keggGet(tmp) as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=52 tmp = names(keggFind('compound',query=as.character(wells$sourceName[i]))) tmp2 = keggGet(tmp) as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=53 tmp2 = keggGet('C05984') as.character(wells$sourceName[i]) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=54 tmp = names(keggFind('compound',query=as.character(wells$sourceName[i]))) tmp2 = keggGet(tmp) as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=55 tmp = names(keggFind('compound',query=as.character(wells$sourceName[i]))) tmp2 = keggGet(tmp) as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=56 tmp = names(keggFind('compound',query=as.character(wells$sourceName[i]))) tmp2 = keggGet(tmp) as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=57 tmp = names(keggFind('compound',query=as.character(wells$sourceName[i]))) tmp2 = keggGet(tmp) as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=58 tmp2 = keggGet('C00256') as.character(wells$sourceName[i]) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=59 tmp2 = keggGet('C00186') as.character(wells$sourceName[i]) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=60 tmp2 = keggGet('C06010') as.character(wells$sourceName[i]) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=61 tmp = names(keggFind('compound',query=as.character(wells$sourceName[i]))) tmp2 = keggGet(tmp) as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=62 tmp = names(keggFind('compound',query=as.character(wells$sourceName[i]))) tmp2 = keggGet(tmp) as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=63 tmp = names(keggFind('compound',query=as.character(wells$sourceName[i]))) tmp2 = keggGet(tmp) as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[10]] i=64 tmp = names(keggFind('compound',query=as.character(wells$sourceName[i]))) tmp2 = keggGet(tmp) as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=65 tmp2 = keggGet('C00022') as.character(wells$sourceName[i]) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=66 tmp = names(keggFind('compound',query=as.character(wells$sourceName[i]))) tmp2 = keggGet(tmp) as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=67 tmp2 = keggGet('C01180') as.character(wells$sourceName[i]) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=68 tmp = names(keggFind('compound',query=as.character(wells$sourceName[i]))) tmp2 = keggGet(tmp) as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[2]] i=69 tmp2 = keggGet('C00490') as.character(wells$sourceName[i]) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=70 tmp2 = keggGet('C00552') as.character(wells$sourceName[i]) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=71 tmp = names(keggFind('compound',query=as.character(wells$sourceName[i]))) tmp2 = keggGet(tmp) as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=72 tmp2 = keggGet('C19779') as.character(wells$sourceName[i]) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=73 tmp = names(keggFind('compound',query=as.character(wells$sourceName[i]))) tmp2 = keggGet(tmp) as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=74 tmp2 = keggGet('C00064') as.character(wells$sourceName[i]) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=75 tmp2 = keggGet('C00135') as.character(wells$sourceName[i]) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=76 tmp2 = keggGet('C00188') as.character(wells$sourceName[i]) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=77 tmp = names(keggFind('compound',query=as.character(wells$sourceName[i]))) tmp2 = keggGet(tmp) as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=78 tmp = names(keggFind('compound',query=as.character(wells$sourceName[i]))) tmp2 = keggGet(tmp) as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=79 tmp = names(keggFind('compound',query=as.character(wells$sourceName[i]))) tmp2 = keggGet(tmp) as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=80 tmp2 = keggGet('C00049') as.character(wells$sourceName[i]) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=81 tmp2 = keggGet('C00064') as.character(wells$sourceName[i]) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=82 tmp2 = keggGet('C00073') as.character(wells$sourceName[i]) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=83 tmp2 = keggGet('C00148') as.character(wells$sourceName[i]) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=84 tmp = names(keggFind('compound',query=as.character(wells$sourceName[i]))) tmp2 = keggGet(tmp) as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[2]] i=85 tmp = names(keggFind('compound',query=as.character(wells$sourceName[i]))) tmp2 = keggGet(tmp) as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=86 tmp = names(keggFind('compound',query=as.character(wells$sourceName[i]))) tmp2 = keggGet(tmp) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=87 tmp = names(keggFind('compound',query=as.character(wells$sourceName[i]))) tmp2 = keggGet(tmp) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=88 tmp = names(keggFind('compound',query=as.character(wells$sourceName[i]))) tmp2 = keggGet(tmp) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=89 tmp = names(keggFind('compound',query=as.character(wells$sourceName[i]))) tmp2 = keggGet(tmp) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=90 tmp = names(keggFind('compound',query=as.character(wells$sourceName[i]))) tmp2 = keggGet(tmp) as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[4]] i=91 tmp = names(keggFind('compound',query=as.character(wells$sourceName[i]))) tmp2 = keggGet(tmp) as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[4]] i=92 tmp = names(keggFind('compound',query=as.character(wells$sourceName[i]))) tmp2 = keggGet(tmp) as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=93 tmp = names(keggFind('compound',query=as.character(wells$sourceName[i]))) tmp2 = keggGet(tmp) as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[5]] i=94 tmp = names(keggFind('compound',query=as.character(wells$sourceName[i]))) tmp2 = keggGet(tmp) as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=95 tmp2 = keggGet('C00105') as.character(wells$sourceName[i]) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] ANNO[[plateID]]$DB.KEGG = DB.KEGG
Here are the top 20 most represented KEGG pathways across all sugars:
sort(table(unlist(lapply(DB.KEGG,function(x)x$PATHWAY))),decreasing=T)[1:20]
After getting KEGG pathways corresponding to the different wells, we want to check if the clusters we got from PubChem similarity are biologically relevant.
pathways = lapply(DB.KEGG,function(x)x$PATHWAY) pathways.name = unique(unlist(pathways)) # 146 pathways length(pathways.name) # enrichment ENR = vector("list", length = max(col.leaf)) for(i in 1:max(col.leaf)){ HITS = NULL for(j in 1:length(pathways.name)){ target = sapply(pathways[wells.dend[which(col.leaf == i)]], function(x) pathways.name[j] %in% x) bgd = sapply(pathways[wells.dend[which(col.leaf != i)]], function(x) pathways.name[j] %in% x) tmp = fisher.test(x=factor(c(target,bgd),levels=c('TRUE','FALSE')),y=factor(c(rep('target',length(target)),rep('bgd',length(bgd))),levels=c('target','bgd'))) #if(tmp$estimate > 1 & tmp$p.value < 0.05) ENR[[i]] = c(ENR[[i]],pathways.name[j]) if(tmp$p.value < 0.03) HITS = rbind(HITS,c(pathways.name[j],round(sign(tmp$estimate-1)*tmp$p.value,3))) } if(length(HITS) > 1) colnames(HITS) = c('Pathway','p-value') # we keep negative enrichment but wont' display it in the app ENR[[i]] = HITS[order(abs(as.numeric(HITS[,2])),decreasing = FALSE),] } # This enrichment mapping might be useful when we detect significant clusters in a specific group of strains ANNO[[plateID]]$ENR = ENR ANNO[[plateID]]$col.leaf = col.leaf ANNO[[plateID]]$pathways.name = pathways.name ANNO[[plateID]]$wells.dend = wells.dend #save(ENR,col.leaf,DB.KEGG,pathways.name,wells.dend,file='biolog_kegg_wells.Rdata')
Here is the content of each cluster in terms of enriched KEGG pathways:
options(width = 1000)
ENR
plateID = 'PM1'
wells = source_db[which(source_db$plateID == plateID),] head(wells) job1 <- launchCMTool("pubchemID2SDF", wells$pubchem) while(status(job1) == 'RUNNING'){} result1 <- result(job1) job4 <- launchCMTool("OpenBabel Descriptors", result1) while(status(job4) == 'RUNNING'){} result4 <- result(job4) result4 = cbind(wells$sourceName,result4) head(result4) # convert apset <- sdf2ap(result1) cid(apset) = as.character(wells[,2]) ANNO[[plateID]] = list('result' = result4,'apset'=apset) #save(result4,apset,file='../data/biolog_sugar_molecular_features.Rdata')
We compute the Tanimoto similarity between each sugar molecule, based on their binary fingerprints (one bit per pattern), as explained in https://www.surechembl.org/knowledgebase/84207-tanimoto-coefficient-and-fingerprint-generation.
#load('../data/biolog_sugar_molecular_features.Rdata') fpset <- desc2fp(ANNO[[plateID]]$apset)
Here are two examples of the fingerprints extracted from PubChem (only 20 first bits):
cat(as.character(wells$sourceName[5]), head(fpset[[5]]@fp,20),'\n') cat(as.character(wells$sourceName[7]), head(fpset[[7]]@fp,20),'\n') cat("Tanimoto's Similarity between",as.character(wells$sourceName[5]), "and",as.character(wells$sourceName[7]) ,":",fpSim(fpset[5], fpset[7], sorted=FALSE),'\n')
simMA <- sapply(cid(fpset), function(x) fpSim(fpset[x], fpset, sorted=FALSE)) ANNO[[plateID]]$simMA = simMA #save(simMA,file='../data/pubchem_kegg/cluster_info.Rdata') hc <- hclust(as.dist(1-simMA), method="single") if(!require(heatmaply)){ install.packages('heatmaply') library(heatmaply) # Loads the package } heatmaply(simMA, k_col = NA, k_row = NA, label_names = c("product1", "product2", "similarity"),labRow = rownames(simMA),labCol =colnames(simMA),fontsize_row = 4,fontsize_col = 4) %>% layout(margin = list(l = 130, b = 40)) # 8 groups were found # get cluster/well info tmp = heatmapr(simMA, k_col = NA, k_row = NA) dend = as.dendrogram(tmp$rows) if(!require(dendextend)){ install.packages('dendextend') library(dendextend) # Loads the package } col.leaf = get_leaves_branches_col(dend) col.leaf = as.numeric(as.factor(col.leaf)) wells.dend = labels(dend)
Here is the assignment of sugars in each cluster:
head(cbind(wells.dend,col.leaf))
Here is the distribution of cluster size:
sort(table(col.leaf))
Here is the distribution of molecular weight in each cluster:
boxplot(result4$MW~col.leaf,xlab='cluster ID',ylab='Molecular Weight')
Here we rely on KEGG database for compounds to understand the underlying biology of the clusters we obtained in the previous step:
if(!require(KEGGREST)){ install.packages('KEGGREST') library(KEGGREST) # Loads the package } DB.KEGG = list() i=1 tmp = names(keggFind('compound',query=as.character(wells$sourceName[i]))) tmp2 = keggGet(tmp) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=2 tmp = names(keggFind('compound',query=as.character(wells$sourceName[i]))) tmp2 = keggGet(tmp) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[2]] i=3 tmp = names(keggFind('compound',query=as.character(wells$sourceName[i]))) tmp2 = keggGet(tmp) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=4 tmp = names(keggFind('compound',query=as.character(wells$sourceName[i]))) tmp2 = keggGet(tmp) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[2]] i=5 tmp = names(keggFind('compound',query=as.character(wells$sourceName[i]))) tmp2 = keggGet(tmp) as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[2]] i=6 tmp = names(keggFind('compound',query=as.character(wells$sourceName[i]))) tmp2 = keggGet(tmp) as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=7 tmp = names(keggFind('compound',query=as.character(wells$sourceName[i]))) tmp2 = keggGet(tmp) as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=8 tmp = names(keggFind('compound',query=as.character(wells$sourceName[i]))) tmp2 = keggGet(tmp) as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=9 tmp2 = keggGet('C01083') as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=10 tmp = names(keggFind('compound',query=as.character(wells$sourceName[i]))) tmp2 = keggGet(tmp) as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[2]] i=11 tmp = names(keggFind('compound',query=as.character(wells$sourceName[i]))) tmp2 = keggGet(tmp) as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=12 tmp = names(keggFind('compound',query=as.character(wells$sourceName[i]))) tmp2 = keggGet(tmp) as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=13 tmp = names(keggFind('compound',query=as.character(wells$sourceName[i]))) tmp2 = keggGet(tmp) as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=14 tmp = names(keggFind('compound',query=as.character(wells$sourceName[i]))) tmp2 = keggGet(tmp) as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[2]] i=15 tmp = names(keggFind('compound',query=as.character(wells$sourceName[i]))) tmp2 = keggGet(tmp) as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[2]] i=16 tmp = names(keggFind('compound',query=as.character(wells$sourceName[i]))) tmp2 = keggGet(tmp) as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[2]] i=17 tmp = names(keggFind('compound',query=as.character(wells$sourceName[i]))) tmp2 = keggGet(tmp) as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=18 tmp2 = keggGet('C00093') as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=19 tmp = names(keggFind('compound',query=as.character(wells$sourceName[i]))) tmp2 = keggGet(tmp) as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=20 tmp = names(keggFind('compound',query=as.character(wells$sourceName[i]))) tmp2 = keggGet(tmp) as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=21 tmp = names(keggFind('compound',query=as.character(wells$sourceName[i]))) tmp2 = keggGet(tmp) as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=22 tmp = names(keggFind('compound',query=as.character(wells$sourceName[i]))) tmp2 = keggGet(tmp) as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=23 tmp = names(keggFind('compound',query=as.character(wells$sourceName[i]))) tmp2 = keggGet(tmp) as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=24 tmp2 = keggGet('C00092') as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=25 tmp2 = keggGet('C00880') as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=26 tmp = names(keggFind('compound',query=as.character(wells$sourceName[i]))) tmp2 = keggGet(tmp) as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=27 tmp = names(keggFind('compound',query=as.character(wells$sourceName[i]))) tmp2 = keggGet(tmp) as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[3]] i=28 tmp = names(keggFind('compound',query=as.character(wells$sourceName[i]))) tmp2 = keggGet(tmp) as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=29 tmp = names(keggFind('compound',query=as.character(wells$sourceName[i]))) tmp2 = keggGet(tmp) as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=30 tmp = names(keggFind('compound',query=as.character(wells$sourceName[i]))) tmp2 = keggGet(tmp) as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[2]] i=31 tmp = names(keggFind('compound',query=as.character(wells$sourceName[i]))) tmp2 = keggGet(tmp) as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=32 tmp = names(keggFind('compound',query=as.character(wells$sourceName[i]))) tmp2 = keggGet(tmp) as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[3]] i=33 tmp = names(keggFind('compound',query=as.character(wells$sourceName[i]))) tmp2 = keggGet(tmp) as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=34 tmp = names(keggFind('compound',query='Melibiose')) tmp2 = keggGet(tmp) as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[2]] i=35 tmp = names(keggFind('compound',query=as.character(wells$sourceName[i]))) tmp2 = keggGet(tmp) as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=36 tmp = names(keggFind('compound',query=as.character(wells$sourceName[i]))) tmp2 = keggGet(tmp) as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=37 tmp = names(keggFind('compound',query=as.character(wells$sourceName[i]))) tmp2 = keggGet(tmp) as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=38 tmp = names(keggFind('compound',query=as.character(wells$sourceName[i]))) tmp2 = keggGet(tmp) as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=39 tmp = names(keggFind('compound',query=as.character(wells$sourceName[i]))) tmp2 = keggGet(tmp) as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=40 tmp2 = keggGet('D05566') as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=41 tmp = names(keggFind('compound',query=as.character(wells$sourceName[i]))) tmp2 = keggGet(tmp) as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=42 tmp = names(keggFind('compound',query=as.character(wells$sourceName[i]))) tmp2 = keggGet(tmp) as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=43 tmp2 = keggGet('C03619') as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=44 tmp = names(keggFind('compound',query='Lactose')) tmp2 = keggGet(tmp) as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[3]] i=45 tmp = names(keggFind('compound',query=as.character(wells$sourceName[i]))) tmp2 = keggGet(tmp) as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=46 tmp = names(keggFind('compound',query=as.character(wells$sourceName[i]))) tmp2 = keggGet(tmp) as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=47 tmp = names(keggFind('compound',query=as.character(wells$sourceName[i]))) tmp2 = keggGet(tmp) as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[5]] i=48 tmp = names(keggFind('compound',query=as.character(wells$sourceName[i]))) tmp2 = keggGet(tmp) as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=49 tmp2 = keggGet('C00552') as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=50 tmp2 = keggGet('C00103') as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=51 tmp2 = keggGet('C00085') as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=52 tmp = names(keggFind('compound',query=as.character(wells$sourceName[i]))) tmp2 = keggGet(tmp) as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=53 tmp2 = keggGet('C02630') as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=54 tmp2 = keggGet('C05984') as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=55 tmp2 = keggGet('C11611') as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=56 tmp = names(keggFind('compound',query=as.character(wells$sourceName[i]))) tmp2 = keggGet(tmp) as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=57 tmp = names(keggFind('compound',query=as.character(wells$sourceName[i]))) tmp2 = keggGet(tmp) as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=58 tmp = names(keggFind('compound',query=as.character(wells$sourceName[i]))) tmp2 = keggGet(tmp) as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[4]] i=59 tmp = names(keggFind('compound',query=as.character(wells$sourceName[i]))) tmp2 = keggGet(tmp) as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[9]] i=60 tmp2 = keggGet('C00049') as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=61 tmp = names(keggFind('compound',query=as.character(wells$sourceName[i]))) tmp2 = keggGet(tmp) as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=62 tmp2 = keggGet('C00137') as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=63 tmp = names(keggFind('compound',query=as.character(wells$sourceName[i]))) tmp2 = keggGet(tmp) as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=64 tmp = names(keggFind('compound',query=as.character(wells$sourceName[i]))) tmp2 = keggGet(tmp) as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=65 tmp2 = keggGet('C00042') as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=66 tmp = names(keggFind('compound',query=as.character(wells$sourceName[i]))) tmp2 = keggGet(tmp) as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[10]] i=67 tmp = names(keggFind('compound',query=as.character(wells$sourceName[i]))) tmp2 = keggGet(tmp) as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=68 tmp = names(keggFind('compound',query=as.character(wells$sourceName[i]))) tmp2 = keggGet(tmp) as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=69 tmp = names(keggFind('compound',query=as.character(wells$sourceName[i]))) tmp2 = keggGet(tmp) as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=70 tmp = names(keggFind('compound',query='Cellobiose')) tmp2 = keggGet(tmp) as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=71 tmp = names(keggFind('compound',query=as.character(wells$sourceName[i]))) tmp2 = keggGet(tmp) as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[4]] i=72 tmp2 = keggGet('C00624') as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=73 tmp = names(keggFind('compound',query=as.character(wells$sourceName[i]))) tmp2 = keggGet(tmp) as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=74 tmp = names(keggFind('compound',query=as.character(wells$sourceName[i]))) tmp2 = keggGet(tmp) as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=75 tmp = names(keggFind('compound',query=as.character(wells$sourceName[i]))) tmp2 = keggGet(tmp) as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=76 tmp = names(keggFind('compound',query=as.character(wells$sourceName[i]))) tmp2 = keggGet(tmp) as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=77 tmp2 = keggGet('C05729') as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=78 tmp = names(keggFind('compound',query=as.character(wells$sourceName[i]))) tmp2 = keggGet(tmp) as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=79 tmp2 = keggGet('C00645') as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=80 tmp2 = keggGet('C14115') as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=81 tmp = names(keggFind('compound',query=as.character(wells$sourceName[i]))) tmp2 = keggGet(tmp) as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[3]] i=82 tmp = names(keggFind('compound',query=as.character(wells$sourceName[i]))) tmp2 = keggGet(tmp) as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=83 tmp = names(keggFind('compound',query=as.character(wells$sourceName[i]))) tmp2 = keggGet(tmp) as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=84 tmp2 = keggGet('C01833') as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=85 tmp2 = keggGet('C05852') as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=86 tmp2 = keggGet('C05593') as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=87 tmp = names(keggFind('compound',query=as.character(wells$sourceName[i]))) tmp2 = keggGet(tmp) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=88 tmp = names(keggFind('compound',query=as.character(wells$sourceName[i]))) tmp2 = keggGet(tmp) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=89 tmp = names(keggFind('compound',query=as.character(wells$sourceName[i]))) tmp2 = keggGet(tmp) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=90 tmp2 = keggGet('D01791') as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=91 tmp = names(keggFind('compound',query=as.character(wells$sourceName[i]))) tmp2 = keggGet(tmp) as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=92 tmp2 = keggGet('C01040') as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=93 tmp = names(keggFind('compound',query=as.character(wells$sourceName[i]))) tmp2 = keggGet(tmp) as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=94 tmp = names(keggFind('compound',query=as.character(wells$sourceName[i]))) tmp2 = keggGet(tmp) as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[2]] i=95 tmp = names(keggFind('compound',query=as.character(wells$sourceName[i]))) tmp2 = keggGet(tmp) as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] ANNO[[plateID]]$DB.KEGG = DB.KEGG
Here are the top 20 most represented KEGG pathways across all sugars:
sort(table(unlist(lapply(DB.KEGG,function(x)x$PATHWAY))),decreasing=T)[1:20]
After getting KEGG pathways corresponding to the different wells, we want to check if the clusters we got from PubChem similarity are biologically relevant.
pathways = lapply(DB.KEGG,function(x)x$PATHWAY) pathways.name = unique(unlist(pathways)) # 153 pathways length(pathways.name) # enrichment ENR = vector("list", length = max(col.leaf)) for(i in 1:max(col.leaf)){ HITS = NULL for(j in 1:length(pathways.name)){ target = sapply(pathways[wells.dend[which(col.leaf == i)]], function(x) pathways.name[j] %in% x) bgd = sapply(pathways[wells.dend[which(col.leaf != i)]], function(x) pathways.name[j] %in% x) tmp = fisher.test(x=factor(c(target,bgd),levels=c('TRUE','FALSE')),y=factor(c(rep('target',length(target)),rep('bgd',length(bgd))),levels=c('target','bgd'))) #if(tmp$estimate > 1 & tmp$p.value < 0.05) ENR[[i]] = c(ENR[[i]],pathways.name[j]) if(tmp$p.value < 0.03) HITS = rbind(HITS,c(pathways.name[j],round(sign(tmp$estimate-1)*tmp$p.value,3))) } if(length(HITS) > 1) colnames(HITS) = c('Pathway','p-value') # we keep negative enrichment but wont' display it in the app ENR[[i]] = HITS[order(abs(as.numeric(HITS[,2])),decreasing = FALSE),] } # This enrichment mapping might be useful when we detect significant clusters in a specific group of strains ANNO[[plateID]]$ENR = ENR ANNO[[plateID]]$col.leaf = col.leaf ANNO[[plateID]]$pathways.name = pathways.name ANNO[[plateID]]$wells.dend = wells.dend
Here is the content of each cluster in terms of enriched KEGG pathways:
options(width = 1000)
ENR
plateID = 'PM2A'
wells = source_db[which(source_db$plateID == plateID),] head(wells) job1 <- launchCMTool("pubchemID2SDF", wells$pubchem) while(status(job1) == 'RUNNING'){} result1 <- result(job1) job4 <- launchCMTool("OpenBabel Descriptors", result1) while(status(job4) == 'RUNNING'){} result4 <- result(job4) result4 = cbind(wells$sourceName,result4) head(result4) # convert apset <- sdf2ap(result1) cid(apset) = as.character(wells[,2]) ANNO[[plateID]] = list('result' = result4,'apset'=apset)
We compute the Tanimoto similarity between each sugar molecule, based on their binary fingerprints (one bit per pattern), as explained in https://www.surechembl.org/knowledgebase/84207-tanimoto-coefficient-and-fingerprint-generation.
fpset <- desc2fp(ANNO[[plateID]]$apset)
Here are two examples of the fingerprints extracted from PubChem (only 20 first bits):
cat(as.character(wells$sourceName[5]), head(fpset[[5]]@fp,20),'\n') cat(as.character(wells$sourceName[7]), head(fpset[[7]]@fp,20),'\n') cat("Tanimoto's Similarity between",as.character(wells$sourceName[5]), "and",as.character(wells$sourceName[7]) ,":",fpSim(fpset[5], fpset[7], sorted=FALSE),'\n')
simMA <- sapply(cid(fpset), function(x) fpSim(fpset[x], fpset, sorted=FALSE)) ANNO[[plateID]]$simMA = simMA #save(simMA,file='../data/pubchem_kegg/cluster_info.Rdata') hc <- hclust(as.dist(1-simMA), method="single") if(!require(heatmaply)){ install.packages('heatmaply') library(heatmaply) # Loads the package } heatmaply(simMA, k_col = NA, k_row = NA, label_names = c("product1", "product2", "similarity"),labRow = rownames(simMA),labCol =colnames(simMA),fontsize_row = 4,fontsize_col = 4) %>% layout(margin = list(l = 130, b = 40)) # 10 groups were found # get cluster/well info tmp = heatmapr(simMA, k_col = NA, k_row = NA) dend = as.dendrogram(tmp$rows) if(!require(dendextend)){ install.packages('dendextend') library(dendextend) # Loads the package } col.leaf = get_leaves_branches_col(dend) col.leaf = as.numeric(as.factor(col.leaf)) wells.dend = labels(dend)
Here is the assignment of sugars in each cluster:
head(cbind(wells.dend,col.leaf))
Here is the distribution of cluster size:
sort(table(col.leaf))
Here is the distribution of molecular weight in each cluster:
boxplot(result4$MW~col.leaf,xlab='cluster ID',ylab='Molecular Weight')
Here we rely on KEGG database for compounds to understand the underlying biology of the clusters we obtained in the previous step:
if(!require(KEGGREST)){ install.packages('KEGGREST') library(KEGGREST) # Loads the package } DB.KEGG = list() i=1 tmp = names(keggFind('compound',query=as.character(wells$sourceName[i]))) tmp2 = keggGet(tmp) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[3]] i=2 tmp2 = keggGet('C00973') as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=3 tmp = names(keggFind('compound',query=as.character(wells$sourceName[i]))) tmp2 = keggGet(tmp) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=4 tmp2 = keggGet('C00973') as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=5 tmp = names(keggFind('compound',query=as.character(wells$sourceName[i]))) tmp2 = keggGet(tmp) as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=6 tmp = names(keggFind('compound',query=as.character(wells$sourceName[i]))) tmp2 = keggGet(tmp) as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=7 tmp = names(keggFind('compound',query=as.character(wells$sourceName[i]))) tmp2 = keggGet(tmp) as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=8 tmp = names(keggFind('compound',query=as.character(wells$sourceName[i]))) tmp2 = keggGet(tmp) as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=9 tmp = names(keggFind('compound',query=as.character(wells$sourceName[i]))) tmp2 = keggGet(tmp) as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=10 tmp = names(keggFind('compound',query=as.character(wells$sourceName[i]))) tmp2 = keggGet(tmp) as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=11 tmp = names(keggFind('compound',query=as.character(wells$sourceName[i]))) tmp2 = keggGet(tmp) as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[3]] i=12 tmp = names(keggFind('compound',query=as.character(wells$sourceName[i]))) tmp2 = keggGet(tmp) as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[2]] i=13 tmp2 = keggGet('C00270') as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=14 tmp2 = keggGet('C01487') as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=15 tmp = names(keggFind('compound',query=as.character(wells$sourceName[i]))) tmp2 = keggGet(tmp) as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=16 tmp = names(keggFind('compound',query=as.character(wells$sourceName[i]))) tmp2 = keggGet(tmp) as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=17 tmp = names(keggFind('compound',query=as.character(wells$sourceName[i]))) tmp2 = keggGet(tmp) as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=18 tmp = names(keggFind('compound',query=as.character(wells$sourceName[i]))) tmp2 = keggGet(tmp) as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=19 tmp = names(keggFind('compound',query=as.character(wells$sourceName[i]))) tmp2 = keggGet(tmp) as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=20 tmp = names(keggFind('compound',query=as.character(wells$sourceName[i]))) tmp2 = keggGet(tmp) as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[3]] i=21 tmp2 = keggGet('C00503') as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=22 tmp = names(keggFind('compound',query=as.character(wells$sourceName[i]))) tmp2 = keggGet(tmp) as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=23 tmp2 = keggGet('C00243') as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=24 tmp = names(keggFind('compound',query=as.character(wells$sourceName[i]))) tmp2 = keggGet(tmp) as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=25 tmp2 = keggGet('C00031') as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=26 tmp = names(keggFind('compound',query=as.character(wells$sourceName[i]))) tmp2 = keggGet(tmp) as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[3]] i=27 tmp2 = keggGet('C08243') as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=28 tmp2 = keggGet('D04845') as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=29 tmp2 = keggGet('C03619') as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=30 tmp2 = keggGet('C03619') as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[2]] i=31 tmp2 = keggGet('C00031') as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=32 tmp2 = keggGet('C00257') as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=33 tmp2 = keggGet('C02603') as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=34 tmp2 = keggGet('C03619') as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=35 tmp = names(keggFind('compound',query=as.character(wells$sourceName[i]))) tmp2 = keggGet(tmp) as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=36 tmp2 = keggGet('C00492') as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=37 tmp = names(keggFind('compound',query=as.character(wells$sourceName[i]))) tmp2 = keggGet(tmp) as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=38 tmp2 = keggGet('C02076') as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=39 tmp = names(keggFind('compound',query=as.character(wells$sourceName[i]))) tmp2 = keggGet(tmp) as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=40 tmp = names(keggFind('compound',query=as.character(wells$sourceName[i]))) tmp2 = keggGet(tmp) as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=41 tmp = names(keggFind('compound',query=as.character(wells$sourceName[i]))) tmp2 = keggGet(tmp) as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=42 tmp = names(keggFind('compound',query=as.character(wells$sourceName[i]))) tmp2 = keggGet(tmp) as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=43 tmp = names(keggFind('compound',query=as.character(wells$sourceName[i]))) tmp2 = keggGet(tmp) as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=44 tmp2 = keggGet('C00140') as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=45 tmp = names(keggFind('compound',query=as.character(wells$sourceName[i]))) tmp2 = keggGet(tmp) as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=46 tmp2 = keggGet('C00431') as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=47 tmp = names(keggFind('compound',query=as.character(wells$sourceName[i]))) tmp2 = keggGet(tmp) as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[8]] i=48 tmp = names(keggFind('compound',query=as.character(wells$sourceName[i]))) tmp2 = keggGet(tmp) as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=49 tmp = names(keggFind('compound',query=as.character(wells$sourceName[i]))) tmp2 = keggGet(tmp) as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[2]] i=50 tmp = names(keggFind('compound',query=as.character(wells$sourceName[i]))) tmp2 = keggGet(tmp) as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=51 tmp = names(keggFind('compound',query=as.character(wells$sourceName[i]))) tmp2 = keggGet(tmp) as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=52 tmp = names(keggFind('compound',query=as.character(wells$sourceName[i]))) tmp2 = keggGet(tmp) as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[3]] i=53 tmp = names(keggFind('compound',query=as.character(wells$sourceName[i]))) tmp2 = keggGet(tmp) as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=54 tmp = names(keggFind('compound',query=as.character(wells$sourceName[i]))) tmp2 = keggGet(tmp) as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=55 tmp = names(keggFind('compound',query=as.character(wells$sourceName[i]))) tmp2 = keggGet(tmp) as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=56 tmp2 = keggGet('C00989') as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=57 tmp = names(keggFind('compound',query=as.character(wells$sourceName[i]))) tmp2 = keggGet(tmp) as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=58 tmp = names(keggFind('compound',query=as.character(wells$sourceName[i]))) tmp2 = keggGet(tmp) as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=59 tmp = names(keggFind('compound',query=as.character(wells$sourceName[i]))) tmp2 = keggGet(tmp) as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=60 tmp2 = keggGet('C00256') as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=61 tmp = names(keggFind('compound',query=as.character(wells$sourceName[i]))) tmp2 = keggGet(tmp) as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=62 tmp2 = keggGet('C05402') as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=63 tmp = names(keggFind('compound',query=as.character(wells$sourceName[i]))) tmp2 = keggGet(tmp) as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=64 tmp = names(keggFind('compound',query=as.character(wells$sourceName[i]))) tmp2 = keggGet(tmp) as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=65 tmp = names(keggFind('compound',query=as.character(wells$sourceName[i]))) tmp2 = keggGet(tmp) as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=66 tmp2 = keggGet('C00121') as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=67 tmp = names(keggFind('compound',query=as.character(wells$sourceName[i]))) tmp2 = keggGet(tmp) as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=68 tmp = names(keggFind('compound',query=as.character(wells$sourceName[i]))) tmp2 = keggGet(tmp) as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=69 tmp = names(keggFind('compound',query=as.character(wells$sourceName[i]))) tmp2 = keggGet(tmp) as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=70 tmp = names(keggFind('compound',query=as.character(wells$sourceName[i]))) tmp2 = keggGet(tmp) as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=71 tmp = names(keggFind('compound',query=as.character(wells$sourceName[i]))) tmp2 = keggGet(tmp) as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=72 tmp = names(keggFind('compound',query=as.character(wells$sourceName[i]))) tmp2 = keggGet(tmp) as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[6]] i=73 tmp2 = keggGet('C19779') as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=74 tmp = names(keggFind('compound',query=as.character(wells$sourceName[i]))) tmp2 = keggGet(tmp) as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=75 tmp = names(keggFind('compound',query=as.character(wells$sourceName[i]))) tmp2 = keggGet(tmp) as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=76 tmp = names(keggFind('compound',query=as.character(wells$sourceName[i]))) tmp2 = keggGet(tmp) as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=77 tmp = names(keggFind('compound',query=as.character(wells$sourceName[i]))) tmp2 = keggGet(tmp) as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=78 tmp = names(keggFind('compound',query=as.character(wells$sourceName[i]))) tmp2 = keggGet(tmp) as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=79 tmp = names(keggFind('compound',query=as.character(wells$sourceName[i]))) tmp2 = keggGet(tmp) as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[2]] i=80 tmp = names(keggFind('compound',query=as.character(wells$sourceName[i]))) tmp2 = keggGet(tmp) as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=81 tmp = names(keggFind('compound',query=as.character(wells$sourceName[i]))) tmp2 = keggGet(tmp) as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=82 tmp = names(keggFind('compound',query=as.character(wells$sourceName[i]))) tmp2 = keggGet(tmp) as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=83 tmp = names(keggFind('compound',query=as.character(wells$sourceName[i]))) tmp2 = keggGet(tmp) as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[2]] i=84 tmp = names(keggFind('compound',query=as.character(wells$sourceName[i]))) tmp2 = keggGet(tmp) as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=85 tmp = names(keggFind('compound',query=as.character(wells$sourceName[i]))) tmp2 = keggGet(tmp) as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=86 tmp = names(keggFind('compound',query=as.character(wells$sourceName[i]))) tmp2 = keggGet(tmp) as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=87 tmp = names(keggFind('compound',query=as.character(wells$sourceName[i]))) tmp2 = keggGet(tmp) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=88 tmp2 = keggGet('C00487') as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=89 tmp2 = keggGet('C18706') as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=90 tmp2 = keggGet('C04227') as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=91 tmp = names(keggFind('compound',query=as.character(wells$sourceName[i]))) tmp2 = keggGet(tmp) as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=92 tmp = names(keggFind('compound',query=as.character(wells$sourceName[i]))) tmp2 = keggGet(tmp) as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=93 tmp = names(keggFind('compound',query=as.character(wells$sourceName[i]))) tmp2 = keggGet(tmp) as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[5]] i=94 tmp2 = keggGet('C02845') as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] i=95 tmp = names(keggFind('compound',query=as.character(wells$sourceName[i]))) tmp2 = keggGet(tmp) as.character(wells$sourceName[i]) lapply(tmp2,function(x)x$NAME) DB.KEGG[[as.character(wells$wellID[i])]] = tmp2[[1]] ANNO[[plateID]]$DB.KEGG = DB.KEGG
Here are the top 20 most represented KEGG pathways across all sugars:
sort(table(unlist(lapply(DB.KEGG,function(x)x$PATHWAY))),decreasing=T)[1:20]
After getting KEGG pathways corresponding to the different wells, we want to check if the clusters we got from PubChem similarity are biologically relevant.
pathways = lapply(DB.KEGG,function(x)x$PATHWAY) pathways.name = unique(unlist(pathways)) # 153 pathways length(pathways.name) # enrichment ENR = vector("list", length = max(col.leaf)) for(i in 1:max(col.leaf)){ HITS = NULL for(j in 1:length(pathways.name)){ target = sapply(pathways[wells.dend[which(col.leaf == i)]], function(x) pathways.name[j] %in% x) bgd = sapply(pathways[wells.dend[which(col.leaf != i)]], function(x) pathways.name[j] %in% x) tmp = fisher.test(x=factor(c(target,bgd),levels=c('TRUE','FALSE')),y=factor(c(rep('target',length(target)),rep('bgd',length(bgd))),levels=c('target','bgd'))) #if(tmp$estimate > 1 & tmp$p.value < 0.05) ENR[[i]] = c(ENR[[i]],pathways.name[j]) if(tmp$p.value < 0.03) HITS = rbind(HITS,c(pathways.name[j],round(sign(tmp$estimate-1)*tmp$p.value,3))) } if(length(HITS) > 1) colnames(HITS) = c('Pathway','p-value') # we keep negative enrichment but wont' display it in the app ENR[[i]] = HITS[order(abs(as.numeric(HITS[,2])),decreasing = FALSE),] } # This enrichment mapping might be useful when we detect significant clusters in a specific group of strains ANNO[[plateID]]$ENR = ENR ANNO[[plateID]]$col.leaf = col.leaf ANNO[[plateID]]$pathways.name = pathways.name ANNO[[plateID]]$wells.dend = wells.dend
Here is the content of each cluster in terms of enriched KEGG pathways:
options(width = 1000)
ENR
save(ANNO,file='../data/biolog_kegg_wells.Rdata')
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.