feat.batch.annotation.child <-
function(mz.val,max.mz.diff, adductname, syssleep,adduct_table)
{
#adduct_table<-read.table("/Users/karanuppal/Documents/Emory/JonesLab/Projects/xMSannotator/CAMERA_Fiehnlab_adducts_calculated_masses_vjuly1014.txt",sep="\t",header=TRUE)
#adduct_table<-adduct_table[c(which(adduct_table[,6]=="S"),which(adduct_table[,6]=="Acetonitrile")),]
adduct_names<-as.character(adduct_table[,1])
adductlist<-adduct_table[,4]
mult_charge<-adduct_table[,3]
num_mol<-adduct_table[,2]
names(adductlist)<-as.character(adduct_names)
names(mult_charge)<-as.character(adduct_names)
names(num_mol)<-as.character(adduct_names)
alladducts<-adduct_names
#print(mz.val)
#convert to neutral mass
#mz=mz.val-adductmass
adductmass=adductlist[as.character(adductname)]
adductcharge=mult_charge[as.character(adductname)]
adductnmol=num_mol[as.character(adductname)]
#mz=(mz.val-adductmass)*adductcharge
#mz=(exact_mass/adductcharge)+adductmass
#mz=
#mz=#((nmol*M)/charge+adductMass))
#M=((mz-adductMass)*charge)/nmol
#mz=#((nmol*M)+adductMass))/charge
#M=((mz*charge)-adductMass)/nmol
#reverse
#mono_mass=((mz.val-adductmass)*adduct_charge)/(adductnmol)
mono_mass=((mz.val*adduct_charge)-(adductmass))/(adductnmol)
mz=mono_mass
delta_ppm=(max.mz.diff)*(mz/1000000)
min_mz=round((mz-delta_ppm),5)
max_mz=round((mz+delta_ppm),5)
res={} #c("-","-","-","-","-","-","-","-","-","-","-","-","-","-","-","-","-","-","-","-","-","-")
mzorig=round(mz.val,5)
delta_ppm=round(delta_ppm,5)
syssleep1<-(syssleep/5)
Sys.sleep(syssleep1)
write.table(mz.val,file="mzval.txt",sep="\t",row.names=FALSE)
html_link<-"-"
search_link=paste("http://rest.kegg.jp/find/compound/",min_mz,"-",max_mz,"/exact_mass",sep="")
#print(search_link)
d1<-try(readLines(search_link),silent=TRUE)
if(is(d1,"try-error")){
res<-c(mz.val,rep("NC",27))
write.table(mz.val,file="kegg_bad_mzs.txt",sep="\t",row.names=FALSE,append=TRUE)
}else{
#print(dim(d1))
cnames<-c("ENTRY","NAME","FORMULA","EXACT_MASS","REACTION","PATHWAY","ENZYME","PubChem","ChEBI","PDB")
#pattern_list<-c("C[0-9]{3,5}","[:blank:]{2,}[0-9|A-Z|:punct:|(|:print:][[:punct:]|[:alnum:]]*{3,}", "FORMULA","EXACT_MASS","CAS:","PubChem:","KNApSAcK:","PDB-CCD")
#pattern_list<-c("C[0-9]{3,5}","NAME", "FORMULA","EXACT_MASS","ko[0-9]{5}","CAS:","ChEBI:","LIPIDMAPS:","PubChem:", "KNApSAcK:","PDB-CCD:")
#pattern_list<-c("C[0-9]{3,5}","NAME", "FORMULA","EXACT_MASS","ko[0-9]{5}","CAS:","ChEBI:","LIPIDMAPS:","PubChem:", "KNApSAcK:","PDB-CCD:", "map")
pattern_list<-c("EXACT_MASS","NAME", "FORMULA","CAS:","PubChem:","ChEBI:","LIPIDMAPS:", "BRITE", "map")
pattern_keggid<-"C[0-9]{3,5}"
#if(dim(d1)[1]>0)
id_list<-"-"
CName<-"-"
mass<-"-"
casID<-"-"
keggID<-"-"
kegglink<-"-"
keggpathid<-"-"
keggpathname<-"-"
keggpathlink<-"-"
hmdbID<-"-"
hmdblink<-"-"
pubchemsid<-"-"
pubchemslink<-"-"
pubchemcid<-"-"
pubchemclink<-"-"
chebiid<-"-"
chebilink<-"-"
lipidmapsid<-"-"
lipidmapslink<-"-"
chemformula<-"-"
if(length(d1)>0){
for(i in 1:length(d1))
{
if(i%%5>0){
syssleep1<-(syssleep/5)
Sys.sleep(syssleep1)
}else{
syssleep1<-(syssleep/3)
Sys.sleep(syssleep1)
}
id_list<-"-"
CName<-"-"
mass<-"-"
casID<-"-"
keggID<-"-"
kegglink<-"-"
keggpathid<-"-"
keggpathname<-"-"
keggpathlink<-"-"
hmdbID<-"-"
hmdblink<-"-"
pubchemsid<-"-"
pubchemslink<-"-"
pubchemcid<-"-"
pubchemclink<-"-"
chebiid<-"-"
chebilink<-"-"
lipidmapsid<-"-"
lipidmapslink<-"-"
chemformula<-"-"
keggpathinf<-{}
#l1<-grep(d1[i],pattern=pattern_list[5])
str_text=d1[i]
t2<-gregexpr(pattern=pattern_keggid,perl=FALSE,text=str_text)
if(t2[[1]][1]>0)
{
t3=t2[[1]]
strlength=attr(t3,"match.length")-1
t4=strsplit(as.character(str_text),"")
keggID<-t4[[1]][t3[1]:(t3[1]+strlength)]
keggID<-paste(keggID,collapse="")
kegglink<-paste("<a href=http://www.genome.jp/dbget-bin/www_bget?cpd:",keggID,">",keggID,"</a>",sep="")
#html_res=readHTMLTable(kegglink)
search_link1=paste("http://rest.genome.jp/link/cpd:",keggID,"+-e",sep="")
#dlink<-readLines(search_link1)
dlink<-getURL(search_link1)
if(dlink!=""){
dlink<-read.delim(search_link1,header=FALSE)
dlink2<-as.data.frame(dlink)
if(dim(dlink2)[2]>0){
for(l in 1:dim(dlink2)[1]){
link_text=dlink2[l,2]
t2<-gregexpr(pattern="HMDB[0-9]{2,}",perl=FALSE,text=link_text)
t3=t2[[1]]
strlength=attr(t3,"match.length")-1
t4=strsplit(as.character(link_text),"")
if(strlength>0)
{
hmdbID<-t4[[1]][t3[1]:(t3[1]+strlength)]
hmdbID<-paste(hmdbID,collapse="")
hmdblink<-paste("<a href=http://www.hmdb.ca/metabolites/",hmdbID,">",hmdbID,"</a>",sep="")
}
}
}
}
#keggID<-"C00392"
#keggID<-"C00157"
#keggID<-"C00082"
search_link=paste("http://rest.kegg.jp/get/cpd:",as.character(keggID),sep="")
d2<-read.delim(search_link,header=FALSE)
d3<-as.data.frame(d2)
if(length(d3)>0){
pat.res<-{}
url_vec<-{}
url_strs<-c("-","-","-","-","<a href=http://pubchem.ncbi.nlm.nih.gov/summary/summary.cgi?sid=","<a href=http://www.ebi.ac.uk/chebi/searchId.do?chebiId=CHEBI:",
"<a href=http://www.lipidmaps.org/data/get_lm_lipids_dbgif.php?LM_ID=","-")
url_vec<-{}
if(length(d3)>0){
for(j in 1:(length(pattern_list)-1))
{
l1<-grep(d3[,1],pattern=pattern_list[j])
if(length(l1)>0){
for(ind1 in 1:length(l1)){
str1<-gsub(as.character(d3[l1[ind1],1]),pattern=" ",replacement="_")
s1<-strsplit(str1," ")
#print(s1)
if(j==8){
p1<-paste("(DBLINKS)|[_]{2,}|;*",pattern_list[j],sep="")
s2<-gsub(as.character(s1[[1]]),pattern=p1,replacement="")
s2<-gsub(s2,pattern="_",replacement=" ")
}else{
p1<-paste("(DBLINKS)|[_]*|:*|;*",pattern_list[j],sep="")
s2<-gsub(as.character(s1[[1]]),pattern=p1,replacement="")
}
#p1<-paste("([DBLINKS])|[_]|:|;",pattern_list[j],sep="")
#paste("<a href=http://www.ebi.ac.uk/chebi/searchId.do?chebiId=CHEBI:",chebiid,">",chebiid,"</a>",sep="")
url_str_cur<-paste(url_strs[j],s2,">",s2,"</a>",sep="")
#if(ind1>1){s2<-paste(s2,";",sep="")}
pat.res<-c(pat.res,s2)
pat.res<-c(pat.res,url_str_cur)
}
}else{
pat.res<-c(pat.res,rep("-",2))
}
}
l1<-grep(d3[,1],pattern=pattern_list[length(pattern_list)])
if(length(l1)>0){
keggpathid<-""
keggpathname<-""
keggpathlink<-""
for(ind1 in 1:length(l1)){
temp.pat.res<-{}
str1<-gsub(as.character(d3[l1[ind1],1]),pattern=" ",replacement="_")
s1<-strsplit(str1," ")
p1<-paste("(DBLINKS)|[_]{3,}|:*|;*|PATHWAY",sep="")
#p1<-paste("([DBLINKS])|[_]|:|;",pattern_list[j],sep="")
s2<-gsub(as.character(s1[[1]]),pattern=p1,replacement="")
s3<-strsplit(s2,"__")
#s2<-gsub(s2,"__",";",sep="")
#temp.pat.res<-c(temp.pat.res,s3[[1]][1])
keggpathurl<-paste("<a href=http://www.genome.jp/kegg-bin/show_pathway?",s3[[1]][1],"+",keggID,">",s3[[1]][1],"</a>",sep="")
#temp.pat.res<-c(temp.pat.res,keggpathlink)
s4<-gsub(as.character(s3[[1]][2]),pattern="_",replacement=" ")
#temp.pat.res<-c(temp.pat.res,s4)
keggpathid<-paste(keggpathid,paste(s3[[1]][1],";",sep=""),sep="")
keggpathlink<-paste(keggpathlink,paste(keggpathurl,";",sep=""),sep="<br>")
keggpathname<-paste(keggpathname,paste(s4,";",sep=""),sep="<br>")
}
pat.res<-c(pat.res,keggpathid,keggpathlink,keggpathname)
}else{
pat.res<-c(pat.res,"-","-","-")
}
search_link2=paste("http://rest.kegg.jp/link/disease/cpd:",as.character(keggID),sep="")
d4<-try(read.delim(search_link2,header=FALSE),silent=TRUE)
if(is(d4,"try-error")){
diseaselink<-"-"
diseaseitemlist<-"-"
}else{
d5<-as.data.frame(d4)
diseaseitemlist<-d4[,2]
maxnum<-10
if(length(diseaseitemlist)>maxnum)
{
diseaseitemlist<-diseaseitemlist[1:maxnum]
}
if(length(d5)>0){
diseaselink<-paste("<a href=http://www.genome.jp/dbget-bin/www_bget?",diseaseitemlist,">",diseaseitemlist,"</a>",sep="")
diseaselink<-paste(as.vector(diseaselink),";<br>",collapse="")
diseaseitemlist<-paste(diseaseitemlist,";",collapse="")
}else{
diseaselink<-"-"
diseaseitemlist<-"-"
}
}
search_link2=paste("http://rest.kegg.jp/link/environ/cpd:",as.character(keggID),sep="")
d4<-try(read.delim(search_link2,header=FALSE),silent=TRUE)
if(is(d4,"try-error")){
environlink<-"-"
environitemlist<-"-"
}else{
d5<-as.data.frame(d4)
if(length(d5)>0){
environitemlist<-d4[,2]
maxnum<-10
if(length(environitemlist)>maxnum)
{
environitemlist<-environitemlist[1:maxnum]
}
environlink<-paste("<a href=http://www.genome.jp/dbget-bin/www_bget?",environitemlist,">",environitemlist,"</a>",sep="")
environlink<-paste(as.vector(environlink),";<br>",collapse="")
environitemlist<-paste(environitemlist,";",collapse="")
}else{
environlink<-"-"
environitemlist<-"-"
}
}
search_link2=paste("http://rest.kegg.jp/link/drug/cpd:",as.character(keggID),sep="")
d4<-try(read.delim(search_link2,header=FALSE),silent=TRUE)
if(is(d4,"try-error")){
druglink<-"-"
drugitemlist<-"-"
}else{
d5<-as.data.frame(d4)
if(length(d5)>0){
drugitemlist<-d4[,2]
maxnum<-10
if(length(drugitemlist)>maxnum)
{
drugitemlist<-drugitemlist[1:maxnum]
}
druglink<-paste("<a href=http://www.genome.jp/dbget-bin/www_bget?",drugitemlist,">",drugitemlist,"</a>",sep="")
druglink<-paste(as.vector(druglink),";<br>",collapse="")
drugitemlist<-paste(drugitemlist,";",collapse="")
}else{
druglink<-"-"
drugitemlist<-"-"
}
}
}
#pattern_list<-c("EXACT_MASS","NAME", "FORMULA","CAS:","C[0-9]{3,5}","PubChem:","ChEBI:","LIPIDMAPS:", "map")
#res<-rbind(res,c(mzorig,delta_ppm,as.character(id_list), mass, html_link, CName,chemformula,casID,keggID,kegglink,keggpathid,keggpathname,keggpathlink,hmdbID,hmdblink,pubchemsid,pubchemslink, pubchemcid,pubchemclink,chebiid,chebilink, lipidmapsid, lipidmapslink))
res<-rbind(res,c(mzorig,delta_ppm,as.character(id_list), pat.res[1], html_link, pat.res[3],pat.res[5],pat.res[7],keggID,kegglink,pat.res[17],pat.res[18],pat.res[19],hmdbID,hmdblink,pat.res[9],pat.res[10],pat.res[11],pat.res[12],pat.res[13],pat.res[14],pat.res[15],
diseaselink,druglink,environlink,diseaseitemlist,drugitemlist,environitemlist))
}
}
}
}
metres<-html_link
#write.table(res,file="kegg_cur_res.txt",sep="\t",append=TRUE,row.names=FALSE)
}
syssleep1<-(syssleep/5)
Sys.sleep(syssleep1)
return(res)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.