R/feat.batch.annotation.child.R

Defines functions feat.batch.annotation.child

feat.batch.annotation.child <-
function(mz.val,max.mz.diff, adductname, adductmass=NA, adductcharge=NA, syssleep)
{
	
	adductlist=c(1.007276,22.989218,38.963158,-35.012676,-17.0027,0.0227,7.01597,18.033823,
	33.033486,42.033826,44.97116,64.015768,2.014552,23.996494,45.978436,3.021828,
	25.00377,46.985712,-19.01839,-1.007276,18.998371,20.974666,34.969402,36.948606,
	44.998194,59.013851,78.918885,-2.014552,-3.021828)
	alladducts<-c("M+H","M+Na","M+K","M+H-2H2O","M+H-H2O", "M-H2O+NH4", "M+Li","M+NH4",
	"M+CH3OH+H","M+ACN+H","M+2Na-H","M+ACN+Na","M+2H", "M+H+Na","M+2Na","M+3H",
	"M+2H+Na","M+2Na+H","M-H2O-H", "M-H", "M+F","M+Na-2H","M+Cl","M+K-2H",
	"M+FA-H","M+CH3COO-H","M+Br","M-2H","M-3H")
	names(adductlist)<-c("M+H","M+Na","M+K","M+H-2H2O","M+H-H2O", "M-H2O+NH4", "M+Li","M+NH4",
	"M+CH3OH+H","M+ACN+H","M+2Na-H","M+ACN+Na","M+2H", "M+H+Na","M+2Na","M+3H",
	"M+2H+Na","M+2Na+H","M-H2O-H", "M-H", "M+F","M+Na-2H","M+Cl","M+K-2H",
	"M+FA-H","M+CH3COO-H","M+Br","M-2H","M-3H")
	
	mult_charge<-c(1,1,1,1,1,1,1,1,1,1,1,1,2,2,2,3,3,3,1,1,1,1,1,1,1,1,1,2,3)
	names(mult_charge)<-c("M+H","M+Na","M+K","M+H-2H2O","M+H-H2O", "M-H2O+NH4", "M+Li","M+NH4",
	"M+CH3OH+H","M+ACN+H","M+2Na-H","M+ACN+Na","M+2H", "M+H+Na","M+2Na","M+3H",
	"M+2H+Na","M+2Na+H","M-H2O-H", "M-H", "M+F","M+Na-2H","M+Cl","M+K-2H",
	"M+FA-H","M+CH3COO-H","M+Br","M-2H","M-3H")
	
    
    adductmass=adductlist[as.character(adductname)]
    adductcharge=mult_charge[as.character(adductname)]
    
    print(adductmass)
    print(adductcharge)
	#print(mz.val)
	
	
        delta_ppm=(max.mz.diff)*(mz.val/1000000)
      
        min_mz=round((mz.val-delta_ppm),5)
        max_mz=round((mz.val+delta_ppm),5)
        
        print(mz.val)
        print(min_mz)
        print(max_mz)
        
        #convert to neutral mass
        min_mz=(min_mz*adductcharge)-adductmass
        max_mz=(max_mz*adductcharge)-adductmass
        
        
	
	res={} #c("-","-","-","-","-","-","-","-","-","-","-","-","-","-","-","-","-","-","-","-","-","-")
	mzorig=round(mz.val,5)
	delta_ppm=round(delta_ppm,5)
	
	syssleep1<-(syssleep/5)
	Sys.sleep(syssleep1)
	
	write.table(mz.val,file="mzval.txt",sep="\t",row.names=FALSE)

	html_link<-"-"
	search_link=paste("http://rest.kegg.jp/find/compound/",min_mz,"-",max_mz,"/exact_mass",sep="")
	
	#print(search_link)
	d1<-try(readLines(search_link),silent=TRUE)

	if(is(d1,"try-error")){

	res<-c(mz.val,rep("NC",27))
	
	 write.table(mz.val,file="kegg_bad_mzs.txt",sep="\t",row.names=FALSE,append=TRUE)

	}else{
	
	
	
	#print(dim(d1))
	cnames<-c("ENTRY","NAME","FORMULA","EXACT_MASS","REACTION","PATHWAY","ENZYME","PubChem","ChEBI","PDB")
	
	#pattern_list<-c("C[0-9]{3,5}","[:blank:]{2,}[0-9|A-Z|:punct:|(|:print:][[:punct:]|[:alnum:]]*{3,}", "FORMULA","EXACT_MASS","CAS:","PubChem:","KNApSAcK:","PDB-CCD")
	
	#pattern_list<-c("C[0-9]{3,5}","NAME", "FORMULA","EXACT_MASS","ko[0-9]{5}","CAS:","ChEBI:","LIPIDMAPS:","PubChem:", "KNApSAcK:","PDB-CCD:")
	
	#pattern_list<-c("C[0-9]{3,5}","NAME", "FORMULA","EXACT_MASS","ko[0-9]{5}","CAS:","ChEBI:","LIPIDMAPS:","PubChem:", "KNApSAcK:","PDB-CCD:", "map")
	
	pattern_list<-c("EXACT_MASS","NAME", "FORMULA","CAS:","PubChem:","ChEBI:","LIPIDMAPS:", "BRITE", "map")
	
	pattern_keggid<-"C[0-9]{3,5}"
	
	#if(dim(d1)[1]>0)
	id_list<-"-"
		CName<-"-"
		mass<-"-"
		casID<-"-"
		keggID<-"-"
		kegglink<-"-"
		keggpathid<-"-"
		keggpathname<-"-"
		keggpathlink<-"-"
		hmdbID<-"-"
		hmdblink<-"-"
		pubchemsid<-"-"
		pubchemslink<-"-"
		pubchemcid<-"-"
		pubchemclink<-"-"
		chebiid<-"-"
		chebilink<-"-"
		lipidmapsid<-"-"
		lipidmapslink<-"-"
		chemformula<-"-"
		
	if(length(d1)>0){ 
	for(i in 1:length(d1))
	{
		if(i%%5>0){
		syssleep1<-(syssleep/5)
		Sys.sleep(syssleep1)
		}else{
		syssleep1<-(syssleep/3)
		Sys.sleep(syssleep1)
		}
		id_list<-"-"
		CName<-"-"
		mass<-"-"
		casID<-"-"
		keggID<-"-"
		kegglink<-"-"
		keggpathid<-"-"
		keggpathname<-"-"
		keggpathlink<-"-"
		hmdbID<-"-"
		hmdblink<-"-"
		pubchemsid<-"-"
		pubchemslink<-"-"
		pubchemcid<-"-"
		pubchemclink<-"-"
		chebiid<-"-"
		chebilink<-"-"
		lipidmapsid<-"-"
		lipidmapslink<-"-"
		chemformula<-"-"
		keggpathinf<-{}
		
		#l1<-grep(d1[i],pattern=pattern_list[5])
		str_text=d1[i]
		t2<-gregexpr(pattern=pattern_keggid,perl=FALSE,text=str_text)
			if(t2[[1]][1]>0)
			{
				t3=t2[[1]]
				strlength=attr(t3,"match.length")-1
				t4=strsplit(as.character(str_text),"")
				
				keggID<-t4[[1]][t3[1]:(t3[1]+strlength)]
			
				
				keggID<-paste(keggID,collapse="")
				kegglink<-paste("<a href=http://www.genome.jp/dbget-bin/www_bget?cpd:",keggID,">",keggID,"</a>",sep="")
				
				
				#html_res=readHTMLTable(kegglink)			
				search_link1=paste("http://rest.genome.jp/link/cpd:",keggID,"+-e",sep="")
				
				#dlink<-readLines(search_link1)
				dlink<-getURL(search_link1)
	
				if(dlink!=""){
					dlink<-read.delim(search_link1,header=FALSE)
					dlink2<-as.data.frame(dlink)
					if(dim(dlink2)[2]>0){
					for(l in 1:dim(dlink2)[1]){
						link_text=dlink2[l,2]
						t2<-gregexpr(pattern="HMDB[0-9]{2,}",perl=FALSE,text=link_text)
						t3=t2[[1]]
						strlength=attr(t3,"match.length")-1
						t4=strsplit(as.character(link_text),"")
						if(strlength>0)
						{
							hmdbID<-t4[[1]][t3[1]:(t3[1]+strlength)]
							hmdbID<-paste(hmdbID,collapse="")
							hmdblink<-paste("<a href=http://www.hmdb.ca/metabolites/",hmdbID,">",hmdbID,"</a>",sep="")
						}
					 }
					}
				}
				
				#keggID<-"C00392"
				
				#keggID<-"C00157"
		#keggID<-"C00082"
		search_link=paste("http://rest.kegg.jp/get/cpd:",as.character(keggID),sep="")
		d2<-read.delim(search_link,header=FALSE)
		d3<-as.data.frame(d2)
		
		
		
		
		if(length(d3)>0){
		pat.res<-{}
	

		url_vec<-{}
			url_strs<-c("-","-","-","-","<a href=http://pubchem.ncbi.nlm.nih.gov/summary/summary.cgi?sid=","<a href=http://www.ebi.ac.uk/chebi/searchId.do?chebiId=CHEBI:",
			"<a href=http://www.lipidmaps.org/data/get_lm_lipids_dbgif.php?LM_ID=","-")

		url_vec<-{}
		
		if(length(d3)>0){
		for(j in 1:(length(pattern_list)-1))
			{
				l1<-grep(d3[,1],pattern=pattern_list[j])
				if(length(l1)>0){
				for(ind1 in 1:length(l1)){
				
				str1<-gsub(as.character(d3[l1[ind1],1]),pattern=" ",replacement="_")
				s1<-strsplit(str1," ")
				#print(s1)
				if(j==8){
				p1<-paste("(DBLINKS)|[_]{2,}|;*",pattern_list[j],sep="")
				s2<-gsub(as.character(s1[[1]]),pattern=p1,replacement="")
				s2<-gsub(s2,pattern="_",replacement=" ")
				}else{
				p1<-paste("(DBLINKS)|[_]*|:*|;*",pattern_list[j],sep="")
				s2<-gsub(as.character(s1[[1]]),pattern=p1,replacement="")
				
				}
				#p1<-paste("([DBLINKS])|[_]|:|;",pattern_list[j],sep="")
				
				
				
				
				#paste("<a href=http://www.ebi.ac.uk/chebi/searchId.do?chebiId=CHEBI:",chebiid,">",chebiid,"</a>",sep="")
				
				url_str_cur<-paste(url_strs[j],s2,">",s2,"</a>",sep="")
				
				#if(ind1>1){s2<-paste(s2,";",sep="")}
				
				pat.res<-c(pat.res,s2)
				pat.res<-c(pat.res,url_str_cur)		
				}
				}else{
				pat.res<-c(pat.res,rep("-",2))
				}
			}
				l1<-grep(d3[,1],pattern=pattern_list[length(pattern_list)])
				if(length(l1)>0){
				
				keggpathid<-""
				keggpathname<-""
				keggpathlink<-""
				
				for(ind1 in 1:length(l1)){
				temp.pat.res<-{}
				str1<-gsub(as.character(d3[l1[ind1],1]),pattern=" ",replacement="_")
				s1<-strsplit(str1," ")
				
				
				p1<-paste("(DBLINKS)|[_]{3,}|:*|;*|PATHWAY",sep="")
				#p1<-paste("([DBLINKS])|[_]|:|;",pattern_list[j],sep="")
				s2<-gsub(as.character(s1[[1]]),pattern=p1,replacement="")
				
				s3<-strsplit(s2,"__")
				#s2<-gsub(s2,"__",";",sep="")
				
				#temp.pat.res<-c(temp.pat.res,s3[[1]][1])
				keggpathurl<-paste("<a href=http://www.genome.jp/kegg-bin/show_pathway?",s3[[1]][1],"+",keggID,">",s3[[1]][1],"</a>",sep="")
				#temp.pat.res<-c(temp.pat.res,keggpathlink)				
				s4<-gsub(as.character(s3[[1]][2]),pattern="_",replacement=" ")
				
				#temp.pat.res<-c(temp.pat.res,s4)
				keggpathid<-paste(keggpathid,paste(s3[[1]][1],";",sep=""),sep="")
				keggpathlink<-paste(keggpathlink,paste(keggpathurl,";",sep=""),sep="<br>")
				keggpathname<-paste(keggpathname,paste(s4,";",sep=""),sep="<br>")
				}
				
				pat.res<-c(pat.res,keggpathid,keggpathlink,keggpathname)
				
				}else{
				pat.res<-c(pat.res,"-","-","-")
				}
				
				
		
				
			
		
		}
		

	#pattern_list<-c("EXACT_MASS","NAME", "FORMULA","CAS:","C[0-9]{3,5}","PubChem:","ChEBI:","LIPIDMAPS:", "map")
	
		#res<-rbind(res,c(mzorig,delta_ppm,as.character(id_list), mass, html_link, CName,chemformula,casID,keggID,kegglink,keggpathid,keggpathname,keggpathlink,hmdbID,hmdblink,pubchemsid,pubchemslink, pubchemcid,pubchemclink,chebiid,chebilink, lipidmapsid, lipidmapslink))
				
	res<-rbind(res,c(mzorig,delta_ppm,as.character(id_list), pat.res[1], html_link, pat.res[3],pat.res[5],pat.res[7],keggID,kegglink,pat.res[17],pat.res[18],pat.res[19],hmdbID,hmdblink,pat.res[9],pat.res[10],pat.res[11],pat.res[12],pat.res[13],pat.res[14],pat.res[15]))
			
	
	
     
	
			
		
		}
		}
		
		}
	}
	metres<-html_link
	#write.table(res,file="kegg_cur_res.txt",sep="\t",append=TRUE,row.names=FALSE)
	}
	syssleep1<-(syssleep/5)
	Sys.sleep(syssleep1)

	return(res)
}
kuppal2/xMSanalyzer documentation built on Feb. 12, 2021, 12:36 a.m.