feat.batch.annotation.child <- function(mz.val, max.mz.diff,
adductname, adductmass = NA, adductcharge = NA,
syssleep) {
adductlist = c(1.007276, 22.989218, 38.963158,
-35.012676, -17.0027, 0.0227, 7.01597, 18.033823,
33.033486, 42.033826, 44.97116, 64.015768,
2.014552, 23.996494, 45.978436, 3.021828,
25.00377, 46.985712, -19.01839, -1.007276,
18.998371, 20.974666, 34.969402, 36.948606,
44.998194, 59.013851, 78.918885, -2.014552,
-3.021828)
alladducts <- c("M+H", "M+Na", "M+K", "M+H-2H2O",
"M+H-H2O", "M-H2O+NH4", "M+Li", "M+NH4", "M+CH3OH+H",
"M+ACN+H", "M+2Na-H", "M+ACN+Na", "M+2H",
"M+H+Na", "M+2Na", "M+3H", "M+2H+Na", "M+2Na+H",
"M-H2O-H", "M-H", "M+F", "M+Na-2H", "M+Cl",
"M+K-2H", "M+FA-H", "M+CH3COO-H", "M+Br",
"M-2H", "M-3H")
names(adductlist) <- c("M+H", "M+Na", "M+K", "M+H-2H2O",
"M+H-H2O", "M-H2O+NH4", "M+Li", "M+NH4", "M+CH3OH+H",
"M+ACN+H", "M+2Na-H", "M+ACN+Na", "M+2H",
"M+H+Na", "M+2Na", "M+3H", "M+2H+Na", "M+2Na+H",
"M-H2O-H", "M-H", "M+F", "M+Na-2H", "M+Cl",
"M+K-2H", "M+FA-H", "M+CH3COO-H", "M+Br",
"M-2H", "M-3H")
mult_charge <- c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 2, 2, 2, 3, 3, 3, 1, 1, 1, 1, 1, 1,
1, 1, 1, 2, 3)
names(mult_charge) <- c("M+H", "M+Na", "M+K",
"M+H-2H2O", "M+H-H2O", "M-H2O+NH4", "M+Li",
"M+NH4", "M+CH3OH+H", "M+ACN+H", "M+2Na-H",
"M+ACN+Na", "M+2H", "M+H+Na", "M+2Na", "M+3H",
"M+2H+Na", "M+2Na+H", "M-H2O-H", "M-H", "M+F",
"M+Na-2H", "M+Cl", "M+K-2H", "M+FA-H", "M+CH3COO-H",
"M+Br", "M-2H", "M-3H")
adductmass = adductlist[as.character(adductname)]
adductcharge = mult_charge[as.character(adductname)]
print(adductmass)
print(adductcharge)
# print(mz.val)
delta_ppm = (max.mz.diff) * (mz.val/1e+06)
min_mz = round((mz.val - delta_ppm), 5)
max_mz = round((mz.val + delta_ppm), 5)
print(mz.val)
print(min_mz)
print(max_mz)
# convert to neutral mass
min_mz = (min_mz * adductcharge) - adductmass
max_mz = (max_mz * adductcharge) - adductmass
res = {
} #c('-','-','-','-','-','-','-','-','-','-','-','-','-','-','-','-','-','-','-','-','-','-')
mzorig = round(mz.val, 5)
delta_ppm = round(delta_ppm, 5)
syssleep1 <- (syssleep/5)
Sys.sleep(syssleep1)
write.table(mz.val, file = "mzval.txt", sep = "\t",
row.names = FALSE)
html_link <- "-"
search_link = paste("http://rest.kegg.jp/find/compound/",
min_mz, "-", max_mz, "/exact_mass", sep = "")
# print(search_link)
d1 <- try(readLines(search_link), silent = TRUE)
if (is(d1, "try-error")) {
res <- c(mz.val, rep("NC", 27))
write.table(mz.val, file = "kegg_bad_mzs.txt",
sep = "\t", row.names = FALSE, append = TRUE)
} else {
# print(dim(d1))
cnames <- c("ENTRY", "NAME", "FORMULA", "EXACT_MASS",
"REACTION", "PATHWAY", "ENZYME", "PubChem",
"ChEBI", "PDB")
# pattern_list<-c('C[0-9]{3,5}','[:blank:]{2,}[0-9|A-Z|:punct:|(|:print:][[:punct:]|[:alnum:]]*{3,}',
# 'FORMULA','EXACT_MASS','CAS:','PubChem:','KNApSAcK:','PDB-CCD')
# pattern_list<-c('C[0-9]{3,5}','NAME',
# 'FORMULA','EXACT_MASS','ko[0-9]{5}','CAS:','ChEBI:','LIPIDMAPS:','PubChem:',
# 'KNApSAcK:','PDB-CCD:')
# pattern_list<-c('C[0-9]{3,5}','NAME',
# 'FORMULA','EXACT_MASS','ko[0-9]{5}','CAS:','ChEBI:','LIPIDMAPS:','PubChem:',
# 'KNApSAcK:','PDB-CCD:', 'map')
pattern_list <- c("EXACT_MASS", "NAME", "FORMULA",
"CAS:", "PubChem:", "ChEBI:", "LIPIDMAPS:",
"BRITE", "map")
pattern_keggid <- "C[0-9]{3,5}"
# if(dim(d1)[1]>0)
id_list <- "-"
CName <- "-"
mass <- "-"
casID <- "-"
keggID <- "-"
kegglink <- "-"
keggpathid <- "-"
keggpathname <- "-"
keggpathlink <- "-"
hmdbID <- "-"
hmdblink <- "-"
pubchemsid <- "-"
pubchemslink <- "-"
pubchemcid <- "-"
pubchemclink <- "-"
chebiid <- "-"
chebilink <- "-"
lipidmapsid <- "-"
lipidmapslink <- "-"
chemformula <- "-"
if (length(d1) > 0) {
for (i in 1:length(d1)) {
if (i%%5 > 0) {
syssleep1 <- (syssleep/5)
Sys.sleep(syssleep1)
} else {
syssleep1 <- (syssleep/3)
Sys.sleep(syssleep1)
}
id_list <- "-"
CName <- "-"
mass <- "-"
casID <- "-"
keggID <- "-"
kegglink <- "-"
keggpathid <- "-"
keggpathname <- "-"
keggpathlink <- "-"
hmdbID <- "-"
hmdblink <- "-"
pubchemsid <- "-"
pubchemslink <- "-"
pubchemcid <- "-"
pubchemclink <- "-"
chebiid <- "-"
chebilink <- "-"
lipidmapsid <- "-"
lipidmapslink <- "-"
chemformula <- "-"
keggpathinf <- {
}
# l1<-grep(d1[i],pattern=pattern_list[5])
str_text = d1[i]
t2 <- gregexpr(pattern = pattern_keggid,
perl = FALSE, text = str_text)
if (t2[[1]][1] > 0) {
t3 = t2[[1]]
strlength = attr(t3, "match.length") -
1
t4 = strsplit(as.character(str_text),
"")
keggID <- t4[[1]][t3[1]:(t3[1] +
strlength)]
keggID <- paste(keggID, collapse = "")
kegglink <- paste("<a href=http://www.genome.jp/dbget-bin/www_bget?cpd:",
keggID, ">", keggID, "</a>", sep = "")
# html_res=readHTMLTable(kegglink)
search_link1 = paste("http://rest.genome.jp/link/cpd:",
keggID, "+-e", sep = "")
# dlink<-readLines(search_link1)
dlink <- getURL(search_link1)
if (dlink != "") {
dlink <- read.delim(search_link1,
header = FALSE)
dlink2 <- as.data.frame(dlink)
if (dim(dlink2)[2] > 0) {
for (l in 1:dim(dlink2)[1]) {
link_text = dlink2[l, 2]
t2 <- gregexpr(pattern = "HMDB[0-9]{2,}",
perl = FALSE, text = link_text)
t3 = t2[[1]]
strlength = attr(t3, "match.length") -
1
t4 = strsplit(as.character(link_text),
"")
if (strlength > 0) {
hmdbID <- t4[[1]][t3[1]:(t3[1] +
strlength)]
hmdbID <- paste(hmdbID,
collapse = "")
hmdblink <- paste("<a href=http://www.hmdb.ca/metabolites/",
hmdbID, ">", hmdbID, "</a>",
sep = "")
}
}
}
}
# keggID<-'C00392'
# keggID<-'C00157' keggID<-'C00082'
search_link = paste("http://rest.kegg.jp/get/cpd:",
as.character(keggID), sep = "")
d2 <- read.delim(search_link, header = FALSE)
d3 <- as.data.frame(d2)
if (length(d3) > 0) {
pat.res <- {
}
url_vec <- {
}
url_strs <- c("-", "-", "-", "-",
"<a href=http://pubchem.ncbi.nlm.nih.gov/summary/summary.cgi?sid=",
"<a href=http://www.ebi.ac.uk/chebi/searchId.do?chebiId=CHEBI:",
"<a href=http://www.lipidmaps.org/data/get_lm_lipids_dbgif.php?LM_ID=",
"-")
url_vec <- {
}
if (length(d3) > 0) {
for (j in 1:(length(pattern_list) -
1)) {
l1 <- grep(d3[, 1], pattern = pattern_list[j])
if (length(l1) > 0) {
for (ind1 in 1:length(l1)) {
str1 <- gsub(as.character(d3[l1[ind1],
1]), pattern = " ",
replacement = "_")
s1 <- strsplit(str1, " ")
# print(s1)
if (j == 8) {
p1 <- paste("(DBLINKS)|[_]{2,}|;*",
pattern_list[j], sep = "")
s2 <- gsub(as.character(s1[[1]]),
pattern = p1, replacement = "")
s2 <- gsub(s2, pattern = "_",
replacement = " ")
} else {
p1 <- paste("(DBLINKS)|[_]*|:*|;*",
pattern_list[j], sep = "")
s2 <- gsub(as.character(s1[[1]]),
pattern = p1, replacement = "")
}
# p1<-paste('([DBLINKS])|[_]|:|;',pattern_list[j],sep='')
# paste('<a
# href=http://www.ebi.ac.uk/chebi/searchId.do?chebiId=CHEBI:',chebiid,'>',chebiid,'</a>',sep='')
url_str_cur <- paste(url_strs[j],
s2, ">", s2, "</a>",
sep = "")
# if(ind1>1){s2<-paste(s2,';',sep='')}
pat.res <- c(pat.res,
s2)
pat.res <- c(pat.res,
url_str_cur)
}
} else {
pat.res <- c(pat.res, rep("-",
2))
}
}
l1 <- grep(d3[, 1], pattern = pattern_list[length(pattern_list)])
if (length(l1) > 0) {
keggpathid <- ""
keggpathname <- ""
keggpathlink <- ""
for (ind1 in 1:length(l1)) {
temp.pat.res <- {
}
str1 <- gsub(as.character(d3[l1[ind1],
1]), pattern = " ", replacement = "_")
s1 <- strsplit(str1, " ")
p1 <- paste("(DBLINKS)|[_]{3,}|:*|;*|PATHWAY",
sep = "")
# p1<-paste('([DBLINKS])|[_]|:|;',pattern_list[j],sep='')
s2 <- gsub(as.character(s1[[1]]),
pattern = p1, replacement = "")
s3 <- strsplit(s2, "__")
# s2<-gsub(s2,'__',';',sep='')
# temp.pat.res<-c(temp.pat.res,s3[[1]][1])
keggpathurl <- paste("<a href=http://www.genome.jp/kegg-bin/show_pathway?",
s3[[1]][1], "+", keggID,
">", s3[[1]][1], "</a>",
sep = "")
# temp.pat.res<-c(temp.pat.res,keggpathlink)
s4 <- gsub(as.character(s3[[1]][2]),
pattern = "_", replacement = " ")
# temp.pat.res<-c(temp.pat.res,s4)
keggpathid <- paste(keggpathid,
paste(s3[[1]][1], ";",
sep = ""), sep = "")
keggpathlink <- paste(keggpathlink,
paste(keggpathurl, ";",
sep = ""), sep = "<br>")
keggpathname <- paste(keggpathname,
paste(s4, ";", sep = ""),
sep = "<br>")
}
pat.res <- c(pat.res, keggpathid,
keggpathlink, keggpathname)
} else {
pat.res <- c(pat.res, "-",
"-", "-")
}
}
# pattern_list<-c('EXACT_MASS','NAME',
# 'FORMULA','CAS:','C[0-9]{3,5}','PubChem:','ChEBI:','LIPIDMAPS:',
# 'map')
# res<-rbind(res,c(mzorig,delta_ppm,as.character(id_list),
# mass, html_link,
# CName,chemformula,casID,keggID,kegglink,keggpathid,keggpathname,keggpathlink,hmdbID,hmdblink,pubchemsid,pubchemslink,
# pubchemcid,pubchemclink,chebiid,chebilink,
# lipidmapsid, lipidmapslink))
res <- rbind(res, c(mzorig, delta_ppm,
as.character(id_list), pat.res[1],
html_link, pat.res[3], pat.res[5],
pat.res[7], keggID, kegglink,
pat.res[17], pat.res[18], pat.res[19],
hmdbID, hmdblink, pat.res[9],
pat.res[10], pat.res[11], pat.res[12],
pat.res[13], pat.res[14], pat.res[15]))
}
}
}
}
metres <- html_link
# write.table(res,file='kegg_cur_res.txt',sep='\t',append=TRUE,row.names=FALSE)
}
syssleep1 <- (syssleep/5)
Sys.sleep(syssleep1)
return(res)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.