R/feat.batch.annotation.child.R

feat.batch.annotation.child <- function(mz.val, max.mz.diff, 
    adductname, adductmass = NA, adductcharge = NA, 
    syssleep) {
    
    adductlist = c(1.007276, 22.989218, 38.963158, 
        -35.012676, -17.0027, 0.0227, 7.01597, 18.033823, 
        33.033486, 42.033826, 44.97116, 64.015768, 
        2.014552, 23.996494, 45.978436, 3.021828, 
        25.00377, 46.985712, -19.01839, -1.007276, 
        18.998371, 20.974666, 34.969402, 36.948606, 
        44.998194, 59.013851, 78.918885, -2.014552, 
        -3.021828)
    alladducts <- c("M+H", "M+Na", "M+K", "M+H-2H2O", 
        "M+H-H2O", "M-H2O+NH4", "M+Li", "M+NH4", "M+CH3OH+H", 
        "M+ACN+H", "M+2Na-H", "M+ACN+Na", "M+2H", 
        "M+H+Na", "M+2Na", "M+3H", "M+2H+Na", "M+2Na+H", 
        "M-H2O-H", "M-H", "M+F", "M+Na-2H", "M+Cl", 
        "M+K-2H", "M+FA-H", "M+CH3COO-H", "M+Br", 
        "M-2H", "M-3H")
    names(adductlist) <- c("M+H", "M+Na", "M+K", "M+H-2H2O", 
        "M+H-H2O", "M-H2O+NH4", "M+Li", "M+NH4", "M+CH3OH+H", 
        "M+ACN+H", "M+2Na-H", "M+ACN+Na", "M+2H", 
        "M+H+Na", "M+2Na", "M+3H", "M+2H+Na", "M+2Na+H", 
        "M-H2O-H", "M-H", "M+F", "M+Na-2H", "M+Cl", 
        "M+K-2H", "M+FA-H", "M+CH3COO-H", "M+Br", 
        "M-2H", "M-3H")
    
    mult_charge <- c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 
        1, 1, 2, 2, 2, 3, 3, 3, 1, 1, 1, 1, 1, 1, 
        1, 1, 1, 2, 3)
    names(mult_charge) <- c("M+H", "M+Na", "M+K", 
        "M+H-2H2O", "M+H-H2O", "M-H2O+NH4", "M+Li", 
        "M+NH4", "M+CH3OH+H", "M+ACN+H", "M+2Na-H", 
        "M+ACN+Na", "M+2H", "M+H+Na", "M+2Na", "M+3H", 
        "M+2H+Na", "M+2Na+H", "M-H2O-H", "M-H", "M+F", 
        "M+Na-2H", "M+Cl", "M+K-2H", "M+FA-H", "M+CH3COO-H", 
        "M+Br", "M-2H", "M-3H")
    
    
    adductmass = adductlist[as.character(adductname)]
    adductcharge = mult_charge[as.character(adductname)]
    
    print(adductmass)
    print(adductcharge)
    # print(mz.val)
    
    
    delta_ppm = (max.mz.diff) * (mz.val/1e+06)
    
    min_mz = round((mz.val - delta_ppm), 5)
    max_mz = round((mz.val + delta_ppm), 5)
    
    print(mz.val)
    print(min_mz)
    print(max_mz)
    
    # convert to neutral mass
    min_mz = (min_mz * adductcharge) - adductmass
    max_mz = (max_mz * adductcharge) - adductmass
    
    
    
    res = {
    }  #c('-','-','-','-','-','-','-','-','-','-','-','-','-','-','-','-','-','-','-','-','-','-')
    mzorig = round(mz.val, 5)
    delta_ppm = round(delta_ppm, 5)
    
    syssleep1 <- (syssleep/5)
    Sys.sleep(syssleep1)
    
    write.table(mz.val, file = "mzval.txt", sep = "\t", 
        row.names = FALSE)
    
    html_link <- "-"
    search_link = paste("http://rest.kegg.jp/find/compound/", 
        min_mz, "-", max_mz, "/exact_mass", sep = "")
    
    # print(search_link)
    d1 <- try(readLines(search_link), silent = TRUE)
    
    if (is(d1, "try-error")) {
        
        res <- c(mz.val, rep("NC", 27))
        
        write.table(mz.val, file = "kegg_bad_mzs.txt", 
            sep = "\t", row.names = FALSE, append = TRUE)
        
    } else {
        
        
        
        # print(dim(d1))
        cnames <- c("ENTRY", "NAME", "FORMULA", "EXACT_MASS", 
            "REACTION", "PATHWAY", "ENZYME", "PubChem", 
            "ChEBI", "PDB")
        
        # pattern_list<-c('C[0-9]{3,5}','[:blank:]{2,}[0-9|A-Z|:punct:|(|:print:][[:punct:]|[:alnum:]]*{3,}',
        # 'FORMULA','EXACT_MASS','CAS:','PubChem:','KNApSAcK:','PDB-CCD')
        
        # pattern_list<-c('C[0-9]{3,5}','NAME',
        # 'FORMULA','EXACT_MASS','ko[0-9]{5}','CAS:','ChEBI:','LIPIDMAPS:','PubChem:',
        # 'KNApSAcK:','PDB-CCD:')
        
        # pattern_list<-c('C[0-9]{3,5}','NAME',
        # 'FORMULA','EXACT_MASS','ko[0-9]{5}','CAS:','ChEBI:','LIPIDMAPS:','PubChem:',
        # 'KNApSAcK:','PDB-CCD:', 'map')
        
        pattern_list <- c("EXACT_MASS", "NAME", "FORMULA", 
            "CAS:", "PubChem:", "ChEBI:", "LIPIDMAPS:", 
            "BRITE", "map")
        
        pattern_keggid <- "C[0-9]{3,5}"
        
        # if(dim(d1)[1]>0)
        id_list <- "-"
        CName <- "-"
        mass <- "-"
        casID <- "-"
        keggID <- "-"
        kegglink <- "-"
        keggpathid <- "-"
        keggpathname <- "-"
        keggpathlink <- "-"
        hmdbID <- "-"
        hmdblink <- "-"
        pubchemsid <- "-"
        pubchemslink <- "-"
        pubchemcid <- "-"
        pubchemclink <- "-"
        chebiid <- "-"
        chebilink <- "-"
        lipidmapsid <- "-"
        lipidmapslink <- "-"
        chemformula <- "-"
        
        if (length(d1) > 0) {
            for (i in 1:length(d1)) {
                if (i%%5 > 0) {
                  syssleep1 <- (syssleep/5)
                  Sys.sleep(syssleep1)
                } else {
                  syssleep1 <- (syssleep/3)
                  Sys.sleep(syssleep1)
                }
                id_list <- "-"
                CName <- "-"
                mass <- "-"
                casID <- "-"
                keggID <- "-"
                kegglink <- "-"
                keggpathid <- "-"
                keggpathname <- "-"
                keggpathlink <- "-"
                hmdbID <- "-"
                hmdblink <- "-"
                pubchemsid <- "-"
                pubchemslink <- "-"
                pubchemcid <- "-"
                pubchemclink <- "-"
                chebiid <- "-"
                chebilink <- "-"
                lipidmapsid <- "-"
                lipidmapslink <- "-"
                chemformula <- "-"
                keggpathinf <- {
                }
                
                # l1<-grep(d1[i],pattern=pattern_list[5])
                str_text = d1[i]
                t2 <- gregexpr(pattern = pattern_keggid, 
                  perl = FALSE, text = str_text)
                if (t2[[1]][1] > 0) {
                  t3 = t2[[1]]
                  strlength = attr(t3, "match.length") - 
                    1
                  t4 = strsplit(as.character(str_text), 
                    "")
                  
                  keggID <- t4[[1]][t3[1]:(t3[1] + 
                    strlength)]
                  
                  
                  keggID <- paste(keggID, collapse = "")
                  kegglink <- paste("<a href=http://www.genome.jp/dbget-bin/www_bget?cpd:", 
                    keggID, ">", keggID, "</a>", sep = "")
                  
                  
                  # html_res=readHTMLTable(kegglink)
                  search_link1 = paste("http://rest.genome.jp/link/cpd:", 
                    keggID, "+-e", sep = "")
                  
                  # dlink<-readLines(search_link1)
                  dlink <- getURL(search_link1)
                  
                  if (dlink != "") {
                    dlink <- read.delim(search_link1, 
                      header = FALSE)
                    dlink2 <- as.data.frame(dlink)
                    if (dim(dlink2)[2] > 0) {
                      for (l in 1:dim(dlink2)[1]) {
                        link_text = dlink2[l, 2]
                        t2 <- gregexpr(pattern = "HMDB[0-9]{2,}", 
                          perl = FALSE, text = link_text)
                        t3 = t2[[1]]
                        strlength = attr(t3, "match.length") - 
                          1
                        t4 = strsplit(as.character(link_text), 
                          "")
                        if (strlength > 0) {
                          hmdbID <- t4[[1]][t3[1]:(t3[1] + 
                            strlength)]
                          hmdbID <- paste(hmdbID, 
                            collapse = "")
                          hmdblink <- paste("<a href=http://www.hmdb.ca/metabolites/", 
                            hmdbID, ">", hmdbID, "</a>", 
                            sep = "")
                        }
                      }
                    }
                  }
                  
                  # keggID<-'C00392'
                  
                  # keggID<-'C00157' keggID<-'C00082'
                  search_link = paste("http://rest.kegg.jp/get/cpd:", 
                    as.character(keggID), sep = "")
                  d2 <- read.delim(search_link, header = FALSE)
                  d3 <- as.data.frame(d2)
                  
                  
                  
                  
                  if (length(d3) > 0) {
                    pat.res <- {
                    }
                    
                    
                    url_vec <- {
                    }
                    url_strs <- c("-", "-", "-", "-", 
                      "<a href=http://pubchem.ncbi.nlm.nih.gov/summary/summary.cgi?sid=", 
                      "<a href=http://www.ebi.ac.uk/chebi/searchId.do?chebiId=CHEBI:", 
                      "<a href=http://www.lipidmaps.org/data/get_lm_lipids_dbgif.php?LM_ID=", 
                      "-")
                    
                    url_vec <- {
                    }
                    
                    if (length(d3) > 0) {
                      for (j in 1:(length(pattern_list) - 
                        1)) {
                        l1 <- grep(d3[, 1], pattern = pattern_list[j])
                        if (length(l1) > 0) {
                          for (ind1 in 1:length(l1)) {
                            
                            str1 <- gsub(as.character(d3[l1[ind1], 
                              1]), pattern = " ", 
                              replacement = "_")
                            s1 <- strsplit(str1, " ")
                            # print(s1)
                            if (j == 8) {
                              p1 <- paste("(DBLINKS)|[_]{2,}|;*", 
                                pattern_list[j], sep = "")
                              s2 <- gsub(as.character(s1[[1]]), 
                                pattern = p1, replacement = "")
                              s2 <- gsub(s2, pattern = "_", 
                                replacement = " ")
                            } else {
                              p1 <- paste("(DBLINKS)|[_]*|:*|;*", 
                                pattern_list[j], sep = "")
                              s2 <- gsub(as.character(s1[[1]]), 
                                pattern = p1, replacement = "")
                              
                            }
                            # p1<-paste('([DBLINKS])|[_]|:|;',pattern_list[j],sep='')
                            
                            
                            
                            
                            # paste('<a
                            # href=http://www.ebi.ac.uk/chebi/searchId.do?chebiId=CHEBI:',chebiid,'>',chebiid,'</a>',sep='')
                            
                            url_str_cur <- paste(url_strs[j], 
                              s2, ">", s2, "</a>", 
                              sep = "")
                            
                            # if(ind1>1){s2<-paste(s2,';',sep='')}
                            
                            pat.res <- c(pat.res, 
                              s2)
                            pat.res <- c(pat.res, 
                              url_str_cur)
                          }
                        } else {
                          pat.res <- c(pat.res, rep("-", 
                            2))
                        }
                      }
                      l1 <- grep(d3[, 1], pattern = pattern_list[length(pattern_list)])
                      if (length(l1) > 0) {
                        
                        keggpathid <- ""
                        keggpathname <- ""
                        keggpathlink <- ""
                        
                        for (ind1 in 1:length(l1)) {
                          temp.pat.res <- {
                          }
                          str1 <- gsub(as.character(d3[l1[ind1], 
                            1]), pattern = " ", replacement = "_")
                          s1 <- strsplit(str1, " ")
                          
                          
                          p1 <- paste("(DBLINKS)|[_]{3,}|:*|;*|PATHWAY", 
                            sep = "")
                          # p1<-paste('([DBLINKS])|[_]|:|;',pattern_list[j],sep='')
                          s2 <- gsub(as.character(s1[[1]]), 
                            pattern = p1, replacement = "")
                          
                          s3 <- strsplit(s2, "__")
                          # s2<-gsub(s2,'__',';',sep='')
                          
                          # temp.pat.res<-c(temp.pat.res,s3[[1]][1])
                          keggpathurl <- paste("<a href=http://www.genome.jp/kegg-bin/show_pathway?", 
                            s3[[1]][1], "+", keggID, 
                            ">", s3[[1]][1], "</a>", 
                            sep = "")
                          # temp.pat.res<-c(temp.pat.res,keggpathlink)
                          s4 <- gsub(as.character(s3[[1]][2]), 
                            pattern = "_", replacement = " ")
                          
                          # temp.pat.res<-c(temp.pat.res,s4)
                          keggpathid <- paste(keggpathid, 
                            paste(s3[[1]][1], ";", 
                              sep = ""), sep = "")
                          keggpathlink <- paste(keggpathlink, 
                            paste(keggpathurl, ";", 
                              sep = ""), sep = "<br>")
                          keggpathname <- paste(keggpathname, 
                            paste(s4, ";", sep = ""), 
                            sep = "<br>")
                        }
                        
                        pat.res <- c(pat.res, keggpathid, 
                          keggpathlink, keggpathname)
                        
                      } else {
                        pat.res <- c(pat.res, "-", 
                          "-", "-")
                      }
                      
                      
                      
                      
                      
                      
                    }
                    
                    
                    # pattern_list<-c('EXACT_MASS','NAME',
                    # 'FORMULA','CAS:','C[0-9]{3,5}','PubChem:','ChEBI:','LIPIDMAPS:',
                    # 'map')
                    
                    # res<-rbind(res,c(mzorig,delta_ppm,as.character(id_list),
                    # mass, html_link,
                    # CName,chemformula,casID,keggID,kegglink,keggpathid,keggpathname,keggpathlink,hmdbID,hmdblink,pubchemsid,pubchemslink,
                    # pubchemcid,pubchemclink,chebiid,chebilink,
                    # lipidmapsid, lipidmapslink))
                    
                    res <- rbind(res, c(mzorig, delta_ppm, 
                      as.character(id_list), pat.res[1], 
                      html_link, pat.res[3], pat.res[5], 
                      pat.res[7], keggID, kegglink, 
                      pat.res[17], pat.res[18], pat.res[19], 
                      hmdbID, hmdblink, pat.res[9], 
                      pat.res[10], pat.res[11], pat.res[12], 
                      pat.res[13], pat.res[14], pat.res[15]))
                    
                    
                    
                    
                    
                    
                    
                  }
                }
                
            }
        }
        metres <- html_link
        # write.table(res,file='kegg_cur_res.txt',sep='\t',append=TRUE,row.names=FALSE)
    }
    syssleep1 <- (syssleep/5)
    Sys.sleep(syssleep1)
    
    return(res)
}
yufree/xMSanalyzer documentation built on May 4, 2019, 6:35 p.m.