R/rc.cmpd.get.classyfire.R

Defines functions classyfire.sunburst.plot get.classyfire.local

Documented in classyfire.sunburst.plot get.classyfire.local

#' getClassyFire
#'
#' use classyfire web API to look up full ClassyFire hirarchy for each inchikey
#' @details The $inchikey slot is used to look up the 
#' 
#' @param ramclustObj ramclustR object to ClassyFy.  Must supply one of either ramclustObj or inchikey (see below)
#' @param inchikey vector of text inchikeys to ClassyFy.  Must supply one of either ramclustObj or inchikey.
#' @param get.all logical; if TRUE, when inchikey classyfire lookup fails, submits for classyfication.  Can be slow. max.wait (below) sets max time to spend on each compound before moving on. default = FALSE.
#' @param max.wait  numeric; maximum time (seconds) to wait per compound when 'get.all' = TRUE.   
#' @param posts.per.minute  integer; a limit set when 'get.all' is true.  ClassyFire server accepts no more than 5 posts per minute when calculating new ClassyFire results.  Slows down submission process to keep server from denying access.  
#' @return returns a ramclustR object.  new dataframe in $classyfire slot with rows equal to number of compounds.  
#' @importFrom jsonlite fromJSON
#' @importFrom httr http_error
#' @concept ramclustR
#' @concept RAMClustR
#' @concept metabolomics
#' @concept mass spectrometry
#' @concept clustering
#' @concept feature
#' @concept MSFinder
#' @concept xcms
#' @concept classyFire
#' @author Corey Broeckling
#' @references Djoumbou Feunang Y, Eisner R, Knox C, Chepelev L, Hastings J, Owen G, Fahy E, Steinbeck C, Subramanian S, Bolton E, Greiner R, and Wishart DS. ClassyFire: Automated Chemical Classification With A Comprehensive, Computable Taxonomy. Journal of Cheminformatics, 2016, 8:61. DOI: 10.1186/s13321-016-0174-y

#' @export 

rc.cmpd.get.classyfire <- function (ramclustObj = NULL, inchikey = NULL, get.all = TRUE, 
                                    max.wait = 10, posts.per.minute = 5) 
{
  
  if(is.null(ramclustObj) & is.null(inchikey)) {
    stop("must supply ramclustObj or vector of inchikeys as input.  i.e. ramclustObj = RC; inchikey = c('MCPFEAJYKIXPQF-DXZAWUHFSA-N','GLZPCOQZEFWAFX-JXMROGBWSA-N')", '\n')
  }
  
  if(is.null(ramclustObj)) {
    ramclustObj <- list()
    ramclustObj$cmpd <- paste0("C", 1:length(inchikey))
    ramclustObj[['inchikey']] <- inchikey

  }

  if (!requireNamespace("httr", quietly = TRUE)) {
    stop("The use of this function requires package 'httr'.")
  }
  
  params <- c(
    "ramclustObj" = ramclustObj, 
    "inchikey" = inchikey, 
    "get.all" = get.all, 
    "max.wait" = max.wait, 
    "posts.per.minute" = posts.per.minute
  )
  if (is.null(ramclustObj$inchikey)) {
    stop("no inchikey slot found, please 'annotate' first", 
         "\n")
  }
  if (get.all & is.null(ramclustObj$smiles)) {
    warning(" - getting smiles from inchikey", '\n')
    ramclustObj <- getSmilesInchi(ramclustObj = ramclustObj)
    # stop("obtaining new classyfication (get.all option) requires a smiles notation", 
    #      "\n", "and no smiles slot found in ramclustObj.  Please first run 'getSmilesInchi()'", 
    #      "\n")
  }
  if (any(names(ramclustObj) == "classyfire")) {
    redo <- TRUE} else {redo <- FALSE}
  if (!redo) {
    ramclustObj$classyfire <- data.frame(inchikey = rep(NA, 
                                                        length(ramclustObj$inchikey)), kingdom = rep(NA, 
                                                                                                     length(ramclustObj$inchikey)), superclass = rep(NA, 
                                                                                                                                                     length(ramclustObj$inchikey)), class = rep(NA, length(ramclustObj$inchikey)), 
                                         subclass = rep(NA, length(ramclustObj$inchikey)), 
                                         parent = rep(NA, length(ramclustObj$inchikey)), 
                                         description = rep(NA, length(ramclustObj$inchikey)))
  }
  
  
  url = "http://classyfire.wishartlab.com"
  
  ## check server
  out <- tryCatch(jsonlite::fromJSON(paste0(url, "/entities/", "RYYVLZVUVIJVGH-UHFFFAOYSA-N", 
                                            ".json")), error = function(y) {
                                              return(NA)
                                            })
  if(length(out) == 1) {
    if(is.na(out)) {
      stop("  classyfire server appears to be down", '\n')
    }
  }
                                              
  
  
  cat("performing inchikey based lookup:", '\n')
  for (i in 1:length(ramclustObj$inchikey)) {
    
    if (is.na(ramclustObj$inchikey[i])) {
      next
    }
    if (!is.na(ramclustObj$classyfire[i, "kingdom"])) {
      next
    }
    Sys.sleep(1/posts.per.minute)
    cat(i, " ")
    out <- tryCatch(jsonlite::fromJSON(paste0(url, "/entities/", ramclustObj$inchikey[i], 
                                              ".json")), error = function(y) {
                                                return(NA)
                                              })
    
    tryn <- 1 
    if(length(out) < 2 & tryn <= 3) {
      # cat("retry", i, '\n')
      tryn <- tryn + 1
      Sys.sleep(1)
      out <- tryCatch(jsonlite::fromJSON(paste0(url, "/entities/", ramclustObj$inchikey[i], 
                                                ".json")), error = function(y) {
                                                  return(NA)
                                                })
    }
    
    if (length(out) > 1) {
      a <- ramclustObj$inchikey[i]
      b <- out$kingdom$name
      if (is.null(b)) 
        b <- NA
      c <- out$superclass$name
      if (is.null(c)) 
        c <- NA
      d <- out$class$name
      if (is.null(d)) 
        d <- NA
      e <- out$subclass$name
      if (is.null(e)) 
        e <- NA
      f <- out$direct_parent$name
      if (is.null(f)) 
        f <- NA
      g <- out$description
      if (is.null(g)) 
        g <- NA
      ramclustObj$classyfire[i, ] <- c(a, b, c, d, e, 
                                       f, g)
      
      rm(out); rm(a); rm(b); rm(c); rm(d); rm(e); rm(f); rm(g); rm(tryn)
    }
  }
  if (get.all) {
    get.full <- which(
      !is.na(ramclustObj$inchikey) & is.na(ramclustObj$classyfire[,2])
    )
    cat("this will take some time - maximum of", posts.per.minute, "posts per minute", '\n')
    for (i in get.full) {
      
      cat(i, " ")
      if (!is.na(ramclustObj$smiles[i])) {
        params <- list(label = "ramclustR", query_input = ramclustObj$smiles[i], 
                       query_type = "STRUCTURE")
        submit <- httr::POST("http://classyfire.wishartlab.com/queries", 
                             body = params, encode = "json", httr::accept_json(), 
                             httr::add_headers(`Content-Type` = "application/json"))
        Sys.sleep(1)
        query_id <- jsonlite::fromJSON(httr::content(submit, 
                                                     "text"))
        
        if(any(names(query_id) == "error")) {
          if(query_id$error == "Limit exceeded") {
            Sys.sleep(60)
            params <- list(label = "ramclustR", query_input = ramclustObj$smiles[i], 
                           query_type = "STRUCTURE")
            submit <- httr::POST("http://classyfire.wishartlab.com/queries", 
                                 body = params, encode = "json", httr::accept_json(), 
                                 httr::add_headers(`Content-Type` = "application/json"))
            query_id <- jsonlite::fromJSON(httr::content(submit, 
                                                         "text"))
          }
        }
        
        
        if(any(names(query_id) == "status")) {
          if(query_id$status == "500") {
            cat(" failed", '\n')
            next
          }
        }
        
        out <- list()
        out$classification_status <- "not done"
        out$number_of_elements <- 0
        Sys.sleep(1)
        skiptonext <- FALSE
        time.a <- Sys.time()
        while (out$number_of_elements == 0) {
          Sys.sleep(1)
          
          if(httr::http_error(paste0("http://classyfire.wishartlab.com/queries/",  query = query_id$id, ".json"))) {
            cat(" not done", '\n')
            break
          }
          out <- tryCatch( 
            {
              out <- jsonlite::fromJSON(paste0("http://classyfire.wishartlab.com/queries/", query = query_id$id, ".json"))
            }, 
            error = function(y) {
              out <- list()
              out$classification_status <- "not done"
              out$number_of_elements <- 0
              out
            }
          )
          
          if (round(as.numeric(difftime(Sys.time(), 
                                        time.a, units = "secs")), 3) >= max.wait) {
            cat("timed out", "\n")
            skiptonext <- TRUE
            break
          }
        }
        if (out$number_of_elements == 0) {
          ramclustObj$classyfire[i, ] <- c(ramclustObj$inchikey[i], 
                                           rep(NA, 6))
          rm(out)
        } else {
          a <- out$entities$inchikey
          if(is.null(a)) {
            a <- ramclustObj$inchikey[i]
          } else {
            a <- gsub("InChIKey=", "", a)
          }
          b <- out$entities$kingdom$name
          if (is.null(b)) {
            b <- NA
            c <- NA
            d <- NA 
            e <- NA
            f <- NA
            g <- NA
          } else {
            c <- if(length(out$entities$superclass)>1) {out$entities$superclass$name} else {c <- NA}
            if (is.null(c)) {c <- NA}
            d <- if(length(out$entities$class)>1) {out$entities$class$name} else {d <- NA}
            if (is.null(d)) {d <- NA}
            e <- if(length(out$entities$subclass)>1) {out$entities$subclass$name} else {e <- NA}
            if (is.null(e)) {e <- NA}
            f <- if(length(out$entities$direct_parent)>1) {out$entities$direct_parent$name} else {f <- NA}
            if (is.null(f)) {f <- NA}
            g <- if(nchar(out$entities$description)>1) {out$entities$description} else {g <- NA}
            if (is.null(g)) {g <- NA}
          }
          cat(" done", '\n')
          ramclustObj$classyfire[i, ] <- c(a, b, c, 
                                           d, e, f, g)
          rm(out)
        }
      }
      Sys.sleep(ceiling(60/posts.per.minute))
    }
  }
  
  ramclustObj$history$classyfire <- paste(
    "Compounds were assigned to chemical ontogenies using the ClassyFire API (Djoumbou 2016)."
  )
  
  if(is.null(ramclustObj$params)) {ramclustObj$params <- list()}
  ramclustObj$params$rc.cmpd.get.classyfire <- params
  
  
  return(ramclustObj)
}



######################################################################
## get classyfire heirarchy for a given vector of classyfire chemonids
#' get.classyfire.local
#'
#' use classyfire web API to look up full ClassyFire hirarchy for each inchikey
#' @details The get chem ontology heirarchy from chemontid. references local .obo file for faster lookup 
#' 
#' @param ramclustObj ramclustR object to ClassyFy.  Must supply one of either ramclustObj or inchikey (see below)
#' @param chemontid vector of chem ontology IDs.  
#' @param chemont.obo.location location of the chemontology OBO file. 
#' @return returns a ramclustR object.  new dataframe in $classyfire slot with rows equal to number of compounds.  
#' @importFrom jsonlite fromJSON
#' @importFrom httr http_error
#' @concept ramclustR
#' @concept RAMClustR
#' @concept metabolomics
#' @concept mass spectrometry
#' @concept clustering
#' @concept feature
#' @concept MSFinder
#' @concept xcms
#' @concept classyFire
#' @author Corey Broeckling
#' @references Djoumbou Feunang Y, Eisner R, Knox C, Chepelev L, Hastings J, Owen G, Fahy E, Steinbeck C, Subramanian S, Bolton E, Greiner R, and Wishart DS. ClassyFire: Automated Chemical Classification With A Comprehensive, Computable Taxonomy. Journal of Cheminformatics, 2016, 8:61. DOI: 10.1186/s13321-016-0174-y
#' @export 
get.classyfire.local <- function(
    ramclustObj = NULL,
    chemontid = NULL,
    chemont.obo.location = NULL
) {
  
  if(!is.null(ramclustObj) & is.null(chemontid)) {
    chemontid <- ramclustObj$annotations.full$chemontid
  }
  
  if(is.null(chemontid)) {stop("no chemontids submitted", '\n')}
  if(is.null(chemont.obo.location)) {stop("no chemont.obo file specified", '\n')}
  
  classyfire <- ontologyIndex::get_ontology(
    file = chemont.obo.location,
    propagate_relationships = "is_a",
    extract_tags = "minimal",
    merge_equivalent_terms = TRUE
  )
  
  cmpd.classyfire <- data.frame(
    'superclass'     = rep(NA, 0),
    'class'          = rep(NA, 0),
    'subclass'       = rep(NA, 0),
    'direct.parent'  = rep(NA, 0)
  )
  
  for(i in 1:length(chemontid)) {
    
    if(
      nchar(ramclustObj$annotations.full[i,'chemontid'])==0 | ramclustObj$annotations.full[i,'chemontid'] == "NA"
    ) {
      out <- data.frame(
        'superclass'     = NA,
        'class'          = NA,
        'subclass'       = NA,
        'direct.parent'  = NA
      )
      cmpd.classyfire <- rbind(
        cmpd.classyfire,
        out
      )
    } else {
      classyfication <- as.vector(classyfire$name[unlist(classyfire$ancestors[chemontid[i]])][3:5])
      out <- data.frame(
        'superclass'     = classyfication[1],
        'class'          = classyfication[2],
        'subclass'       = classyfication[3],
        'direct.parent'  = classyfire$name[chemontid[i]]
      )
      cmpd.classyfire <- rbind(
        cmpd.classyfire,
        out
      )
      
    }
    if(nrow(cmpd.classyfire)>i) {stop("too many rows. On: ", i, '\n')}
  }
  
  cmpd.classyfire[which(is.na(cmpd.classyfire), arr.ind = TRUE)] <- 'unassigned'
  
  if(is.null(ramclustObj)) {
    return(cmpd.classyfire)} else {
      ramclustObj$annotations.full <- 
        data.frame(
          ramclustObj$annotations.full,
          cmpd.classyfire
        )
      ramclustObj$annotations.selected <- ramclustObj$annotations.full[which(ramclustObj$annotations.full$assigned),2:ncol(ramclustObj$annotations.full)]
      return(ramclustObj)
    }
  
  write.csv(ramclustObj$annotations.full, file = paste0("spectra/", "all.annotations.csv"), row.names = FALSE)
  write.csv(ramclustObj$annotations.selected, file = paste0("spectra/", "assigned.annotations.csv"), row.names = FALSE)
}



######################################################################
## export classyfire sunburst plot as html
#' classyfire.sunburst.plot
#'
#' interactive sunburst plot for exploring the classyfire heirarchy.
#' @details interactive sunburst plot for exploring the classyfire heirarchy.
#' 
#' @param ramclustObj ramclustR object with classyfire data
#' @param subset.cmpds optionally, vector of integers of which compounds to include.  
#' @param out.file.name output file name, optionally.  
#' @return nothing - exported files  
#' @concept ramclustR
#' @concept RAMClustR
#' @concept metabolomics
#' @concept mass spectrometry
#' @concept clustering
#' @concept feature
#' @concept MSFinder
#' @concept xcms
#' @concept classyFire
#' @author Corey Broeckling
#' @references Djoumbou Feunang Y, Eisner R, Knox C, Chepelev L, Hastings J, Owen G, Fahy E, Steinbeck C, Subramanian S, Bolton E, Greiner R, and Wishart DS. ClassyFire: Automated Chemical Classification With A Comprehensive, Computable Taxonomy. Journal of Cheminformatics, 2016, 8:61. DOI: 10.1186/s13321-016-0174-y
#' @export 
classyfire.sunburst.plot <-  function(
    ramclustObj = RC, 
    subset.cmpds = rep(TRUE, length(ramclustObj$ann)),
    out.file.name = NULL
){
  
  if(length(which(subset.cmpds)) == 0) {
    error("all subset.cmpds are set to FALSE", '\n')
  }
  
  require(dplyr)
  require(plotly)
  
  ## sunburst plot
  ## may be able to add more info with 'hover' function to add things like stats results, or compound description.
  ct <- ramclustObj$annotations.selected[,c("superclass", "class", "subclass", "direct.parent", "compound.name")]
  
  ct <- ct[subset.cmpds,]
  
  ct <- replace(ct, is.na(ct), "unassigned")
  for(i in 1:ncol(ct)) {
    ct[,i] <- gsub("-", "_", ct[,i])
  }
  d <- data.frame(
    "ids" = rep(NA, 0),
    "labels" = rep(NA, 0),
    "parents" = rep(NA, 0),
    "values"= rep(0, 0)
  )
  superclasses <- unique(ct[,"superclass"])
  for(i in 1:length(superclasses)) {
    superclass <- superclasses[i]
    ## add root node element
    ctsub <- ct[which(ct$superclass == superclass),]
    out <- data.frame(
      "ids" = superclass,
      "labels" = superclass,
      "parents" = "",
      "values" = nrow(ctsub)
    )
    d <- rbind(d, out)
    
    ## for all classes within superclass i
    classes <- unique(ctsub[,"class"])
    for(j in 1:length(classes)) {
      class <- classes[j]
      ctsub2 <- ctsub[which(ctsub$class == class),]
      if(nrow(ctsub2) == 0) next
      out <- data.frame(
        "ids" = paste0(c(superclass, class), collapse = "-"),
        "labels" = class,
        "parents" = superclass,
        "values" = nrow(ctsub2)
      )
      d <- rbind(d, out)
      
      ## for all subclasses within class j
      subclasses <- unique(ctsub2[,"subclass"])
      for(k in 1:length(subclasses)) {
        subclass <- subclasses[k]
        ctsub3 <- ctsub2[which(ctsub2$subclass == subclass),]
        if(nrow(ctsub3) == 0) next
        if(class == "unassigned" & subclass == "unassigned") next
        out <- data.frame(
          "ids" = paste0(c(class, subclass), collapse = "-"),
          "labels" = subclass,
          "parents" = paste0(c(superclass, class), collapse = "-"),
          "values" = nrow(ctsub3)
        )
        d <- rbind(d, out)
        
        # for all compounds within subclass k
        compounds <- unique(ctsub3[,"compound.name"])
        for(m in 1:length(compounds)) {
          compound <- compounds[m]
          ctsub5 <- ctsub3[which(ctsub3$compound.name == compound),]
          if(nrow(ctsub5) == 0) next
          if(subclass == "unassigned" & compound == "unassigned") next
          out <- data.frame(
            "ids" = paste0(c(subclass, compound), collapse = "-"),
            "labels" = compound,
            "parents" = paste0(c(class, subclass), collapse = "-"),
            "values" = nrow(ctsub5)
          )
          d <- rbind(d, out)
          
          # }
        }
      }
    }
  }
  fig2 <- plotly::plot_ly(d, 
                          ids = ~ids, 
                          labels = ~labels, 
                          parents = ~parents, 
                          values = ~values,
                          branchvalues = "total",
                          insidetextorientation='radial',
                          maxdepth=2,
                          type = 'sunburst')
  
  if(is.null(out.file.name)) {
    htmlwidgets::saveWidget(fig2, file=paste0( getwd(), "/spectra/annotation.classyfire.sunburst.html"), 
                            knitrOptions = list()) 
    write.csv(d, file = paste0( getwd(), "/spectra/annotation.classyfire.sunburst.input.csv"), row.names = FALSE)
  } else {
    htmlwidgets::saveWidget(fig2, file=paste0( getwd(), out.file.name, ".html"), 
                            knitrOptions = list()) 
    write.csv(d, file = paste0( getwd(), out.file.name, ".input.csv"), row.names = FALSE)
  }
}
cbroeckl/csu.pmf.tools documentation built on Jan. 26, 2024, 6:27 p.m.