R/biolink_annotate.R

Defines functions biolink_annotate

Documented in biolink_annotate

#' biolink_api
#' @author Francisco Requena
#' @param ... queries options
#' @param resource resource
#' @param content content
#' @param include_category include category
#' @param exclude_category exclude category
#' @param min_length minimum length
#' @param longest_only longest only
#' @param include_abbreviation include abbreviation
#' @param include_acronym include acronym
#' @param include_numbers include numbers
#' @importFrom httr http_type http_error content modify_url GET
#' @importFrom tibble enframe
#' @importFrom dplyr select mutate na_if 
#' @importFrom stats na.omit
#' @importFrom magrittr %>%
#' @importFrom stringr str_remove str_match_all
#' @importFrom tidyr separate separate_rows
#' @importFrom jsonlite fromJSON
#' @return the results from the search
#' @examples 
#' \dontrun{
#' biolink_api('nlp/annotate, content = 'Marfan syndrome', include_category = 'anatomical entity)
#' }
#' 

#' @rdname biolink_annotate
#' @export 
biolink_annotate <- function(resource = NULL, content, include_category = NULL,
                        exclude_category = NULL, min_length = 4, longest_only = FALSE,
                        include_abbreviation = FALSE, include_acronym = FALSE, 
                        include_numbers = FALSE) {
  
  check_internet()
  
  # Test
  # args <- list(content = 'Marfan syndrome ole ole')
  # resource = 'nlp/annotate'
  
  # stopifnot(!is.null(content), 'Content argument can not be empty') 

  
  args <- list(content = content, include_category = include_category, 
               exclude_category = exclude_category, min_length = min_length,
               longest_only = longest_only, include_abbreviation = include_abbreviation,
               include_acronym = include_acronym, include_numbers = include_numbers)


  
  url <- modify_url(base_url, path = paste0('api/', resource))
  
  
  resp <- GET(url, user_agent = ua, query = args)
  
  check_response(resp)
  

  parsed <- jsonlite::fromJSON(content(resp, as = "text", encoding = 'UTF-8'), simplifyVector = TRUE)

  
  parsed <- parsed %>% str_match_all('("(.*?)")')
    
  result_tbl <- parsed[[1]][6] %>%
    enframe(value = 'parsed', name = NULL) %>%
    mutate(parsed = as.character(parsed),
           parsed = str_remove(parsed, '<span class="sciCrunchAnnotation" data-sciGraph='),
           parsed = str_remove(parsed, '/span>')) %>%
    separate_rows(parsed, sep = '\\|') %>%
    separate(parsed, c('name', 'id', 'category'), sep = ',') %>%
    na_if('') %>%
    na.omit() 

  structure(
    list(
      # content = ifelse(output_type == 'dataframe', as.data.frame(parsed), parsed),
      content = result_tbl,
      path = url,
      response = resp
      ),
    class = "biolink_api"
  )
  
}


# print.biolink_api <- function(x, ...) {
#   cat("<BioLink ", x$path, ">\n", sep = "")
#   str(x$content)
#   invisible(x)
# }
frequena/rbiolink documentation built on May 16, 2020, 10:20 p.m.