R/myco_search.R

Defines functions parse_links myco_search

Documented in myco_search

#' Search Mycobank
#'
#' @export
#' @param layout (character) One of 'mbwservice' or 'mycobank'. There is no explanation of what
#' these different layouts are. Help anyone?
#' @param filter (character) A variety of inputs. See Details.
#' @param limit (integer) Number to return. Default: 100
#' @param ... Futher args passed onto httr::GET
#'
#' @details
#'
#' For the filter p?arameter, use the following syntax:
#' \code{<FieldCode1><Operator>"<Value1>" AND|OR|NOT <FieldCode2><Operator>"<Value2>"}. Use the
#' special value: $NULL$ to filter on Null values and $EMPTY$ to filter on empty string values.
#' These two special values are not case sensitive.
#'
#' For mbwservice, the options are
#'
#' \itemize{
#'  \item Authors_ text, e.g., \code{Authors_ CONTAINS "Value"}
#'  \item Epithet_ text, e.g., \code{Epithet_ CONTAINS "Value"}
#'  \item MycoBankNr_ numeric, e.g., code{MycoBankNr_="1"}
#'  \item Name text, e.g., \code{Name CONTAINS "Value"}
#'  \item Year of publication DateTime (YYYYMMDDHHmmss), e.g., \code{NameYear_<"20140826022700"}
#' }
#'
#' For mycobank, the options are
#'
#' \itemize{
#'  \item Authors_ text, e.g., \code{Authors_ CONTAINS "Value"}
#'  \item Classification_ text, e.g., \code{Classification_="Value"}
#'  \item Epithet_ text, e.g., \code{Epithet_ CONTAINS "Value"}
#'  \item MycoBankNr_ numeric, e.g., code{MycoBankNr_="1"}
#'  \item Name text, e.g., \code{NAME CONTAINS "Value"}
#'  \item NameStatus_ text, e.g., \code{NameStatus_ = "Value"}
#'  \item Summary text, e.g., \code{E3787 CONTAINS "Value"}
#'  \item Synonymy text, e.g., \code{E4060 CONTAINS "Value"}
#'  \item Year of publication DateTime (YYYYMMDDHHmmss), e.g., \code{NameYear_<"20140826022700"}
#' }
#'
#' Operator options
#'
#' \itemize{
#'  \item =	Equality
#'  \item <	Lower than
#'  \item >	Greater than
#'  \item <> Diffirent
#'  \item <= Lower than or equals
#'  \item >= Greater than or equals
#'  \item CONTAINS	Contains the specified string
#'  \item STARTSWITH	Starts with the specified string
#'  \item ENDSWITH	Ends with the specified string
#' }
#'
#' @references \url{http://www.mycobank.org/Services/Generic/Help.aspx?s=searchservice}
#' @return A list of length two, with slots for data and links, in which NROW(data) should
#' equal length(links).
#'
#' @examples \dontrun{
#' myco_search(filter='MycoBankNr_="344025"')
#' head(myco_search(filter='MycoBankNr_>"344025"'))
#' myco_search(filter='Name CONTAINS "Candida boidinii"')
#' myco_search(filter='Name CONTAINS "Candida"')
#' myco_search(filter='Name STARTSWITH "Candida"')
#' myco_search(filter='Name ENDSWITH "Candida"')
#'
#' # using layout = mycobank
#' myco_search(layout='mycobank', filter='MycoBankNr_="344025"')
#' head(myco_search(layout='mycobank', filter='MycoBankNr_>"344025"'))
#'
#' # Curl debugging
#' library('httr')
#' myco_search(filter='MycoBankNr_="344025"', config=verbose())
#' }

myco_search <- function(layout='mbwservice', filter="", limit=100, ...) {
  layout <- switch(layout, mycobank = '14682616000000161', mbwservice = '14682616000003562')
  url <- "http://www.mycobank.org/Services/Generic/SearchService.svc/rest/xml"
  args <- tsc(list(layout = layout, filter = filter, limit = limit))
  res <- GET(url, query = args, ...)
  stop_for_status(res)
  tt <- content(res, "text", encoding = "UTF-8")
  parsed <- xmlParse(tt)
  nodes <- xpathSApply(parsed, "//Taxon")
  dat <- lapply(nodes, function(x) {
    tmp <- xmlToList(x, simplify = FALSE)
    tmp[sapply(tmp, is.null)] <- NA
    c(tmp[!names(tmp) == 'u3733'], parse_links(tmp$u3733))
  })
  df <- setDF(rbindlist(dat, fill = TRUE, use.names = TRUE))
  names(df) <- gsub("_$", "", tolower(names(df)))
  if (layout == 'mbwservice') df <- plyr::rename(df, c('e3787' = 'summary'))
  return( df )
}

parse_links <- function(y){
  if (is.null(y) || is.na(y)) {
    NULL
  } else {
    res <- htmlParse(y)
    namez <- xpathSApply(res, "//name", xmlValue)
    urls <- as.list(xpathSApply(res, "//url", xmlValue))
    names(urls) <- tolower(namez)
    data.frame(urls, stringsAsFactors = FALSE)
  }
}
ropensci/taxizesoap documentation built on May 18, 2022, 7:33 p.m.