R/annotation_Uniprot_query.R

Defines functions annotation_uniprot_query

Documented in annotation_uniprot_query

#' Consult Uniprot to download information about the sequences obtained from HMMER.
#'
#' @param acc A character vector with accession number.
#' @param acc2 A character vector with secondary accession number.
#' @param timeout A numeric value for time to wait. Default to 200.
#'
#' @return A DataFrame containing information about sequences
#' @export
#'
#' @examples
#' annotation_uniprot_query("P69905", "Q9UCM0")

annotation_uniprot_query <- function(acc, acc2, timeout = 2000){
  RCurl::curlSetOpt(timeout = timeout)
  if(!is.character(acc))
    stop("'acc' should be character")
  if(!is.character(acc2))
    stop("'acc2' should be character")
  #Progress bar
  pb <- progress::progress_bar$new(total = length(acc))
  pb$tick(0)
  purrr::map2_dfr(acc, acc2, function(x, y){
    # URL for entries with acc and acc2
    d <- paste0("http://www.uniprot.org/uniprot/?query=",
                gsub("\\..*","",x),
                "+and+",
                gsub("\\..*","",y),
                '&format=tab') %>%
      url()%>%
      readr::read_tsv(show_col_types = FALSE,
                      progress = FALSE,
                      col_types = list(
                        'Entry' = readr::col_character(),
                        'Entry name' = readr::col_character(),
                        'Status' = readr::col_character(),
                        'Protein names' = readr::col_character(),
                        'Gene names' = readr::col_character(),
                        'Organism' = readr::col_character(),
                        'Length' = readr::col_double()
                      )) %>%
      dplyr::mutate(acc = x, acc2 = y)
    pb$tick()
    return(d)
  })}
currocam/FascinRSCA documentation built on March 21, 2022, 6:29 a.m.