R/fetch_flybase.R

#' Retrieving URLs for FlyBase ID and Symbol Synonym Conversion Table
#'
#' \code{get_fbase_url()} calls RCurl to check FlyBase FTP release repository
#' to check available version. If not otherwise specified, it will generate
#' dowdload URLs for table for FlyBase ID and Synonym.
#'
#' @param version a character specifying the version desired
#' (e.g., "FB2019_01".)
#' @param dirstr a list returned by \code{get_fbase_url()}
#'
#' @return a named list with 4 items ("fbid", "syno", "dirstr", and "version")
get_fbase_url <- function(version = NULL, dirstr = NULL) {
  if (!is.null(dirstr)) {
    filelist <- dirstr[["dirstr"]]
  } else {
    # Find current FBid mapping and synonyms from FlyBase release
    filelist <- RCurl::getURL("ftp://ftp.flybase.net/releases/")
    filelist <- data.table::fread(text = filelist, fill = TRUE,
                                  data.table = FALSE)
  }

  # Default version is the newest version
  if (is.null(version)) {
    version <- filelist[nrow(filelist), ncol(filelist)]
  }
  message(paste0("Retrieving URL for ", version, ".\n"))

  # Check if the version number is legal
  if (!grepl("^FB[0-9]{4}_[0-9]{2}", version)) {
    stop(paste("The format for version number seems to be wrong.",
               "It is FB[year]_[month] (e.g., 'FB2019_01').\n"))
  }

  # Extract available version on the server
  ver_list <- filelist[ , 9]
  ver_list <- ver_list[!ver_list %in% c("README", "current")]

  if (!version %in% ver_list) {
    msg <- paste0("The version is not available on FlyBase. ",
                  "Please try the follows: ",
                  paste(ver_list, collapse = ", "), ".\n")
    stop(msg)
  }

  # Make URL for the specified version
  rldate <- gsub("^FB", "", version)
  ftpurl_prefix <- paste0("ftp://ftp.flybase.net/releases/",
                          version, "/precomputed_files/")
  fbid_postfix <- paste0("genes/fbgn_annotation_ID_fb_", rldate,".tsv.gz")
  syno_postfix <- paste0("synonyms/fb_synonym_fb_", rldate, ".tsv.gz")
  fbid_url <- paste0(ftpurl_prefix, fbid_postfix)
  syno_url <- paste0(ftpurl_prefix, syno_postfix)
  urls <- list(fbid_url, syno_url, filelist, version)
  names(urls) <- c("fbid", "syno", "dirstr", "version")
  return(urls)
}


#' Downloading FlyBase ID Conversion Table and Symbol Synonym Table
#'
#' \code{fetch_flybase()} downloads conversion tables for different version of
#' Flybase IDs and symbol synonyms from Flybase. If not otherwise specified,
#' current version of the tables will be downloaded, and returned as a list
#' with 2 items.
#'
#' @param version a character string specifying the version desired
#' (e.g., "FB2019_01".)
#' @param paths a character vector specifying local files to read. The length
#' should be 2: the first for FBid conversion table, and the second for synonym
#' conversion. This should only be used in test to make sure the convesion
#' coheres with the current official version.
#' @param urls (Optional) a character vector specifying specific URLs to
#' download and read. The length should be 2: the first URL for FBid conversion
#' table, and the second for synonym conversion.
#'
#' @return a list of 2 data frames
fetch_flybase <- function (urls = NULL, paths = NULL, version = NULL) {
  # Get urls for FB id and FB synonyms for conversion
  if (!is.null(urls)) {
    # If the user provides own URL vector, make it a list
    # with consistent structure with the one generated by
    # get_fbase_url()
    urls <- as.list(urls)
    names(urls) <- c("fbid", "syno")
  }


  # Loading local file in tests to prevent long downloading time
  if (!is.null(paths)) {
    urls <- as.list(paths)
    names(urls) <- c("fbid", "syno")
    fbtable_list <- lapply(urls, function(x) {
      data.table::fread(
        input = x, quote = "", sep = "\t", data.table = FALSE
      )
    })
    return(fbtable_list)
  }

  if (is.null(urls) & is.null(paths)) {urls <- get_fbase_url(version = version)}

  fbtable_list <- list()
  for (nameattr in c("fbid", "syno")) {

    temp_dlfile <- tempfile(fileext = ".gz")
    utils::download.file(url = urls[[nameattr]], destfile = temp_dlfile)

    # The user might give URLs to non-Gzipped file, so it's necessary
    # to check and decide whether gunzip is required.
    gzipped <- R.utils::isGzipped(temp_dlfile)
    read_path <- temp_dlfile
    if (gzipped) {read_path <- R.utils::gunzip(temp_dlfile)}

    fbtable_list[[nameattr]] <- data.table::fread(
      input = read_path, quote = "", sep = "\t", data.table = FALSE
    )
  }

  fbtable_list[["version"]] <- urls[["version"]]
  return(fbtable_list)
}
chenyenchung/genofeatutil documentation built on May 15, 2019, 10:38 p.m.