R/XenaDownload.R

Defines functions download.file2 XenaDownload

Documented in XenaDownload

##' Download Datasets from UCSC Xena Hubs
##'
##' Avaliable datasets list: <https://xenabrowser.net/datapages/>
##'
##' @author Shixiang Wang <w_shixiang@163.com>
##' @param xquery a tibble object generated by [XenaQuery] function.
##' @param destdir specify a location to store download data. Default is system temp directory.
##' @param download_probeMap if `TRUE`, also download ProbeMap data, which used for id mapping.
##' @param trans_slash logical, default is `FALSE`. If `TRUE`, transform slash '/' in dataset id
##' to '__'. This option is for backwards compatibility.
##' @param max_try time limit to try downloading the data.
##' @param force logical. if `TRUE`, force to download data no matter whether files exist.
##'  Default is `FALSE`.
##' @param ... other argument to `download.file` function
##' @return a `tibble`
##' @export
##' @importFrom utils download.file
##' @importFrom dplyr filter
##' @examples
##' \dontrun{
##' xe = XenaGenerate(subset = XenaHostNames == "tcgaHub")
##' hosts(xe)
##' xe_query = XenaQuery(xe)
##' xe_download = XenaDownload(xe_query)
##' }

XenaDownload <- function(xquery,
                         destdir = tempdir(),
                         download_probeMap = FALSE,
                         trans_slash = FALSE,
                         force = FALSE,
                         max_try = 3L,
                         ...) {
  stopifnot(is.data.frame(xquery), c("url") %in% names(xquery), is.logical(download_probeMap))

  if (download_probeMap) {
    xquery_probe <- UCSCXenaTools::XenaData %>%
      dplyr::filter(XenaDatasets %in% xquery$datasets) %>%
      XenaGenerate() %>%
      XenaQueryProbeMap()
    xquery <- dplyr::bind_rows(xquery, xquery_probe)
  }

  if (trans_slash) {
    xquery$fileNames <- gsub(
      pattern = "/",
      replacement = "__",
      x = xquery$datasets
    )
  } else {
    xquery$fileNames <- xquery$datasets
  }

  xquery$fileNames <- ifelse(grepl("\\.gz", xquery$url),
    paste0(xquery$fileNames, ".gz"),
    xquery$fileNames
  )
  # destdir = paste0(destdir,"/")
  xquery$destfiles <- file.path(destdir, xquery$fileNames)

  if (!dir.exists(destdir)) {
    dir.create(destdir, recursive = TRUE, showWarnings = FALSE)
  }

  message("All downloaded files will under directory ", destdir, ".")
  if (!trans_slash) {
    dir_names <- dirname(xquery$destfiles)
    message("The 'trans_slash' option is FALSE, keep same directory structure as Xena.")
    message("Creating directories for datasets...")
    for (i in dir_names) {
      dir.create(i, recursive = TRUE, showWarnings = FALSE)
    }
  }

  # Make sure the order is right
  xquery <- dplyr::select(xquery, c("hosts", "datasets", "url", "fileNames", "destfiles"), dplyr::everything())
  download_dataset <- function(x) { # nocov start
    tryCatch(
      {
        if (!file.exists(x[5]) | force) {
          message("Downloading ", x[4])
          download.file2(x[3], destfile = x[5], max_try = max_try, ...)
        } else {
          message(x[5], ", the file has been download!")
        }
      },
      error = function(e) {
        message(
          "Can not find file",
          x[4],
          ", this file maybe not compressed."
        )
        x[3] <- gsub(pattern = "\\.gz$", "", x[3])
        x[4] <- gsub(pattern = "\\.gz$", "", x[4])
        x[5] <- gsub(pattern = "\\.gz$", "", x[5])
        message("Try downloading file", x[4], "...")
        tryCatch(
          {
            download.file2(x[3], destfile = x[5], max_try = max_try, ...)
          },
          error = function(e) {
            message("Your network is bad (try again) or the data source is invalid (report to the developer).")
            invisible(NULL)
          }
        )
      }
    )
  }

  apply(xquery, 1, download_dataset) # nocov end

  if (trans_slash) {
    message(
      "Note file names inherit from names in datasets column\n  and '/' all changed to '__'."
    )
  }

  invisible(xquery)
}

download.file2 <- function(url, destfile,
                           max_try = 3L,
                           ...) {
  Sys.sleep(0.01)
  tryCatch(
    {
      if (abs(max_try - 4L) > 1) {
        message("==> Trying #", abs(max_try - 4L))
      }
      download.file(url, destfile, ...)
    },
    error = function(e) {
      if (max_try == 1) {
        message("Tried 3 times but failed, please check your internet connection!")
        invisible(NULL)
      } else {
        download.file2(url, destfile, max_try = max_try - 1L, ...)
      }
    }
  )

}
ropensci/UCSCXenaTools documentation built on Jan. 27, 2024, 6:30 a.m.