Nothing
##' Download Datasets from UCSC Xena Hubs
##'
##' Avaliable datasets list: <https://xenabrowser.net/datapages/>
##'
##' @author Shixiang Wang <w_shixiang@163.com>
##' @param xquery a tibble object generated by [XenaQuery] function.
##' @param destdir specify a location to store download data. Default is system temp directory.
##' @param download_probeMap if `TRUE`, also download ProbeMap data, which used for id mapping.
##' @param trans_slash logical, default is `FALSE`. If `TRUE`, transform slash '/' in dataset id
##' to '__'. This option is for backwards compatibility.
##' @param max_try time limit to try downloading the data.
##' @param force logical. if `TRUE`, force to download data no matter whether files exist.
##' Default is `FALSE`.
##' @param ... other argument to `download.file` function
##' @return a `tibble`
##' @export
##' @importFrom utils download.file
##' @importFrom dplyr filter
##' @examples
##' \dontrun{
##' xe = XenaGenerate(subset = XenaHostNames == "tcgaHub")
##' hosts(xe)
##' xe_query = XenaQuery(xe)
##' xe_download = XenaDownload(xe_query)
##' }
XenaDownload <- function(xquery,
destdir = tempdir(),
download_probeMap = FALSE,
trans_slash = FALSE,
force = FALSE,
max_try = 3L,
...) {
stopifnot(is.data.frame(xquery), c("url") %in% names(xquery), is.logical(download_probeMap))
if (download_probeMap) {
xquery_probe <- UCSCXenaTools::XenaData %>%
dplyr::filter(XenaDatasets %in% xquery$datasets) %>%
XenaGenerate() %>%
XenaQueryProbeMap()
xquery <- dplyr::bind_rows(xquery, xquery_probe)
}
if (trans_slash) {
xquery$fileNames <- gsub(
pattern = "/",
replacement = "__",
x = xquery$datasets
)
} else {
xquery$fileNames <- xquery$datasets
}
xquery$fileNames <- ifelse(grepl("\\.gz", xquery$url),
paste0(xquery$fileNames, ".gz"),
xquery$fileNames
)
# destdir = paste0(destdir,"/")
xquery$destfiles <- file.path(destdir, xquery$fileNames)
if (!dir.exists(destdir)) {
dir.create(destdir, recursive = TRUE, showWarnings = FALSE)
}
message("All downloaded files will under directory ", destdir, ".")
if (!trans_slash) {
dir_names <- dirname(xquery$destfiles)
message("The 'trans_slash' option is FALSE, keep same directory structure as Xena.")
message("Creating directories for datasets...")
for (i in dir_names) {
dir.create(i, recursive = TRUE, showWarnings = FALSE)
}
}
# Make sure the order is right
xquery <- dplyr::select(xquery, c("hosts", "datasets", "url", "fileNames", "destfiles"), dplyr::everything())
download_dataset <- function(x) { # nocov start
tryCatch(
{
if (!file.exists(x[5]) | force) {
message("Downloading ", x[4])
download.file2(x[3], destfile = x[5], max_try = max_try, ...)
} else {
message(x[5], ", the file has been download!")
}
},
error = function(e) {
message(
"Can not find file",
x[4],
", this file maybe not compressed."
)
x[3] <- gsub(pattern = "\\.gz$", "", x[3])
x[4] <- gsub(pattern = "\\.gz$", "", x[4])
x[5] <- gsub(pattern = "\\.gz$", "", x[5])
message("Try downloading file", x[4], "...")
tryCatch(
{
download.file2(x[3], destfile = x[5], max_try = max_try, ...)
},
error = function(e) {
message("Your network is bad (try again) or the data source is invalid (report to the developer).")
invisible(NULL)
}
)
}
)
}
apply(xquery, 1, download_dataset) # nocov end
if (trans_slash) {
message(
"Note file names inherit from names in datasets column\n and '/' all changed to '__'."
)
}
invisible(xquery)
}
download.file2 <- function(url, destfile,
max_try = 3L,
...) {
Sys.sleep(0.01)
tryCatch(
{
if (abs(max_try - 4L) > 1) {
message("==> Trying #", abs(max_try - 4L))
}
download.file(url, destfile, ...)
},
error = function(e) {
if (max_try == 1) {
message("Tried 3 times but failed, please check your internet connection!")
invisible(NULL)
} else {
download.file2(url, destfile, max_try = max_try - 1L, ...)
}
}
)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.