data-raw/mirror.R

#!/usr/bin/env Rscript
## code to mirror all raw datasets (which can be downloaded by users) of UCSC Xena

# Update to the recent UCSCXenaTools
# BiocManager::install("UCSCXenaTools")

suppressMessages(library(UCSCXenaTools))
library(furrr)

args <- commandArgs(trailingOnly = TRUE)

if (length(args) != 2) {
    message("Error: two arguments are required!")
    message()
    message("Usage: ./mirror.R <destdir> <thread>")
    message("Set thread to 0 to run sequentially.")
    quit("no", status = 1)
}

destdir <- path.expand(args[1])
threads <- as.integer(args[2])

sink(file = file.path(destdir, "mirror.txt"))
on.exit(sink(NULL))
message("destdir: ", destdir)
message("threads: ", threads)

if (!dir.exists(destdir)) dir.create(destdir, recursive = TRUE)
if (threads != 0) {
    future::plan(multisession, workers = threads)
    call_fun <- furrr::future_map
} else {
    call_fun <- purrr::map
}

download_dataset <- function(x, destdir) {
    x %>%
        XenaGenerate() %>%
        XenaQuery() %>%
        XenaDownload(
            destdir = destdir,
            download_probeMap = TRUE,
            trans_slash = FALSE,
            force = TRUE,
            method = "curl", extra = "-C -") # 断点续传
}

access_datasets <- XenaData # XenaDataUpdate(saveTolocal = FALSE)
hubs <- unique(access_datasets$XenaHostNames)

dataset_list <- call_fun(hubs, function(h) {
    sink(file = file.path(destdir, "mirror.txt"), append = TRUE)
    x <- subset(access_datasets, XenaHostNames == h)
    message("Mirroring hub: ", h)
    message("================================")
    download_dataset(x, file.path(destdir, h))
})

mapping_df <- Reduce(rbind, dataset_list)

message("================================")
message("The mirror work has done, please check the mirror.txt.")

write.csv(mapping_df,
          file = file.path(destdir, "mapping_dataframe.csv"),
          row.names = FALSE, quote = FALSE)
ShixiangWang/UCSCXenaTools documentation built on Jan. 17, 2024, 2:19 a.m.