#' check CMIP5 downloaded files
#'
#' Check whether all files_txt have been downloaded or not. If not, extract the
#' undownloaded urls, and save to txt files_txt.
#'
#' @param files_txt Path of downloading url txt file generated by
#' `urls_CMIP5_ESGP`.
#' @param indir1 Temporal downloading directory. The subdirectory of `indir1`
#' should be same as the name of files_txt.
#'
#' @param outdir Output directory for remained undownloading url's txt files_txt.
#' If missing, `outdir` is set to be the same directory of the first one files_txt.
#' @param is_union If true, only `remaining_all.txt` will be written.
#' @param is_archive Boolean, Move finished files to `indir2`?
#' @param overwrite Boolean
#'
#' @example man/examples/ex-check_download.R
#' @keywords internal
#' @export
check_download <- function(files_txt, indir1, outdir,
is_union = FALSE, is_archive = FALSE, overwrite = FALSE){
indir2 <- paste0(indir1, "_2") %>% check_dir()
files_finished2 <- dir(indir2, "*.nc$", recursive = TRUE)
if (missing(outdir)) outdir <- dirname(files_txt[1])
nfile <- length(files_txt)
res <- vector("list", nfile) %>% set_names(names(files_txt))
for (i in 1:nfile){
file <- files_txt[i]
# TODO: sort according to host speed
d_url <- fread(file, header = F)[order(V1),]
## get scenario and variable
pattern <- str_extract(basename(file), ".*(?=\\.)") %>%
gsub("urls_mon_|urls_day_", "", .)
scenario <- str_extract(pattern, "(rcp|RCP|his|pi)[a-z,A-Z,0-9]*")
variable <- gsub(sprintf("%s_|_%s", scenario, scenario), "", pattern)
outfile <- sprintf("%s/%s_%s_rem.txt", outdir, scenario, variable)
dir_i <- sprintf("%s/%s_%s", indir1, scenario, variable)
files_finished <- aria2c_finished(dir_i)
if (is_archive && length(files_finished) > 0) {
cat(sprintf(" |%s file finished.\n", num_good(length(files_finished))))
dirs <- paste(indir2, basename(dirname(files_finished)), sep = "/")
dirs <- check_dir(dirs)
files_new <- paste(dirs, basename(files_finished), sep = "/")
stats <- file.rename(files_finished, files_new)
}
I_finished <- match(basename(c(files_finished2, files_finished)), basename(d_url$V1)) %>%
unique() %>% .[!is.na(.)]
d_url.left <- d_url
if (length(I_finished) > 0) d_url.left <- d_url[-I_finished, ]
if (file.exists(outfile) && !overwrite) {
d_rem <- fread(outfile, header = FALSE)
I <- match(d_url.left$V1, d_rem$V1) %>% which.notna()
d_url.left <- d_url.left[I, ]
}
n_left <- nrow(d_url.left)
n_finished <- length(I_finished)
if (n_left > 0){
if (!is_union) fwrite(d_url.left, outfile, col.names = F)
width = 4
cat(sprintf("[m]: %s finished, and %s missing! | %s \n",
num_good(n_finished, width),
num_bad(n_left, width), basename(file)))
res[[i]] <- d_url.left[[1]]
} else {
ok(sprintf("[ok]: %s finished.\n", basename(file)))
NULL
}
}
urls <- do.call(c, res)
## rm archived files
if (length(urls) > 0){
cat(sprintf("[m]: %s files unfinished.\n", num_bad(length(urls))))
if (is_union) {
fwrite(data.table(urls), sprintf("%s/rem_all.txt", outdir), col.names = F)
fwrite(data.table(basename(urls)), sprintf("%s/rem_all_filename.txt", outdir), col.names = F)
}
} else {
ok("[finished]: ===================\n")
}
urls
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.