R/temp/check_download.R

Defines functions check_download

#' check CMIP5 downloaded files
#'
#' Check whether all files_txt have been downloaded or not. If not, extract the 
#' undownloaded urls, and save to txt files_txt.
#' 
#' @param files_txt Path of downloading url txt file generated by 
#' `urls_CMIP5_ESGP`.
#' @param indir1 Temporal downloading directory. The subdirectory of `indir1` 
#' should be same as the name of files_txt.
#' 
#' @param outdir Output directory for remained undownloading url's txt files_txt. 
#' If missing, `outdir` is set to be the same directory of the first one files_txt.
#' @param is_union If true, only `remaining_all.txt` will be written.
#' @param is_archive Boolean, Move finished files to `indir2`?
#' @param overwrite Boolean
#' 
#' @example man/examples/ex-check_download.R
#' @keywords internal
#' @export
check_download <- function(files_txt, indir1, outdir, 
    is_union = FALSE, is_archive = FALSE, overwrite = FALSE){

    indir2 <- paste0(indir1, "_2") %>% check_dir()
    files_finished2 <- dir(indir2, "*.nc$", recursive = TRUE)

    if (missing(outdir)) outdir <- dirname(files_txt[1])

    nfile <- length(files_txt)
    res <- vector("list", nfile) %>% set_names(names(files_txt))

    for (i in 1:nfile){
        file  <- files_txt[i]
        # TODO: sort according to host speed
        d_url <- fread(file, header = F)[order(V1),]
        
        ## get scenario and variable
        pattern <- str_extract(basename(file), ".*(?=\\.)") %>% 
            gsub("urls_mon_|urls_day_", "", .)
        scenario <- str_extract(pattern, "(rcp|RCP|his|pi)[a-z,A-Z,0-9]*")
        variable <- gsub(sprintf("%s_|_%s", scenario, scenario), "", pattern)
        outfile  <- sprintf("%s/%s_%s_rem.txt", outdir, scenario, variable)
        
        dir_i  <- sprintf("%s/%s_%s", indir1, scenario, variable)   

        files_finished <- aria2c_finished(dir_i)
        if (is_archive && length(files_finished) > 0) {
            cat(sprintf("  |%s file finished.\n", num_good(length(files_finished))))

            dirs <- paste(indir2, basename(dirname(files_finished)), sep = "/") 
            dirs <- check_dir(dirs)
            files_new <- paste(dirs, basename(files_finished), sep = "/")
            
            stats <- file.rename(files_finished, files_new)
        }

        I_finished <- match(basename(c(files_finished2, files_finished)), basename(d_url$V1)) %>% 
            unique() %>% .[!is.na(.)]

        d_url.left <- d_url
        if (length(I_finished) > 0) d_url.left <- d_url[-I_finished, ]
        
        if (file.exists(outfile) && !overwrite) {
            d_rem <- fread(outfile, header = FALSE)
            I <- match(d_url.left$V1, d_rem$V1) %>% which.notna()
            d_url.left <- d_url.left[I, ]
        }

        n_left <- nrow(d_url.left)
        n_finished <- length(I_finished)
        if (n_left > 0){
            if (!is_union) fwrite(d_url.left, outfile, col.names = F)
            width = 4
            cat(sprintf("[m]: %s finished, and %s missing! | %s \n", 
                num_good(n_finished, width), 
                num_bad(n_left, width), basename(file)))

            res[[i]] <- d_url.left[[1]]
        } else {
            ok(sprintf("[ok]: %s finished.\n", basename(file)))
            NULL
        }
    }
    urls <- do.call(c, res)

    ## rm archived files
    if (length(urls) > 0){
        cat(sprintf("[m]: %s files unfinished.\n", num_bad(length(urls))))
        if (is_union) {
            fwrite(data.table(urls), sprintf("%s/rem_all.txt", outdir), col.names = F)
            fwrite(data.table(basename(urls)), sprintf("%s/rem_all_filename.txt", outdir), col.names = F)    
        }
    } else {
        ok("[finished]: ===================\n")
    }
    urls
}
kongdd/CMIP5tools documentation built on Dec. 17, 2020, 11:03 a.m.