R/urls_CMIP5_ESGP.R

Defines functions read_wget_ESGP

#' @export
read_wget_ESGP <- function(file, is_save = FALSE) {
    str <- readLines(file)
    str_end <- str[length(str)]
    if (str_end != 'echo "done"') {
        message(sprintf("%s: %s\n", basename(file), "not finished!"))
    }
    # fprintf("%s: %s\n", basename(file), str_end)
    d_url <- str[grep("^'", str)] %>%
        strsplit(" ") %>%
        do.call(rbind, .) %>%
        alply(2, . %>% gsub("'", "", .)) %>% # replace quote in character
        as.data.table() %>%
        set_colnames(c("file", "url", "MD5", "md5val"))
    # nc4 has been removed.
    d_url %<>% .[str_detect(file, ".nc$"), ]
    # str_detect(file, prefix) &
    
    if (is_save) {
        outfile = gsub(".sh$", ".txt", file)
        fwrite(d_url[, .(url)], outfile, col.names = F)  
    }
    d_url[, 1:2]
}

#' http urls of CMIP5 nc files from ESGP
#' 
#' Extract URLs from sh files. A txt file contaiing download urls will be written 
#' into the same directory.
#' `nc4` files are duplicated with `nc` file. And they are removed.
#' 
#' For `rcp` scenario, `date_start` later than `2101-01-01` will be ignored.
#' 
#' @param file The path of sh file retrieved from ESGP wget.
#' @param type Downloading mode, one of `aria2c` and `wget`.
#' @param ... other parameters to [urls_filter()]
#' 
#' @importFrom plyr alply
#' @importFrom stringr str_detect
#' 
#' @seealso [urls_CMIP5_ceda()]
#' @inherit urls_CMIP5_ceda examples
#' 
#' @export
urls_CMIP5_ESGP <- function(file, type = "aria2c", ...){
    prefix <- gsub(".sh", "", basename(file)) %>%
        {strsplit(., "_")[[1]][2]} %>% # "tasmax"
        paste0("_")                    # "tasmax_"
    outfile <- gsub('.sh', '.txt', 
        file %>% {paste0(dirname(dirname(.)), "/", basename(.))})

    d_url <- read_wget_ESGP(file)

    # del cfMon and cfDay
    I_del <- d_url[, grep("cfMon|cfDay", file)]
    if (length(I_del) > 0) d_url <- d_url[-I_del, ]

    d_url.sel <- tryCatch({
        files = d_url$file
        dfile <- urls_filter(files, ...)
        I_sel <- seq_along(files)[match(basename(dfile$file), basename(files))]
        d_url[I_sel, ]
    }, error = function(e) {
        message(sprintf("[e] %s", e$message))
        d_url
    })
    
    if (type == "aria2c"){
        fwrite(d_url.sel[, .(url)], outfile, col.names = F)
    } else if (type == "wget") {
        fwrite(d_url.sel, outfile, col.names = F)
    }
    d_url.sel
}
kongdd/CMIP5tools documentation built on Dec. 17, 2020, 11:03 a.m.