tidy_filelist <- function(file){
# file <- "fileList_20150706.txt"
# df <- fread(file, nrows = 1e7) # select = c(4, 9)
# table(df$V4)
host <- "ftp://ftp.ceda.ac.uk/badc/cmip5/data/cmip5/metadata/fileList_20150706.txt"
df <- fread(file, select = c(5, 9), nrows = Inf) %>%
set_colnames(c("size_mb", "file"))
df[, size_mb := size_mb/1024^2]
info <- split(df$file, "/") %>% do.call(rbind, .) %>% data.table() %>% .[, -9]
varnames <- c("output", "org", "model", "scenario", "time_scale", "part", "time_scale2", "ensemble", "variable", "file")
colnames(info) <- varnames
info %<>% cbind(I = 1:nrow(.), .)
save(info, df, file = "filelist_CMIP5.rda")
}
split_scenario <- function(d){
d %>% split(., .$scenario)
}
fix_host <- function(d) {
host <- "ftp://ftp.ceda.ac.uk/badc/cmip5/data/cmip5/"
d$file %<>% paste0(host, .)
d
}
#' @importFrom readr write_lines
write_urls_ceda <- function(d, outfile, show.filesize = FALSE) {
check_dir(dirname(outfile))
scenario = d$scenario[1]
size = round(sum(d$size_mb) / 1024, 2)
message_str = sprintf("# [%s] %+6s Gb", scenario, size)
write_lines(message_str, outfile)
if (show.filesize) {
d[, sprintf("%s\n# %4.1fMb", file, size_mb)] %>% write_lines(outfile, append = TRUE)
} else {
write_lines(d$file, outfile, append = TRUE)
}
}
#' ftp urls of CMIP5 nc files from CEDA
#'
#' For RCP and piControl, only r1i1p1 ensemble returned.
#'
#' @param df data.frame of ceda CMIP5 filelist.
#' - `file` : relative path of nc file
#' - `size_mb`: file size
#' @param info file detailed information
#' @param varname which variable to extracted
#' @param TIMESCALE filter time_scale2 in the `info`. e.g. `"Amon", "day", ...`
#' @param PART one of `"aerosol", "atmos", "land", "landIce", "ocean", "ocnBgchem", "seaIce"`
#' @param scenarios which scenarios to be extracted
#' @param save boolean. Whether save urls to txt files?
#' @param outdir output directory
#'
#' @references
#' 1. ftp://ftp.ceda.ac.uk/badc/cmip5/data/cmip5/
#' 2. ftp://ftp.ceda.ac.uk/badc/cmip5/data/cmip5/metadata/fileList_20150706.txt
#'
#' @example man/examples/ex-urls_CMIP5.R
#' @export
urls_CMIP5_ceda <- function(df, info, varname = "tasmax",
TIMESCALE = "Amon",
PART = NULL,
scenarios = c("piControl", "historical", "historicalGHG", "historicalMisc",
"historicalNat", "rcp26", "rcp45", "rcp60", "rcp85"),
save = TRUE,
outdir = ".")
{
info_sel <- info[variable == varname &
time_scale2 %in% TIMESCALE &
scenario %in% scenarios, ]
if (!is.null(PART)) info_sel <- info_sel[part == PART, ]
# fill into real url
info_sel$file <- df$file[info_sel$I]
info_sel$size_mb <- df$size_mb[info_sel$I]
# ignore: cfDay, cfMon
l <- info_sel %>% split(., .$scenario) %>% map(function(d){
I_sel <- urls_filter(d$file)
d[I_sel, ]
})
lst <- map(l, fix_host) # link
if (save) {
temp = foreach(d = lst, scenario = names(lst)) %do% {
outfile <- sprintf("%s/%s_%s.txt", outdir, scenario, varname)
write_urls_ceda(d, outfile)
}
}
# print size_mb
show_file_size(lst)
lst
}
#' show_file_size
#' @param lst Object returned by [urls_CMIP5_ceda()]
#'
#' @export
show_file_size <- function(lst) {
sizes <- foreach(d = lst, scenario = names(lst), i = icount(), .combine = c) %do% {
size = round(sum(d$size_mb) / 1024, 1)
size_str = sprintf("%+6s", size)
message_str = sprintf("[%-13s] %s Gb\n", scenario, green(size_str))
cat(message_str)
size
}
cat(sprintf("[Total] %s Gb\n", round(sum(sizes))))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.