#' Read data from cmip5 nc files and merge into a large array
#'
#' @param d A data.frame at least with the columns of 'start_adj', 'end_adj' and 'file'`.
#' @inheritParams ncread
#' @param verbose whether to show reading progress?
#' @param bigmemory Boolean. If true, 3d array memory will be pre-allocated.
#' To do this, we need to open nc files first than get the dimension information.
#' This step will sacrifice some time. If false, temporary data are stored in
#' list and finally convert to 3d array.
#'
#' @note Only merge a model for each time. Please make sure that no duplicated date
#' in `d`.
#'
#' @return
#' * `grid.origin`: A list with `'lon', 'lat', 'cellsize_x', 'cellsize_y'`
#' of original data.
#' * `grid`: A list with `'lon', 'lat', 'date'` of the read data.
#' * `data`: Merged array value, with the dimension of `[nlon*nlat, ntime]`.
#'
#' @export
#' @importFrom abind abind
#' @importFrom lubridate make_date
#' @import bigmemory
#'
#' @examples
#' \dontrun{
#' library(magrittr)
#' range <- c(15, 55, 70,140)
#'
#' d_files <- CMIP5Files_filter(files)
#' lst_dfiles <- d_files %>% split(., .$model)
#'
#' i = 1
#' d <- lst_dfiles[[i]]
#'
#' l <- nc_merge(d, varname = 'tasmax', range, delta = 2)
#' }
nc_merge <- function(d,
varname = 1L,
range,
delta = 2,
verbose = TRUE,
bigmemory = FALSE,
scale=1,
offset=0,
value_range=NULL,
adjust_lon = TRUE) # , ntime = -1
{
options(bigmemory.typecast.warning=FALSE)
model <- d$model[1]
nfile <- nrow(d)
if (verbose)
ok(sprintf('=========== %s [n=%d] ===========\n', model, nfile))
# convertTo2d <- ifelse(bigmemory, TRUE, FALSE)
if (bigmemory){
# 1. get grid size, [nlon, nlat]
i = 1
DatePeriod <- c(d$start_adj[i], d$end_adj[i]) %>% format(DATE_FORMAT)
file <- d$file[i]
l <- ncread_cmip(file, varname = NULL, range = range, delta = delta,
ntime = -1, DatePeriod = DatePeriod,
grid_type = NULL,
scale = scale, offset = offset, value_range = value_range,
adjust_lon = adjust_lon, check_date = TRUE)
dim <- l$grid$dim
nij <- prod(dim[1:2])
# 2. get ntime_max and allocate memory according to it
ntime_max <- d %$% {
# note: ymd will be error, when year < 100
# start_date <- start_adj %>% pmax(min(start))
# end_date <- end_adj %>% pmin(max(end))
# c(start_date, end_date)
difftime(max(end_adj), min(start_adj), "day") %>% {as.numeric(.)+1}
}
cat(ok(sprintf("\t%s | dim=[%d, %d, %d], about %.1fG\n",
model, dim[1], dim[2], ntime_max, nij*ntime_max*8/1024^3)))
if (nfile == 1) ntime_max <- l$grid$dim[3]
array_val <- big.matrix(nij, ntime_max, "float") # default type `double`
k_start <- 0
}
lst_date <- lst <- list()
for (i in 1:nfile){
DatePeriod <- c(d$start_adj[i], d$end_adj[i]) %>% format(DATE_FORMAT)
file <- d$file[i]
if (verbose) {
cat(sprintf('[%02d] reading %s %s ... \n', i, basename(file),
green(file_size(file))))
}
l <- ncread_cmip(file, varname = varname, range = range, delta = delta,
ntime = -1, DatePeriod = DatePeriod,
grid_type = "mat",
scale = scale, offset = offset, value_range = value_range,
adjust_lon = adjust_lon, check_date = TRUE)
lst_date[[i]] <- l$grid$date
if (bigmemory) {
# note that value are 2d matrix in bigmemory
# fill values int `array_val`
ntime_i <- length(l$grid$date)
I_time <- seq.int(k_start+1, k_start + ntime_i)
## TODO: solve the problem of missing year
array_val[, I_time] <- l$data[[1]]
k_start <- k_start + ntime_i
} else {
lst[[i]] <- l
}
}
if (!bigmemory){
along <- dim(lst[[1]]$data[[1]]) %>% length()
array_val <- map(lst, ~.$data[[1]]) %>% abind(along = along)
rm(lst); gc(); gc()
} else {
array_val <- sub.big.matrix(array_val, firstCol = 1, lastCol = k_start)
}
date_all <- do.call(c, lst_date)
I_dup <- duplicated(date_all) %>% which()
if (!is_empty(I_dup)) {
# date_all %<>% .[-I_dup]
stop(sprintf('[nc] %s, duplicated date found!', model))
# rm duplicated date
# array_val <- array_val[,,I_nodup]
}
# get dimension infomation
grid.origin <- l$grid.origin[c('lon', 'lat', 'dim','cellsize_x', 'cellsize_y')]
grid <- l$grid[c('lon', 'lat', 'date', 'dim')]
grid.origin$dim[3] <- length(date_all)
grid$dim[3] <- length(date_all)
grid$date <- date_all
grid$object_size <- utils:::format.object_size(prod(grid$dim) * 4, "auto") # single in cpp?
# rm(lst, l); gc()
structure(list(grid.origin = grid.origin, grid = grid, data = array_val),
class = "raster2")
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.