Nothing
#' Accumulate data of NetCDF file.
#'
#' Computes the accumulation of the given variable over time.
#' The resulting outfile has the same dimensions as the infile.
#'
#' @param var Name of variable in infile (character).
#' @param infile Character containing file name or path of input file.
#' @param outfile Character containing file name or path of output file.
#' If NULL, the input file is directly edited instead.
#' @param nc34 NetCDF version of output file. If \code{nc34 = 3} the output
#' file will be in NetCDFv3 format (numeric). Default output is NetCDFv4.
#' @param overwrite Logical; should existing output file be overwritten?
#' If outfile is NULL, this parameter is ignored.
#' @param na_replace Replacing NA values with either 'mean' or 'previous'
#' for monthly mean or previous value, respectively (character).
#' @param verbose logical; if TRUE, progress messages are shown
#'@param nc Alternatively to \code{infile} you can specify the input as an
#' object of class `ncdf4` (as returned from \code{ncdf4::nc_open}).
#'
#' @export
timcumsum <- function(var,
infile,
outfile,
nc34 = 4,
overwrite = FALSE,
na_replace = "mean",
verbose = FALSE,
nc = NULL) {
calc_time_start <- Sys.time()
check_variable(var)
if (is.null(nc)) check_infile(infile)
check_outfile(outfile)
outfile <- correct_filename(outfile)
check_overwrite(outfile, overwrite)
check_nc_version(nc34)
stopifnot(na_replace %in% c("mean", "previous"))
##### extract data from file #####
file_data <- read_file(infile, var, nc = nc)
stopifnot(length(file_data$dimension_data$t) > 1)
if (file_data$time_info$has_time_bnds) {
time_bnds <- get_time_bounds_from_file(infile, nc = nc)
}
if (file_data$time_info$has_time_bnds) {
vars_data <- list(result = NA, time_bounds = time_bnds)
} else {
vars_data <- list(result = NA)
}
nc_format <- get_nc_version(nc34)
cmsaf_info <-
paste0("cmsafops::timcumsum for variable ", file_data$variable$name)
##### prepare output #####
global_att_list <- names(file_data$global_att)
global_att_list <-
global_att_list[toupper(global_att_list) %in% toupper(GLOBAL_ATT_DEFAULT)]
global_attributes <- file_data$global_att[global_att_list]
dims <- define_dims(
file_data$grid$is_regular,
file_data$dimension_data$x,
file_data$dimension_data$y,
file_data$dimension_data$t,
NB2,
file_data$time_info$units,
with_time_bnds = file_data$time_info$has_time_bnds
)
# Accumulating "short" values can lead to overflow. That's why we use "float".
vars <-
define_vars(
file_data$variable,
dims,
nc_format$compression,
with_time_bnds = file_data$time_info$has_time_bnds,
precision = "float"
)
##### write output file #####
write_output_file(
outfile,
nc_format$force_v4,
vars,
vars_data,
file_data$variable$name,
file_data$grid$vars, file_data$grid$vars_data,
cmsaf_info,
file_data$time_info$calendar,
file_data$variable$attributes,
global_attributes,
with_time_bnds = file_data$time_info$has_time_bnds,
write_result = FALSE
)
##### calculate #####
limit <-
2601 * 2601 * 31 # This value can be ajusted to avoid RAM overflow
dimensionality <-
as.double(length(file_data$dimension_data$x)) *
as.double(length(file_data$dimension_data$y)) * as.double(length(file_data$dimension_data$t))
nc_out <- nc_open(outfile, write = TRUE)
if (dimensionality < limit) {
# Result can directly be calculated.
if (!is.null(nc)) nc_in <- nc
else nc_in <- nc_open(infile)
dum_dat <-
ncvar_get(nc_in, file_data$variable$name, collapse_degen = FALSE)
dum_times <-
get_date_time(ncvar_get(nc_in, "time"), file_data$time_info$units)
nc_close(nc_in)
##### replace NAs by monthly mean #####
dum_na <- which(is.na(dum_dat), arr.ind = TRUE)
if (nrow(dum_na) > 0) {
if (na_replace == "mean") {
of <- tempfile(fileext = ".nc")
cmsafops::monmean(
file_data$variable$name,
infile = infile,
outfile = of,
overwrite = TRUE,
nc = nc
)
nc_of <- nc_open(of)
meandata <- ncvar_get(nc_of, file_data$variable$name, collapse_degen = FALSE)
meantimes <-
get_date_time(ncvar_get(nc_of, "time"),
file_data$time_info$units)
mean_na <- which(is.na(meandata), arr.ind = TRUE)
nc_close(nc_of)
na_ind <-
which(dum_na[, 1] %in% mean_na[, 1] &
dum_na[, 2] %in% mean_na[, 2])
if (length(na_ind) > 0)
dum_na <- dum_na[-na_ind,]
dum_na_timestep <- unique(dum_na[, 3])
dum_na_times <- dum_times[dum_na_timestep, ]
for (k in seq_along(dum_na_timestep)) {
mo <- which(meantimes$month == dum_na_times$month[k])
yr <- which(meantimes$year == dum_na_times$year[k])
dum_dat[, , dum_na_timestep[k]] <-
meandata[,, mo[which(mo %in% yr)]]
}
} else {
ref_na <- which(is.na(dum_dat[,,1]), arr.ind = TRUE)
if (length(ref_na) > 0 && all(is.na(ref_na)))
stop("Reference data invalid. The first record of your data is corrupt. Maybe try na_replace = 'mean' to repair.")
na_ind <-
which(dum_na[, 1] %in% ref_na[, 1] &
dum_na[, 2] %in% ref_na[, 2])
if (length(na_ind) > 0)
dum_na <- dum_na[-na_ind,]
dum_na_timestep <- unique(dum_na[, 3])
for (k in seq_along(dum_na_timestep)) {
dum_dat[, , dum_na_timestep[k]] <-
dum_dat[, , dum_na_timestep[k] - 1]
}
}
}
##### accumulate #####
res <- apply(dum_dat, c(1, 2), cumsum)
result <- aperm(res, c(2, 3, 1))
ncvar_put(nc_out, vars[[1]], result)
} else {
nr_of_possible_x_dims <-
round((limit / length(file_data$dimension_data$x)) / length(file_data$dimension_data$t))
dimsteps_start <-
seq(1,
length(file_data$dimension_data$y),
nr_of_possible_x_dims)
dimsteps_count <-
rep(nr_of_possible_x_dims, length(dimsteps_start))
cor <-
nr_of_possible_x_dims * length(dimsteps_start) - length(file_data$dimension_data$y)
dimsteps_count[length(dimsteps_start)] <-
dimsteps_count[length(dimsteps_start)] - cor
for (i in seq_along(dimsteps_start)) {
if (!is.null(nc)) nc_in <- nc
else nc_in <- nc_open(infile)
dum_dat <- ncvar_get(
nc_in,
file_data$variable$name,
start = c(1, dimsteps_start[i], 1),
count = c(-1, dimsteps_count[i], -1),
collapse_degen = FALSE
)
dum_times <-
get_date_time(ncvar_get(nc_in, "time"), file_data$time_info$units)
if (is.null(nc)) nc_close(nc_in)
##### replace NAs by monthly mean #####
dum_na <- which(is.na(dum_dat), arr.ind = TRUE)
if (nrow(dum_na) > 0) {
if (na_replace == "mean") {
if (i == 1) {
of <- tempfile(fileext = ".nc")
cmsafops::monmean(
file_data$variable$name,
infile = infile,
outfile = of,
overwrite = TRUE,
nc = nc
)
nc_of <- nc_open(of)
meandata <- ncvar_get(nc_of, file_data$variable$name)
meantimes <-
get_date_time(ncvar_get(nc_of, "time"),
file_data$time_info$units)
mean_na <- which(is.na(meandata), arr.ind = TRUE)
nc_close(nc_of)
}
na_ind <-
which(dum_na[, 1] %in% mean_na[, 1] &
dum_na[, 2] %in% mean_na[, 2])
if (length(na_ind) > 0)
dum_na <- dum_na[-na_ind,]
dum_na_timestep <- unique(dum_na[, 3])
dum_na_times <- dum_times[dum_na_timestep, ]
for (k in seq_along(dum_na_timestep)) {
mo <- which(meantimes$month == dum_na_times$month[k])
yr <- which(meantimes$year == dum_na_times$year[k])
dum_dat[, , dum_na_timestep[k]] <-
meandata[, dimsteps_start[i]:(dimsteps_start[i] + dimsteps_count[i] - 1), mo[which(mo %in% yr)]]
}
} else {
ref_na <- which(is.na(dum_dat[,,1]), arr.ind = TRUE)
if (length(ref_na) > 0 && all(is.na(ref_na)))
stop("Reference data invalid. The first record of your data is corrupt. Maybe try na_replace = 'mean' to repair.")
na_ind <-
which(dum_na[, 1] %in% ref_na[, 1] &
dum_na[, 2] %in% ref_na[, 2])
if (length(na_ind) > 0)
dum_na <- dum_na[-na_ind,]
dum_na_timestep <- unique(dum_na[, 3])
for (k in seq_along(dum_na_timestep)) {
dum_dat[, , dum_na_timestep[k]] <-
dum_dat[, , dum_na_timestep[k] - 1]
}
}
}
##### accumulate #####
res <- apply(dum_dat, c(1, 2), cumsum)
result <- aperm(res, c(2, 3, 1))
ncvar_put(
nc_out,
vars[[1]],
result,
start = c(1, dimsteps_start[i], 1),
count = c(-1, dimsteps_count[i], -1)
)
# ncvar_put(nc_out, dims$y, file_data$dimension_data$y[dimsteps_start[i]:(dimsteps_start[i] + dimsteps_count[i] - 1)],
# start = dimsteps_start[i], count = dimsteps_count[i])
}
}
nc_close(nc_out)
calc_time_end <- Sys.time()
if (verbose)
message(get_processing_time_string(calc_time_start, calc_time_end))
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.