#!/usr/bin/Rscript
############################################################################################################################
############################ FUNCTIONS TO HANDLE NetCDF FILES ###########################################################
############################################################################################################################
#' Get a Field for NetCDF
#'
#'
#' An internal function that reads data from an NetCDF .nc file.
#'
#' @param source A \linkS4class{Source} containing the meta-data about the NetCDF source
#' @param quant A Quantity object to specify what quantity should be opened.
#' @param layers A character string (or a vector of character strings) specifying which variables from the NetCDF file are to be read.
#' NULL (default) means read all.
#' @param target.sta.info An STAInfo object defining the spatial-temporal-annual extent over which we want the data
#' @param file.name Character string holding the name of the file. This can be left blank, in which case the file name is automatically generated
#' @param calendar Character string, sometimes the calendar string on the time axis can be incorrect or missing. Here you can manually provide it.
#' Note: A common error in paleo files is "standard" instead of "proleptic_gregorian". Specifically, if you have dates with years 1582
#' (the start of the Gregorian calendar) and it includes leap years the calendar needs to be set to "proleptic_gregorian".
#' @param vars.to.ignore A list of character strings specifying which cvariables *not* to read. These are typically metadata about the coordinate data
#' (hence the defaults ""time_bnds", "lon_bnds", "lat_bnds", "lat" and "lon"), but you can also use this argument to ignore a particular data variable from a file.
#' @param verbose A logical, set to true to give progress/debug information
#' @param nc.verbose A logical, set to true to give progress/debug information from the ncdf4 package functions. This can be a lot,
#' so it is handy to control that separately.
#' @return A list containing firstly the data.table containing the data, and secondly the STAInfo for the data that we have
#' @author Matthew Forrest \email{matthew.forrest@@senckenberg.de}
#' @include classes.R
#' @keywords internal
getField_NetCDF <- function(source,
quant,
layers = NULL,
target.STAInfo,
file.name,
verbose = FALSE,
nc.verbose = FALSE,
vars.to.ignore = c("time_bnds", "lon_bnds", "lat_bnds", "lat", "lon"),
calendar = "standard") {
# timing
t1 <- Sys.time()
# first check that ncdf4 netCDF package is installed
if (! requireNamespace("ncdf4", quietly = TRUE)) stop("Please install ncdf4 R package and, if necessary the netCDF libraries, on your system to read NetCDF files.")
# To avoid annoying NOTES when R CMD check-ing
Lon = Lat = Year = Month = Day = Time = Temp =NULL
# supported calendars
supported.calendars <- c("standard", "gregorian", "proleptic_gregorian", "360_day", "366_day", "365_day", "uniform30day", "no_leap", "all_leap", "noleap", "allleap")
# possible dimensions names
# TODO maybe move these to arguments for greater flexibility
possible.lat.dim.names <- c("lat", "Lat", "latitude", "Latitude", "y", "Y")
possible.lon.dim.names <- c("lon", "Lon", "longitude", "Longitude", "x", "X")
possible.time.dim.names <- c("time", "Time", "times", "Times", "year", "Year", "month", "Month", "t", "T")
# handy function to check for attributes, also including the kind of deprecated "DGVMTools_" and "DGVMData_" variants for global attributes
lookupAttribute <- function(nc, var, attname, verbose) {
this_try <- ncdf4::ncatt_get(nc, var, attname, verbose)
if(this_try$hasatt) return(this_try$value)
this_try <- ncdf4::ncatt_get(nc, var, paste0("DGVMData_", attname), verbose)
if(this_try$hasatt) return(this_try$value)
this_try <- ncdf4::ncatt_get(nc, var, paste0("DGVMTools_", attname), verbose)
if(this_try$hasatt) return(this_try$value)
return("Unspecified")
}
# get first and last year for use later on
first.year = target.STAInfo@first.year
last.year = target.STAInfo@last.year
# STAInfo object describing the data
sta.info = new("STAInfo")
# Make the filename (if necessary) and check for the file, gunzip if necessary, fail if not present
if(!is.null(file.name)) file.name.nc <- file.path(source@dir, file.name)
else file.name.nc <- file.path(source@dir, paste(quant@id, "nc", sep = "."))
file.name.nc.gz <- paste(file.name.nc, "gz", sep = ".")
zipped <- FALSE
if(file.exists(file.name.nc)){
if(verbose) message(paste("Found and opening file", file.name.nc, sep = " "))
}
else if(file.exists(file.name.nc.gz)){
zipped <- TRUE
R.utils::gunzip(file.name.nc.gz)
if(verbose) message(paste("Found, gunzipping and opening file", file.name.nc.gz, sep = " "))
}
# Open file
if(verbose) message(paste0("Opening file ", file.name.nc))
if(verbose) this.nc <- ncdf4::nc_open(file.name.nc, readunlim=FALSE, verbose=nc.verbose, suppress_dimvals=FALSE )
else this.nc <- invisible(ncdf4::nc_open(file.name.nc, readunlim=FALSE, verbose=nc.verbose, suppress_dimvals=FALSE ))
#### EXAMINE VARIABLES PRESENT IN FILE ####
# Build a vector of variables which have dimensions associated with them and which aren't flagged as dimension variables
all_vars_names <- c()
for(this_var in this.nc$var){
if(this_var$ndims > 0 && !this_var$id$isdimvar) all_vars_names <- append(all_vars_names, this_var$name)
}
# Get make a list of vars to poentially read, excluding the ones to be ignored
if(verbose) message(paste("* NetCDF file contains potentially useful variables:", paste(all_vars_names, collapse = ", ")))
vars.to.read <- all_vars_names[!all_vars_names %in% vars.to.ignore]
vars.that.will.be.ignored <- all_vars_names[all_vars_names %in% vars.to.ignore]
if(verbose && length(vars.that.will.be.ignored) > 0) message(paste("* However at your request I am ignoring:", paste(vars.that.will.be.ignored, collapse = ", ")))
#### IF SPECIFIED, SELECT ONLY THE VARIABLES REQUIRED BY THE 'layers' ARGUMENT
if(!is.null(layers)) {
# first find any missing layers and warn about them
missing.layers <- layers[which(!layers %in% vars.to.read)]
for(this.layer in missing.layers) {
message(paste0("Layer '", this.layer, "' requested but not found in data."))
warning(paste0("Layer '", this.layer, "' requested but not found in data."))
}
# find the variables that match
vars.to.read <- layers[which(layers %in% vars.to.read)]
# stop if no layers to read because subsequent code will probably fail
if(length(vars.to.read) == 0) stop("When reading netCDF file, none of the layers you requested were present and so subsequent code will almost certainly fail.")
if(verbose) message(paste("* And I will attempt to read:", paste(vars.to.read, collapse = ", ")))
}
else {
if(verbose) message(paste("* And I will attempt to read all of them."))
}
#### FIND DIMENSIONS USED BY VARIABLES ####
# having established which variables we want to read, check which dimensions they have
# and, if we have got multiple variables, check that they have the same dimensions
# potential TODO - this can probably be factored out
# for each variable to read
all_required_dims <- list()
for(this_var in vars.to.read) {
# find the "var" object corresponding to it
for(this_var_obj in this.nc$var) {
if(this_var_obj$name == this_var) break
}
# make a vector of the dimensions required
these_required_dims <- c()
for(this_dim in this_var_obj$dim) {
these_required_dims <- append(these_required_dims, this_dim$name)
}
all_required_dims[[this_var_obj$name]] <- these_required_dims
}
# check all variables have the same dimensions and make the final list of required dimensions
if(length(unique(all_required_dims)) != 1) stop("In a single getField() call (NetCDF Format), trying to read variables with different dimensions. Please read these separately in *different* getField() calls.")
all_required_dims <- unlist(unique(all_required_dims))
if(verbose) message(paste("* All variables indexed by dimensions:", paste(all_required_dims, collapse = ", ")))
#### READ AND VERFIY DIMENSION INFO ####
# set dimensions names and values to zero/zero-length strings until a matching dimension is found
all.lats <- numeric(0)
all.lons <- numeric(0)
all.time.intervals <- numeric(0)
all.layer.vals <- numeric(0)
lat.string <- ""
lon.string <- ""
time.string <- ""
layer.string <- ""
# assign each of the required dimensions to lon, lat, time or 'layer'
for(this.dimension in all_required_dims) {
# pick up Lat
if(this.dimension %in% possible.lat.dim.names) {
lat.string <- this.dimension
all.lats <- ncdf4::ncvar_get(this.nc, lat.string)
if(verbose) message(paste0("** Confirmed latitude dimension: '", lat.string, "', with ", length(all.lats), " values."))
}
# pick up Lon
else if(this.dimension %in% possible.lon.dim.names) {
lon.string <- this.dimension
all.lons <- ncdf4::ncvar_get(this.nc, lon.string)
if(verbose) message(paste0("** Confirmed longitude dimension: '", lon.string, "', with ", length(all.lons), " values."))
}
# pick up Time
else if(this.dimension %in% possible.time.dim.names) {
time.string <- this.dimension
all.time.intervals <- ncdf4::ncvar_get(this.nc, time.string)
if(verbose) message(paste0("** Confirmed time dimension: '", time.string, "', with ", length(all.time.intervals), " values."))
}
# pick up 'layer' dimension (ie not one of the previous)
else if(layer.string == ""){
layer.string <- this.dimension
## SPECIAL CASE: for layer type dimensions in case they they have no corresponding variable defined
# for try with exception handling
all.layer.vals <- tryCatch({
ncdf4::ncvar_get(this.nc, layer.string)
}, warning = function(war) {
print("getField_NetCDF: **** SAVED **** No it won't, exception caught! Boom! But maybe you want to consider adding netcdf 'variables' for each 'dimension' in the file.")
1:this.nc$dim[[this.dimension]]$len
}, error = function(err) {
print("getField_NetCDF: **** SAVED **** No it won't, exception caught! Boom! But maybe you want to consider adding netcdf 'variables' for each 'dimension' in the file.")
1:this.nc$dim[[this.dimension]]$len
}, finally = {
})
if(verbose) message(paste0("** Confirmed layer-type dimension: '", layer.string, "', with ", length(all.layer.vals), " values."))
}
# If two "layer" dimensions found then fail
else {
stop(paste0("Found two 'layer'-type dimensions (i.e. not lon, lat or time): ", this.dimension, " and ", layer.string, ". Can't deal with that, so stopping."))
}
}
# check for at least one dimension
if( lat.string == "" && lon.string == "" && time.string == "" && layer.string == "") {
stop("Not picked up any dimensions, failing...")
}
# check if we have multiple layers from both a "vegtype" style axis *and* multiple requested variable inside the netcdf file
if(length(layer.string) > 0 & length(vars.to.read) > 1) {
}
#### LON AXIS AND LAT AXIS
# default values if no longitude or latitude selection is required
start.lon <- 1
count.lon <- -1
start.lat <- 1
count.lat <- -1
# TODO potential lon/lat cropping here
#### TIME AXIS - calculate the time labels and the start and count values for the time axis
# First, let's determine some details about the time axis if there is one
if(length(all.time.intervals) > 0) {
# extract the details about the time axis, if the units attributes exists
relative.time.axis <- FALSE
time.units.attr <- ncdf4::ncatt_get(this.nc, time.string, "units")
if(time.units.attr$hasatt) {
time.units.string.vec <- unlist(strsplit(time.units.attr$value, " "))
timestep.unit <- time.units.string.vec[1]
if(length(time.units.string.vec) > 1) {
if(tolower(time.units.string.vec[2]) == "since" || tolower(time.units.string.vec[2]) == "after") relative.time.axis <- TRUE
else if(tolower(time.units.string.vec[2]) == "as") stop("Absolute time axis with 'day as ...' not currently implemented. Contact the package author is this becomes important for you.")
}
}
# if no unit attributes make some assumptions (based on the axis name) only or fail
else {
if(tolower(time.string) == "years" || tolower(time.string) == "year") {
timestep.unit <- "year"
}
else if(tolower(time.string) == "months" || tolower(time.string) == "month") {
timestep.unit <- "month"
}
else stop(paste0("Unclear time axis in file ", file.name.nc,", which appears to have a time axis named ", time.string, " but no units attribute to help me interpret it."))
}
#### RELATIVE TIME AXIS ####
if(relative.time.axis) {
# extract the start day, month and year and return it as custom date type (vector with $year, $month and $day)
# the reason for using this custom format is to support years < 0 and > 9999
start.date <- parseStartDate(time.units.attr$value)
# if time step unit is seconds convert to days and proceed
if(timestep.unit == "seconds") {
all.time.intervals <- all.time.intervals/(60*60*24)
timestep.unit <- "days"
}
# if time step unit is days
if(timestep.unit == "days" || timestep.unit == "day") {
# calculate the difference between adjacent time intervals to determine the time resolution of the data
diff.all.time.intervals <- diff(all.time.intervals)
# calculate the mean to guess if is monthly or yearly
mean.timestep.differences <- mean(diff.all.time.intervals)
# if the mean is between 28 and 31 (inclusive) assume monthly
if(mean.timestep.differences >= 28 & mean.timestep.differences <= 31 ) time.res <- "Month"
# if the mean is between 359 and 367 (inclusive) assume yearly
else if(mean.timestep.differences >= 360 & mean.timestep.differences <= 367 ) time.res <- "Year"
# if all the time steps are integer values, assume daily (but potentially with missing days)
else if(isTRUE(all(diff.all.time.intervals == floor(diff.all.time.intervals))) || abs(mean.timestep.differences - 1) < 0.00001) time.res <- "Day"
else stop("Data doesn't appear to be on daily, monthly, or yearly timesteps. Other options are not currently supported by the DGVMTools, but could potentially be. So if you need this please contact the author.")
# lookup the calendar attribute (if not provided)
if(missing(calendar)) {
calendar.att <- ncdf4::ncatt_get(this.nc, time.string, "calendar")
if(calendar.att$hasatt) calendar <- calendar.att$value
else if(!calendar %in% supported.calendars) stop(paste("Unsupported calendar. Supported calendars:", paste(supported.calendars, collapse = ", ")))
else warning("DGVMTools requires a correctly defined 'calendar' attribute on a daily relative time axis. None has been found in the netCDF file, so I am assuming 'standard'. Note that you also can specify one in your getField() call.")
}
# default values if no time selection is required
start.time <- 1
count.time <- -1
# PROCESS RELATIVE DAILY TIME AXIS AND RETURN ALL INFO IN A DATA.TABLE (including cropping to the years that are wanted)
# This function call processes a relative time axis with unit of "days since ..."
# It compares the time axis to the years requested and return the start and count values for reading up the netCDF file
# and the actual years,months and days corresponding to this selection.
axis.info.dt <- processDailyRelativeNCAxis(axis.values = all.time.intervals,
start.year = start.date["year"],
start.month = start.date["month"],
start.day = start.date["day"],
target.STAInfo = target.STAInfo,
calendar = calendar,
year.offset = source@year.offset)
if(verbose) {
message("Time axis information table:")
print(axis.info.dt)
message("You can compare the above to the netCDF time axis in your file to check everything aligns as expected.")
message("(The cdo command 'showdate' and ncdump with the option '-c' are very useful in this regard.)")
}
# pull out start index and count for when reading the netCDF slice
start.time <- axis.info.dt[["NetCDFIndex"]][1]
count.time <- nrow(axis.info.dt)
# also a vector of years for establishing first and last year later on (could be streamlined)
years.vector <- axis.info.dt[["Year"]]
# make the "Time" labels depending on whether data is monthly, yearly or daily
# this is used for labelling the time dimension of the slice once it is read
if(time.res == "Year") {
all.times <- paste(years.vector)
if(verbose) message("Processed time axis as relative time axis with units of days and yearly values.")
}
else if(time.res == "Month") {
all.times <- paste(axis.info.dt[["Year"]], sprintf("%02d", axis.info.dt[["Month"]]), sep = ".")
if(verbose) message("Processed time axis as relative time axis with units of days and monthly values.")
}
else {
all.times <- paste(axis.info.dt[["Year"]], sprintf("%03d", axis.info.dt[["Day"]]), sep = ".")
if(verbose) message("Processed time axis as relative time axis with units of days and daily values.")
}
} # end if time step is days/day
# if time step unit is month
else if(timestep.unit == "months") {
time.res <- "Month"
if(verbose) message("Processing time axis as relative time axis with units of month and monthly values.")
# first enforce that we have all integer months
if(!isTRUE(all(all.time.intervals == floor(all.time.intervals)))) {
warning("Found non-integer months values on relative time axis with unit of months. I am truncating these values to give integer values. Please check this aligns the months as you expect.")
if(verbose) message("WARNING: Found non-integer months values on relative time axis with unit of months. I am truncating these values to give integer values. Please check this aligns the months as you expect.")
all.time.intervals <- floor(all.time.intervals)
}
# This function call processes a time axis which is assumed to be monthly
# It compares the time axis to the years requested and return the start and count values for reading up the netCDF file
# and the actual years and months corresponding to this selection.
month.axis.info <- processMonthlyNCAxis(axis.values = all.time.intervals,
start.year = start.date["year"],
start.month = start.date["month"],
target.STAInfo = target.STAInfo,
year.offset = source@year.offset)
start.time <- month.axis.info$start.time
count.time <- month.axis.info$count.time
years.vector <- month.axis.info$years.vector
months.vector <- month.axis.info$months.vector
# make the vector of all times as a code "year.month" for labelling this dimension once it is read
all.times <- paste(years.vector, months.vector, sep = ".")
}
else if(timestep.unit == "years") {
time.res <- "Year"
# first enforce that we have all integer years
if(!isTRUE(all(all.time.intervals == floor(all.time.intervals)))) {
warning("Found non-integer years values on relative time axis with unit of years. I am truncating these values to give integer values. Please check this aligns the years as you expect.")
if(verbose) message("WARNING: Found non-integer years values on relative time axis with unit of years. I am truncating these values to give integer values. Please check this aligns the years as you expect.")
all.time.intervals <- floor(all.time.intervals)
}
# This function call processes a time axis which is assumed to be yearly
# It compares the time axis to the years requested and return the start and count values for reading up the netCDF file
# and the actual years and months corresponding to this selection.
year.axis.info <- processYearlyNCAxis(axis.values = all.time.intervals,
start.year = start.date["year"],
target.STAInfo = target.STAInfo,
year.offset = source@year.offset)
start.time <- year.axis.info$start.time
count.time <- year.axis.info$count.time
years.vector <- year.axis.info$years.vector
# make the vector of all years as character vectors for labelling this dimension once it is read
all.times <- paste(years.vector)
}
# else catch whatever else
else stop('When reading relative time axes, DGVMTools only accepts unit strings of the form = "seconds/days/months/years since/as <start_time>".')
} # end if relative time axis
# else an absolute time axis
else {
# timestep unit "year(s)", which is pretty easy
if(tolower(timestep.unit) == "year" || tolower(timestep.unit) == "years") {
if(verbose) message("Processing time axis as absolute time axis with units of year and yearly values.")
time.res <- "Year"
# check they are all integer values (ie no confusing fractional year)
if(!isTRUE(all(all.time.intervals == floor(all.time.intervals)))) stop("For an absolute time axis, only integer values of years are supported")
# check the values on time dimension are unique, if not something is wrong and make an assumption to make them unique
if(length(all.time.intervals) != length(unique(all.time.intervals))) {
warning(paste0("Don't have unique values on time (year) dimension in netCDF file ", file.name.nc, ". This should not be, so I am improvising. Please check the years are as expected in the resulting Field and consider setting sensibles values for the time dimension."))
print(paste0("Don't have unique values on time (year) dimension in netCDF file ", file.name.nc, ". This should not be, so I am improvising. Please check the years are as expected in the resulting Field and consider setting sensibles values for the time dimension."))
all.time.intervals <- all.time.intervals:(all.time.intervals+length(all.time.intervals)-1)
}
# apply year offset prior to cropping
if(source@year.offset != 0) all.time.intervals <- all.time.intervals + source@year.offset
# get the indices consider cropping
cropped.year.indices <- calculateYearCroppingIndices(target.STAInfo, all.time.intervals)
start.time <- cropped.year.indices$first
count.time <- cropped.year.indices$last - cropped.year.indices$first + 1
# get the years.vector by cropping the full years
years.vector <- all.time.intervals[cropped.year.indices$first:cropped.year.indices$last]
# make a numeric code of the years for labelling this dimension once it is read
all.times <- paste(years.vector)
}
# timestep unit "month(s)", which is pretty easy
else if(tolower(timestep.unit) == "month" || tolower(timestep.unit) == "months") {
if(verbose) message("Processing time axis as absolute time axis with units of month and monthly values.")
time.res <- "Month"
# check they are all integer values (ie no confusing fractions of a month)
if(!isTRUE(all(all.time.intervals == floor(all.time.intervals)))) stop("For an absolute time axis, only integer values of monthd are supported")
# a value of "0" will mess things up, so offset it
if(min(all.time.intervals) == 0) all.time.intervals <- all.time.intervals +1
# This function call processes a time axis which is assumed to be monthly
# It compares the time axis to the years requested and return the start and count values for reading up the netCDF file
# and the actual years and months corresponding to this selection.
month.axis.info <- processMonthlyNCAxis(axis.values = all.time.intervals,
start.year = 1, # absolute time axis, start in "year 1" because we have no other reference
start.month = all.time.intervals[1], # again absolute time axis, so take the first month as the start month
target.STAInfo = target.STAInfo,
year.offset = source@year.offset)
start.time <- month.axis.info$start.time
count.time <- month.axis.info$count.time
years.vector <- month.axis.info$years.vector
months.vector <- month.axis.info$months.vector
# make the vector of all times as a code "year.month" for labelling this dimension once it is read
all.times <- paste(years.vector, months.vector, sep = ".")
}
else {
stop("Absolute time axes with any unit other than 'year' or 'month' not currently supported")
}
}
# set year and subannual meta-data - note that years.vector (ie. the final years of data that will be selected) must be defined above
# time resolution and start and end years are determined from the file and cropping
sta.info@first.year <- years.vector[1]
sta.info@last.year <- years.vector[length(years.vector)]
sta.info@subannual.resolution <- time.res
# original subannual resolution and subannual aggregation method can be determined from metadata if they exist
sta.info@subannual.aggregate.method <- lookupAttribute(this.nc, 0, "subannual.aggregate.method", nc.verbose)
sta.info@subannual.original <- lookupAttribute(this.nc, 0, "subannual.original", nc.verbose)
if(sta.info@subannual.original == "none") sta.info@subannual.original <- time.res
}
#### RETRIEVE THE DATA
# list of data.tables, one for each netCDF var (which will be interpreted as a 'Layer' in DGVMTools terms)
dt.list <- list()
# keep track of all the units, long names and standard names for later
assumed.units <- c()
all.standard_names <- c()
all.long_names <- c()
for(this.var in vars.to.read) {
if(verbose) message(paste0("* Reading variable: '", this.var, "'"))
for(this_var_obj in this.nc$var) {
if(this_var_obj$name == this.var) {
break
}
}
# set start and count appropriately
# note this is done slightly clumsily with a for loop to make sure the ordering is correct
start <- numeric()
count <- numeric()
# also set up the dimension list for naming the dimensions once the data cube is read
dimension.names <- list()
for(this.dim in this_var_obj$dim) {
if(this.dim$name == lat.string) {
start <- append(start, start.lat)
count <- append(count, count.lat)
if(length(all.lats) > 0) dimension.names[["Lat"]] <- all.lats
if(verbose) message(paste("* Reading data for", count.lat, "latitude values ('-1' means reading the full range available)"))
}
else if(this.dim$name == lon.string) {
start <- append(start, start.lon)
count <- append(count, count.lon)
if(length(all.lons) > 0) dimension.names[["Lon"]] <- all.lons
if(verbose) message(paste("* Reading data for", count.lon, "longitude values ('-1' means reading the full range available)"))
}
else if(this.dim$name == time.string) {
start <- append(start, start.time)
count <- append(count, count.time)
if(length(all.time.intervals) > 0) dimension.names[["Time"]] <- all.times
if(verbose) message(paste("* Reading data for", count.time, "time values ('-1' means reading the full range available)"))
}
else if(this.dim$name == layer.string) {
start <- append(start, 1)
count <- append(count, -1)
if(length(all.layer.vals) > 0) dimension.names[["Layers"]] <- all.layer.vals
if(verbose) message(paste("* Reading data for", -1, "layer values ('-1' means reading the full range available"))
}
}
# Get the actual data and set the dimension names
this.slice <- ncdf4::ncvar_get(this.nc, this_var_obj, start = start, count = count, verbose = nc.verbose, collapse_degen=FALSE)
if(verbose) message(paste0("Read slice of netCDF file with dimensions: ", paste(dim(this.slice), collapse = ", ")))
if(verbose) message(paste("Setting dimension names: ", paste(names(dimension.names), collapse = ", ")))
dimnames(this.slice) <- dimension.names
if(ncdf4::ncatt_get(this.nc, this_var_obj, "units")$hasatt) {
# read the units for this layer
these.units <- ncdf4::ncatt_get(this.nc, this_var_obj, "units")$value
# if first units hasn't been defined take these ones
if(length(assumed.units) == 0) assumed.units <- these.units
# else check if the next units are the same, if not issue a warnings
else {
if(!identical(these.units, assumed.units)) warning(paste0("Multiple units found when reading data from NetCDF file ", file.name.nc, ", using the first one (", paste(assumed.units, collapse = ", "), ") and ignoring the later one (", paste(these.units, collapse = ", "), ")."))
}
}
if(ncdf4::ncatt_get(this.nc, this_var_obj, "standard_name")$hasatt) all.standard_names <- append(all.standard_names, ncdf4::ncatt_get(this.nc, this_var_obj, "standard_name")$value)
if(ncdf4::ncatt_get(this.nc, this_var_obj, "long_name")$hasatt) all.long_names <- append(all.long_names, ncdf4::ncatt_get(this.nc, this_var_obj, "long_name")$value)
# if only a 2-D 'matrix', convert to a 3-D 'array' to that it melts properly with the as.data.table command below
if(length(dim(this.slice)) < 3) {
dim(this.slice) <- c(dim(this.slice), 1)
dimension.names[["Temp"]] <- 1
dimnames(this.slice) <- dimension.names
}
# 'melt' and remove the Temp column if necessary
if(verbose) message(paste("Melting array slice to data.table. This might take a while (depending on the size of the slice)..."))
# TODO This is pretty much the rate-limiting step and it can gobble up a huge amount of memory.
# But there is a possible solution. It should be possible to split up this.slice (totally arbitrarily) in to smaller slices
# which came be melted separated and then rbind-ed together at the end. That will definely help with the memory issue,
# not sure about speed
this.slice.dt <- as.data.table(this.slice, na.rm = TRUE, keep.rownames = TRUE)
if("Temp" %in% names(this.slice.dt)) this.slice.dt[ , Temp := NULL]
if("Lon" %in% names(this.slice.dt)) this.slice.dt[ , Lon := as.numeric(Lon)]
if("Lat" %in% names(this.slice.dt)) this.slice.dt[ , Lat := as.numeric(Lat)]
if("Time" %in% names(this.slice.dt)) this.slice.dt[ , Time := as.numeric(Time)]
if(verbose) message(paste("... done!"))
# clear up
rm(this.slice)
gc()
# if necessary dcast the Layers column back to make separate columns for each layer
if("Layers" %in% names(this.slice.dt)) this.slice.dt <- dcast(this.slice.dt, ... ~ Layers, value.var = "value")
this.slice.dt <- stats::na.omit(this.slice.dt)
if("value" %in% names(this.slice.dt)) setnames(this.slice.dt, "value", this_var_obj$name)
# ### HANDLE TIME AXIS
if(length(all.time.intervals) > 0) {
if(verbose) message("Translating Time axis to Year/Month/Day as required...")
if(time.res == "Year") {
if("Time" %in% names(this.slice.dt)) setnames(this.slice.dt, "Time", "Year")
}
else if(time.res == "Month") {
toYearandMonth <- function(x) {
Year <- as.integer(trunc(x))
return(list("Year" = Year, "Month" = as.integer(round((x-Year)*100))) )
}
this.slice.dt[, c("Year", "Month") := toYearandMonth(Time)]
if(min(this.slice.dt[["Month"]]) < 0) this.slice.dt[, Month := abs(Month)]
this.slice.dt[, Time := NULL]
}
else if(time.res == "Day") {
toYearandDay <- function(x) {
Year <- as.integer(trunc(x))
return(list("Year" = Year, "Day" = abs(as.integer(round((x-Year)*1000)))) )
}
this.slice.dt[, c("Year", "Day") := toYearandDay(Time)]
if(min(this.slice.dt[["Day"]]) < 0) this.slice.dt[, Day := abs(Day)]
this.slice.dt[, Time := NULL]
}
if(verbose) message("... done!")
}
# handle column order and names
# if no layer dimensions
if(length(layer.string) == 0 || layer.string == "") {
# make new column order so that data is last
all.col.names <- names(this.slice.dt)
all.col.names <- all.col.names[-which(all.col.names == this_var_obj$name)]
all.col.names <- append(all.col.names, this_var_obj$name)
setcolorder(this.slice.dt, all.col.names)
# If quantity is a categorical scheme (i.e. more than one entry in the @units slot), convert it to a factor
if(length(quant@units) > 1) {
this.slice.dt[,this_var_obj$name := factor(this.slice.dt[[this_var_obj$name]], labels = quant@units)]
}
}
# for the case of a layer dimension, rename each layer and move it to the end layers
else {
for(this_layer_value in all.layer.vals) {
# look for attribute with name "<layername>_<value>" or "<value>", on either the data variable or the dimension variable
final_layer_name <- tryCatch({
utils::capture.output(invisible(final_layer_name <- lookupAttribute(this.nc, var = layer.string, attname = paste(layer.string, this_layer_value, sep = "_"), verbose = nc.verbose)))
if(final_layer_name == "Unspecified") invisible(utils::capture.output(final_layer_name <- lookupAttribute(this.nc, var = layer.string, attname = paste(this_layer_value), verbose = nc.verbose)))
else if(final_layer_name == "Unspecified") invisible(utils::capture.output(final_layer_name <- lookupAttribute(this.nc, var = this_var_obj, attname = paste(layer.string, this_layer_value, sep = "_"), verbose = nc.verbose)))
else if(final_layer_name == "Unspecified") invisible(utils::capture.output(final_layer_name <- lookupAttribute(this.nc, var = this_var_obj, attname = paste(this_layer_value), verbose = nc.verbose)))
# if didn't find something, return to the original value
else if(final_layer_name == "Unspecified") final_layer_name <- this_layer_value
final_layer_name
}, warning = function(war){
this_layer_value
}, error = function(err){
this_layer_value
}, finally = {
})
# set name its final name (if specified, otherwise keep the original)
if(final_layer_name == "Unspecified") final_layer_name <- paste0(layer.string, this_layer_value)
setnames(this.slice.dt, paste(this_layer_value), paste(final_layer_name))
# and move it to the end
all.col.names <- names(this.slice.dt)
all.col.names <- all.col.names[-which(all.col.names == final_layer_name)]
all.col.names <- append(all.col.names, final_layer_name)
setcolorder(this.slice.dt, all.col.names)
# If quantity is a categorical scheme (i.e. more than one entry in the @units slot), convert it to a factor
if(length(quant@units) > 1) {
this.slice.dt[,this_var_obj$name := factor(this.slice.dt[[final_layer_name]], labels = quant@units)]
}
} # for each value on the layer dimensions
} # if got a layer dimension
# now join this slice to dt.list
setKeyDGVM(this.slice.dt)
dt.list[[length(dt.list)+1]] <- this.slice.dt
} # for each var/layer to read
# join all together and set key
dt <- dt.list[[1]]
if(length(dt.list) > 1) {
for(this.dt in dt.list[2:length(dt.list)]) {
dt <- merge(x = dt, y = this.dt, all = TRUE)
}
}
# Set any NAs introduced by merging process to zero
# This is correct (at least for the ESA Landcover CCI case) because for each gridcell any NAs introduced actually means zere
# fraction of this landcover. This might not be conceptually correct for all datasets, but let's see.
for (j in seq_len(ncol(dt)))set(dt,which(is.na(dt[[j]])),j,0)
# clean up and set key
rm(dt.list); gc()
if(verbose) message("Setting key")
dt <- setKeyDGVM(dt)
# special case for exactly one layer in total, set the name that the Quant ID
if(length(layers(dt)) == 1) setnames(dt, layers(dt), quant@id)
# STInfo
st.info <- getDimInfo(dt)
# Correct lons and lats
# Note: Year offset has been applied above, this is necessary because it needs to be applied *before* cropping.
# Also, if lon and lat cropping is implemented at the stage of reading the NetCDF (for extra efficiency),
# these corrections will also need to be done earlier.
if(verbose) message("Correcting lons and lats with offsets...")
if(length(source@lonlat.offset) == 2 ){
if(source@lonlat.offset[1] != 0) dt[, Lon := Lon + source@lonlat.offset[1]]
if(source@lonlat.offset[2] != 0) dt[, Lat := Lat + source@lonlat.offset[2]]
}
else if(length(source@lonlat.offset) == 1 ){
if(source@lonlat.offset[1] != 0) dt[, Lon := Lon + source@lonlat.offset[1]]
if(source@lonlat.offset[1] != 0) dt[, Lat := Lat + source@lonlat.offset[1]]
}
if(verbose) {
message("Offsets applied. Head of full .out file (after offsets):")
print(utils::head(dt))
}
#### DETERMINE SPATIAL EXTENT AND RELATED METADATA ####
# for the extent, look at the lons and lats originally determined from the NetCDF files
if(length(all.lons) > 1 && length(all.lats) > 1) {
lon.diffs <- diff(all.lons)
lat.diffs <- diff(all.lats)
if(!length(unique(lon.diffs)) == 1) warning(paste("Longitude differences in file", file.name.nc, "are not equal, so guessing the overall longitude bounds, which might not be 100% accurate."))
if(!length(unique(lat.diffs)) == 1) warning(paste("Latitude differences in file", file.name.nc, "are not equal, so guessing the overall latitude bounds, which might not be 100% accurate."))
# calculate the extent (including London centering)
if(source@london.centre && max(all.lons) >= 180) temp.lons <- LondonCentre(all.lons)
else temp.lons <- all.lons
this.extent <- raster::extent(min(temp.lons) - mean(lon.diffs)/2,
max(temp.lons) + mean(lon.diffs)/2,
min(all.lats) - mean(lat.diffs)/2,
max(all.lats) + mean(lat.diffs)/2)
sta.info@spatial.extent <- this.extent
}
else {
warning(paste("No spatial extent determinable from netcdf file", file.name.nc,"file. A filler extent will be set."))
sta.info@spatial.extent <- raster::extent(-1,1,-1,1)
}
# Look up spatial metadata for things that cannot be determined from the lons and lats
sta.info@spatial.extent.id <- lookupAttribute(this.nc, 0, "spatial.extent.id", nc.verbose)
sta.info@spatial.aggregate.method <- lookupAttribute(this.nc, 0, "spatial.aggregate.method", nc.verbose)
if(sta.info@spatial.aggregate.method == "Unspecified") sta.info@spatial.aggregate.method <- "none"
#### PROCESS QUANTITY RELATED METADATA ####
# units
# if units from the requested Quantity and the NetCDF file don't match
if(!identical(assumed.units, quant@units)) {
# if we have valid units from the netCDF files *and* the Quantity units are NOT categorical (ie. length > 1)
# then take the netCDF file units
if(length(assumed.units) > 0 & length(quant@units) <= 1) {
warning(paste0("Overriding requested units when reading NetCDF file ", file.name.nc, ". '", paste(quant@units, collapse = ", "), "' was specified but '", paste(assumed.units, collapse = ", "), "' was found (so using that).\n"))
quant@units <- assumed.units
}
}
# standard names (CF standard_name)
unique.standard_name <- unique(all.standard_names)
final.standard_name <- unique(all.standard_names)[1]
if(length(final.standard_name) > 0 & !identical(tolower(final.standard_name), "unknown")) {
if(length(unique.standard_name) > 1) warning(paste0("Multiple standard_name found when reading data from NetCDF file ", file.name.nc, ", using the first one (", final.standard_name, ").Overriding requested "))
if(final.standard_name != quant@standard_name & quant@standard_name != "undefined unit"){
warning(paste0("Overriding requested standard_name when reading NetCDF file ", file.name.nc, ". '", quant@standard_name, "' was specified but '",final.standard_name, "' was found (so using that).\n"))
quant@standard_name <- final.standard_name
}
}
# long names (CF long_name which is mapped to DGVMTools @name)
unique.long_name <- unique(all.long_names)
final.long_name <- unique(all.long_names)[1]
if(length(final.long_name) > 0 & !identical(tolower(final.long_name), "unknown")) {
if(length(unique.long_name) > 1) warning(paste0("Multiple long_name found when reading data from NetCDF file ", file.name.nc, ", using the first one (", final.long_name, ").Overriding requested "))
if(final.long_name != quant@name & quant@name != "undefined unit"){
warning(paste0("Overriding requested long_name when reading NetCDF file ", file.name.nc, ". '", quant@name, "' was specified but '",final.long_name, "' was found (so using that).\n"))
quant@name <- final.long_name
}
}
#### LONDON CENTRE ####
# if london.centre is requested, make sure all longitudes greater than 180 are shifted to negative
if(source@london.centre){
if(max(all.lons) > 180) {
dt[, Lon := LondonCentre(Lon)]
}
}
# set keys
setKeyDGVM(dt)
# remove any NAs
dt <- stats::na.omit(dt)
# close the netcdf file and if necessary zip again
ncdf4::nc_close(this.nc)
if(zipped) {
R.utils::gzip(file.name.nc)
}
# make the ID and then make and return Field
field.id <- makeFieldID(source = source, quant.string = quant@id, sta.info = sta.info)
message(paste0("Reading of NetCDF file ", file.name.nc, " sucessful!"))
if(verbose) {
message("Reading of file took:")
print(Sys.time() - t1)
}
new("Field",
id = field.id,
data = dt,
quant = quant,
source = source,
sta.info
)
}
######################### LIST ALL LPJ-GUESS OUTPUT VARIABLES (STORED AS *.out FILES) IN AN source DIRECTORY #####################################################################
#' List all LPJ-GUESS *.out files in a source directory
#'
#' Simply lists all LPJ-GUESS output variables (stored as .out files) available in a directory.
#' Also ignores some common red herrings like "guess.out" and "*.out"
#'
#' @param source A \code{\linkS4class{Source}} containing the meta-data about the NetCDF source
#' @param names Logical, if TRUE return a character vector of names of available quantities, if FALSE return a list of the actual Quantities.
#' @return A list of all the .out files present, with the ".out" removed.
#'
#' @keywords internal
#' @author Matthew Forrest \email{matthew.forrest@@senckenberg.de}
availableQuantities_NetCDF <- function(source, names){
warning("availableQuantity() functionality not implemented for NetCDF Format because this format is too flexible to clearly identify them.")
message("availableQuantity() functionality not implemented for NetCDF Format because this format is too flexible to clearly identify them.")
if(names) return(character(0))
else return(list())
}
########################################################
########### NetCDF QUANTITIES ########################
########################################################
#' @format The \code{\linkS4class{Quantity}} class is an S4 class with the slots defined below
#' @rdname Quantity-class
#' @keywords datasets
#'
#'
NetCDF.quantities <- list(
new("Quantity",
id = "fraction",
name = "Fraction",
units = "",
colours = grDevices::colorRampPalette(c("grey85", "black")),
format = c("NetCDF"),
standard_name = "area_fraction"),
new("Quantity",
id = "vegcover_std",
name = "Area Fraction",
units = "%",
colours = grDevices::colorRampPalette(c("white", "darkolivegreen1", "darkolivegreen4", "saddlebrown", "black")),
format = c("NetCDF"),
standard_name = "area_fraction_percent"),
new("Quantity",
id = "landcover_std",
name = "Area Fraction",
units = "%",
colours = grDevices::colorRampPalette(c("white", "darkolivegreen1", "darkolivegreen4", "saddlebrown", "black")),
format = c("NetCDF"),
standard_name = "area_fraction_percent"),
new("Quantity",
id = "vegC_std",
name = "Vegetation Carbon Mass",
units = "kg m-2",
colours = function(n) rev(viridis::viridis(n)),
format = c("NetCDF"),
standard_name = "vegetation_carbon_content"),
new("Quantity",
id = "LAI_std",
name = "LAI",
units = "1",
colours = viridis::viridis,
format = c("NetCDF"),
standard_name = "leaf_area_index"),
new("Quantity",
id = "mGPP_std",
name = "Monthly GPP",
units = "kgC/m^2",
colours = viridis::turbo,
format = c("Standard")),
new("Quantity",
id = "aGPP_std",
name = "Annual GPP",
units = "kgC m-2 year-1",
colours = viridis::inferno,
format = c("NetCDF"),
standard_name = "annual_gross_primary_productivity_of_biomass_expressed_as_carbon"),
new("Quantity",
id = "aNPP_std",
name = "Annual NPP",
units = "kgC/m^2/year",
colours = viridis::turbo,
format = c("NetCDF")),
new("Quantity",
id = "canopyheight_std",
name = "Canopy Height",
units = "m",
colours = function(n) rev(viridis::magma(n)),
format = c("NetCDF"),
standard_name = "canopy_height"),
new("Quantity",
id = "burntfraction_std",
name = "Annual Fraction Burned",
units = "fraction of gridcell",
colours = viridis::turbo,
format = c("NetCDF"),
standard_name = "burned_area_fraction"),
new("Quantity",
id = "FPAR_std",
name = "Fraction absorbed of Photosynthetically Active Radiation",
units = "fraction",
colours = grDevices::colorRampPalette(c("white", "darkolivegreen1", "darkolivegreen4", "saddlebrown", "black")),
format = c("NetCDF"),
standard_name = "fraction_absorbed_of_photosynthetically_active_radiation"),
new("Quantity",
id = "aNEE_std",
name = "Annual land sink (NEE)",
units = "GtC/year",
colours = viridis::turbo,
format = c("NetCDF"))
)
####################################################
########### NetCDF FORMAT ########################
####################################################
#' @description \code{NetCDF} - a Format object defined here for reading NetCDF files.
#' It is essentially CF-compliant netCDF with a couple of extra attributes defined.
#'
#' @format A \code{\linkS4class{Format}} object is an S4 class.
#' @aliases Format-class
#' @rdname Format-class
#' @keywords datasets
#' @export
NetCDF <- new("Format",
# UNIQUE ID
id = "NetCDF",
# FUNCTION TO LIST ALL QUANTIES AVAILABLE IN A RUN
availableQuantities = availableQuantities_NetCDF,
# FUNCTION TO READ A FIELD
getField = getField_NetCDF,
# DEFAULT GLOBAL LAYERS
predefined.layers = list(),
# QUANTITIES THAT CAN BE PULLED DIRECTLY FROM LPJ-GUESS RUNS
quantities = NetCDF.quantities
)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.