R/CFObject.R
In ncdfCF: Easy Access to NetCDF Files with CF Metadata Conventions

#' @import methods
#' @import R6
#' @include NCObject.R
NULL

#' CF base object
#'
#' @description This class is a basic ancestor to all classes that represent CF
#'   objects, specifically data variables and axes. More useful classes use this
#'   class as ancestor.
#'
#' @docType class
CFObject <- R6::R6Class("CFObject",
  private = list(
    # The NCVariable instance that this CF object represents if it was read from
    # file.
    .NCvar = NULL,

    # The id and name of this object. They are taken from the NCVariable upon
    # creation, if supplied, otherwise the name should be supplied and the id
    # will be generated.
    .id = -1L,
    .name = "",

    # The values of this object. This class does not manipulate the values in
    # any way - that is the job of descendant classes.
    .values = NULL,

    # List of start and count vectors for reading data from file. The length of
    # the vectors must agree with the array on file or be `NA`.
    .start_count = list(start = NA, count = NA),

    # The data type of the data in the object. Taken from NCvar if set,
    # otherwise the descending class must set it explicitly when receiving its
    # data values.
    .data_type = "NC_NAT", # Not a type

    # Flag to indicate if the data on disk is too large for memory.
    .data_oversized = FALSE,

    # The attributes of the CF object. Upon read from a netCDF resource they are
    # copied from the NCVariable. For a new CF object, just an empty data.frame
    # with the appropriate columns.
    .attributes = data.frame(name = character(0), type = character(0), length = integer(0), value = numeric(0)),

    # Sanitize the attributes argument. Returns a safe data.frame to use.
    check_attributes = function(attributes) {
      if (!is.data.frame(attributes))
        stop("Argument `attributes` must be a valid data.frame.", call. = FALSE) # nocov

      if (!nrow(attributes))
        return(data.frame(name = character(0), type = character(0), length = integer(0), value = numeric(0)))

      # FIXME: Ensure that the appropriate columns and types are present

      attributes
    },

    # Sanitize the start and count values. NAs are converted to numbers and
    # values have to agree with .values and .NCvar. Returns the sanitized start
    # and count vectors as a list.
    check_start_count = function(start, count) {
      d <- as.integer(self$dim())
      if (!length(d)) # When a NC variable does not have any data
        return(list(start = NA, count = NA))

      if (length(start) == 1L && is.na(start))
        start <- rep(1L, length(d))
      else
        start[is.na(start)] <- 1L

      if (any(start > d))
        stop("Start values cannot be larger than the dimensions of the data.", call. = FALSE) # nocov
      if (length(count) == 1L && is.na(count))
        count <- d - start + 1L
      else {
        ndx <- which(is.na(count))
        count[ndx] <- d[ndx] - start[ndx] + 1L
      }

      if (any(count > d - start + 1L))
        stop("Count values cannot extend beyond the dimensions of the data.", call. = FALSE) # nocov

      list(start = start, count = count)
    },

    # Set the values of the object. Perform some basic checks when set programmatically.
    # Values may be NULL
    set_values = function(values) {
      # FIXME: Check that dim(values) agrees with NCvar and data_type

      # Check if we can find a vtype from the NCvar, possibly packed
      if (!is.null(private$.NCvar)) {
        private$.data_type <- private$.NCvar$attribute("scale_factor", "type")
        if (is.na(private$.data_type))
          private$.data_type <- private$.NCvar$attribute("add_offset", "type")
        if (is.na(private$.data_type))
          private$.data_type <- private$.NCvar$vtype
        else
          # Data is packed in the netCDF file: throw away the attributes and let
          # RNetCDF deal with unpacking when reading the data using the
          # attributes in the NCVariable.
          self$delete_attribute(c("_FillValue", "scale_factor", "add_offset",
                                  "valid_range", "valid_min", "valid_max",
                                  "missing_value"))
      } else if (!is.null(values)) {
        if (storage.mode(values) == "double") {
          # If the data is numeric, check attributes to select between NC_DOUBLE and NC_FLOAT
          if (!is.na(dt <- self$attribute("_FillValue", "type"))) private$.data_type <- dt
          else if (!is.na(dt <- self$attribute("missing_value", "type"))) private$.data_type <- dt
          else private$.data_type <- "NC_DOUBLE"
        } else {
          # Get the data_type from the values
          private$.data_type <- switch(storage.mode(values),
                                        "character" = "NC_STRING",
                                        "integer" = "NC_INT",
                                        "logical" = "NC_SHORT",
                                        stop("Unsupported data type for a CF object.", call. = FALSE))
        }
      } else
        private$.data_type <- "NC_NAT"

      # Set the actual_range attribute for the values
      if (is.null(values))
        self$delete_attribute("actual_range")
      else {
        rng <- range(values, na.rm = TRUE)
        if (is.na(rng[1L]))
          self$delete_attribute("actual_range")
        else {
          if (is.numeric(rng))
            rng <- round(rng, CF.options$digits)
          self$set_attribute("actual_range", private$.data_type, rng)
        }
      }

      private$.values <- values
    },

    # Read the data of the CF object from file. The data is cached by `self` so
    # repeated calls do not access the netCDF resource, unless argument
    # `refresh` is `TRUE`.
    # This method will not assess how big the data is before reading it so there
    # is a chance that memory will be exhausted. The calling code should check
    # for this possibility and break up the reading of data into chunks.
    # @param refresh Should the data be read from file if the object is linked?
    #   This will replace current values, if previously loaded. Default `FALSE`.
    # @return An array of data, invisibly, as prescribed by the `start` and
    #   `count` values used to create this object. If the object is not backed
    #   by a netCDF resource, returns `NULL`.
    read_data = function(refresh = FALSE) {
      if ((!is.null(private$.NCvar)) && (is.null(private$.values) || refresh))
        private$set_values(private$.NCvar$get_data(private$.start_count$start, private$.start_count$count))
      invisible(private$.values)
    },

    # Read a chunk of data of the CF object, as defined by the `start` and
    # `count` vectors. Note that these vectors are relative to any subset of the
    # netCDF data variable that this CF object refers to. The data read by this
    # method will not be stored in `self` so the calling code must take a
    # reference to it.
    # @param start Vector of indices where to start reading data along the
    #   dimensions of the array. The vector must be `NA` to read all data,
    #   otherwise it must have agree with the dimensions of the array.
    # @param count Vector of number of elements to read along each dimension of
    #   the array on file. The vector must be `NA` to read to the end of each
    #   dimension, otherwise its value must agree with the corresponding `start`
    #   value and the dimension of the array.
    # @return An array of data, as prescribed by the `start` and `count`
    #   arguments, or `NULL` if there is no data.
    read_chunk = function(start, count) {
      sc <- private$check_start_count(start, count)
      if (is.na(sc$start[1L])) return(NULL)

      if (!is.null(private$.values)) {
        # Extract from loaded data
        cll <- paste0("private$.values[", paste(sc$start, ":", sc$start + sc$count - 1L, sep = "", collapse = ", "), "]")
        eval(parse(text = cll))
      } else {
        # Read from the netCDF resource
        start <- private$.start_count$start + sc$start - 1L
        private$.NCvar$get_data(start, sc$count)
      }
    },

    # Make sure we detach before we poof out.
    finalize = function() {
      if (!is.null(private$.NCvar))
        private$.NCvar$detach(self)
    }
  ),
  public = list(
    #' @description Create a new `CFobject` instance from a variable in a netCDF
    #'   resource. This method is called upon opening a netCDF resource. It is
    #'   rarely, if ever, useful to call this constructor directly. Instead, use
    #'   the methods from higher-level classes such as [CFVariable].
    #'
    #' @param var The [NCVariable] instance upon which this CF object is based
    #'   when read from a netCDF resource, or the name for the new CF object to
    #'   be created.
    #' @param values Optional. The values of the object in an array.
    #' @param start Optional. Vector of indices where to start reading data
    #'   along the dimensions of the array on file. The vector must be `NA` to
    #'   read all data, otherwise it must have agree with the dimensions of the
    #'   array on file. Ignored when argument `var` is not an `NCVariable`
    #'   instance.
    #' @param count Optional. Vector of number of elements to read along each
    #'   dimension of the array on file. The vector must be `NA` to read to the
    #'   end of each dimension, otherwise its value must agree with the
    #'   corresponding `start` value and the dimension of the array on file.
    #'   Ignored when argument `var` is not an `NCVariable` instance.
    #' @param attributes Optional. A `data.frame` with the attributes of the
    #'   object. When argument `var` is an `NCVariable` instance and this
    #'   argument is an empty `data.frame` (default), arguments will be read
    #'   from the resource.
    #' @return A `CFObject` instance.
    initialize = function(var, values, start = 1L, count = NA, attributes = data.frame()) {
      atts <- private$check_attributes(attributes)

      if (is.character(var)) {
        if (!.is_valid_name(var))
          stop("Name is not valid for a CF object.", call. = FALSE) # nocov
        private$.name <- var
        private$.id <- CF$newVarId()
        private$.attributes <- atts
      } else {
        var$CF <- self
        private$.NCvar <- var
        private$.id <- var$id
        private$.name <- var$name
        private$.start_count <- private$check_start_count(start, count)
        private$.attributes <- if (nrow(attributes)) atts
                               else var$attributes[-1L]
      }

      if (missing(values) || (length(values) == 1L && is.na(values)))
        values <- NULL
      private$set_values(values)
    },

    #' @description Detach the current object from its underlying netCDF
    #'   resource. If necessary, data is read from the resource before
    #'   detaching.
    detach = function() {
      if (!is.null(private$.NCvar)) {
        if (is.null(private$.values))
          private$read_data()
        private$.NCvar$detach(self)
        private$.NCvar <- NULL
      }
    },

    #' @description Retrieve the dimensions of the data of this object. This
    #' could be for the data on file or for in-memory data.
    #' @param dimension Optional. The index of the dimension to retrieve the
    #' length for. If omitted, retrieve the lengths of all dimensions.
    #' @return Integer vector with the length of each requested dimension.
    dim = function(dimension) {
      len <- if (!is.null(private$.values)) {
        d <- dim(private$.values)
        if (length(d) == 0L) length(private$.values) else d
      } else if (self$has_resource) {
        if (length(private$.start_count$count) > 1L || !is.na(private$.start_count$count))
          private$.start_count$count - private$.start_count$start + 1L
        else
          private$.NCvar$dim()
      } else
        return(NULL)

      if (missing(dimension))
        len
      else
        len[dimension]
    },

    #' @description Retrieve an attribute of a CF object.
    #'
    #' @param att Single character string of attribute to return.
    #' @param field The field of the attribute to return values from. This must
    #'   be "value" (default) or "type".
    #' @return If the `field` argument is "type", a character string. If `field`
    #'   is "value", a single value of the type of the attribute, or a vector
    #'   when the attribute has multiple values. If no attribute is named with a
    #'   value of argument `att` `NA` is returned.
    attribute = function(att, field = "value") {
      if (length(att) > 1L)
        stop("Can extract only one attribute at a time.", call. = FALSE) # nocov

      atts <- private$.attributes
      if (!nrow(atts)) return(NA)
      val <- atts[atts$name == att, ]
      if (!nrow(val)) return(NA)

      val[[field]][[1L]]
    },

    #' @description Print the attributes of the CF object to the console.
    #'
    #' @param width The maximum width of each column in the `data.frame` when
    #' printed to the console.
    print_attributes = function(width = 30L) {
      if (nrow(private$.attributes)) {
        cat("\nAttributes:\n")
        print(.slim.data.frame(private$.attributes, width), right = FALSE, row.names = FALSE)
      }
    },

    #' @description Add an attribute. If an attribute `name` already exists, it
    #'   will be overwritten.
    #' @param name The name of the attribute. The name must begin with a letter
    #'   and be composed of letters, digits, and underscores, with a maximum
    #'   length of 255 characters. UTF-8 characters are not supported in
    #'   attribute names.
    #' @param type The type of the attribute, as a string value of a netCDF data
    #'   type.
    #' @param value The value of the attribute. This can be of any supported
    #'   type, including a vector or list of values. Matrices, arrays and like
    #'   compound data structures should be stored as a data variable, not as an
    #'   attribute and they are thus not allowed. In general, an attribute
    #'   should be a character value, a numeric value, a logical value, or a
    #'   short vector or list of any of these. Values passed in a list will be
    #'   coerced to their common mode.
    #' @return Self, invisibly.
    set_attribute = function(name, type, value) {
      if (is.na(name) || !is.character(name) || length(name) != 1L)
        stop("Must name one attribute to set values for.", call. = FALSE) # nocov
      if (!type %in% netcdf_data_types)
        stop("Invalid netCDF data type.", call. = FALSE) # nocov

      # Prepare values
      value <- unlist(value, use.names = FALSE)
      if (is.list(value) || is.array(value))
        stop("Unsupported value for attribute (compound list, matrix, array?).", call. = FALSE) # nocov
      if (is.character(value)) {
        if (type == "NC_STRING") len <- length(value)
        else if (type == "NC_CHAR") {
          value <- paste(value, sep = ", ")
          len <- nchar(value)
        } else stop("Wrong attribute type for string value.", call. = FALSE) # nocov
      } else {
        if (is.logical(value)) value <- as.integer(value)
        if (is.numeric(value)) len <- length(value)
        else stop("Unsupported value for attribute.", call. = FALSE) # nocov
      }
      if (type != "NC_CHAR") value <- list(value)

      # Check if the name refers to an existing attribute
      if (nrow(private$.attributes) && nrow(private$.attributes[private$.attributes$name == name, ])) {
        # If so, replace its type and value
        private$.attributes[private$.attributes$name == name, ]$type <- type
        private$.attributes[private$.attributes$name == name, ]$length <- len
        private$.attributes[private$.attributes$name == name, ]$value <- value
      } else {
        # If not, create a new attribute
        if (!.is_valid_name(name))
          stop("Attribute name is not valid.", call. = FALSE) # nocov

        df <- data.frame(name = name, type = type, length = len)
        df$value <- value # Preserve lists
        private$.attributes <- rbind(private$.attributes, df)
      }
      invisible(self)
    },

    #' @description Append the text value of an attribute. If an attribute
    #'   `name` already exists, the `value` will be appended to the existing
    #'   value of the attribute. If the attribute `name` does not exist it will
    #'   be created. The attribute must be of "NC_CHAR" or "NC_STRING" type; in
    #'   the latter case having only a single string value.
    #' @param name The name of the attribute. The name must begin with a letter
    #'   and be composed of letters, digits, and underscores, with a maximum
    #'   length of 255 characters. UTF-8 characters are not supported in
    #'   attribute names.
    #' @param value The character value of the attribute to append. This must be
    #'   a character string.
    #' @param sep The separator to use. Default is `"; "`.
    #' @param prepend Logical to flag if the supplied `value` should be placed
    #'   before the existing value. Default is `FALSE`.
    #' @return Self, invisibly.
    append_attribute = function(name, value, sep = "; ", prepend = FALSE) {
      if (is.na(name) || !is.character(name) || length(name) != 1L)
        stop("Must name one attribute to append values for.", call. = FALSE)
      if (is.na(name) || !is.character(name) || length(name) != 1L)
        stop("Value to append must be a single character string.", call. = FALSE)

      if (nchar(value) > 0L) {
        # Check if the name refers to an existing attribute
        if (nrow(private$.attributes[private$.attributes$name == name, ])) {
          new_val <- if (prepend)
            paste0(value, sep, private$.attributes[private$.attributes$name == name, ]$value)
          else
            paste0(private$.attributes[private$.attributes$name == name, ]$value, sep, value)
          private$.attributes[private$.attributes$name == name, ]$value <- new_val
          private$.attributes[private$.attributes$name == name, ]$length <- nchar(new_val)
        } else # else create a new attribute
          self$set_attribute(name, "NC_STRING", value)
      }

      invisible(self)
    },

    #' @description Delete attributes. If an attribute `name` is not present
    #' this method simply returns.
    #' @param name Vector of names of the attributes to delete.
    #' @return Self, invisibly.
    delete_attribute = function(name) {
      private$.attributes <- private$.attributes[!private$.attributes$name %in% name, ]
      invisible(self)
    },

    #' @description Write the attributes of this object to a netCDF file.
    #' @param nc The handle to the netCDF file opened for writing.
    #' @param nm The NC variable name or "NC_GLOBAL" to write the attributes to.
    #' @return Self, invisibly.
    write_attributes = function(nc, nm) {
      if ((num_atts <- nrow(private$.attributes)) > 0L)
        for (a in 1L:num_atts) {
          attr <- private$.attributes[a,]
          RNetCDF::att.put.nc(nc, nm, attr$name, attr$type, unlist(attr$value, use.names = FALSE))
        }
      invisible(self)
    },

    #' @description Add names of axes or auxiliary coordinates to the
    #'   "coordinates" attribute, avoiding duplicates and retaining previous
    #'   values.
    #' @param crds Vector of axis or auxiliary coordinate names to add to the
    #'   attribute.
    #' @return Self, invisibly.
    update_coordinates_attribute = function(crds) {
      current <- private$.attributes[private$.attributes$name == "coordinates", ]
      if (nrow(current)) {
        # There is a "coordinates" attribute already so append values
        new_val <- paste(unique(c(strsplit(current[[1L, "value"]], " ")[[1L]], crds)), collapse = " ")
        private$.attributes[private$.attributes$name == "coordinates", ]$value <- new_val
        private$.attributes[private$.attributes$name == "coordinates", ]$length <- nchar(new_val)
      } else
        # Make a new "coordinates" attribute
        self$set_attribute("coordinates", "NC_CHAR", paste(crds, collapse = " "))
      invisible(self)
    }
  ),
  active = list(
    #' @field friendlyClassName (read-only) A nice description of the class.
    friendlyClassName = function(value) {
      if (missing(value))
        "Generic CF object"
    },

    #' @field id (read-only) Retrieve the identifier of the CF object.
    id = function(value) {
      if (missing(value))
        private$.id
    },

    #' @field name Set or retrieve the name of the CF object. The name must be a
    #'   valid netCDF name: start with a character, use only characters, numbers
    #'   and the underscore, and be at most 255 characters long.
    name = function(value) {
      if (missing(value))
        private$.name
      else if (.is_valid_name(value))
        private$.name <- value
      else
        stop("Invalid name for a CF object.", call. = FALSE) # nocov
    },

    #' @field fullname (read-only) The fully-qualified name of the CF object.
    fullname = function(value) {
      if (missing(value)) {

        grp <- self$group
        if (is.null(grp))
          private$.name
        else {
          if ((gnm <- grp$name) == "/")
            private$.name
          else
            paste0("/", gnm, "/", private$.name)
        }
      }
    },

    #' @field group (read-only) Retrieve the [NCGroup] that this object is
    #'   located in.
    group = function(value) {
      if (missing(value) && self$has_resource)
        private$.NCvar$group
      else
        NULL
    },

    #' @field attributes (read-only) Retrieve a `data.frame` with the attributes
    #'   of the CF object.
    attributes = function(value) {
      if (missing(value))
        private$.attributes
    },

    #' @field has_resource (read-only) Flag that indicates if this object has an
    #'   underlying netCDF resource.
    has_resource = function(value) {
      !is.null(private$.NCvar)
    },

    #' @field NCvar (read-only) The `NCVariable` object that links to an
    #'   underlying netCDF resource, or `NULL` if not linked.
    NCvar = function(value) {
      if (missing(value))
        private$.NCvar
    },

    #' @field data_type Set or retrieve the data type of the data in the
    #'   object. Setting the data type to a wrong value can have unpredictable
    #'   but catastrophic consequences.
    data_type = function(value) {
      if (missing(value))
        private$.data_type
      else if (self$has_resource)
        stop("Cannot set the data type of a variable present on file.", call. = FALSE) # nocov
      else if (value %in% netcdf_data_types)
        private$.data_type <- value
      else
        stop("Unrecognized data type for a netCDF variable.", call. = FALSE) # nocov
    },

    #' @field ndims (read-only) Retrieve the dimensionality of the data in the
    #'   array, or the netCDF resource.
    ndims = function(value) {
      if (missing(value))
        length(self$dim())
    },

    #' @field array_indices Returns a list with columns "start" and "count"
    #'   giving the indices for reading the data of this object from a netCDF
    #'   resource.
    array_indices = function(value) {
      if (missing(value))
        private$.start_count
    }
  )
)

#' @name dimnames
#' @title Names or axis values of an CF object
#'
#' @description Retrieve the variable or axis names of an `ncdfCF` object. The
#'   `names()` function gives the names of the variables in the data set,
#'   preceded by the path to the group if the resource uses groups. The return
#'   value of the `dimnames()` function differs depending on the type of object:
#' * `CFDataset`, `CFVariable`: The dimnames are returned as a vector of the
#'   names of the axes of the data set or variable, preceded with the path to
#'   the group if the resource uses groups. Note that this differs markedly from
#'   the `base::dimnames()` functionality.
#' * `CFAxisNumeric`, `CFAxisLongitude`, `CFAxisLatitude`, `CFAxisVertical`: The
#'   coordinate values along the axis as a numeric vector.
#' * `CFAxisTime`: The coordinate values along the axis as a
#'   character vector containing timestamps in ISO8601 format. This could be
#'   dates or date-times if time information is available in the axis.
#' * `CFAxisCharacter`: The coordinate values along the axis as
#'   a character vector.
#' * `CFAxisDiscrete`: The index values of the axis, either along the entire
#'   axis, or a portion thereof.
#'
#' @param x An `CFObject` whose axis names to retrieve. This could be
#'   `CFDataset`, `CFVariable`, or a class descending from `CFAxis`.
#'
#' @return A vector as described in the Description section.
#' @examples
#' fn <- system.file("extdata",
#'   "pr_day_EC-Earth3-CC_ssp245_r1i1p1f1_gr_20230101-20231231_vncdfCF.nc",
#'   package = "ncdfCF")
#' ds <- open_ncdf(fn)
#'
#' # Names of data variables
#' names(ds)
#'
#' # CFDataset
#' dimnames(ds)
#'
#' # CFVariable
#' pr <- ds[["pr"]]
#' dimnames(pr)
#'
#' # CFAxisNumeric
#' lon <- ds[["lon"]]
#' dimnames(lon)
#'
#' # CFAxisTime
#' t <- ds[["time"]]
#' dimnames(t)
NULL
Any scripts or data that you put into this service are public.
ncdfCF documentation built on Sept. 13, 2025, 5:07 p.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
ncdfCF
Easy Access to NetCDF Files with CF Metadata Conventions

R/CFObject.R
In ncdfCF: Easy Access to NetCDF Files with CF Metadata Conventions

Try the ncdfCF package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

ncdfCF Easy Access to NetCDF Files with CF Metadata Conventions

R/CFObject.R In ncdfCF: Easy Access to NetCDF Files with CF Metadata Conventions

Try the ncdfCF package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

ncdfCF
Easy Access to NetCDF Files with CF Metadata Conventions

R/CFObject.R
In ncdfCF: Easy Access to NetCDF Files with CF Metadata Conventions