R/nc_helper.R

#' @include generics.R
#' @title Wrapper class for working with ncdf4 objects
#'
#' @description
#'
#' Easier I/O for .nc objects
#'
#' @param object Object of class 'remote'
#' @param class Character-string of class to pass to getMethod
#'
#'
#' @details
#'
#' @author Brandon McNellis
#'
#' @name nc_helper
#' @rdname nc_helper
NULL
#'
#' An S4 wrapper class for ncdf4 objects
#'
#' @slot coords List of length-2 numeric vectors specifying c(lon, lat)
#' @slot file_name nc filename where data is stored
#' @slot sample Numeric ID for each unique location
#' @slot time Numeric vector of years
#' @slot time_units Length-1 character describing time units
#' @slot variables Length-n character vector of variable names
#' @slot timestamp POSIXct w/ tz = GMT indicating last time object was modified
#' @slot nc_dir Collated data directory, will be created if nonexistant
#' @slot raw_dir Raw data directory
#' @slot n_fill Tally of how many lat/lon entries have been filled for each year
#'
#' @rdname nc_helper
nc_helper <- setClass(
  'nc_helper',
  slots = list(
    coords = 'list',
    sample = 'numeric',
    time = 'numeric',
    time_units = 'character',
    variables = 'character',
    timestamp = 'POSIXct',
    nc_dir = 'character',
    raw_dir = 'character',
    file_name = 'character',
    n_fill = 'integer'
  )
)
#' @export
setValidity('nc_helper', function(object) {
  errors <- character()

  # coords
  coo <- object@coords
  if (length(coo) > 0) {
    if (!(all(
      is.list(coo),
      length(unique(sapply(coo, length))) == 1,
      length(coo[[1]]) == 2,
      length(unique(sapply(coo, class))) == 1,
      class(coo[[1]]) == 'numeric'
    ))) {
      msg <- paste0('Malformed coordinates.')
      errors <- c(errors, msg)
    }
    l_coo <- length(coo)
    l_lat <- sapply(coo, function(x) x[2])
    l_lon <- sapply(coo, function(x) x[1])
    if (any(is.na(l_lat), is.na(l_lon))) {
      msg <- paste0('Missing coordinate values.')
      errors <- c(errors, msg)
    }
    r_lat <- range(l_lat, na.rm = T)
    r_lon <- range(l_lon, na.rm = T)
    if (max(abs(r_lat[1])) > 90) {
      msg <- paste0('Latitude out of range.')
      errors <- c(errors, msg)
    }
    if (max(abs(r_lon[2])) > 180) {
      msg <- paste0('Longitude out of range.')
      errors <- c(errors, msg)
    }
  }

  # time
  time <- object@time
  if (length(time) > 0) {
    if (object@time_units == 'years') {
      yrs <- time
      cur_yr <- as.POSIXlt(Sys.time())$year + 1900
      if (all(yrs == order(yrs))) {
        msg <- paste0('Years out of order.')
        errors <- c(errors, msg)
      }
    } else if (object@time_units == 'depth') {
      dp <- c(0, 5, 15, 30, 60, 100, 200)
      if (!all(object@time %in% dp)) {
        msg <- paste0('Soil depths must be 0, 5, 15, 30, 60, 100, or 200')
        errors <- c(errors, msg)
      }
    }
  }

  # sample
  if (length(coo) != length(object@sample)) {
    msg <- paste0('Sample ID must be same length as coordinates.')
    errors <- c(errors, msg)
  }

  # n_fill
  if (length(object@n_fill) > 1) {
    msg <- paste0('n_fill should be length 1')
    errors <- c(errors, msg)
  }

  # Returns
  if (length(errors) == 0) {
    TRUE
  } else {
    errors
  }
})
#' @rdname nc_helper
#' @export
setMethod('initialize',
          signature(.Object = 'nc_helper'),
          function (.Object, ...) {
            params <- list(...)

            # coords
            if ('coords' %in% names(params)) {
              .Object@coords <- params$coords
            } else {
              .Object@coords <- list()
            }

            # nc_dir
            if ('nc_dir' %in% names(params)) {
              .Object@nc_dir <- params$nc_dir
            } else {
              .Object@nc_dir <- paste0(getwd(), '/nc_files')
            }

            # variables
            if ('variables' %in% names(params)) {
              .Object@variables <- params$variables
            }

            # time_units
            if ('time_units' %in% names(params)) {
              .Object@time_units <- params$time_units
            } else {
              if (length(.Object@time_units) == 0) {
                .Object@time_units <- 'years'
              }
            }

            # time
            if ('time' %in% names(params)) {
              .Object@time <- params$time
            } else {
              if (length(.Object@time) == 0) {
                .Object@time <- c(2000:2015)
              }
            }

            # timestamp
            st <- Sys.time()
            attr(st, 'tzone') <- 'GMT'
            .Object@timestamp <- st

            # file_name
            if ('file_name' %in% names(params)) {
              .Object@file_name <- params$file_name
            } else {
              .Object@file_name <- paste0(as.character(round(as.numeric(st))), '.nc')
            }

            # sample
            if ('sample' %in% names(params)) {
              .Object@sample <- params$sample
            } else {
              .Object@sample <- as.numeric(seq_along(params$coords))
            }

            # n_fill
            .Object@n_fill <- 1L

            # Returns:
            mt <- validObject(.Object)
            if (isTRUE(mt)) {
              return(.Object)
            } else {
              return(mt)
            }
          }
)
#' @export
setMethod('print',
          signature(x = 'nc_helper'),
          function (x, ...) {
            str(x, max.level = 2)
            invisible()
          }
)
#' @rdname nc_helper
#' @export
setMethod("AddCoords",
          signature(object = "nc_helper"),
          function(object, lon, lat, sample, overwrite = F) {
            if (length(object@coords) != 0) {
              if (length(object@sample) != 0) {
                if (!overwrite) {
                  message('coords present, updating sample')
                  if (length(sample) != length(object@coords)) {
                    if (is.null(sample)) {
                      object@sample <- seq(length(object@coords))
                    } else {
                      stop('bad sample input length')
                    }
                  } else {
                    object@sample <- sample
                  }
                  return(object)
                } else {
                  message('overwriting coords')
                }
              } else {
                stop('coords and sample already present')
              }
            }
            object@coords <- CoordVecsToList(lon, lat)
            if (length(object@sample) == 0) {
              if (is.null(sample)) {
                object@sample <- seq(length(object@coords))
              } else {
                object@sample <- sample
              }
            }

            return(object)
          }
)
#' @rdname nc_helper
#' @docType methods
#' @export
setMethod('SetupDataFile',
          signature(object = 'nc_helper'),
          function(object, overwrite = F, backup = F) {
            dwd <- object@nc_dir
            fname <- paste0(object@nc_dir, '/', object@file_name)

            if (file.exists(fname)) {
              if (overwrite) {
                cat('\nOVERWRITING = T\n')
                if (backup) {
                  cat('\nBacking up .nc file...')
                  tof <- paste0(object@nc_dir, '/backup/', object@file_name)
                  file.copy(from = fname, to = tof)
                }
                file.remove(fname)
              } else {
                stop('.nc file already exists')
              }
            }
            if (!(dir.exists(dwd))) {
              dir.create(dwd)
            }

            # Create .nc object
            in_dim <- list(
              ncdf4::ncdim_def('sample', units = '', vals = object@sample, unlim = T),
              ncdf4::ncdim_def('time', units = object@time_units, vals = object@time, unlim = T)
            )

            vl <- vector('list', length(object@variables))
            for (i in seq_along(vl)) {
              ii <- object@variables[i]
              vl[[i]] <- ncdf4::ncvar_def(name = ii, units = '', dim = in_dim, missval = NULL)
            }

            cat('\nWriting nc file...\n')
            ncdf4::nc_create(fname, vl)
            invisible(object)
          }
)
#' @rdname nc_helper
#' @export
setMethod('ManipulateDataFile',
          signature(object = 'nc_helper'),
          function(object, rename = NULL, move = NULL) {
            stopifnot(validObject(object))
            path <- paste0(object@nc_dir, '/', object@file_name)
            if (!is.null(rename)) {
              stopifnot(is.character(rename), length(rename) == 1)
              if (length(grep('.nc', object@file_name)) < 1) {
                stop('Need the .nc file extension in the new filename.')
              }
              pnew <- paste0(object@nc_dir, '/', rename)
              file.rename(from = path, to = pnew)
              object@file_name <- rename
            }
            if (!is.null(move)) {
              stopifnot(is.character(rename), length(rename) == 1)
              pnew <- paste0(move, '/', object@file_name)
              file.copy(from = path, to = pnew)
              file.remove(path)
              object@nc_dir <- move
            }

            return(object)
          }
)
#' @rdname nc_helper
#' @export
setMethod("FillArray",
          signature(object = "nc_helper"),
          function(object, df, nc = NULL) {
            validObject(object)

            if (is.null(nc)) {
              fname <- paste0(object@nc_dir, '/', object@file_name)
              nc <- ncdf4::nc_open(fname, write = T)
            }

            if (length(na.omit(unique(df$time))) > 1) {
              stop('implement this?')
              t0 <- 1
            } else {
              t0 <- which(object@time == na.omit(unique(df$time)))
            }
            tt <- ifelse(length(t0) == 1, t0, 1)
            s0 <- which(object@sample == na.omit(unique(df$sample)))
            ss <- ifelse(length(s0) == 1, s0, 1)
            st <- c(ss, tt)
            ct <- c(length(na.omit(unique(df$sample))), length(na.omit(unique(df$time))))

            cat('\n')
            cat(' ss:', ss, ' tt:', tt, ' st:', st, ' ct:', ct) # debug

            browser()
            for (i in seq(ncol(df))) {
              ii <- colnames(df)[i]
              if (ii %in% object@variables) {
                vali <- df[, ii]# assumes sample is first column
                ncdf4::ncvar_put(nc = nc, varid = ii, start = st, count = ct, vals = vali)
                #cat(' var:', ii) # debug
                #tvals <- ncdf4::ncvar_get(nc = nc, varid = ii, start = st, count = ct) # debug
                #cat(' ncvar_get:', tvals) # debug
              } else {
                next
              }
            } # end i

            invisible()
          }
)
#' @rdname nc_helper
#' @export
setMethod('PullDataFrame',
          signature(object = 'nc_helper'),
          function(object) {
            validObject(object)
            fname <- paste0(object@nc_dir, '/', object@file_name)
            nc <- ncdf4::nc_open(fname, write = F)
            ndim <- length(nc.get.dim.names(nc))

            dfnc <- length(object@variables) + 4
            df_out <- data.frame(matrix(nrow = 0, ncol = dfnc), stringsAsFactors = F)

            # 3-col df template for i loop
            c0 <- CoordVecsToList(object@coords)
            df0 <- data.frame(matrix(nrow = length(object@sample), ncol = 0), stringsAsFactors = F)
            df0$lon <- c0[, 1]
            df0$lat <- c0[, 2]
            df0$sample <- object@sample

            if (ndim == 2) {
              for (i in seq_along(object@time)) {
                ii <- object@time[i]
                dfi <- df0
                dfi$time <- rep(ii, length(object@sample))

                for (j in seq_along(object@variables)) {
                  jj <- object@variables[j]
                  dfi[[jj]] <- ncdf4::ncvar_get(nc = nc, varid = jj, start = c(1, i), count = c(-1, 1))
                } # end j

                df_out <- rbind(df_out, dfi)
              }
            } else if (ndim == 1) {
              dfi <- df0

              for (j in seq_along(object@variables)) {
                jj <- object@variables[j]
                dfi[[jj]] <- ncdf4::ncvar_get(nc = nc, varid = jj, start = c(1), count = c(-1))
              }

              df_out <- dfi

            } else {
              stop('unsupported ndim')
            }


            df_out[, ] <- lapply(df_out, function(x) {
              x[is.nan(x)] <- NA
              x
            })

            return(df_out)
          }
)
#' @rdname nc_helper
#' @export
setMethod('CoordVecsToList',
          signature(x = 'numeric'),
          function(x, y = NULL) {
            stopifnot(
              is.vector(x),
              is.vector(y),
              length(x) == length(y),
              is.numeric(y)
            )
            z <- cbind(x, y)
            zz <- vector('list', nrow(z))
            for (i in seq_along(zz)) {
              zz[[i]] <- as.vector(z[i, ])
            }
            return(zz)
          }
)
#' @rdname nc_helper
#' @export
setMethod('CoordVecsToList',
          signature(x = 'data.frame'),
          function(x, y = NULL) {
            stopifnot(
              is.null(y),
              ncol(x) == 2
            )
            z <- vector('list', nrow(x))
            for (i in seq_along(z)) {
              z[[i]] <- as.vector(x[i, ])
            }
            return(z)
          }
)#' @rdname nc_helper
#' @export
setMethod('CoordVecsToList',
          signature(x = 'list'),
          function(x, y = NULL) {
            stopifnot(
              is.null(y),
              ncol(x) == 1
            )
            z <- data.frame(matrix(ncol = 2, nrow = length(x)))
            z[, 1] <- as.numeric(sapply(x, function(xx) xx[1]))
            z[, 2] <- as.numeric(sapply(x, function(xx) xx[2]))
            colnames(z) <- c('lon', 'lat')
            return(z)
          }
)
#' @rdname nc_helper
#' @export
setMethod('AggregateByDimension',
          signature(object = 'ncdf4'),
          function(object, dim, FUN, ...) {

            dims0 <- sapply(object$var[[1]]$dim, function(x) x[[1]])
            stopifnot(dim %in% dims0)
            wd0 <- which(dims0 == dim)
            wd1 <- seq_along(dims0)[-wd0]
            dims1 <- vector('list', length(wd1))
            for (i in seq_along(wd1)) {
              ii <- wd1[i]
              dim1[[i]] <- object$var[[1]]$dim[[ii]]
            }
            v0 <- names(object$var)
            u0 <- sapply(object$var, function(x) x$units)
            m0 <- sapply(object$var, function(x) x$missval)
            v1 <- vector('list', length(v0))

            for (i in seq_along(v1)) {
              ii <- v0[i]
              v1[[i]] <- ncdf4::ncvar_def(ii, u0[i], dim1, m0[i])
            }

            fn0 <- paste0('AGG', deparse(substitute(FUN)))
            fnn <- paste0(strsplit(object$filename, '.nc')[[1]], fn0, '.nc')

            nc1 <- ncdf4::nc_create(fnn, vars = v1)

            # find new values

            # return
            nc_close(object)
            nc_close(nc1)
            return(object)

})

#' @rdname nc_helper
#' @export
setMethod('AggregateByDimension',
          signature(object = 'nc_helper'),
          function(object, dim, FUN, ...) {
            validObject(object)
            params <- list(...)

            fname <- paste0(object@nc_dir, '/', object@file_name)
            object@file_name <- paste('AGGREGATE', deparse(substitute(FUN)) ,object@file_name, sep = '_')
            fname_new <- paste0(object@nc_dir, '/', object@file_name)
            nc_old <- ncdf4::nc_open(fname, write = F)
            dims_old <- nc_old$var[[1]]$dim

            if (file.exists(fname_new)) {
              file.remove(fname_new)
            }

            ndim <- length(nc.get.dim.names(nc_old))
            wd0 <- which(nc.get.dim.names(nc_old) == dim)
            dim0 <- dims_old[[wd0]]

            wd1 <- which(nc.get.dim.names(nc_old) != dim)
            dim1 <- vector('list', length(wd1))
            for (i in seq_along(wd1)) {
              ii <- wd1[i]
              dim1[[i]] <- dims_old[[ii]]
            }

            new_vars <- vector('list', length(object@variables))
            for (i in seq_along(new_vars)) {
              ii <- object@variables[i]
              new_vars[[i]] <- ncdf4::ncvar_def(ii, '', dim1, NA)
            }
            new_nc <- ncdf4::nc_create(fname_new, new_vars)

            for (i in seq_along(object@sample)) {
              inc_val <- paste0(format(i / length(object@sample) * 100, digits = 0, nsmall = 2), ' %')
              #cat('\r', format(i / length(object@sample) * 100, digits = 0, nsmall = 2), '%')
              ii <- object@sample[i]

              if (ndim == 3) {
                for (j in seq_along(object@time)) {
                  jj <- object@time[j]
                  st <- c(i, 1, j)
                  st0 <- c(i, j)
                  ct <- c(1, -1, 1)
                  ct0 <- c(1, 1)

                  cat('\r', inc_val, ' time:', jj, ' st:', st, ' ct:', ct, '   ')

                  for (k in seq_along(object@variables)) {
                    kk <- object@variables[k]
                    gv <- ncdf4::ncvar_get(nc_old, kk, start = st, count = ct)
                    params$x <- gv
                    valk <- do.call(FUN, args = params)
                    ncdf4::ncvar_put(new_nc, varid = kk, vals = valk, start = st0, count = ct0)
                  } # end k
                } # end j

              } else if (ndim == 2) {
                st <- c(i, 1)
                st0 <- c(i)
                ct <- c(1, -1)
                ct0 <- c(1)

                cat('\r', inc_val, ' st:', st, ' ct:', ct, '   ')

                for (k in seq_along(object@variables)) {
                  kk <- object@variables[k]
                  gv <- ncdf4::ncvar_get(nc_old, kk, start = st, count = ct)
                  valk <- do.call(FUN, args = list(gv))
                  ncdf4::ncvar_put(new_nc, varid = kk, vals = valk, start = st0, count = ct0)
                } # end k
              } else {
                stop('unsupported dims')
              }

            } # end i

            ncdf4::nc_close(new_nc)
            ncdf4::nc_close(nc_old)
            return(object)
          }
)
#' @rdname nc_helper
#' @export
UpdateTimeStamp <- function(object) {
  stopifnot(validObject(object))
  st <- Sys.time()
  attr(st, 'tzone') <- 'GMT'
  object@timestamp <- st
  return(object)
}
#' @rdname nc_helper
#' @export
setMethod('GetMeta',
          signature(class = 'character'),
          function(class, ...) {
            mf <- selectMethod('GetMeta', signature = class)(...)
          }
)
bmcnellis/RSFIA documentation built on June 1, 2019, 7:40 a.m.