Nothing
#' Tools for Manipulating and Summarizing CMIP5 Data
#'
#' Working with CMIP5 data can be tricky, forcing scientists to
#' write custom scripts and programs. The `RCMIP5` package aims
#' to ease this process, providing a standard, robust, and
#' high-performance set of functions to (i) explore what data
#' have been downloaded, (ii) identify missing data, (iii)
#' average (or apply other mathematical operations) across
#' experimental ensembles, (iv) produce both temporal and spatial
#' statistical summaries, and (v) produce
#' easy-to-work-with graphical and data summaries.
#'
#' ...
#'
#' @references Todd-Brown and Bond-Lamberty, 2014: (in prep).
#' @references Taylor et al., 2012:
#' An overview of CMIP5 and the experiment design, Bulletin of the American
#' Meteorological Society, 93, 485-498.
#' \url{http://dx.doi.org/10.1175/BAMS-D-11-00094.1}
#' @import dplyr digest abind assertthat Matrix
#' @docType package
#' @name RCMIP5
NULL
#' The 'cmip5data' class
#'
#' This constructor has two functions. First, given a list, it makes the list
#' a cmip5data-class object (no check is made that the list has appropriate
#' fields though). Second, if given a numeric value(s), it returns sample/
#' example data in the newly constructed object. This is used extensively by
#' the testing code.
#'
#' @param x A list or numeric. If x is a list then the fields are expected to match those of the returned cmip5data object. If x is a numeric sample data is created where the numeric indicates the years of sample data to return.
#' @param lonlat Boolean indicating whether to create lon and lat dimensions
#' @param lonsize Integer size of longitude dimension
#' @param latsize Integer size of latitude dimension
#' @param Z logical. Create Z dimension?
#' @param Zsize integer. Size of Z dimension
#' @param time logical. Create time dimension?
#' @param monthly logical. Monthly (if not, annual) data?
#' @param randomize logical. Random sample data?
#' @param irregular logical. Irregular lon/lat grid?
#' @param verbose logical. Print info as we go?
#' @param loadAs a string identifying possible structures for values. Currently: 'data.frame' and 'array' the only valid options.
#' @return A cmip5data object, which is a list with the following fields:
#' \item{files}{Array of strings containg the file(s) included in this dataset}
#' \item{variable}{String containg the variable name described by this dataset}
#' \item{model}{String containing the model name of this dataset}
#' \item{experiment}{String containing the experiment name of this dataset}
#' \item{ensembles}{Array of strings containg the ensemble(s) included in this dataset}
#' \item{domain}{String containing the domain name of this dataset}
#' \item{val}{Data frame holding data, with fields lon, lat, Z, time}
#' \item{valUnit}{String containing the value units}
#' \item{lon}{Numeric vector containing longitude values; may be \code{NULL}}
#' \item{lat}{Numeric vector containing latitude values; may be \code{NULL}}
#' \item{Z}{Numeric vector Z values; may be \code{NULL}}
#' \item{time}{Numeric vector containing time values; may be \code{NULL}}
#' \item{dimNames}{Array of strings containing the original (NetCDF) dimension names}
#' \item{calendarStr}{String defining the calendar type; may be \code{NULL}}
#' \item{debug}{List with additional data (subject to change)}
#' \item{provenance}{Data frame with the object's provenance. See \code{\link{addProvenance}}}
#' \item{numPerYear}{Numeric vector; only present after \code{\link{makeAnnualStat}}}
#' \item{numYears}{Numeric vector; only present after \code{\link{makeMonthlyStat}}}
#' \item{numCells}{Numeric vector; only present after \code{\link{makeGlobalStat}}}
#' \item{filtered}{Logical; only present after \code{\link{filterDimensions}}}
#' @docType class
#' @examples
#' cmip5data(1970) # produces monthly sample data for year 1970
#' cmip5data(1970:2014)
#' cmip5data(1970:2014, monthly=FALSE) # annual data
#' cmip5data(1970:2014, randomize=TRUE) # randomized data
#' cmip5data(1970:2014, Z=TRUE) # four-dimensional data
#' cmip5data(0, time=FALSE) # sample 'fx' data, two-dimensional
#' cmip5data(list()) # makes this (here empty) list class into 'cmip5data'
#' @export
cmip5data <- function(x=list(),
# parameters for making sample data
lonlat=TRUE, lonsize=10, latsize=10,
Z=FALSE, Zsize=5,
time=TRUE, monthly=TRUE,
randomize=FALSE, irregular=FALSE,
verbose=FALSE, loadAs='data.frame') {
# Sanity checks
assert_that(is.flag(lonlat))
assert_that(is.numeric(lonsize))
assert_that(is.numeric(latsize))
assert_that(is.flag(Z))
assert_that(is.numeric(Zsize))
assert_that(is.flag(time))
assert_that(is.flag(monthly))
assert_that(is.flag(randomize))
assert_that(is.flag(irregular))
assert_that(is.flag(verbose))
assert_that(loadAs %in% c("data.frame", "array"))
if (is.list(x)) { # If x is a list then cast it directly to a cmip5data object
if(verbose) cat("Casting list to cmip5data\n")
structure(x, class="cmip5data")
} else if(is.numeric(x)) { # Create sample data
if(verbose) cat("Creating new cmip5data\n")
# Construct two lists which will be used to create the sample data:
# ... result and debug.
# result holds the primary data of interest
result <- list(
files=NULL,
variable="var",
model="model",
experiment="experiment",
ensembles="ensemble",
domain="domain",
val=NULL,
valUnit=NULL,
lon=NULL,
lat=NULL,
Z=NULL,
time=NULL,
dimNames=NULL
)
debug <- list()
# If this data will have spatial dimensions, construct
if(lonlat) {
if(verbose) cat("Adding spatial dimensions\n")
# realistic lon (0 to 360) and lat (-90 to 90) numbers
result$lon <- 360/lonsize * c(0:(lonsize-1)) + 360/lonsize/2
result$lat <- 180/latsize * c(0:(latsize-1)) - 90 + 180/latsize/2
# Convert to two dimensions
result$lon <- array(result$lon, dim=c(lonsize, latsize))
result$lat <- array(rep(result$lat, 1, each=lonsize), dim=c(lonsize, latsize))
if(irregular) {
result$lon <- result$lon + round(runif(lonsize * latsize), 1)
result$lat <- result$lat + round(runif(lonsize * latsize), 1)
}
result$dimNames=c("lon", "lat")
debug$lonUnit <- "degrees_east"
debug$latUnit <- "degrees_north"
} else {
result$dimNames <- c(NA, NA)
}
# If this data will have Z dimension, construct
if(Z) {
if(verbose) cat("Adding Z dimensions\n")
result$Z <- c(0:(Zsize-1))
result$dimNames <- c(result$dimNames, "Z")
debug$ZUnit <- "m"
} else {
result$dimNames <- c(result$dimNames, NA)
}
# If this data will have time dimension, construct
if(time) {
if(verbose) cat("Adding time dimensions\n")
years <- x
ppy <- ifelse(monthly, 12, 1) # periods per year
result$calendarStr <- "360_day"
debug$timeFreqStr <- ifelse(monthly, "mon", "yr")
debug$startYr <- years[1]
debug$calendarStr <- "360_day"
debug$timeUnit <- paste0("days since ",years[1],"-01-01")
if(monthly) {
# '+15' initalizes all time stamps to be middle of the month
debug$timeRaw <- (360/ppy*c(0:(length(years)*ppy-1) )+15)
result$time <- debug$timeRaw/360+min(years)
} else {
debug$timeRaw <- result$time <- years
}
# convert day based calandar to year based
result$dimNames <- c(result$dimNames, "time")
} else { # no time
result$dimNames <- c(result$dimNames, NA)
result$domain <- "fx"
}
# Create the data array
finalDim <- c(max(1, length(result$lon[,1])),
max(1, length(result$lat[1,])),
max(1, length(result$Z)),
max(1, length(result$time)))
if(randomize) {
result$val <- runif(n=prod(finalDim))
} else {
result$val <- rep(1, prod(finalDim))
}
dim(result$val) <- finalDim
# Miscellany
result$valUnit <- "unit"
result$debug <- debug
# Add debug info and set class
result <- structure(result, class="cmip5data")
# Convert to data frame representation, if requested
if(loadAs == 'data.frame') {
result$val <- convert_array_to_df(result, verbose)
}
# Initialize provenance and return
addProvenance(result, "Dummy data created")
} else {
stop("Don't know what to do with this class of parameter")
}
}
#' Print a 'cmip5data' class object.
#'
#' @param x A \code{\link{cmip5data}} object
#' @param ... Other parameters passed to cat
#' @details Prints a one-line summary of the object
#' @method print cmip5data
#' @export
#' @keywords internal
print.cmip5data <- function(x, ...) {
if(all(unlist(lapply(x, is.null)))) {
cat("(Empty cmip5data object)")
return()
}
ansStr <- paste0('CMIP5: ', x$variable, ", ", x$model, " ", x$experiment)
spaceStr <- paste(dim(x$lon)[1], "x", dim(x$lat)[2], "x", length(x$Z), "x", length(x$time))
ansStr <- paste0(ansStr, ", ", spaceStr)
timeStr <- "no time"
if(!is.null(x$time) & length(x$time) > 0) {
timeStr <- paste(floor(min(x$time, na.rm=TRUE)), "to",
floor(max(x$time, na.rm=TRUE)))
}
ansStr <- paste0(ansStr, ", ", timeStr)
if(!is.null(x$ensembles)) {
ansStr <- paste0(ansStr, ", from ", length(x$ensembles), " ",
ifelse(length(x$ensembles)==1, "ensemble", "ensembles"))
}
cat(ansStr, "\n")
cat(...)
} # print.cmip5data
#' Summarize a 'cmip5data' class object.
#'
#' @param object A \code{\link{cmip5data}} object
#' @param ... ignored
#' @details Prints a short summary of the object.
#' @return A summary structure of the object.
#' @method summary cmip5data
#' @export
#' @keywords internal
summary.cmip5data <- function(object, ...) {
ans <- list()
class(ans) <- "summary.cmip5data"
# cmip5 objects should always have the following defined:
ans$variable <- object$variable
ans$valUnit <- object$valUnit
ans$domain <- object$domain
ans$model <- object$model
ans$experiment <- object$experiment
ans$ensembles <- object$ensembles
ans$type <- "CMIP5 data"
# if(grepl('makeAnnualStat', rev(object$provenance$caller)[1])) {
# ans$type <- paste(ans$type, "annual summary [",
# paste(unique(object$numPerYear), collapse=', '),
# "]")
# }
if(!is.null(object$numPerYear)) {
ans$type <- paste0(ans$type, " (annual summary of ", mean(object$numPerYear), " times)")
}
if(!is.null(object$numYears)) {
ans$type <- paste0(ans$type, " (monthly summary of ", mean(object$numYears), " years)")
}
if(!is.null(object$numCells)) {
ans$type <- paste0(ans$type, " (spatial summary of ", object$numCells, " cells)")
}
if(!is.null(object$numZs)) {
ans$type <- paste0(ans$type, " (Z summary of ", object$numZs, " levels)")
}
if(!is.null(object$filtered)) {
ans$type <- paste(ans$type, "(filtered)")
}
# if(!is.null(object$area)) {
# ans$type <- paste(ans$type, "(regridded)")
# }
ans$spatial <- paste0("lon [", dim(object$lon)[1],
"] lat [", dim(object$lat)[2],
"] Z [", length(object$Z), "]")
ans$time <- paste0(object$debug$timeFreqStr, " [", length(object$time), "] ", object$debug$timeUnit)
ans$size <- as.numeric(object.size(object))
if(is.array(object$val)) {
ans$valsummary <- c(min(object$val, na.rm=TRUE),
mean(object$val, na.rm=TRUE),
max(object$val, na.rm=TRUE))
} else {
ans$valsummary <- c(min(object$val$value, na.rm=TRUE),
mean(object$val$value, na.rm=TRUE),
max(object$val$value, na.rm=TRUE))
#} else {
# stop('Class of value is not recognized.')
}
ans$provenance <- object$provenance
return(ans)
} # summary.cmip5data
#' Print the summary for a 'cmip5data' class object.
#'
#' @param x A \code{\link{cmip5data}} object
#' @param ... Other parameters passed to cat
#' @details Prints a one-line summary of the object
#' @method print summary.cmip5data
#' @export
#' @keywords internal
print.summary.cmip5data <- function(x, ...) {
cat(x$type, "\n")
cat("Variable: ", x$variable, " (", x$valUnit, ") from model ", x$model, "\n", sep="")
cat(sprintf("Data range: %.2g to %.2g Mean: %.2g\n", x$valsummary[1], x$valsummary[3], x$valsummary[2]))
cat("Experiment:", x$experiment, "-", length(x$ensembles), "ensemble(s)\n")
cat("Spatial dimensions:", x$spatial, "\n")
cat("Time dimension:", x$time, "\n")
cat("Size:", format(round(x$size/1024/1024, 1), nsmall=1), "MB\n")
cat("Provenance has", nrow(x$provenance), "entries\n")
} # print.summary.cmip5data
#' Convert a cmip5data object to a data frame
#'
#' @param x A \code{\link{cmip5data}} object
#' @param ... Other parameters
#' @param originalNames logical. Use original dimension names from file?
#' @return The object converted to a data frame
#' @method as.data.frame cmip5data
#' @export
#' @keywords internal
as.data.frame.cmip5data <- function(x, ..., originalNames=FALSE) {
# Sanity checks
assert_that(is.flag(originalNames))
if(is.array(x$val)) {
convert_array_to_df(x)
} else {
# Suppress stupid NOTEs from R CMD CHECK
lon <- lat <- Z <- time <- NULL
dplyr::arrange(x$val, lon, lat, Z, time)
}
} # as.data.frame.cmip5data
#' Convert a cmip5data object to an array
#'
#' @param x A \code{\link{cmip5data}} object
#' @param ... Other parameters
#' @param drop logical. Drop degenerate dimensions?
#' @return The object converted to an array
#' @method as.array cmip5data
#' @export
#' @keywords internal
as.array.cmip5data <- function(x, ..., drop=TRUE) {
# Sanity checks
assert_that(is.flag(drop))
if(is.array(x$val)) {
adrop(x$val, drop=drop)
} else {
dimList <- c(length(unique(x$val$lon)),
length(unique(x$val$lat)),
length(unique(x$val$Z)),
length(unique(x$val$time)))
# Remove degenerate dimensions
if(drop) {
dimList <- dimList[!dimList %in% 1]
}
# Suppress stupid NOTEs from R CMD CHECK
lon <- lat <- Z <- time <- NULL
# Note we sort data frame before converting to array!
array(dplyr::arrange(x$val, time, Z, lat, lon)$value, dim=dimList)
}
} # as.array.cmip5data
#' Make package datasets and write them to disk.
#'
#' @param path root of directory tree
#' @param maxSize max size (in MB) of dataset to write
#' @param outpath directory to write to
#' @details Writes all available ensembles to disk as Rdata files, subject to
#' a maximum size parameter (CRAN says keep sample data < 5MB).
#' @note This is an internal RCMIP5 function and not exported.
#' @keywords internal
makePackageData <- function(path="./sampledata", maxSize=Inf, outpath="./data") {
# Sanity checks
assert_that(is.dir(path))
assert_that(is.readable(path))
assert_that(is.numeric(maxSize))
if(!file.exists(outpath)) dir.create(outpath)
assert_that(is.dir(outpath))
assert_that(is.writeable(outpath))
datasets <- getFileInfo(path)
if(is.null(datasets)) return()
for(i in 1:nrow(datasets)) {
cat("-----------------------\n", datasets[i, "filename"], "\n")
d <- with(datasets[i,],
loadEnsemble(variable, model, experiment, ensemble, path=path, verbose=T)
)
print(object.size(d), units="MB")
if(object.size(d)/1024/1024 <= maxSize) {
objname <- gsub("_[0-9]{4,}-[0-9]{4,}.nc$", "", basename(d$files[1])) # strip dates
assign(objname, d)
cat("Writing", objname, "\n")
save(list=objname, file=paste0(outpath, "/", objname, ".rda"))
} else {
cat("Too big; skipping\n")
}
}
} # makePackageData
#' Alternative weighted mean
#'
#' @param x vector of data
#' @param w vector of weights
#' @param na.rm Remove NAs in both data and weights?
#' @return Weighted mean of \code{x}, using weights \code{d}.
#' @details The \code{stats} version of weighted.mean doesn't handle weights in a very
#' useful way. Specifically, it will remove missing \code{x} values, but not missing weights.
#' A fair number of CMIP5 models have \code{NA} values in their grid areas, so this will
#' quickly cause problems. This function removes (if \code{na.rm=TRUE}) missing observations
#' and weights before computing the weighted mean.
#' @export
#' @seealso \code{\link{weighted.mean}}
cmip5.weighted.mean <- function(x, w=rep(1, length(x)), na.rm=TRUE) {
assert_that(length(x) == length(w))
na <- FALSE
if(na.rm) na <- is.na(x) | is.na(w)
sum((x*w)[!na]) / sum(w[!na])
} # cmip5.weighted.mean
#' Return data values
#'
#' @param x A \code{\link{cmip5data}} object
#' @return Data values in \code{x}.
#' @details Abstracts away getting values, the method for which
#' varies depending on backend implementation.
#' @keywords internal
#' @note This is an internal RCMIP5 function and not exported.
vals <- function(x) {
assert_that(class(x)=="cmip5data")
if(is.data.frame(x$val)) {
x$val$value
} else if(is.array(x$val)) {
as.numeric(x$val)
} else
stop("Unknown data implementation")
} # vals
#' Return number of data values
#'
#' @param x A \code{\link{cmip5data}} object
#' @return Number of data values in \code{x}.
#' @details Abstracts away getting number of values, the method for which
#' varies depending on backend implementation.
#' @keywords internal
#' @note This is an internal RCMIP5 function and not exported.
nvals <- function(x) {
length(vals(x))
} # nvals
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.