#' Empirical Orthogonal Functions (EOFs).
#'
#' Computes EOFs (a type of principal component analysis) for combinations of
#' data sets, typically from gridded data, reanalysis and climate models
#' results.
#'
#' [ref: Benestad (2001), "A comparison between two empirical downscaling
#' strategies", \emph{Int. J. Climatology}, \bold{vol 21}, Issue 13,
#' pp.1645-1668. DOI 10.1002/joc.703]. and \code{mixFields} prepares for
#' mixed-field EOF analysis [ref. Bretherton et al. (1992) "An Intercomparison
#' of Methods for finding Coupled Patterns in Climate Data", \emph{J. Climate},
#' \bold{vol 5}, 541-560; Benestad et al. (2002), "Empirically downscaled
#' temperature scenarios for Svalbard", \emph{Atm. Sci. Lett.},
#' doi.10.1006/asle.2002.0051].
#'
#' Uncertainty estimates are computed according to North et al. (1982),
#' "Sampling Errors in the Estimation of Empirical Orthogonal Functions",
#' \emph{Mon. Weather Rev.}, \bold{vol 110}, 699-706.
#'
#' The EOFs are based on \code{\link{svd}}.
#'
#' See the course notes from Environmental statistics for climate researchers
#' \url{http://www.gfi.uib.no/~nilsg/kurs/notes/course.html} for a discussion
#' on EOF analysis.
#'
#' The method \code{PCA} is similar to EOF, but designed for parallel station
#' series (e.g. grouped together with \code{\link{merge}}). PCA does not assume
#' gridded values and hence does not weigth according to grid area. PCA is
#' useful for downscaling where the spatial covariance/coherence is important,
#' e.u involving different variables from same site, same variable from
#' different sites, or a mix between these. For instance, PCA can be applied to
#' the two wind components from a specific site and hence extract the most
#' important wind directions/speeds. \code{PCA.matrix} is just a wrapper function
#' for \code{svd} that makes sure that the dimensions of the input are in order.
#'
#' @aliases EOF EOF.default EOF.field EOF.comb eof2field PCA PCA.default
#' PCA.station PCA.matrix pca2station
#'
#' @importFrom stats approx sd acf
#'
#' @seealso as.eof
#'
#' @param X a 'field' or 'pca' object
#' @param it see \code{\link{subset}}
#' @param n number of EOFs
#' @param is Spatial subsetting - see \code{\link{subset.eof}}
#' @param lon set longitude range - see \code{\link{t2m.NCEP}}
#' @param lat set latitude range
#' @param verbose TRUE - clutter the screen with messages
#' @param anomaly When TRUE, subtract the mean before SVD.
#' @param \dots additional arguments
#'
#' @return File containing an 'eof' object which is based on the 'zoo' class.
#' @author R.E. Benestad
#' @keywords spatial ts multivariate
#' @examples
#'
#' # Simple EOF for annual mean SST:
#' sst <- sst.NCEP(lon=c(-90,20),lat=c(0,70))
#' SST <- annual(sst, FUN="mean", nmin=12)
#' eof.sst <- EOF(SST)
#' plot(eof.sst)
#'
#' # EOF of July SST:
#' eof.sst7 <- EOF(sst,it="Jul")
#' plot(eof.sst7)
#'
#' # common EOF for model
#' # Get some sample data, extract regions:
#' GCM <- t2m.NorESM.M()
#' gcm <- subset(GCM,is=list(lon=c(-50,60),lat=c(30,70)))
#' t2m.dnmi <- t2m.DNMI()
#' dnmi <- subset(t2m.dnmi,is=list(lon=c(-50,60),lat=c(30,70)))
#'
#' OBS <- annual(dnmi, FUN="mean", nmin=12)
#' GCM <- annual(gcm, FUN="mean", nmin=12)
#' OBSGCM <- combine(OBS,GCM,dimension='time')
#'
#' ceof <- EOF(OBSGCM)
#' plot(ceof)
#'
#' # Example for using PCA in downscaling
#' ## nacd <- station(src='nacd')
#' ## X <- annual(nacd)
#' X <- station(src='nacd')
#' nv <- function(x) sum(is.finite(x))
#' ok <- (1:dim(X)[2])[apply(X,2,nv) == dim(X)[1]]
#' X <- subset(X,is=ok)
#' pca <- PCA(X)
#' map(pca)
#'
#' slp <- slp.NCEP(lon=c(-20,30),lat=c(30,70))
#' eof <- EOF(slp,it="Jan")
#' ds <- DS(pca,eof)
#' # ds is a PCA-object
#' plot(ds)
#'
#' # Recover the station data:
#' Z <- as.station(pca)
#' plot(Z,plot.type='multiple')
#'
#' @export EOF
EOF <- function(X,...,it=NULL,is=NULL,n=20,lon=NULL,lat=NULL,verbose=FALSE,anomaly=TRUE) { UseMethod("EOF") }
#' @exportS3Method
#' @export
EOF.default <- function(X,...,it=NULL,is=NULL,n=20,lon=NULL,lat=NULL,verbose=FALSE,anomaly=TRUE) {
# Verify Arguments
if (verbose) print("EOF.default")
stopifnot(!missing(X), is.matrix(X),inherits(X,"zoo"))
if (!zeros(inherits(X,c("comb","zoo"),which=TRUE))) {
eof <- EOF.comb(X,it=it,is=is,n=n,anomaly=anomaly,verbose=verbose)
} else if ( !zeros(inherits(X,c("field","zoo"),which=TRUE)) ) {
eof <- EOF.field(X,it=it,is=is,n=n,anomaly=anomaly,verbose=verbose)
}
attr(eof,'dimnames') <- NULL # REB 2016-03-04
return(eof)
}
# Apply EOF analysis to the monthly mean field values:
#' @exportS3Method
#' @export
EOF.field <- function(X,it=NULL,is=NULL,n=20,lon=NULL,lat=NULL,verbose=FALSE,anomaly=TRUE,...) {
SF <- function(x) {sum(is.finite(x))}
if (verbose) print("EOF.field")
attr(X,'dimnames') <- NULL
stopifnot(!missing(X), is.matrix(X),
inherits(X,c("field","zoo")))
# Remove time slices with missing data:
# nok <- !is.finite(rowMeans(X))
# The regridded appendix may contain some NA's if its domain exceeds that of the original field.
# Get rid of time slices with all NAs.
## REB 2022-08-10 The following lines were commented out as they unintentionally removed GCM data
# nok <- apply(X,1,nv) < 0.5*dim(X)[2]
# if (sum(nok)> 0) {
# it <- (1:length(nok))[!nok]
# if (verbose) print(paste('removing ',sum(nok),'NA time slices'))
# }
x <- subset(X,it=it,is=is,verbose=verbose)
x <- sp2np(x)
dates <- index(x)
if (verbose) print(dates)
d <- attr(x,'dimensions')
if ((length(d) != 3) | min(d) == 1) {
stop(paste('EOF.field: too small data dimensions'))
}
cls <- class(x)
Y <- t(coredata(x))
Y[!is.finite(Y)] <- NA
# Apply geographical weighting to account for different grid area at
# different latitudes:
if (verbose) print('svd:')
stdv <- sd(c(Y),na.rm=TRUE) # Account for mixed fields with different
# magnitudes...
#print(d); print(dim(Y)); str(attr(X,'latitude'))
Wght <- matrix(nrow=d[1],ncol=d[2])
for (i in 1:d[1]) Wght[i,]<-sqrt(abs(cos(pi*attr(X,'latitude')/180)))
#plot(attr(X,'latitude'),colMeans(Wght),type="l"); stop("HERE")
dim(Wght) <- c(d[1]*d[2])
#print(length(Wght)); print(dim(Y)); print(d[3])
#for (it in 1:d[3]) Y[,it] <- (Wght/stdv)*Y[,it]
# Exclude the missing values 'NA' and grid points with sd == 0 for all times:
sd0 <- apply(as.matrix(Y),2,sd,na.rm=TRUE)
nf <- apply(as.matrix(Y),2,SF)
if (verbose) print(paste('Exclude the missing values/zero-sd:',
sum(sd0>0.0),sum(nf > 0)))
y <- Y[,(sd0>0.0) & (nf > 0)]
# Exclude the time slices with missing values:
skip <- apply(as.matrix(y),1,SF); npts <- dim(y)[2]
y <- as.matrix(y)[skip == npts,]
# Remove the mean value - center the analysis:
if (anomaly) {
if (verbose) print('center the data')
ave <- rowMeans(y)
#print(c(length(ave),dim(y)))
y <- y - ave
} else ave <- rowMeans(y)*0
npca <- min(dim(y))
ny <- min(c(dim(y),n))
# REB 2015-05-21
# Apply the SVD decomposition: see e.g. Strang (1988) or Press et al. (1989)
#SVD <- svd(y,nu=min(c(20,npca)),nv=min(c(20,npca)))
## KMP 23-11-2015
## When running DSensemble, this convergence error comes up occasionally:
## "Error in La.svd(x, nu, nv) : error code 1 from Lapack routine 'dgesdd'."
## For some reason rounding the matrix before performing svd helps.
## Another alternative is to do svd on the matrix transpose
#y <- round(y,digits=10)
#SVD <- svd(y,nu=min(c(ny,npca)),nv=min(c(ny,npca)))
SVD <- try(svd(y,nu=min(c(ny,npca)),nv=min(c(ny,npca))))
if (inherits(SVD,"try-error")) {
if (verbose) print("svd(x) failed. try svd(t(x))")
SVD <- try(svd(t(y),nu=min(c(ny,npca)),nv=min(c(ny,npca))))
temp <- SVD$u
SVD$u <- SVD$v
SVD$v <- temp
}
if (inherits(SVD,"try-error") & verbose) print("both svd(x) and svd(t(x) failed.")
#print("---"); print(dim(SVD$u)); print(dim(SVD$v)); print("---")
autocor <- 0
if (verbose) print(paste("Find max autocorr in the grid boxes."))
#print(dim(y))
for (i in 1:npts) {
vec <- as.vector(y[,i])
i.bad <- is.na(vec)
if (sum(i.bad) == 0) {
ar1 <- acf(vec[],plot=FALSE)
autocor <- max(c(autocor,ar1$acf[2,1,1]),na.rm=TRUE)
}
}
n <- min(n,length(SVD$d))
#a <- y[,1]; dim(a) <- c(d[1],d[2]); image(a)
eof <- zoo(SVD$v[,1:n],order.by = dates)
invert <- apply(SVD$u[,1:n],2,mean) < 0
# Some data points may have been excluded due to missing values.
# Need to insert the results for valid data onto the original grid.
pattern <- matrix(rep(NA,d[1]*d[2]*n),d[1]*d[2],n)
pattern[skip == npts,] <- SVD$u[,1:n]
Ave <- rep(NA,d[1]*d[2])
Ave[skip == npts] <- ave
# Make all the EOF vectors havine the same sense rather than
# being random:
if (verbose) print(paste("Invert EOF",(1:length(invert))[invert],collapse=' '))
pattern[,invert] <- -pattern[,invert]
eof[,invert] <- -eof[,invert]
dim(pattern) <- c(d[1],d[2],n)
dim(Ave) <- c(d[1],d[2])
eof <- attrcp(X,eof)
#nattr <- softattr(X)
#for (i in 1:length(nattr))
# attr(eof,nattr[i]) <- attr(X,nattr[i])
names(eof) <- paste("X.",1:n,sep="")
attr(eof,'pattern') <- pattern
attr(eof,'dimensions') <- d
attr(eof,'mean') <- Ave
attr(eof,'max.autocor') <- autocor
attr(eof,'eigenvalues') <- SVD$d[1:n]
attr(eof,'sum.eigenv') <- sum(SVD$d)
attr(eof,'tot.var') <- sum(SVD$d^2)
attr(eof,'history') <- history.stamp(X)
attr(eof,'aspect') <- 'anomaly'
attr(eof,'dimnames') <- NULL # REB 2016-03-04
class(eof) <- c("eof",cls)
#str(eof)
return(eof)
}
#' @exportS3Method
#' @export
EOF.comb <- function(X,it=NULL,is=NULL,n=20,lon=NULL,lat=NULL,verbose=FALSE,anomaly=TRUE,...) {
n.app <- attr(X,'n.apps')
if (verbose) print(paste("EOF.comb: ",n.app,"additional field(s)"))
# Extract the data into an ordinary matrix, remove the respective
# mean values from the different data sets, and combine the data into
# one matrix. Also keep track of dates, but this is monthly data, and
# the day is used to reflect the different data set, where day==1 for
# the original (first field), day==2 for the first field appended,
# day ==3, for the second, and so on.
## AM 11-11-2013 added lines begin
#print('subset')
if (!is.null(is) | !is.null(it)) {
X <- subset(X,it=it,is=is,verbose=verbose)
}
#else if (!is.null(it))
# X <- subset(X,it=it,is=is)
## AM 11-11-2013 added lines end
#print(dim(X))
#print('soutpole-to-northpole')
X <- sp2np(X)
YYt <- t(coredata(X));
clim <- rowMeans(YYt,na.rm=TRUE)
clim.0 <- clim
YYt <- YYt - clim
YY <- t(YYt)
d <- attr(X,'dimensions')
## KMP 2016-12-28: time housekeeping in EOF.comb creates problems
## for DS when applied to annual data. The predictand ends up with
## years as time index but the EOF has dates (YYYY-01-01).
## Do the realdates and fakedates need to be in date format?
if (verbose) print('time house keeping')
t <- index(X)
datetype <- class(t)
if (datetype=="Date") {
fakedates <- paste(format(t,'%Y-%m'),'-01',sep='')
realdates <- paste(format(t,'%Y-%m'),'-01',sep='')
endsofar <- max(as.numeric(format(as.Date(fakedates),'%Y')))
} else if (datetype=="numeric") {
fakedates <- t#paste(t,'-01-01',sep='')#
realdates <- t#paste(t,'-01-01',sep='')#
endsofar <- max(t)#max(as.numeric(format(as.Date(fakedates),'%Y')))#
}
#print(realdates)
# Keep track of the different fields:
if (is.null(attr(X,'source'))) attr(X,'source') <- "0"
id.t <- rep(attr(X,'source'),length(index(X)))
ID.t <- attr(X,'source')
for (i in 1:n.app) {
if (verbose) print(paste("Additional field",i,endsofar))
YYY <- attr(X,paste('appendix.',i,sep=""))
ttt <- index(YYY)
##print(class(ttt)); print(ttt[1:5])
if (inherits(ttt,'Date')) {
year <- as.numeric(format(ttt,'%Y'))
month <- format(ttt,'%m')
} else if (inherits(ttt,c('numeric','integer'))) {
year <- ttt
month <- rep(1,length(year))
}
yearf <- year - min(year) + endsofar + 10
if (inherits(ttt,'Date')) {
fakedates <- c(fakedates,paste(yearf,"-",month,'-01',sep=''))
realdates <- c(realdates,paste(year,"-",month,'-01',sep=''))
endsofar <- max(as.numeric(format(as.Date(fakedates),'%Y')))
#print(fakedates)
} else if (inherits(ttt,c('numeric','integer'))) {
fakedates <- c(fakedates,yearf)
realdates <- c(realdates,year)
endsofar <- max(as.numeric(fakedates))
}
#print(year)
#str(YYYt)
d <- rbind(d,attr(YYY,'dimensions'))
Zt <- t(coredata(YYY))
clim <- rowMeans(Zt,na.rm=TRUE)
#str(clim)
eval(parse(text=paste('clim.',i,' <- clim',sep='')))
Zt <- Zt - clim
#print(paste("Temporal-spatial mean values:",
# round(mean(YY,na.rm=TRUE),3),
# round(mean(clim,na.rm=TRUE),3)))
#print(dim(YY)); print(dim(t(Zt)))
YY <- rbind(YY,t(Zt))
#print(length(index(YYY)))
# Keep track of the different fields:
if (verbose) print(paste('EOF.comb',attr(YYY,'source')))
if (is.null(attr(YYY,'source'))) attr(YYY,'source') <- as.character(i) else
if (is.na(attr(YYY,'source')[1])) attr(YYY,'source') <- as.character(i)
src <- paste(attr(YYY,'source'),i,sep="+")
id.t <- c(id.t,rep(src[1],length(index(YYY))))
ID.t <- c(ID.t,src)
#print('YY:'); print(dim(YY)); print("d:"); print(d)
}
# Synthetise a new object with combined data that looks like a
# field object, and then call the ordinary EOF method:
if (verbose) print("combine original and appended fields")
if(is.character(fakedates)){
Y <- zoo(YY,order.by=as.Date(fakedates))
} else {
Y <- zoo(YY,order.by=fakedates)#as.Date(fakedates))
}
#plot(rowMeans(YY,na.rm=TRUE),type="l")
# Discard time slices with no valid data, e.g. DJF in the beginning of the record
ngood <- apply(coredata(Y),1,nv)
if (verbose) print(summary(ngood))
if (verbose) {print("Check:"); print(table(id.t)); print(dim(Y))}
if (verbose) print(paste('Remove missing data gaps: ngood <- apply(coredata(Y),2,nv):',ngood))
realdates <- realdates[ngood>0]
fakedates <- fakedates[ngood>0]
id.t <- id.t[ngood>0]
Y <- Y[ngood>0,]
Y <- attrcp(X,Y)
class(Y) <- class(X)[-1]
attr(Y,'dimensions') <- c(d[1,1],d[1,2],sum(ngood>0))
if (verbose) {print(dim(Y)); print(attr(Y,'dimensions'))}
if (verbose) print('Ordinary EOF')
eof <- EOF.field(Y,it=it,is=is,n=n,lon=lon,lat=lat,anomaly=anomaly,verbose=verbose)
if (verbose) print("Computed the eofs:")
# After the EOF, the results must be reorganised to reflect the different
# data sets.
ceof <- eof
ii <- is.element(id.t,ID.t[1])
if (verbose) {print("Check:"); print(sum(ii)); print(ID.t); print(table(id.t))
print(realdates[ii]); print(dim(eof))}
if(is.character(realdates)) {
ceof <- zoo(eof[ii,],order.by=as.Date(realdates[ii]))
} else {
ceof <- zoo(eof[ii,],order.by=realdates[ii])
}
## Finalise - set the metadata
if (verbose) {print("Copy attributes"); print(names(attributes(eof)))}
ceof <- attrcp(eof,ceof)
clim <- clim.0
dim(clim) <- attr(X,'dimensions')[1:2]
attr(ceof,'mean') <- clim
attr(ceof,'dimensions') <- attr(X,'dimensions')
## Set the metadata for the appended data: climatology etc.
for (i in 1:n.app) {
jj <- is.element(id.t,ID.t[i+1])
if (verbose) print(paste(ID.t[i+1],' -> appendix.',i,' data points=',sum(jj),sep=''))
if(is.character(realdates)) {
z <- zoo(eof[jj,],order.by=as.Date(realdates[jj]))
} else {
z <- zoo(eof[jj,],order.by=realdates[jj])
}
yyy <- NULL
cline1 <- paste("yyy <- attr(X,'appendix.",i,"')",sep="")
if (verbose) print(cline1)
eval(parse(text=cline1))
z <- attrcp(yyy,z)
cline2 <- paste("clim <- clim.",i,sep="")
if (verbose) print(cline2)
eval(parse(text=cline2))
dim(clim) <- attr(X,'dimensions')[1:2]
if (verbose) print('add information about mean')
attr(z,"mean") <- clim
attr(ceof,paste('appendix.',i,sep="")) <- z
}
attr(ceof,'n.apps') <- n.app
attr(ceof,'history') <- history.stamp(X)
attr(ceof,'aspect') <- 'anomaly'
attr(ceof,'dimnames') <- NULL # REB 2016-03-04
class(ceof) <- c("eof",class(X))
invisible(ceof)
}
#' @export
eof2field <- function(x,it=NULL,is=NULL,ip=NULL,anomaly=FALSE,verbose=FALSE) {
if (verbose) {print("eof2field"); if (!is.null(is)) print(is)}
greenwich <- attr(x,'greenwich')
if ( !is.null(it) | !is.null(is) ) {
eof <- subset(x,it=it,is=is,verbose=verbose)
} else {
eof <- x
}
U <- attr(eof,'pattern')
d <- dim(U)
if (verbose) {str(U); print(d)}
dim(U) <- c(d[1]*d[2],d[3])
W <- attr(eof,'eigenvalues')
V <- coredata(eof)
# ==================================================
## KMP 2016-01-15: added selection of patterns (ip)
if(is.null(ip)) {
ip <- seq(length(W))
} else if(any(ip %in% seq(length(W)))) {
ip <- ip[ip>0 & ip<length(W)]
} else {
stop(paste("Error in input ip =",paste(ip,collaps=", ")))
}
# ==================================================
U <- U[,ip]; W <- W[ip]; V <- V[,ip]
y <-U %*% diag(W) %*% t(V)
if (!anomaly) {
if (verbose) print('Anomalies')
y <- y + c(attr(eof,'mean'))
}
y <- t(y)
y <- as.field.default(y,index=index(eof),
lon=attr(eof,'longitude'),lat=attr(eof,'latitude'),
param=attr(eof,'variable'),unit=attr(eof,'unit'),
longname=attr(eof,'longname'),src=attr(eof,'source'),
url=attr(eof,'url'),reference=attr(eof,'reference'),
info=attr(eof,'info'),calendar=attr(eof,'calendar'),
greenwich=attr(eof,'greenwich'),verbose=verbose)
if (!is.null(lon) | !is.null(lat)) {
if (is.null(lon)) lon <- c(-180,180)
if (is.null(lat)) lat <- c(-90,90)
}
attr(y,'history') <- history.stamp(x)
if (anomaly) attr(y,'aspect') <- 'anomaly' else
attr(y,'aspect') <- 'original'
#class(y) <- class(eof)[-1]
cl <- class(eof)
if("eof" %in% cl) cl <- cl[!cl=="eof"]
# KMP 2023-04-25: Replace class 'station' with 'field' (relevant in very few cases - really should not happen)
if("station" %in% cl) cl[cl=="station"] <- "field"
class(y) <- cl
invisible(y)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.