# Computes Empirical Orthogonal Functions (EOFs)
#
# R.E. Benestad,
# rasmus.benestad@met.no
#
#------------------------------------------------------------------------
require(zoo)
EOF<-function(X,it=NULL,n=20,lon=NULL,lat=NULL,verbose=FALSE,...)
UseMethod("EOF")
EOF.default <- function(X,it=NULL,n=20,lon=NULL,lat=NULL,
area.mean.expl=FALSE,verbose=FALSE,...) {
# Verify Arguments
if (verbose) print("EOF.default")
stopifnot(!missing(X), is.matrix(X),inherits(X,"zoo"))
if ( !zeros(inherits(X,c("comb","zoo"),which=TRUE)) )
eof <- EOF.comb(X,it=it,n=n,lon=lon,lat=lat,
area.mean.expl=area.mean.expl,
verbose=verbose) else
if ( !zeros(inherits(X,c("field","zoo"),which=TRUE)) )
eof <- EOF.field(X,it=it,n=n,lon=lon,lat=lat,
area.mean.expl=area.mean.expl,
verbose=verbose)
return(eof)
}
# Apply EOF analysis to the monthly mean field values:
EOF.field <- function(X,it=NULL,n=20,lon=NULL,lat=NULL,
area.mean.expl=FALSE,verbose=FALSE) {
SF <- function(x) {sum(is.finite(x))}
if (verbose) print("EOF.field")
stopifnot(!missing(X), is.matrix(X),
inherits(X,c("field","zoo")))
# REB: 29.04.2014
if (area.mean.expl) {
if (verbose) print('area.mean.expl')
A <- aggregate.area(X,FUN='mean')
x <- X - A
x <- attrcp(X,x)
attr(x,'dimensions') <- attr(X,'dimensions')
class(x) <- class(X)
X <- x
}
X <- sp2np(X)
dates <- index(X)
d <- attr(X,'dimensions')
cls <- class(X)
#print(cls)
# browser()
if (!is.null(it)) {
if (verbose) print(paste('temporal subset: it=',it))
#print(it)
#print(table(as.POSIXlt(dates)$mon+1))
# Select a subset of the months
if ( (min(it) > 0) & (max(it) < 13) & (inherits(X,c("month"))) ) {
#keepm <- as.numeric(format(index(X),"%m"))==it
#print("Monthly aggregated field")
keepm <- is.element(as.POSIXlt(dates)$mon+1,it)
} else
if ( (min(it) > 0) & (max(it) < 5) & (inherits(X,c("season"))) ) {
#print("Seasonally aggregated field")
#print(table(as.POSIXlt(dates)$mon+1))
keepm <- is.element(as.POSIXlt(dates)$mon+1,c(1,4,7,10)[it])
#print(c(it,sum(keepm)))
} else
## Select a range of years (interval)
if ( (length(it)>1) & (sum(is.element(it,1500:2500))>0) ) {
if (length(it)==2) it <- it[1]:it[2]
##print(it); print(as.POSIXlt(dates)$year+1900); print(dates)
keepm <- is.element(as.numeric(format(index(X),"%Y")),it)
## AM replacement 13-11-2013 old line keepm <- is.element(as.POSIXlt(dates)$year+1900,it)
} else ## if (inherits(it,"POSIXt"))
keepm <- is.element(as.Date(dates),as.Date(it))
## AM replacement 13-11-2013 old line keepm <- is.element(as.POSIXlt(dates),as.POSIXlt(it))
dates <- dates[keepm]
x <- zoo(X[keepm,],order.by = dates)
d[3] <- sum(keepm)
#print(d[3])
} else x <- X
Y <- t(coredata(x))
#print(dim(Y)); print(d)
# to select geographicla regions, the zoo aspects are no longer needed:
# expand into lon lat dimensions in addition to time:
dim(Y) <- d
#print(d); A <- Y[,,1]; image(t(A)); dev.new()
if (!is.null(lon)) {
if (length(lon) != 2)
warning("EOF: argument 'lon' must be a range") else {
# Select a subset of the longitudes
keepx <- (attr(X,'longitude') >= min(lon)) &
(attr(X,'longitude') <= max(lon))
attr(X,'longitude') <- attr(X,'longitude')[keepx]
d[1] <- sum(keepx)
Y <- Y[keepx,,]
}
}
if (!is.null(lat)) {
if (length(lat) != 2)
warning("EOF: argument 'lat' must be a range") else {
# Select a subset of the latitudes
keepy <- (attr(X,'latitude') >= min(lat)) &
(attr(X,'latitude') <= max(lat))
attr(X,'latitude') <- attr(X,'latitude')[keepy]
d[2] <- sum(keepy)
Y <- Y[,keepy,]
}
}
d -> attr(X,'dimensions')
dim(Y) <- c(d[1]*d[2],d[3])
# Apply geographical weighting to account for different grid area at
# different latitudes:
if (verbose) print('svd:')
stdv <- sd(c(Y),na.rm=TRUE) # Account for mixed fields with different
# magnitudes...
#print(d); print(dim(Y)); str(attr(X,'latitude'))
Wght <-matrix(nrow=d[1],ncol=d[2])
for (i in 1:d[1]) Wght[i,]<-sqrt(abs(cos(pi*attr(X,'latitude')/180)))
#plot(attr(X,'latitude'),colMeans(Wght),type="l"); stop("HERE")
dim(Wght) <- c(d[1]*d[2])
#print(length(Wght)); print(dim(Y)); print(d[3])
#for (it in 1:d[3]) Y[,it] <- (Wght/stdv)*Y[,it]
# Exclude the missing values 'NA' and grid points with sd == 0 for all times:
sd0 <- apply(Y,2,sd,na.rm=TRUE)
nf <- apply(Y,2,SF)
y <- Y[,(sd0>0.0) & (nf > 0)]
# Exclude the time slices with missing values:
skip <- apply(y,1,SF); npts <- dim(y)[2]
y <- y[skip == npts,]
# Remove the mean value - center the analysis:
if (verbose) print('center the data')
ave <- rowMeans(y)
#print(c(length(ave),dim(y)))
y <- y - ave
npca <- min(dim(y))
# Apply the SVD decomposition: see e.g. Strang (1988) or Press et al. (1989)
SVD <- svd(y,nu=min(c(20,npca)),nv=min(c(20,npca)))
#print("---"); print(dim(SVD$u)); print(dim(SVD$v)); print("---")
autocor <- 0
if (verbose) print(paste("Find max autocorr in the grid boxes."))
#print(dim(y))
for (i in 1:npts) {
vec <- as.vector(y[,i])
i.bad <- is.na(vec)
if (sum(i.bad) == 0) {
ar1 <- acf(vec[],plot=FALSE)
autocor <- max(c(autocor,ar1$acf[2,1,1]),na.rm=TRUE)
}
}
n <- min(n,length(SVD$d))
#a <- y[,1]; dim(a) <- c(d[1],d[2]); image(a)
eof <- zoo(SVD$v[,1:n],order.by = dates)
invert <- apply(SVD$u[,1:n],2,mean) < 0
# Some data points may have been excluded due to missing values.
# Need to insert the results for valid data onto the original grid.
pattern <- matrix(rep(NA,d[1]*d[2]*n),d[1]*d[2],n)
pattern[skip == npts,] <- SVD$u[,1:n]
Ave <- rep(NA,d[1]*d[2])
Ave[skip == npts] <- ave
if (area.mean.expl) {
ave <- ave + mean(A,na.rm=TRUE)
A <- A - mean(A,na.rm=TRUE)
s <- sd(A,na.rm=TRUE)
A <- A/s
n <- n + 1
#print(dim(pattern))
pattern <- cbind(rep(1,d[1]*d[2]),pattern)
SVD$d <- c(d[1]*d[2]*s,SVD$d)
#print(dim(pattern))
eof <- merge(zoo(A,order.by=index(eof)),eof)
}
#print(dim(pattern)); print(d); print(n)
# Make all the EOF vectors havine the same sense rather than
# being random:
pattern[,invert] <- -pattern[,invert]
eof[,invert] <- -eof[,invert]
dim(pattern) <- c(d[1],d[2],n)
dim(Ave) <- c(d[1],d[2])
eof <- attrcp(X,eof)
#nattr <- softattr(X)
#for (i in 1:length(nattr))
# attr(eof,nattr[i]) <- attr(X,nattr[i])
names(eof) <- paste("X.",1:n,sep="")
attr(eof,'pattern') <- pattern
attr(eof,'dimensions') <- d
attr(eof,'mean') <- Ave
attr(eof,'max.autocor') <- autocor
attr(eof,'eigenvalues') <- SVD$d[1:n]
attr(eof,'sum.eigenv') <- sum(SVD$d)
attr(eof,'tot.var') <- sum(SVD$d^2)
attr(eof,'history') <- history.stamp(X)
attr(eof,'aspect') <- 'anomaly'
if (area.mean.expl) attr(eof,'area.mean.expl') <- TRUE else
attr(eof,'area.mean.expl') <- FALSE
class(eof) <- c("eof",cls)
#str(eof)
return(eof)
}
EOF.comb <- function(X,it=NULL,n=20,lon=NULL,lat=NULL,
area.mean.expl=FALSE,verbose=FALSE) {
iv <- function(x) return(sum(is.finite(x)))
n.app <- attr(X,'n.apps')
if (verbose) print(paste("EOF.comb: ",n.app,"additional field(s)"))
# Extract the data into an ordinary matrix, remove the respective
# mean values from the different data sets, and combine the data into
# one matrix. Also keep track of dates, but this is monthly data, and
# the day is used to reflect the different data set, where day==1 for
# the original (first field), day==2 for the first field appended,
# day ==3, for the second, and so on.
## AM 11-11-2013 added lines begin
#print('subset')
if (!is.null(lon) | !is.null(lat))
X <- subset(X,it=it,is=list(lon,lat))
else if (!is.null(it))
X <- subset(X,it=it)
## AM 11-11-2013 added lines end
#print(dim(X))
#print('soutpole-to-northpole')
X <- sp2np(X)
YYt <- t(coredata(X));
clim <- rowMeans(YYt,na.rm=TRUE);
YYt <- YYt - clim
YY <- t(YYt)
d <- attr(X,'dimensions')
#print('time house keeping')
t <- index(X)
datetype <- class(t)
if (datetype=="Date") {
fakedates <- paste(format(t,'%Y-%m'),'-01',sep='')
realdates <- paste(format(t,'%Y-%m'),'-01',sep='')
} else
if (datetype=="numeric") {
fakedates <- paste(t,'-01-01',sep='')
realdates <- paste(t,'-01-01',sep='')
}
#print(realdates)
# Keep track of the different fields:
if (is.null(attr(X,'source'))) attr(X,'source') <- "0"
id.t <- rep(attr(X,'source'),length(index(X)))
ID.t <- attr(X,'source')
endsofar <- max(as.numeric(format(as.Date(fakedates),'%Y')))
for (i in 1:n.app) {
if (verbose) print(paste("Additional field",i,endsofar))
YYY <- attr(X,paste('appendix.',i,sep=""))
ttt <- index(YYY)
##print(class(ttt)); print(ttt[1:5])
if (inherits(ttt,'Date')) {
year <- as.numeric(format(ttt,'%Y'))
month <- format(ttt,'%m')
} else
if (inherits(ttt,'numeric')) {
year <- ttt
month <- rep(1,length(year))
}
yearf <- year - min(year) + endsofar + 10
#print(year)
#str(YYYt)
d <- rbind(d,attr(YYY,'dimensions'))
Zt <- t(coredata(YYY))
clim <- rowMeans(Zt,na.rm=TRUE)
#str(clim)
eval(parse(text=paste('clim.',i,' <- clim',sep='')))
Zt <- Zt - clim
#print(paste("Temporal-spatial mean values:",
# round(mean(YY,na.rm=TRUE),3),
# round(mean(clim,na.rm=TRUE),3)))
#print(dim(YY)); print(dim(t(Zt)))
YY <- rbind(YY,t(Zt))
#print(length(index(YYY)))
# Keep track of the different fields:
if (is.null(attr(YYY,'source'))) attr(YYY,'source') <- as.character(i)
src <- paste(attr(YYY,'source'),i,sep="+")
id.t <- c(id.t,rep(src,length(index(YYY))))
ID.t <- c(ID.t,src)
fakedates <- c(fakedates,paste(yearf,"-",month,'-01',sep=''))
#print(fakedates)
realdates <- c(realdates,paste(year,"-",month,'-01',sep=''))
endsofar <- max(as.numeric(format(as.Date(fakedates),'%Y')))
#print('YY:'); print(dim(YY)); print("d:"); print(d)
}
#print(fakedates)
#print(dates)
# Synthetise a new object with combined data that looks like a
# field object, and then call the ordinary EOF method:
Y <- zoo(YY,order.by=as.Date(fakedates))
#plot(rowMeans(YY,na.rm=TRUE),type="l")
# Discard time slices with no valid data, e.g. DJF in the beginning of the record
ngood <- apply(coredata(Y),1,iv)
realdates <- realdates[ngood>0]
fakedates <- fakedates[ngood>0]
id.t <- id.t[ngood>0]
Y <- Y[ngood>0,]
Y <- attrcp(X,Y)
class(Y) <- class(X)[-1]
#print(class(Y)); print(index(Y)[1:24])
attr(Y,'dimensions') <- c(d[1,1],d[1,2],sum(ngood>0))
#print(dim(Y)); print(attr(Y,'dimensions'))
#browser()
eof <- EOF.field(Y,n=n,lon=lon,lat=lat,
area.mean.expl=area.mean.expl,verbose=verbose)
# print("Computed the eofs...")
# After the EOF, the results must be reorganised to reflect the different
# data sets.
## browser()
ceof <- eof
ii <- is.element(id.t,ID.t[1])
if (verbose) {print("Check:"); print(sum(ii)); print(ID.t); print(table(id.t))
print(realdates[ii]); print(dim(eof))}
ceof <- zoo(eof[ii,],order.by=as.Date(realdates[ii]))
ceof <- attrcp(eof,ceof)
dim(clim) <- attr(X,'dimensions')[1:2]
attr(ceof,'mean') <- clim
attr(ceof,'dimensions') <- attr(X,'dimensions')
for (i in 1:n.app) {
jj <- is.element(id.t,ID.t[i+1])
if (verbose) print(paste(ID.t[i+1],' -> appendix.',i,' data points=',sum(jj),sep=''))
#cline <- paste("Z <- attr(X,'appendix.",i,"')",sep="")
#print(cline)
#eval(parse(text=cline))
#print("Z:"); print(names(attributes(Z)))
z <- zoo(eof[jj,],order.by=as.Date(realdates[jj]))
# eval(parse(text=paste("XXX <- attr(X,'appendix.",i,"')",sep="")))
# z <- zoo(eof[jj,],order.by=index(XXX))
# rownames(z) <- as.Date(realdates[jj])
eval(parse(text=paste("yyy <- attr(X,'appendix.",i,"')",sep="")))
z <- attrcp(yyy,z)
# attr(z,'clim') <- eval(parse(text=paste('clim.',i,sep=""))) # REB attr 'mean'
attr(ceof,paste('appendix.',i,sep="")) <- z
}
attr(ceof,'n.apps') <- n.app
attr(ceof,'history') <- history.stamp(X)
attr(ceof,'aspect') <- 'anomaly'
class(ceof) <- c("eof",class(X))
invisible(ceof)
}
eof2field <- function(x,is=NULL,lon=NULL,lat=NULL,anomaly=FALSE) {
#print("HERE"); print(lon); print(lat)
greenwich <- attr(x,'greenwich')
if (!is.null(lon)) lon.rng <- range(lon) else lon.rng <- NULL
if (!is.null(lat)) lat.rng <- range(lat) else lat.rng <- NULL
if ( !is.null(lon.rng) | !is.null(lat.rng) )
eof <- subset(x,is=list(lon=lon.rng,lat=lat.rng)) else
if (!is.null(is))
eof <- subset(x,is=is) else
eof <- x
#print(c(greenwich,attr(eof,'greenwich')))
# REB 04.12.13 comment below
U <- attr(eof,'pattern')
d <- dim(U); dim(U) <- c(d[1]*d[2],d[3])
W <- attr(eof,'eigenvalues')
V <- coredata(eof)
y <-U %*% diag(W) %*% t(V)
if (!anomaly) y <- y + c(attr(eof,'mean'))
y <- t(y)
y <- as.field.default(y,index(eof),
lon=attr(eof,'longitude'),lat=attr(eof,'latitude'),
param=attr(eof,'variable'),unit=attr(eof,'unit'),
longname=attr(eof,'longname'),src=attr(eof,'source'),
url=attr(eof,'url'),reference=attr(eof,'reference'),
info=attr(eof,'info'),calendar=attr(eof,'calendar'),
greenwich=attr(eof,'greenwich'))
if (!is.null(lon) | !is.null(lat)) {
if (is.null(lon)) lon <- c(-180,180)
if (is.null(lat)) lat <- c(-90,90)
}
attr(y,'history') <- history.stamp(x)
if (anomaly) attr(y,'aspect') <- 'anomaly' else
attr(y,'aspect') <- 'original'
class(y) <- class(eof)[-1]
invisible(y)
}
PCA <- function(X,...) UseMethod("PCA")
PCA.default <- function(X,...) {
stop("Don't know how to handle objects other than station")
}
PCA.station <- function(X,neofs=20,na.action='fill',verbose=FALSE) {
if (na.action=='fill') {
if (verbose) print('Fill missing data gaps')
# Use interpolation to till missing data gaps. OK for small glitches.
d <- dim(X)
for (i in 1:d[2]) {
x <- coredata(X[,i]); ok <- is.finite(x)
X[,i] <- approx(y=x[ok],x=(1:d[1])[ok],xout=1:d[1])$y
}
}
# Re-order dimensions: space x time
x <- t(coredata(X))
D <- dim(x)
neofs <- min(neofs,D[1])
ok.time <- is.finite(colMeans(x))
z <- x[,ok.time]
ok.site <- is.finite(rowMeans(z))
z <- z[ok.site,]
if (verbose) print(paste('Extract',sum(ok.site),'stations',
'and',sum(ok.time),'time steps',
'dimension=',dim(z)[1],'x',dim(z)[2]))
X.clim <- rowMeans(x,na.rm=TRUE)
pca <- svd(z - rowMeans(z))
#str(pca); print(neofs)
autocor <- 0
#print(paste("Find max autocorr in the grid boxes."))
#print(dim(y))
for (i in 1:dim(z)[2]) {
vec <- as.vector(coredata(z[,i]))
ar1 <- acf(vec[],plot=FALSE)
autocor <- max(c(autocor,ar1$acf[2,1,1]),na.rm=TRUE)
}
# Recover the original matrix size, and insert the results
# where there was valid data
U <- matrix(rep(NA,D[1]*neofs),D[1],neofs)
U[ok.site,] <- pca$u[,1:neofs]
V <- matrix(rep(NA,D[2]*neofs),D[2],neofs)
#print(D); print(dim(V)); print(dim(pca$v))
#print(dim(V[ok.time,])); print(dim(pca$v[1:neofs,]))
V[ok.time,] <- pca$v[,1:neofs]
y <- zoo(V,order.by=index(X))
names(y) <- paste("X.",1:neofs,sep="")
invert <- apply(U,2,mean) < 0
U[,invert] <- -U[,invert]
y[,invert] <- -y[,invert]
y <- attrcp(X,y)
#nattr <- softattr(X)
#for (i in 1:length(nattr))
# attr(y,nattr[i]) <- attr(X,nattr[i])
attr(y,'pattern') <- U
attr(y,'dimensions') <- D
attr(y,'mean') <- X.clim
attr(y,'max.autocor') <- autocor
attr(y,'eigenvalues') <- pca$d[1:neofs]
attr(y,'sum.eigenv') <- sum(pca$d)
attr(y,'tot.var') <- sum(pca$d^2)
attr(y,'aspect') <- 'anomaly'
attr(y,'history') <- history.stamp(X)
class(y) <- c("pca",class(X))
invisible(y)
}
# Transfer PCA back to station data
pca2station <- function(X,lon=NULL,lat=NULL,anomaly=FALSE) {
stopifnot(!missing(X), inherits(X,"pca"))
if (inherits(X,'ds')) class(X) <- class(X)[-1]
#print('pca2station')
pca <- X
cls <- class(pca)
U <- attr(pca,'pattern')
d <- dim(U)
W <- attr(pca,'eigenvalues')
V <- coredata(pca)
V[!is.finite(V)] <- 0
#str(U); str(W); str(V)
x <-U %*% diag(W) %*% t(V)
#str(x)
if (!anomaly) x <- x + c(attr(pca,'mean'))
x <- zoo(t(x),order.by=index(pca))
if (anomaly) attr(x,'aspect') <- 'anomaly' else
attr(x,'aspect') <- 'original'
# attr(x,'call') <- match.call()
# class(x) <- class(pca)[-1]
names(x) <- attr(pca,'location') # AM 30.07.2013 added
x <- attrcp(pca,x)
#nattr <- softattr(pca)
#for (i in 1:length(nattr))
# attr(x,nattr[i]) <- attr(pca,nattr[i])
# Add meta data as attributes:
attr(x,'longitude') <- attr(pca,'longitude')
attr(x,'latitude') <- attr(pca,'latitude')
attr(x,'history') <- history.stamp(pca)
class(x) <- cls[-1]
#print("HERE")
invisible(x)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.