# not exported
ar1 <- function(x,...) {
x <- try(acf(x,plot=FALSE,na.action = na.pass)$acf[2])
if (inherits(x,'try-error')) x <- NA
return(x)
}
# not exported
ltp <- function(x,type='exponential',...) {
# Rybski et al. (2006), i:10.1029/2005GL025591
ar <- acf(x,plot=FALSE)$acf
n <- min(11,(1:length(ar))[ar <= 0])-1
gamma <- log(ar[1:n])
l <- 1:length(gamma)
# qfit <- lm(gamma ~ l + I(l^2))
qfit <- lm(gamma ~ l)
gamma <- qfit$coefficients[2]
if (type=='exponential') y <- gamma else
if (type=='hurst') y <- 1 - gamma/2
return(y)
}
#' Downscale ensemble runs
#'
#' Downscales an ensemble of climate model runs, e.g. CMIP5, taking the results
#' to be seasonal climate statistics. For temperature, the result hold the
#' seasonal mean and standard deviation, whereas for precipitation, the results
#' hold the wet-day mean, the wet-day frequency, and the wet/dry-spell
#' statistics. The call assumes that netCDF files containing the climate model
#' ensemble runs are stores in a file structure, linked to the path argument
#' and the rcp argument.
#'
#' These methods are based on \code{\link{DS}}, and \code{DSensemble} is
#' designed to make a number of checks and evaluations in addition to
#' performing the DS on an ensemble of models. It is based on a similar
#' philosophy as the old R-package '\code{clim.pact}', but there is a new
#' default way of handling the predictors. In order to attempt to ensure a
#' degree of consistency between the downscaled results and those of the GCMs,
#' a fist covariate is introduced before the principal components (PCs)
#' describing the \code{\link{EOF}s}.
#'
#' \code{DSensemble.pca} is used to downscale a predictor represented in terms
#' of PCA, and can reduce the computation time significantly. See Benestad et
#' al. (2015) \url{http://dx.doi.org/10.3402/tellusa.v67.28326}.
#'
#' The argument \code{non.stationarity.check} is used to conduct an additional
#' test, taking the GCM results as 'pseudo-reality' where the predictand is
#' replaced by GCM results interpolated to the same location as the provided
#' predictand. The time series with interpolated values are then used as
#' predictor in calibrating the model, and used to predict future values. This
#' set of prediction is then compared with the interpolated value itself to see
#' if the dependency between the large and small scales in the model world is
#' non-stationary.
#'
#' Other chekch include cross-validation (\code{\link{crossval}}) and
#' diagnostics comparing the sample of ensemble results with the observations:
#' number of observations outside the predicted 90-percent conf. int and
#' comparing trends for the past.
#'
#' The 'bias correction' is described in Imbert and Benestad (2005),
#' \emph{Theor. Appl. Clim.} \url{http://dx.doi.org/10.1007/s00704-005-0133-4}.
#'
#'
#' @aliases DSensemble DSensemble.default DSensemble.station DSensemble.t2m
#' DSensemble.precip DSensemble.annual DSensemble.season DSensemble.field
#' DSensemble.mu.worstcase DSensemble.pca DSensemble.eof
#'
#' @importFrom graphics par grid segments text axis abline
#' @importFrom stats ks.test pnorm acf sd na.pass lm rnorm window start end
#' cor.test
#'
#' @param y A station object.
#' @param plot Plot intermediate results if TRUE.
#' @param path The path where the GCM results are stored.
#' @param rcp Which (RCP) scenario
#' @param biascorrect TRUE, apply a bias adjustment using \code{\link{biasfix}}
#' @param predictor The predictor, a field or EOF object
#' @param non.stationarity.check If TRUE perform stationarity test - work in
#' progress
#' @param ip Which EOFs to include in the step-wise multiple regression.
#' @param rmtrend TRUE: detrend before calibrating the regression model.
#' @param lon Longitude range for predictor
#' @param lat Latitude range for predictor
#' @param rel.cord TRUE: use the range relative to predictand; FALSE use
#' absolute range
#' @param it Used to extract months or a time period. See \code{\link{subset}}.
#' @param select GCMs to select, e.g .subsample the ensemble (1:3 selects the
#' three first GCMs)
#' @param FUN Function for aggregating the predictand (daily), e.g. 'mean',
#' 'wetmean'
#' @param threshold Used together with FUN for some functions ('wetmean').
#' @param nmin Minimum number of day used in \code{\link{annual}} used for
#' aggregating the predictand/predictor
#' @param FUNX Function for transforming the predictor, e.g.
#' '\code{\link{C.C.eq}}' to estimate the saturation water vapout
#' @param type Type of netCDF used in \code{\link{retrieve}} for reading GCM
#' data.
#' @param pattern File name pattern for GCM data.
#' @param verbose TRUE for checking and debugging the functions.
#' @param file.ds Name of file saving the results.
#' @param path.ds Path of file saving the results.
#' @param xfuns Names of functions which do not work in \code{annual(x,FUN=f)}.
#' These functions are used using the following code
#' \code{annual(f(x),FUN="mean")}
#' @param mask TRUE mask out land
#' @param ds.interval Default set to NULL, otherwise set the time period for downscaling, e.g. c(1950,2100)
#' @param \dots additional arguments
#'
#' @return A 'dsensembele' object - a list object holding DS-results.
#'
#' @keywords manip
#' @examples
#'
#' \dontrun{
#' # Import historical temperature data from Oslo
#' data(Oslo)
#'
#' ## Download NorESM1-M from 'climexp.knmi.nl' in default directory
#' ## (home directory for linux/mac users)
#' url <-"http://climexp.knmi.nl/CMIP5/monthly/tas"
#' ## Download NorESM1-ME for the emission scenario RCP4.5
#' noresm <- "tas_Amon_NorESM1-M_rcp45_000.nc"
#' if (!file.exists(noresm)) {
#' download.file(url=file.path(url,noresm), destfile=noresm,
#' method="auto", quiet=FALSE, mode="w", cacheOK=TRUE)
#' }
#'
#' ## Download FIO-ESM for the emission scenario RCP4.5
#' fioesm <- "tas_Amon_FIO-ESM_rcp45_000.nc"
#' if (!file.exists(fioesm)) {
#' download.file(url=file.path(url,fioesm), destfile=fioesm,
#' method="auto", quiet=FALSE, mode="w", cacheOK=TRUE)
#' }
#'
#' ## Downscale the predictor (ERA-interim reanalysis 2m temperature)
#' predictor <- "air.2m.mon.mean.nc"
#' if (!file.exists(predictor)) {
#' url <-"http://climexp.knmi.nl/ERA-interim/erai_t2m.nc"
#' download.file(url=url, destfile=predictor,
#' method="auto", quiet=FALSE, mode="w", cacheOK=TRUE)
#' }
#'
#' # Downscale the temperature in Oslo
#' rcp4.5 <- DSensemble.t2m(Oslo, path='~', rcp='', pattern="tas_Amon_",
#' biascorrect=TRUE, predictor = predictor,
#' plot=TRUE, verbose=TRUE)
#'
#' ## Evaluation:
#' ## (1) combare the past trend with downscaled trends for same
#' ## interval by ranking and by fitting a Gaussian to the model ensemble;
#' ## (2) estimate the probability for the counts outside the 90
#' ## percent confidence interval according to a binomial distribution.
#' diagnose(rcp4.5, plot = TRUE, type = "target")
#' }
#'
#' @export DSensemble
DSensemble<-function(y,...) UseMethod("DSensemble")
#' @exportS3Method
#' @export DSensemble.default
DSensemble.default <- function(y,...,path='CMIP5.monthly/',rcp='rcp45') {
##
stopifnot(!missing(y),inherits(y,"station"),
file.exists(paste(file.path(path,rcp,fsep = .Platform$file.sep))))
if (is.null(attr(y,'aspect'))) attr(y,'aspect') <- "original"
if (inherits(y,'pca')) {
z <- DSensemble.pca(y,path=path,rcp=rcp,...)
} else if (inherits(y,c('eof','field'))) {
z <- DSensemble.eof(y,path=path,rcp=rcp,...)
} else if (inherits(y,'annual')) {
z <- DSensemble.annual(y,path=path,rcp=rcp,threshold=1,...)
} else if (inherits(y,'season')) {
z <- DSensemble.season(y,path=path,rcp=rcp,...)
} else if (is.T(y)) {
z <- DSensemble.t2m(y,path=path,rcp=rcp,...)
} else if (is.precip(y)) {
z <- DSensemble.precip(y,path=path,rcp=rcp,threshold=1,...)
} else {
z <- NULL
}
return(z)
}
#' @export DSensemble.t2m
DSensemble.t2m <- function(y,...,plot=TRUE,path="CMIP5.monthly/",predictor="ERA40_t2m_mon.nc",
rcp="rcp45",biascorrect=FALSE,non.stationarity.check=FALSE,
ip=1:6,lon=c(-20,20),lat=c(-10,10),it=NULL,rel.cord=TRUE,
select=NULL,FUN="mean",FUNX="mean",xfuns='C.C.eq',
pattern="tas_Amon_",path.ds=NULL,file.ds="DSensemble.rda",
nmin=NULL,verbose=FALSE,ds.interval=NULL) {
if (!inherits(y,'day')) warning('station is not daily data')
if (verbose) print("predictand")
#if ( (deparse(substitute(FUN))=='sd') | (deparse(substitute(FUN))=='ar1') )
if(verbose) print("DSensemble.t2m")
if ((FUN=='sd') | (FUN =='ar1')) {
y <- anomaly(y)
attr(y,'aspect') <- 'original'
}
if(verbose) print("aggregate time series")
if (is.null(nmin)) nmin4 <- nmin else nmin4 <- nmin*4
ya <- annual(y,FUN=FUN,nmin=nmin4)
y <- as.4seasons(y,FUN=FUN,nmin=nmin)
if(verbose) print("Set lon/lat predictor range")
if ( !is.na(attr(y,'longitude'))[1] & (any(lon>0) & any(lon<0)) & rel.cord)
lon <- round( range(attr(y,'longitude'),na.rm=TRUE) + lon )
if ( !is.na(attr(y,'latitude'))[1] & (any(lat>0) & any(lat<0)) & rel.cord)
lat <- round( range(attr(y,'latitude'),na.rm=TRUE) + lat )
if (sum(!is.finite(lon))>0)
warning(paste('Bad longitude range provided: ',paste(lon,collapse='-')))
if (sum(!is.finite(lat))>0)
warning(paste('Bad latitude range provided: ',paste(lat,collapse='-')))
# The units
if(verbose) print("Arrange units")
if ( (unit(y)[1] == "deg C") | (unit(y)[1] == "degree Celsius") |
(unit(y)[1] == "degC") | (unit(y)[1] == "Celsius") )
unit <- expression(degree*C) else
unit <- attr(y,'unit')
if (verbose) print(paste('Units:',unit))
if (plot) {
if(verbose) print("Plot station data (predictand)")
ylim <- c(0,0)
ylim <- switch(FUN,'mean'=c(-2,8),'sd'=c(-0.5,1),'ar1'=c(-0.5,0.7)) # assuming y is the temperature?
if (verbose) print(paste('set ylim based on "',FUN,'" -> c(',ylim[1],', ',ylim[2],')',sep=''))
par0 <- par()
par(bty="n")
plot.zoo(ya,type="b",pch=19,main=attr(y,'location'),
xlab="year",ylab=unit,
sub=paste('Station: ',attr(y,'station_id'),'; coordinates: ',
round(attr(y,'longitude'),4),'E/',
round(attr(y,'latitude'),4),'N; ',
attr(y,'altitude'),'m.a.s.l',sep=''),
ylim=ylim + range(coredata(ya),na.rm=TRUE),xlim=c(1900,2100))
grid()
par(bty=par0$bty)
}
if(verbose) print("Retrieve predictor data")
if (is.character(predictor))
lsp <- retrieve(file=predictor,lon=lon,lat=lat,
verbose=verbose) else
if (inherits(predictor,'field'))
lsp <- subset(predictor,is=list(lon=lon,lat=lat))
if(verbose) print("Aggregate seasonal values")
LSP <- as.4seasons(lsp,FUN=FUNX,nmin=nmin)
DJF <- subset(as.4seasons(lsp,FUN=FUNX,nmin=nmin),it='djf')
MAM <- subset(as.4seasons(lsp,FUN=FUNX,nmin=nmin),it='mam')
JJA <- subset(as.4seasons(lsp,FUN=FUNX,nmin=nmin),it='jja')
SON <- subset(as.4seasons(lsp,FUN=FUNX,nmin=nmin),it='son')
ok1 <- is.finite(rowSums(DJF))
DJF <- subset(DJF,it=range(year(DJF)[ok1]))
ok2 <- is.finite(rowSums(MAM))
MAM <- subset(MAM,it=range(year(MAM)[ok2]))
ok3 <- is.finite(rowSums(JJA))
JJA <- subset(JJA,it=range(year(JJA)[ok3]))
ok4 <- is.finite(rowSums(SON))
SON <- subset(SON,it=range(year(SON)[ok4]))
rm("lsp"); gc(reset=TRUE)
# Ensemble GCMs
if(verbose) print("Retrieve & arrange GCMs")
path <- file.path(path,rcp,fsep = .Platform$file.sep)
ncfiles <- list.files(path=path,pattern=pattern,full.names=TRUE)
N <- length(ncfiles)
if (N==0) {print('DSensembles: no files found and N=0...'); return()}
if (is.null(select)) select <- 1:N else
select <- select[select<=N]; N <- length(select)
if (verbose) {print('GCMs:'); print(path); print(pattern); print(ncfiles[select])}
if(verbose) print("Set up results matrix & table of diagnostics")
years <- sort(rep(1900:2100,4))
months <- rep(c(1,4,7,10),length(1900:2100))
m <- length(years)
X <- matrix(rep(NA,N*m),N,m)
gcmnm <- rep("",N)
scorestats <- matrix(rep(NA,N*9),N,9)
colnames(scorestats) <- c("1-r.xval","mean.diff","1-sd.ratio",
"1-autocorr.ratio",
"res.trend","res.K-S","res.ar1",'amplitude.ration',
'1-R2')
t <- as.Date(paste(years,months,'01',sep='-'))
if(verbose) print("Quick test")
#load('control.ds.1.rda'); LSP.ctl -> LSP
flog <- file("DSensemble.t2m-log.txt","at")
if (verbose) print("loop...")
for (i in 1:N) {
if (verbose) print(ncfiles[select[i]])
gcm <- retrieve(file = ncfiles[select[i]],
lon=range(lon(LSP))+c(-2,2),
lat=range(lat(LSP))+c(-2,2),verbose=verbose)
if (!is.null(ds.interval)) gcm <- subset(gcm,it=ds.interval)
if (length(index(gcm))<=1) print(paste('Problem selecting GCM results in period',
min(year(y),na.rm=TRUE),'2099'))
nmattsgcm <- names(attributes(gcm))
if (verbose) print('nmattsgcm <- names(attributes(gcm))')
if (length(grep("realization",nmattsgcm)) > 0) nm.r <- attributes(gcm)[grep("realization",nmattsgcm)][[1]] else nm.r <- '_'
if (length(grep("initialization",nmattsgcm)) > 0) nm.i <- attributes(gcm)[grep("initialization",nmattsgcm)][[1]] else nm.i <- '_'
if (length(grep("physics",nmattsgcm)) > 0) nm.p <- attributes(gcm)[grep("physics",nmattsgcm)][[1]] else nm.p <- '_'
rip <- paste0("r",attr(gcm,nm.r),"i",attr(gcm,nm.i),"p",attr(gcm,nm.p))
gcmnm.i <- paste0(attr(gcm,'model_id'),".",rip)
gcmnm[i] <- gcmnm.i
#gcmnm[i] <- paste(attr(gcm,'model_id'),attr(gcm,'parent_experiment_rip'),sep="-")
# REB: 30.04.2014 - new lines...
DJFGCM <- subset(as.4seasons(gcm,FUN=FUNX,nmin=nmin),it='djf')
MAMGCM <- subset(as.4seasons(gcm,FUN=FUNX,nmin=nmin),it='mam')
JJAGCM <- subset(as.4seasons(gcm,FUN=FUNX,nmin=nmin),it='jja')
SONGCM <- subset(as.4seasons(gcm,FUN=FUNX,nmin=nmin),it='son')
rm("gcm"); gc(reset=TRUE)
X.DJF <- combine(DJF,DJFGCM)
X.MAM <- combine(MAM,MAMGCM)
X.JJA <- combine(JJA,JJAGCM)
X.SON <- combine(SON,SONGCM)
if (verbose) print("- - - > EOFs")
Z1 <- try(EOF(X.DJF))
if (inherits(Z1,"try-error")) {
writeLines(gcmnm[i],con=flog)
writeLines(Z1[[1]],con=flog)
}
Z2 <- try(EOF(X.MAM))
if (inherits(Z2,"try-error")) {
writeLines(gcmnm[i],con=flog)
writeLines(Z2[[1]],con=flog)
}
Z3 <- try(EOF(X.JJA))
if (inherits(Z3,"try-error")) {
writeLines(gcmnm[i],con=flog)
writeLines(Z3[[1]],con=flog)
}
Z4 <- try(EOF(X.SON))
if (inherits(Z4,"try-error")) {
writeLines(gcmnm[i],con=flog)
writeLines(Z4[[1]],con=flog)
}
# The test lines are included to assess for non-stationarity
if (non.stationarity.check) {
## KMP 2018-11-12: Not sure if this test works.
## It's only done for DJF here but could be expanded to other seasons
## if it is a useful diagnostic.
testGCM <- subset(DJFGCM,it=range(year(DJF))) # REB 29.04.2014
testy <- as.station(regrid(testGCM,is=DJF)) # REB 29.04.2014
attr(testGCM,'source') <- 'testGCM' # REB 29.04.2014
testZ <- combine(testGCM,DJFGCM) # REB 29.04.2014
rm("testGCM"); gc(reset=TRUE)
}
##
# REB: 30.04.2014 - new lines...
if (verbose) print("- - - > DS (seasonal)")
if (verbose) print(class(attr(Z1,'appendix.1')))
if (biascorrect) try(Z1 <- biasfix(Z1))
ds1 <- try(DS(subset(y,it='djf'),Z1,ip=ip))
if (inherits(ds1,"try-error")) {
writeLines(gcmnm[i],con=flog)
writeLines(ds1[[1]],con=flog)
} else {
## REB 2024-03-08 - reduce the volume of the information
attr(ds,'model') <- summary(attr(ds,'model'))
}
if (biascorrect) try(Z2 <- biasfix(Z2))
ds2 <- try(DS(subset(y,it='mam'),Z2,ip=ip))
if (inherits(ds2,"try-error")) {
writeLines(gcmnm[i],con=flog)
writeLines(ds2[[1]],con=flog)
} else {
## REB 2024-03-08 - reduce the volume of the information
attr(ds,'model') <- summary(attr(ds,'model'))
}
if (biascorrect) try(Z3 <- biasfix(Z3))
ds3 <- try(DS(subset(y,it='jja'),Z3,ip=ip))
if (inherits(ds3,"try-error")) {
writeLines(gcmnm[i],con=flog)
writeLines(ds3[[1]],con=flog)
} else {
## REB 2024-03-08 - reduce the volume of the information
attr(ds,'model') <- summary(attr(ds,'model'))
}
if (biascorrect) try(Z4 <- biasfix(Z4))
ds4 <- try(DS(subset(y,it='son'),Z4,ip=ip))
if (inherits(ds4,"try-error")) {
writeLines(gcmnm[i],con=flog)
writeLines(ds4[[1]],con=flog)
} else {
## REB 2024-03-08 - reduce the volume of the information
attr(ds,'model') <- summary(attr(ds,'model'))
}
if (verbose) print("Combine the 4 seasons")
ds <- try(combine(list(ds1,ds2,ds3,ds4)))
rm("Z1","Z2","Z3","Z4")
gc(reset=TRUE) ##rm("ds1","ds2","ds3","ds4")
if (inherits(ds,"try-error")) {
writeLines(gcmnm[i],con=flog)
writeLines(ds[[1]],con=flog)
} else {
x.val <- c(crossval(ds1),crossval(ds2),crossval(ds3),crossval(ds4))
attr(ds,'evaluation') <- x.val
if (verbose) print("post-processing")
z <- attr(ds,'appendix.1')
#save(file='inside.dsens.1.rda',ds,y,Z1)
if (non.stationarity.check) {
testds <- DS(testy,testZ,biascorrect=biascorrect,ip=ip) # REB 29.04.2014
testz <- attr(testds,'appendix.1') # REB 29.04.2014
difference.z <- testy - testz # REB 29.04.2014
}
i1 <- is.element(paste(years,months,sep='-'),
paste(year(z),month(z),sep='-'))
i2 <- is.element(paste(year(z),month(z),sep='-'),
paste(years,months,sep='-'))
#
X[i,i1] <- z[i2]
# Diagnose the residual: ACF, pdf, trend. These will together with the
# cross-validation and the common EOF diagnostics provide a set of
# quality indicators.
cal <- coredata(attr(ds,"original_data"))
fit <- coredata(attr(ds,"fitted_values"))
res <- as.residual(ds)
res.trend <- 10*diff(range(trend(res)))/diff(range(year(res)))
ks <- round(ks.test(coredata(res),pnorm)$p.value,4)
ar <- as.numeric(acf(trend(cal-fit,result="residual"),plot=FALSE)[[1]][2]) ##ar <- ar1(coredata(res))
if (verbose) print(paste("Examine residuals: trend=",
round(res.trend,3),'D/decade; K.S. p-val',
round(ks,2),'; AR(1)=',round(ar,2)))
# Evaluation: here are lots of different aspects...
# Get the diagnostics: this is based on the analysis of common EOFs...
xval <- attr(ds,'evaluation')
r.xval <- cor(xval[,1],xval[,2])
if (verbose) print(paste("x-validation r=",r.xval))
dsa <- annual(ds) # annual mean value
xy <- merge.zoo(annual(z),ya)
ds.ratio <- sd(xy[,1],na.rm=TRUE)/sd(xy[,2],na.rm=TRUE)
if (verbose) print(paste("sd ratio=",ds.ratio))
#print(names(attributes(ds)))
if (biascorrect) {
diag <- attr(ds,'diagnose')
if ( (verbose) & !is.null(diag)) str(diag)
} else diag <- NULL
# diagnose for ds-objects
##
if (verbose) print('...')
if (is.null(diag)) {
##diag <- diagnose(z,plot=FALSE)
scorestats[i,] <- c(1-r.xval,NA,NA,NA,res.trend,ks,1-ar,1-ds.ratio,
1-round(var(xval[,2])/var(xval[,1]),2))
mdiff <- (mean(subset(ya,it=range(year(dsa))),na.rm=TRUE)-
mean(subset(dsa,it=range(year(ya))),na.rm=TRUE))/sd(ya,na.rm=TRUE)
srati <- sd(subset(dsa,it=range(year(ya))),na.rm=TRUE)/
sd(subset(ya,it=range(year(dsa))),na.rm=TRUE)
arati <- ar1(dsa)/ar1(ya)
} else {
# Extract the mean score for leading EOF from the 4 seasons:
mdiff <- mean(c(diag$s.1$mean.diff[1]/diag$s.1$sd0[1],
diag$s.2$mean.diff[1]/diag$s.2$sd0[1],
diag$s.3$mean.diff[1]/diag$s.3$sd0[1],
diag$s.4$mean.diff[1]/diag$s.4$sd0[1]))
srati <- mean(c(diag$s.1$sd.ratio[1],diag$s.2$sd.ratio[1],
diag$s.3$sd.ratio[1],diag$s.4$sd.ratio[1]))
arati <- mean(c(diag$s.1$autocorr.ratio[1],diag$s.2$autocorr.ratio[1],
diag$s.3$autocorr.ratio[1],diag$s.4$autocorr.ratio[1]))
}
scorestats[i,] <- c(1-r.xval,mdiff,1-srati,1-arati,res.trend,ks,ar,
1-ds.ratio,
1- var(xval[,2])/var(xval[,1]))
if (verbose) print('scorestats')
if (verbose) print(scorestats[i,])
quality <- 100*(1-mean(abs(scorestats[i,]),na.rm=TRUE))
if (plot) {
qcol <- quality
qcol[qcol < 1] <- 1;qcol[qcol > 100] <- 100
cols <- rgb(seq(1,0,length=100),rep(0,100),seq(0,1,length=100),0.15)
lines(annual(z),lwd=2,col=cols[qcol])
lines(ya,type="b",pch=19)
lines(dsa,lwd=2,col="grey")
}
R2 <- round(100*sd(xval[,2])/sd(xval[,1]),2)
print(paste("i=",i,"GCM=",gcmnm[i],' x-valid cor=',round(100*r.xval,2),
"R2=",R2,'% ','Common EOF: bias=',round(mdiff,2),
' sd1/sd2=',round(srati,3),
"mean=",round(mean(coredata(y),na.rm=TRUE),2),'quality=',round(quality)))
}
}
if(verbose) print("Done with downscaling!")
rm("DJFGCM")
X <- zoo(t(X),order.by=t)
colnames(X) <- gcmnm
attr(X,"model_id") <- gcmnm
#X <- attrcp(y,X)
attr(X,'station') <- y
attr(X,'predictor') <- attr(LSP,'source')
attr(X,'domain') <- list(lon=lon,lat=lat)
attr(X,'scorestats') <- scorestats
attr(X,'path') <- path
attr(X,'scenario') <- rcp
attr(X,'history') <- history.stamp(y)
if (non.stationarity.check)
attr(X,'on.stationarity.check') <- difference.z else
attr(X,'on.stationarity.check') <- NULL
class(X) <- c("dsensemble","zoo")
if (!is.null(path.ds)) file.ds <- file.path(path.ds,file.ds)
save(file=file.ds,X)#"DSensemble.rda",X)
print("---")
invisible(X)
}
#save(file=paste("dscmip5_",attr(y,'location'),"_",N,"_rcp4.5.rda",sep=""),rcp4.5)
#' @export DSensemble.precip
DSensemble.precip <- function(y,...,plot=TRUE,path="CMIP5.monthly/",rcp="rcp45",biascorrect=FALSE,
predictor="ERA40_pr_mon.nc",non.stationarity.check=FALSE,
ip=1:6,lon=c(-10,10),lat=c(-10,10),it=NULL,rel.cord=TRUE,
select=NULL,FUN="wetmean",FUNX="sum",xfuns='C.C.eq',threshold=1,
pattern="pr_Amon_",verbose=FALSE,nmin=NULL,ds.interval=NULL) {
# FUN: exceedance, wetfreq, wet, dry
if (verbose) print('DSensemble.precip')
# if (deparse(substitute(FUN))=='spell') {
if ( (FUN=='wet') | (FUN=='dry')) {
y <- spell(y,threshold=threshold)
y <- annual(y,nmin=nmin)
#plot(y);
y <- switch(FUN,'wet'=subset(y,is=1),'dry'=subset(y,is=2))
} else
# y <- annual(y,FUN=FUN,threshold=threshold)
# REB: the two next lines give errors... :(
# if (sum(is.element(names(formals(FUN)),'threshold')==1))
# y <- annual(y,FUN=FUN,threshold=threshold,nmin=nmin) else
y <- annual(y,FUN=FUN,nmin=nmin)
#
index(y) <- year(y)
if (!is.na(attr(y,'longitude')) & rel.cord)
lon <- round( range(attr(y,'longitude'),na.rm=TRUE) + lon )
if (!is.na(attr(y,'latitude')) & rel.cord)
lat <- round( range(attr(y,'latitude'),na.rm=TRUE) + lat )
if (sum(!is.finite(lon))>0)
warning(paste('Bad longitude range provided: ',paste(lon,collapse='-')))
if (sum(!is.finite(lat))>0)
warning(paste('Bad latitude range provided: ',paste(lat,collapse='-')))
# Get the predictor: ERA40
if (verbose) print("predictor")
if (is.character(predictor))
pre <- retrieve(file=predictor,lon=lon,lat=lat,
verbose=verbose) else
if (inherits(predictor,'field')) pre <- predictor
rm("predictor"); gc(reset=TRUE)
attr(pre,"source") <- "ERA40"
# Use proportional variaions
if (verbose) print("Annual mean")
if (!is.annual(pre)) {
if (sum(is.element(FUNX,xfuns))==0)
PREX <- annual(pre,FUN=FUNX,nmin=nmin) else
eval(parse(text=paste('PREX <- annual(',FUNX,'(pre),FUN="mean",nmin=nmin)',sep="")))
}
#print("estimate %-changes")
PRE.ref <- subset(PREX,it=1961:1990)
# PRE <- zoo(100*coredata(PREX)/colMeans(coredata(PRE.ref)),order.by=year(PREX))
if (is.precip(PREX)) {
PRE <- zoo(100*coredata(PREX)/mean(c(coredata(subset(PREX,it=1961:1990)))),
order.by=year(PREX))
PRE <- attrcp(PREX,PRE)
attr(PRE, "unit" ) <- "%"
attr(PRE, "dimensions" ) <- attr(PREX, "dimensions" )
class(PRE) <- class(PREX)
} else PRE <- PREX
rm("PREX"); gc(reset=TRUE)
if (verbose) print("graphics")
unit <- attr(y,'unit')
if (plot) {
ylim <- switch(FUN,
'exceedance'=c(0,10),'wetmean'=c(0,10),
'wetfreq'=c(0,0),'spell'=c(0,0),
'mean'=c(-10,50),'sd'=c(-5,10),'ar1'=c(-0.5,0.7),
'HDD'=c(0,5000),'CDD'=c(0,500),'GDD'=c(0,2000))
if (is.null(ylim)) ylim <- c(0,0)
par0 <- par()
par(bty="n")
plot.zoo(y,type="b",pch=19,main=attr(y,'location'),
xlab="year",ylab=unit,
sub=paste('Station: ',attr(y,'station_id'),'; coordinates: ',
round(attr(y,'longitude'),4),'E/',
round(attr(y,'latitude'),4),'N; ',
attr(y,'altitude'),'m.a.s.l',sep=''),
ylim=ylim + range(coredata(y),na.rm=TRUE),xlim=c(1900,2100))
grid()
par(bty=par0$bty)
}
# Ensemble GCMs
path <- file.path(path,rcp,fsep = .Platform$file.sep)
ncfiles <- list.files(path=path,pattern=pattern,full.names=TRUE)
N <- length(ncfiles)
if (is.null(select)) select <- 1:N else
N <- length(select)
if (verbose) print(ncfiles[select])
# set up results matrix and tables of diagnostics:
years <- sort(1900:2100)
m <- length(years)
X <- matrix(rep(NA,N*m),N,m)
gcmnm <- rep("",N)
scorestats <- matrix(rep(NA,N*9),N,9)
colnames(scorestats) <- c("1-r.xval","mean.diff","1-sd.ratio","1-autocorr.ratio",
"res.trend","res.K-S","res.ar1",'amplitude.ration','1-R2')
flog <- file("DSensemble.precip-log.txt","at")
for (i in 1:N) {
#
gcm <- retrieve(file = ncfiles[select[i]],
lon=range(lon(PRE))+c(-2,2),lat=range(lat(PRE))+c(-2,2),verbose=verbose)
if (!is.null(ds.interval)) gcm <- subset(gcm,it=ds.interval)
if (length(index(gcm))<=1) print(paste('Problem selecting GCM results in period',
min(year(y),na.rm=TRUE),'2099'))
# KMP: 10.03.2017 - pass on additional information about GCM runs (gcm + rip - realization, initialization, physics version)
rip <- NULL
if(any(grepl("rip", names(attributes(gcm))))) {
nm.rip <- names(attributes(gcm))[grepl("rip",names(attributes(gcm)))][[1]]
if(!is.null(attr(gcm, nm.rip))) {
rip <- attr(gcm, nm.rip)
if(!grepl("r[0-9]{1,2}i[0-9]{1,2}p[0-9]{1,2}", rip)) rip <- NULL
}
}
if(is.null(rip)) {
if (verbose) print('is.null(rip)...1')
if (length(grep("realization",nmattsgcm)) > 0) nm.r <- attributes(gcm)[grep("realization",nmattsgcm)][[1]] else nm.r <- '_'
if (length(grep("initialization",nmattsgcm)) > 0) nm.i <- attributes(gcm)[grep("initialization",nmattsgcm)][[1]] else nm.i <- '_'
if (length(grep("physics",nmattsgcm)) > 0) nm.p <- attributes(gcm)[grep("physics",nmattsgcm)][[1]] else nm.p <- '_'
# nm.r <- names(attributes(gcm))[grep("realization",names(attributes(gcm)))][[1]]
# nm.i <- names(attributes(gcm))[grep("initialization",names(attributes(gcm)))][[1]]
# nm.p <- names(attributes(gcm))[grep("physics",names(attributes(gcm)))][[1]]
rip <- paste0("r",attr(gcm,nm.r),"i",attr(gcm,nm.i),"p",attr(gcm,nm.p))
}
gcmnm.i <- paste0(attr(gcm,'model_id'),".",rip)
#gcmnm[i] <- paste(attr(gcm,'model_id'),attr(gcm,'realization'),sep="-")
#gcmnm[i] <- attr(gcm,'model_id')
if (verbose) print(varid(gcm))
if (sum(is.element(FUNX,xfuns))==0)
GCMX <- annual(gcm,FUN=FUNX,nmin=nmin) else
eval(parse(text=paste('GCMX <- annual(',FUNX,'(gcm),FUN="mean",nmin=nmin)',sep="")))
if (is.precip(GCMX)) {
GCM <- zoo(100*coredata(GCMX)/mean(c(coredata(subset(GCMX,it=1961:1990)))),
order.by=year(GCMX))
GCM <- attrcp(GCMX,GCM)
attr(GCM, "unit" ) <- "%"
attr(GCM, "dimensions" ) <- attr(GCMX, "dimensions" )
class(GCM) <- class(GCMX)
} else
GCM <- GCMX
#
#str(GCM)
model.id <- attr(gcm,'model_id')
rm("gcm","GCMX"); gc(reset=TRUE)
if (verbose) print("combine")
#
PREGCM <- combine(PRE,GCM)
if (verbose) print("EOF")
Z <- EOF(PREGCM)
# The test lines are included to assess for non-stationarity
if (non.stationarity.check) {
testGCM <- subset(GCM,it=range(year(PRE))) # REB 29.04.2014
testy <- as.station(regrid(testGCM,is=y)) # REB 29.04.2014
attr(testGCM,'source') <- 'testGCM' # REB 29.04.2014
testZ <- EOF(combine(testGCM,GCM)) # REB 29.04.2014
rm("testGCM"); gc(reset=TRUE)
}
rm("GCM"); gc(reset=TRUE)
# The test lines are included to assess for non-stationarity
if (non.stationarity.check) {
testds <- DS(testy,testZ,biascorrect=biascorrect,ip=ip) # REB 29.04.2014
testz <- attr(testds,'appendix.1') # REB 29.04.2014
difference.z <- testy - testz # REB 29.04.2014
}
if (verbose) print("diagnose")
diag <- diagnose(Z)
if (biascorrect) Z <- biasfix(Z)
if (verbose) print("- - - > DS (precip)")
if (verbose) print(class(attr(Z,'appendix.1')))
ds <- try(DS(y,Z,ip=ip,verbose=verbose))
if (inherits(ds,"try-error")) {
writeLines(gcmnm[i],con=flog)
writeLines(ds[[1]],con=flog)
} else {
if (verbose) print("post-processing")
## REB 2024-03-08 - reduce the volume of the information
attr(ds,'model') <- summary(attr(ds,'model'))
z <- attr(ds,'appendix.1')
i1 <- is.element(years,year(z))
i2 <- is.element(year(z),years)
#
X[i,i1] <- z[i2]
# Diagnose the residual: ACF, pdf, trend. These will together with the
# cross-validation and the common EOF diagnostics provide a set of
# quality indicators.
cal <- coredata(attr(ds,"original_data"))
fit <- coredata(attr(ds,"fitted_values"))
res <- as.residual(ds)
res.trend <- 10*diff(range(trend(res)))/diff(range(year(res)))
ks <- ks.test(coredata(res),pnorm)$p.value
ar <- as.numeric(acf(trend(cal-fit,result="residual"),plot=FALSE)[[1]][2])
## ar <- ar1(coredata(res))
# Evaluation: here are lots of different aspects...
# Get the diagnostics: this is based on the analysis of common EOFs...
xval <- attr(ds,'evaluation')
r.xval <- cor(xval[,1],xval[,2])
#
xy <- merge.zoo(z,y)
ds.ratio <- sd(xy[,1],na.rm=TRUE)/sd(xy[,2],na.rm=TRUE)
# Extract the mean score for leading EOF from the 4 seasons:
mdiff <- diag$mean.diff[1]/diag$sd0[1]
srati <- 1 - diag$sd.ratio[1]
arati <- 1 - diag$autocorr.ratio[1]
scorestats[i,] <- c(1-r.xval,mdiff,srati,arati,res.trend,ks,ar,1-ds.ratio,
1-var(xval[,2])/var(xval[,1]))
if (verbose) print(scorestats[i,])
quality <- 100*(1-mean(abs(scorestats[i,]),na.rm=TRUE))
index(z) <- year(z); index(ds) <- year(ds)
if (plot) {
qcol <- quality
qcol[qcol < 1] <- 1;qcol[qcol > 100] <- 100
cols <- rgb(seq(1,0,length=100),rep(0,100),seq(0,1,length=100),0.15)
lines(z,lwd=2,col=cols[qcol])
lines(y,type="b",pch=19)
lines(ds,lwd=2,col="grey")
}
#
R2 <- round(100*sd(xval[,2])/sd(xval[,1]),2)
print(paste("i=",i,"GCM=",gcmnm[i],' x-valid cor=',round(100*r.xval,2),
"R2=",R2,'% ', 'Common EOF: bias=',round(mdiff,2),
' sd1/sd2=',round(srati,3),
"mean=",round(mean(coredata(y),na.rm=TRUE),2),'quality=',round(quality)))
}
}
#
X <- zoo(t(X),order.by=years)
colnames(X) <- gcmnm
attr(X,"model_id") <- gcmnm
#X <- attrcp(y,X)
attr(X,'station') <- y
attr(X,'predictor') <- attr(PRE,'source')
attr(X,'domain') <- list(lon=lon,lat=lat)
attr(X,'scorestats') <- scorestats
attr(X,'path') <- path
attr(X,'scenario') <- rcp
if (non.stationarity.check)
attr(X,'on.stationarity.check') <- difference.z else
attr(X,'on.stationarity.check') <- NULL
attr(X,'history') <- history.stamp(y)
class(X) <- c("dsensemble","zoo")
save(file="DSensemble.rda",X)
print("---")
invisible(X)
}
#' @exportS3Method
#' @export DSensemble.annual
DSensemble.annual <- function(y,...,plot=TRUE,path="CMIP5.monthly/",rcp="rcp45",biascorrect=FALSE,
predictor="ERA40_t2m_mon.nc",non.stationarity.check=FALSE,
ip=1:6,lon=c(-10,10),lat=c(-10,10),it=NULL,rel.cord=TRUE,
abscoords=FALSE,select=NULL,FUN=NULL,FUNX="mean",xfuns='C.C.eq',threshold=1,
pattern="tas_Amon_",verbose=FALSE,nmin=NULL,ds.interval=NULL) {
# FUN: exceedance, wetfreq, wetmean, mean wet-spell length, mean dry-spell length
if (verbose) print('DSensemble.annual')
# if (deparse(substitute(FUN))=='spell') {
index(y) <- year(y)
if (!abscoords) {
## If relative coordinates:
if (!is.na(attr(y,'longitude')) & rel.cord)
lon <- round( range(attr(y,'longitude'),na.rm=TRUE) + lon )
if (!is.na(attr(y,'latitude')) & rel.cord)
lat <- round( range(attr(y,'latitude'),na.rm=TRUE) + lat )
}
if (sum(!is.finite(lon))>0)
warning(paste('Bad longitude range provided: ',paste(lon,collapse='-')))
if (sum(!is.finite(lat))>0)
warning(paste('Bad latitude range provided: ',paste(lat,collapse='-')))
# Get the predictor: ERA40
if (verbose) print("predictor")
if (is.character(predictor))
pre <- retrieve(file=predictor,lon=lon,lat=lat,
verbose=verbose) else
if (inherits(predictor,'field')) pre <- predictor
rm("predictor"); gc(reset=TRUE)
attr(pre,"source") <- "ERA40"
## KMP 2017-09-12: don't use subset if pre is annual data and it is months or season!
## if it is character, then then extraction of months reduces number of
## months per year.
if ((is.null(nmin)) & (is.character(it))) nmin <- length(it)
if (!is.null(it) & !(is.character(it) & inherits(pre,"annual"))) {
if (verbose) print('Extract some months or a time period')
if (verbose) print(it)
pre <- subset(pre,it=it)
}
# Use proportional variations
## KMP 2017-09-12: don't calculate the annual mean if pre is already annual
if (verbose) print("Annual mean")
if(inherits(pre,"annual")) {
PRE <- pre
} else {
if (sum(is.element(FUNX,xfuns))==0) {
PRE <- annual(pre,FUN=FUNX,nmin=nmin)
} else {
eval(parse(text=paste('PRE <- annual(',FUNX,'(pre),FUN="mean",nmin=nmin)',sep="")))
}
}
if (verbose) print("graphics")
unit <- attr(y,'unit')
if (plot) {
ylim <- c(0,10)
par0 <- par()
par(bty="n")
plot.zoo(y,type="b",pch=19,main=attr(y,'location'),
xlab="year",ylab=unit,
sub=paste('Station: ',attr(y,'station_id'),'; coordinates: ',
round(attr(y,'longitude'),4),'E/',
round(attr(y,'latitude'),4),'N; ',
attr(y,'altitude'),'m.a.s.l',sep=''),
ylim=ylim + range(coredata(y),na.rm=TRUE),xlim=c(1900,2100))
grid()
par(bty=par0$bty)
}
# Ensemble GCMs
path <- file.path(path,rcp,fsep = .Platform$file.sep)
ncfiles <- list.files(path=path,pattern=pattern,full.names=TRUE)
N <- length(ncfiles)
if (is.null(select)) select <- 1:N else
N <- length(select)
if (verbose) print(ncfiles[select])
# set up results matrix and tables of diagnostics:
years <- sort(1900:2100)
m <- length(years)
X <- matrix(rep(NA,N*m),N,m)
gcmnm <- rep("",N)
scorestats <- matrix(rep(NA,N*9),N,9)
colnames(scorestats) <- c("1-r.xval","mean.diff","1-sd.ratio","1-autocorr.ratio",
"res.trend","res.K-S","res.ar1",'amplitude.ration','1-R2')
flog <- file("DSensemble.precip-log.txt","at")
for (i in 1:N) {
#
gcm <- try(retrieve(file = ncfiles[select[i]],
lon=range(lon(PRE))+c(-2,2),lat=range(lat(PRE))+c(-2,2),verbose=verbose))
if(inherits(gcm,"try-error")) {
writeLines(ncfiles[select[i]],con=flog)
} else {
if (!is.null(ds.interval)) gcm <- subset(gcm,it=ds.interval)
if (length(index(gcm))<=1) print(paste('Problem selecting GCM results in period',
min(year(y),na.rm=TRUE),'2099'))
# KMP: 10.03.2017 - pass on additional information about GCM runs (gcm + rip - realization, initialization, physics version)
rip <- NULL
nmattsgcm <- names(attributes(gcm))
if(any(grepl("rip", names(attributes(gcm))))) {
nm.rip <- names(attributes(gcm))[grepl("rip",names(attributes(gcm)))][[1]]
if(!is.null(attr(gcm, nm.rip))) {
rip <- attr(gcm, nm.rip)
if(!grepl("r[0-9]{1,2}i[0-9]{1,2}p[0-9]{1,2}", rip)) rip <- NULL
}
}
if(is.null(rip)) {
if (verbose) print('is.null(rip)...2')
if (length(grep("realization",nmattsgcm)) > 0) nm.r <- attributes(gcm)[grep("realization",nmattsgcm)][[1]] else nm.r <- '_'
if (length(grep("initialization",nmattsgcm)) > 0) nm.i <- attributes(gcm)[grep("initialization",nmattsgcm)][[1]] else nm.i <- '_'
if (length(grep("physics",nmattsgcm)) > 0) nm.p <- attributes(gcm)[grep("physics",nmattsgcm)][[1]] else nm.p <- '_'
# nm.r <- names(attributes(gcm))[grep("realization",names(attributes(gcm)))][[1]]
# nm.i <- names(attributes(gcm))[grep("initialization",names(attributes(gcm)))][[1]]
# nm.p <- names(attributes(gcm))[grep("physics",names(attributes(gcm)))][[1]]
rip <- paste0("r",attr(gcm,nm.r),"i",attr(gcm,nm.i),"p",attr(gcm,nm.p))
}
gcmnm[i] <- paste0(attr(gcm,'model_id'),".",rip)
#gcmnm[i] <- paste(attr(gcm,'model_id'),attr(gcm,'realization'),sep="-")
#gcmnm[i] <- attr(gcm,'model_id')
if (verbose) print(varid(gcm))
if (!is.null(it)) {
if (verbose) print('Extract some months or a time period')
if (verbose) print(it)
gcm <- subset(gcm,it=it)
}
if (sum(is.element(FUNX,xfuns))==0) {
GCM <- annual(gcm,FUN=FUNX,nmin=nmin)
} else {
eval(parse(text=paste('GCM <- annual(',FUNX,'(gcm),FUN="mean",nmin=nmin)',sep="")))
}
model.id <- attr(gcm,'model_id')
if (verbose) print("combine")
PREGCM <- combine(PRE,GCM)
if (verbose) print("EOF")
Z <- EOF(PREGCM)
# The test lines are included to assess for non-stationarity
if (non.stationarity.check) {
testGCM <- subset(GCM,it=range(year(PRE))) # REB 29.04.2014
testy <- as.station(regrid(testGCM,is=y)) # REB 29.04.2014
attr(testGCM,'source') <- 'testGCM' # REB 29.04.2014
testZ <- EOF(combine(testGCM,GCM)) # REB 29.04.2014
rm("testGCM"); gc(reset=TRUE)
}
rm("GCM"); gc(reset=TRUE)
# The test lines are included to assess for non-stationarity
if (non.stationarity.check) {
testds <- DS(testy,testZ,biascorrect=biascorrect,ip=ip) # REB 29.04.2014
testz <- attr(testds,'appendix.1') # REB 29.04.2014
difference.z <- testy - testz # REB 29.04.2014
}
if (verbose) print("diagnose")
diag <- diagnose(Z)
if (biascorrect) Z <- biasfix(Z)
if (verbose) print("- - - > DS (annual)")
if (verbose) print(class(attr(Z,'appendix.1')))
ds <- try(DS(y,Z,ip=ip,verbose=verbose))
if (inherits(ds,"try-error")) {
writeLines(gcmnm[i],con=flog)
writeLines(ds[[1]],con=flog)
} else {
if (verbose) print("post-processing")
## REB 2024-03-08 - reduce the volume of the information
attr(ds,'model') <- summary(attr(ds,'model'))
z <- attr(ds,'appendix.1')
i1 <- is.element(years,year(z))
i2 <- is.element(year(z),years)
#
X[i,i1] <- z[i2]
# Diagnose the residual: ACF, pdf, trend. These will together with the
# cross-validation and the common EOF diagnostics provide a set of
# quality indicators.
cal <- coredata(attr(ds,"original_data"))
fit <- coredata(attr(ds,"fitted_values"))
res <- as.residual(ds)
res.trend <- 10*diff(range(trend(res)))/diff(range(year(res)))
ks <- ks.test(coredata(res),pnorm)$p.value
ar <- as.numeric(acf(trend(cal-fit,result="residual"),plot=FALSE)[[1]][2])
## ar <- ar1(coredata(res))
# Evaluation: here are lots of different aspects...
# Get the diagnostics: this is based on the analysis of common EOFs...
xval <- attr(ds,'evaluation')
r.xval <- cor(xval[,1],xval[,2])
#
xy <- merge.zoo(z,y)
ds.ratio <- sd(xy[,1],na.rm=TRUE)/sd(xy[,2],na.rm=TRUE)
# Extract the mean score for leading EOF from the 4 seasons:
mdiff <- diag$mean.diff[1]/diag$sd0[1]
srati <- 1 - diag$sd.ratio[1]
arati <- 1 - diag$autocorr.ratio[1]
scorestats[i,] <- c(1-r.xval,mdiff,srati,arati,res.trend,ks,ar,1-ds.ratio,
1-var(xval[,2])/var(xval[,1]))
if (verbose) print(scorestats[i,])
quality <- 100*(1-mean(abs(scorestats[i,]),na.rm=TRUE))
index(z) <- year(z); index(ds) <- year(ds)
if (plot) {
qcol <- quality
qcol[qcol < 1] <- 1;qcol[qcol > 100] <- 100
cols <- rgb(seq(1,0,length=100),rep(0,100),seq(0,1,length=100),0.15)
lines(z,lwd=2,col=cols[qcol])
lines(y,type="b",pch=19)
lines(ds,lwd=2,col="grey")
}
#
R2 <- round(100*sd(xval[,2])/sd(xval[,1]),2)
print(paste("i=",i,"GCM=",gcmnm[i],' x-valid cor=',round(100*r.xval,2),
"R2=",R2,'% ','Common EOF: bias=',
round(mdiff,2),' sd1/sd2=',round(srati,3),
"mean=",round(mean(coredata(y),na.rm=TRUE),2),
'quality=',round(quality)))
}
}
}
#
X <- zoo(t(X),order.by=years)
colnames(X) <- gcmnm
attr(X,"model_id") <- gcmnm
#X <- attrcp(y,X)
attr(X,'station') <- y
attr(X,"lon") <- attr(y,"lon")
attr(X,"lat") <- attr(y,"lat")
attr(X,"longname") <- attr(y,"longname")
attr(X,'predictor') <- attr(PRE,'source')
attr(X,'domain') <- list(lon=lon,lat=lat)
attr(X,'scorestats') <- scorestats
attr(X,'path') <- path
attr(X,'scenario') <- rcp
if (non.stationarity.check) {
attr(X,'on.stationarity.check') <- difference.z
} else {
attr(X,'on.stationarity.check') <- NULL
}
attr(X,'history') <- history.stamp(y)
class(X) <- c("dsensemble","zoo")
save(file="DSensemble.rda",X)
print("---")
invisible(X)
}
#' @exportS3Method
#' @export DSensemble.season
DSensemble.season <- function(y,...,season=NULL,plot=TRUE,path="CMIP5.monthly/",predictor="slp.mon.mean.nc",
rcp="rcp45",biascorrect=FALSE,non.stationarity.check=FALSE,
ip=1:6,lon=c(-20,20),lat=c(-10,10),it=NULL,rel.cord=TRUE,
select=NULL,FUN="mean",FUNX="mean",xfuns='C.C.eq',
pattern="psl_Amon_",lev=NULL,levgcm=NULL,path.ds=NULL,file.ds=NULL,
nmin=NULL,verbose=FALSE,ds.interval=NULL) {
if(verbose) print("DSensemble.season")
if(is.null(season)) {
if(inherits(y,"season")) {
season <- unique(season(y))
} else {
season <- "djf"
}
}
if ((FUN=='sd') | (FUN =='ar1')) {
y <- anomaly(y)
attr(y,'aspect') <- 'original'
}
if(verbose) print("Set lon/lat predictor range")
if ( !is.na(attr(y,'longitude'))[1] & (any(lon>0) & any(lon<0)) & rel.cord)
lon <- round( range(attr(y,'longitude'),na.rm=TRUE) + lon )
if ( !is.na(attr(y,'latitude'))[1] & (any(lat>0) & any(lat<0)) & rel.cord)
lat <- round( range(attr(y,'latitude'),na.rm=TRUE) + lat )
if (sum(!is.finite(lon))>0)
warning(paste('Bad longitude range provided: ',paste(lon,collapse='-')))
if (sum(!is.finite(lat))>0)
warning(paste('Bad latitude range provided: ',paste(lat,collapse='-')))
if(verbose) print("Arrange units")
if ( (unit(y)[1] == "deg C") | (unit(y)[1] == "degree Celsius") |
(unit(y)[1] == "degC") | (unit(y)[1] == "Celsius") )
unit <- expression(degree*C) else
unit <- attr(y,'unit')
if (verbose) print(paste('Units:',unit))
if(verbose) print("aggregate time series")
sm <- eval(parse(text=paste("season.abb()$",season,sep="")))
s1 <- as.character(sm[1])
s2 <- as.character(sm[length(sm)])
if (nchar(s1)==1) s1 <- paste("0",s1,sep="")
if (nchar(s2)==1) s2 <- paste("0",s2,sep="")
if (is.null(nmin)) nmin <- length(sm)
ys <- as.4seasons(y,start=paste(s1,"01",sep="-"),
end=paste(s2,"28",sep="-"),FUN=FUN)
if(!is.null(attr(ys,"n.valid"))) ys <- ys[attr(ys,"n.valid")>=nmin]
if (FUN=="sum" & grepl("month",attr(ys,"unit"))) {
attr(ys,"unit") <- gsub("month","season",attr(ys,"unit"))
}
if (plot) {
if(verbose) print("Plot station data (predictand)")
ylim <- c(0,0)
ylim <- switch(FUN,'mean'=c(-2,8),'sd'=c(-0.5,1),'ar1'=c(-0.5,0.7),
'sum'=c(-6,12))
if (verbose) print(paste('set ylim based on "',FUN,
'" -> c(',ylim[1],', ',ylim[2],')',sep=''))
par0 <- par()
par(bty="n")
plot.zoo(ys,type="b",pch=19,main=attr(y,'location'),
xlab="year",ylab=unit,
sub=paste('Station: ',attr(y,'station_id'),'; coordinates: ',
round(attr(ys,'longitude'),4),'E/',
round(attr(ys,'latitude'),4),'N; ',
attr(ys,'altitude'),'m.a.s.l',sep=''),
ylim=ylim + range(coredata(ys),na.rm=TRUE),
xlim=as.Date(c("1900-01-01","2100-12-31")))
grid()
par(bty=par0$bty)
}
if(verbose) print("Retrieve predictor data")
if (is.character(predictor))
slp <- retrieve(file=predictor,lon=lon,lat=lat,lev=lev,
verbose=verbose) else
if (inherits(predictor,'field'))
slp <- subset(predictor,is=list(lon=lon,lat=lat))
if(verbose) print("Aggregate seasonal values")
SLP <- subset(as.4seasons(slp,FUN=FUNX,nmin=nmin),it=season)
ok <- is.finite(rowSums(SLP))
SLP <- subset(SLP,it=range(year(SLP)[ok]))
rm("slp"); gc(reset=TRUE)
# Ensemble GCMs
if(verbose) print("Retrieve & arrange GCMs")
path <- file.path(path,rcp,fsep = .Platform$file.sep)
ncfiles <- list.files(path=path,pattern=pattern,full.names=TRUE)
N <- length(ncfiles)
if (is.null(select)) {
select <- 1:N
} else {
select <- select[select<=N]
N <- length(select)
}
if (verbose) {print('GCMs:'); print(path); print(pattern); print(ncfiles[select])}
if(verbose) print("Set up results matrix & table of diagnostics")
## KMP 2017-10-19: Don't need to keep all seasons in X
months <- switch(season, "djf"=1, "mam"=4, "jja"=7, "son"=10)
years <- sort(rep(1900:2100,length(months)))
months <- rep(months,length(1900:2100))
#years <- sort(rep(1900:2100,4))
#months <- rep(c(1,4,7,10),length(1900:2100))
m <- length(years)
X <- matrix(rep(NA,N*m),N,m)
gcmnm <- rep("",N)
scorestats <- matrix(rep(NA,N*9),N,9)
colnames(scorestats) <- c("1-r.xval","mean.diff","1-sd.ratio",
"1-autocorr.ratio",
"res.trend","res.K-S","res.ar1",'amplitude.ration',
'1-R2')
t <- as.Date(paste(years,months,'01',sep='-'))
if(verbose) print("Quick test")
flog <- file("DSensemble.season-log.txt","at")
if (verbose) print("loop...")
for (i in 1:N) {
if (verbose) print(ncfiles[select[i]])
gcm <- retrieve(file = ncfiles[select[i]],
lon=range(lon(SLP))+c(-2,2),lev=levgcm,
lat=range(lat(SLP))+c(-2,2),verbose=verbose)
if (!is.null(ds.interval)) gcm <- subset(gcm,it=ds.interval)
if (length(index(gcm))<=1) print(paste('Problem selecting GCM results in period',
min(year(y),na.rm=TRUE),'2099'))
# KMP: 10.03.2017 - pass on additional information about GCM runs (gcm + rip - realization, initialization, physics version)
rip <- NULL
if(any(grepl("rip", names(attributes(gcm))))) {
nm.rip <- names(attributes(gcm))[grepl("rip",names(attributes(gcm)))][[1]]
if(!is.null(attr(gcm, nm.rip))) {
rip <- attr(gcm, nm.rip)
if(!grepl("r[0-9]{1,2}i[0-9]{1,2}p[0-9]{1,2}", rip)) rip <- NULL
}
}
nmattsgcm <- names(attributes(gcm))
if (verbose) print('nmattsgcm <- names(attributes(gcm))')
if(is.null(rip)) {
if (verbose) print('is.null(rip)...3')
if (length(grep("realization",nmattsgcm)) > 0) nm.r <- attributes(gcm)[grep("realization",nmattsgcm)][[1]] else nm.r <- '_'
if (length(grep("initialization",nmattsgcm)) > 0) nm.i <- attributes(gcm)[grep("initialization",nmattsgcm)][[1]] else nm.i <- '_'
if (length(grep("physics",nmattsgcm)) > 0) nm.p <- attributes(gcm)[grep("physics",nmattsgcm)][[1]] else nm.p <- '_'
# nm.r <- names(attributes(gcm))[grep("realization",names(attributes(gcm)))][[1]]
# nm.i <- names(attributes(gcm))[grep("initialization",names(attributes(gcm)))][[1]]
# nm.p <- names(attributes(gcm))[grep("physics",names(attributes(gcm)))][[1]]
rip <- paste0("r",attr(gcm,nm.r),"i",attr(gcm,nm.i),"p",attr(gcm,nm.p))
}
gcmnm[i] <- paste0(attr(gcm,'model_id'),".",rip)
#gcmnm[i] <- paste(attr(gcm,'model_id'),attr(gcm,'realization'),sep="-")
GCM <- subset(as.4seasons(gcm,FUN=FUNX,nmin=nmin),it='djf')
rm("gcm"); gc(reset=TRUE)
SLPGCM <- combine(SLP,GCM)
if (verbose) print("- - - > EOFs")
Z <- EOF(SLPGCM)
# The test lines are included to assess for non-stationarity
if (non.stationarity.check) {
testGCM <- subset(GCM,it=range(year(SLP))) # REB 29.04.2014
testy <- as.station(regrid(testGCM,is=ys)) # REB 29.04.2014
attr(testGCM,'source') <- 'testGCM' # REB 29.04.2014
testZ <- combine(testGCM,GCM) # REB 29.04.2014
rm("testGCM"); gc(reset=TRUE)
}
if (verbose) print("- - - > DS (seasonal)")
if (verbose) print(class(attr(Z,'appendix.1')))
if (biascorrect) try(Z <- biasfix(Z))
ds <- try(DS(ys,Z,ip=ip))
if (inherits(ds,"try-error")) {
writeLines(gcmnm[i],con=flog)
writeLines(ds[[1]],con=flog)
} else {
attr(ds,'evaluation') <- crossval(ds)
if (verbose) print("post-processing")
## REB 2024-03-08 - reduce the volume of the information
attr(ds,'model') <- summary(attr(ds,'model'))
z <- attr(ds,'appendix.1')
if (non.stationarity.check) {
testds <- DS(testy,testZ,biascorrect=biascorrect,ip=ip) # REB 29.04.2014
testz <- attr(testds,'appendix.1') # REB 29.04.2014
difference.z <- testy - testz # REB 29.04.2014
}
i1 <- is.element(paste(years,months,sep='-'),
paste(year(z),month(z),sep='-'))
i2 <- is.element(paste(year(z),month(z),sep='-'),
paste(years,months,sep='-'))
X[i,i1] <- z[i2]
# Diagnose the residual: ACF, pdf, trend. These will together with the
# cross-validation and the common EOF diagnostics provide a set of
# quality indicators.
cal <- coredata(attr(ds,"original_data"))
fit <- coredata(attr(ds,"fitted_values"))
res <- as.residual(ds)
res.trend <- 10*diff(range(trend(res)))/diff(range(year(res)))
ks <- round(ks.test(coredata(res),pnorm)$p.value,4)
ar <- as.numeric(acf(trend(cal-fit,result="residual"),plot=FALSE)[[1]][2])
if (verbose) print(paste("Examine residuals: trend=",
round(res.trend,3),'D/decade; K.S. p-val',
round(ks,2),'; AR(1)=',round(ar,2)))
# Evaluation: here are lots of different aspects...
# Get the diagnostics: this is based on the analysis of common EOFs...
xval <- attr(ds,'evaluation')
r.xval <- cor(xval[,1],xval[,2])
if (verbose) print(paste("x-validation r=",r.xval))
xy <- merge.zoo(z,ys)
ds.ratio <- sd(xy[,1],na.rm=TRUE)/sd(xy[,2],na.rm=TRUE)
if (verbose) print(paste("sd ratio=",ds.ratio))
#print(names(attributes(ds)))
if (biascorrect) {
diag <- attr(ds,'diagnose')
if ( (verbose) & !is.null(diag)) str(diag)
} else diag <- NULL
# diagnose for ds-objects
##
if (verbose) print('...')
if (is.null(diag)) {
##diag <- diagnose(z,plot=FALSE)
scorestats[i,] <- c(1-r.xval,NA,NA,NA,res.trend,ks,1-ar,1-ds.ratio,
1-round(var(xval[,2])/var(xval[,1]),2))
mdiff <- (mean(subset(ys,it=range(year(ds))),na.rm=TRUE)-
mean(subset(ds,it=range(year(ys))),na.rm=TRUE))/
sd(ys,na.rm=TRUE)
srati <- sd(subset(ds,it=range(year(ys))),na.rm=TRUE)/
sd(subset(ys,it=range(year(ds))),na.rm=TRUE)
arati <- ar1(zoo(ds,order.by=year(ds)))/ar1(zoo(ys,order.by=year(ys)))
} else {
# Extract the mean score for leading EOF from the 4 seasons:
mdiff <- mean(c(diag$s.1$mean.diff[1]/diag$s.1$sd0[1],
diag$s.2$mean.diff[1]/diag$s.2$sd0[1],
diag$s.3$mean.diff[1]/diag$s.3$sd0[1],
diag$s.4$mean.diff[1]/diag$s.4$sd0[1]))
srati <- mean(c(diag$s.1$sd.ratio[1],diag$s.2$sd.ratio[1],
diag$s.3$sd.ratio[1],diag$s.4$sd.ratio[1]))
arati <- mean(c(diag$s.1$autocorr.ratio[1],diag$s.2$autocorr.ratio[1],
diag$s.3$autocorr.ratio[1],
diag$s.4$autocorr.ratio[1]))
}
scorestats[i,] <- c(1-r.xval,mdiff,1-srati,1-arati,res.trend,ks,ar,
1-ds.ratio,
1- var(xval[,2])/var(xval[,1]))
if (verbose) print('scorestats')
if (verbose) print(scorestats[i,])
quality <- 100*(1-mean(abs(scorestats[i,]),na.rm=TRUE))
if (plot) {
qcol <- quality
qcol[qcol < 1] <- 1;qcol[qcol > 100] <- 100
cols <- rgb(seq(1,0,length=100),rep(0,100),seq(0,1,length=100),0.15)
lines(z,lwd=2,col=cols[qcol])
lines(ys,type="b",pch=19)
lines(ds,lwd=2,col="grey")
}
R2 <- round(100*sd(xval[,2])/sd(xval[,1]),2)
print(paste("i=",i,"GCM=",gcmnm[i],' x-valid cor=',round(100*r.xval,2),
"R2=",R2,'% ','Common EOF: bias=',round(mdiff,2),
' sd1/sd2=',round(srati,3),
"mean=",round(mean(coredata(ys),na.rm=TRUE),2),
'quality=',round(quality)))
}
}
if(verbose) print("Done with downscaling!")
rm("GCM")
X <- zoo(t(X),order.by=t)
colnames(X) <- gcmnm
attr(X,"model_id") <- gcmnm
#X <- attrcp(ys,X)
attr(X,"season") <- season
attr(X,"longname") <- attr(y,"longname")
attr(X,'station') <- ys
attr(X,'predictor') <- attr(SLP,'source')
attr(X,'domain') <- list(lon=lon,lat=lat)
attr(X,'scorestats') <- scorestats
attr(X,'path') <- path
attr(X,'scenario') <- rcp
attr(X,'history') <- history.stamp(y)
if (non.stationarity.check) {
attr(X,'non.stationarity.check') <- difference.z
} else {
attr(X,'non.stationarity.check') <- NULL
}
class(X) <- c("dsensemble","season","zoo")
if (is.null(file.ds)) {
file.ds <- paste("DSensemble",rcp,N,attr(y,"variable"),
season,"rda",sep="")
}
if (!is.null(path.ds)) file.ds <- file.path(path.ds,file.ds)
save(file=file.ds,X)
print("---")
invisible(X)
}
#' @export DSensemble.mu.worstcase
DSensemble.mu.worstcase <- function(y,...,plot=TRUE,path="CMIP5.monthly/",predictor="ERA40_t2m_mon.nc",
rcp="rcp45",biascorrect=FALSE,n=6,lon=c(-20,20),lat=c(-10,10),
it=NULL,rel.cord=TRUE,select=NULL,FUN="wetmean",
pattern="tas_Amon_",mask=FALSE,verbose=FALSE,ds.interval=NULL) {
if (verbose) print('DSensemble.mu.worstcase')
## The predictor is based on the seasonal variations and assumes that the seasnoal cycle in the
## wet-day mean mu is follows a systematic dependency to the seasonal variations in the temperature
## - the calibration uses the Clausius Clapeiron equation to estimate the saturation water vapour
## rather than using the temeprature directly.
if (verbose) print(paste('The predictand: seasonal',FUN))
ys <- aggregate(y,by=month,FUN=FUN)
ya <- aggregate(y,by=year,FUN=FUN)
ns <- 1
## A group of stations - use PCA
if (!is.null(dim(ys))) {
if (verbose) print('Use PCA for multiple stations')
pca.ys <- PCA(ys,n=n)
pca.ya <- PCA(ya)
ns <- n
}
if (!is.na(attr(y,'longitude')) & rel.cord)
lon <- round( range(attr(y,'longitude'),na.rm=TRUE) + lon )
if (!is.na(attr(y,'latitude')) & rel.cord)
lat <- round( range(attr(y,'latitude'),na.rm=TRUE) + lat )
if (sum(!is.finite(lon))>0)
warning(paste('Bad longitude range provided: ',paste(lon,collapse='-')))
if (sum(!is.finite(lat))>0)
warning(paste('Bad latitude range provided: ',paste(lat,collapse='-')))
if (verbose) print("predictor")
if (is.character(predictor)) {
pre <- retrieve(file=predictor,lon=lon,lat=lat,verbose=verbose)
if (mask) pre=mask(pre,land=TRUE)
# KMP 2019-05-28: replaced spatial.avg.field with aggregate.area
#pre <- spatial.avg.field(C.C.eq(pre))
pre <- aggregate.area(C.C.eq(pre), FUN="mean")
} else if (inherits(predictor,'field')) {
# KMP 2019-05-28: replaced spatial.avg.field with aggregate.area
#pre <- spatial.avg.field(predictor)
pre <- aggregate.area(predictor, FUN="mean")
}
rm("predictor"); gc(reset=TRUE)
## Estimate the reference level
normal61.90 <- mean(coredata(subset(pre,it=c(1961,1990))))
x <- aggregate(pre,by=month,FUN="mean")
if (verbose) print('Prepare the calibration data')
## Loop over PCAs if multipple stations
if (n>1) results <- list()
## Ensemble GCMs
path <- file.path(path,rcp,fsep = .Platform$file.sep)
ncfiles <- list.files(path=path,pattern=pattern,full.names=TRUE)
N <- length(ncfiles)
if (is.null(select)) select <- 1:N else
N <- length(select)
if (verbose) print(ncfiles[select])
## set up results matrix and tables of diagnostics:
years <- sort(1900:2100)
m <- length(years)
gcmnm <- rep("",N)
for (is in 1:n) {
X <- matrix(rep(NA,N*m),N,m)
if (verbose) print(paste('is=',is,'n=',n))
if (n==1) cal <- data.frame(y=coredata(ys),x=coredata(x)) else
cal <- data.frame(y=coredata(ys)[,is],x=coredata(x))
attributes(cal$y) <- NULL
if (is.null(dim(ys))) stats <- cor.test(cal$y,cal$x) else
stats <- cor.test(as.matrix(cal)[,1],cal$x)
wc.model <- lm(y ~ x, data=cal)
if (plot) {
par0 <- par()
par(bty='n',cex.sub=0.7,col.sub='grey40')
ylim <- range(cal$y,na.rm=TRUE); xlim=range(cal$x,na.rm=TRUE); dy <- diff(ylim)/25
plot(cal$x,cal$y,pch=19,cex=1.5,col='grey',
ylab=expression(paste(mu,' (mm/day)')),
xlab=expression(paste(e[s],' (Pa)')),
ylim=ylim,xlim=xlim,
main='Worst-case based on seasonal variations',
sub=paste(loc(y),' (',round(lon(y),2),'E/',round(lat(y),2),'N; ',alt(y),'m.a.s.l.)',sep=''))
segments(x0=cal$x,y0=cal$y,x1=cal$x,y1=cal$y+2*attr(ys,'standard.error'),col='grey')
segments(x0=cal$x,y0=cal$y,x1=cal$x,y1=cal$y-2*attr(ys,'standard.error'),col='grey')
segments(x0=cal$x,y0=cal$y,x1=cal$x+2*attr(x,'standard.error'),y1=cal$y,col='grey')
segments(x0=cal$x,y0=cal$y,x1=cal$x-2*attr(x,'standard.error'),y1=cal$y,col='grey')
points(cal$x,cal$y,pch=19,cex=1.5,col='grey')
grid()
abline(wc.model)
text(xlim[1],ylim[2],paste('Correlation=',round(stats$estimate,2),
'(','p-value=',100*round(stats$p.value,4),'%)'),
pos=4,cex=0.7,col='grey')
text(xlim[1],ylim[2]-dy,paste('Regression: y=',round(wc.model$coeff[1],4), '+',
round(wc.model$coeff[2],4), 'x (R2=',
round(summary(wc.model)$r.squared,2),')'),
pos=4,cex=0.7,col='grey')
par(new=TRUE,fig=c(0.5,0.97,0.1,0.5),yaxt='n',xpd=TRUE,cex.axis=0.7,col.axis='grey')
plot((cal$x - mean(cal$x))/sd(cal$x),type='l',lwd=2,ylab='',xlab='',col=rgb(0.6,0.3,0))
axis(1,col='grey')
lines((cal$y - mean(cal$y))/sd(cal$y),type='l',lwd=2,col=rgb(0,0.3,0.6))
#dev.copy2eps(file='DSensemble.mu.worstcase.cal.eps')
par(fig=par0$fig, yaxt=par0$yaxt, xpd=par0$xpd, bty=par0$bty,
cex.sub=par0$cex.sub, col.sub=par0$col.sub,
cex.axis=par0$cex.axis, col.axis=par0$col.axis)
}
if (plot) {
dev.new()
if (n>1) ya <- pca.ya[,is]
plot(ya,xlim=c(1900,2100),
ylim=range(aggregate(y,by=year,FUN='wetmean'),na.rm=TRUE)*c(0.75,1.5))
grid()
}
sd.noise <- max(attr(ys,'standard.error'),sd(wc.model$residuals))
for (i in 1:N) {
if (verbose) print(ncfiles[select[i]])
gcm <- retrieve(file = ncfiles[select[i]],lon=lon,lat=lat,
verbose=FALSE)
if (!is.null(ds.interval)) gcm <- subset(gcm,it=ds.interval)
if (length(index(gcm))<=1) print(paste('Problem selecting GCM results in period',
min(year(y),na.rm=TRUE),'2099'))
if (verbose) print(paste('mask=',mask))
if (mask) gcm <- mask(gcm,land=TRUE)
# KMP: 10.03.2017 - pass on additional information about GCM runs (gcm + rip - realization, initialization, physics version)
rip <- NULL
if(any(grepl("rip", names(attributes(gcm))))) {
nm.rip <- names(attributes(gcm))[grepl("rip",names(attributes(gcm)))][[1]]
if(!is.null(attr(gcm, nm.rip))) {
rip <- attr(gcm, nm.rip)
if(!grepl("r[0-9]{1,2}i[0-9]{1,2}p[0-9]{1,2}", rip)) rip <- NULL
}
}
nmattsgcm <- names(attributes(gcm))
if (verbose) print('nmattsgcm <- names(attributes(gcm))')
if(is.null(rip)) {
if (verbose) print('is.null(rip)...4')
if (length(grep("realization",nmattsgcm)) > 0) nm.r <- attributes(gcm)[grep("realization",nmattsgcm)][[1]] else nm.r <- '_'
if (length(grep("initialization",nmattsgcm)) > 0) nm.i <- attributes(gcm)[grep("initialization",nmattsgcm)][[1]] else nm.i <- '_'
if (length(grep("physics",nmattsgcm)) > 0) nm.p <- attributes(gcm)[grep("physics",nmattsgcm)][[1]] else nm.p <- '_'
# nm.r <- names(attributes(gcm))[grep("realization",names(attributes(gcm)))][[1]]
# nm.i <- names(attributes(gcm))[grep("initialization",names(attributes(gcm)))][[1]]
# nm.p <- names(attributes(gcm))[grep("physics",names(attributes(gcm)))][[1]]
rip <- paste0("r",attr(gcm,nm.r),"i",attr(gcm,nm.i),"p",attr(gcm,nm.p))
}
gcmnm[i] <- paste0(attr(gcm,'model_id'),".",rip)
#gcmnm[i] <- paste(attr(gcm,'model_id'),attr(gcm,'realization'),sep="-")
if (verbose) print('spatial average')
# KMP 2019-05-28: replaced spatial.avg.field with aggregate.area
#GCM <- spatial.avg.field(C.C.eq(gcm))
GCM <- aggregate.area(C.C.eq(gcm), FUN='mean')
z <- annual(GCM,FUN="max")
z <- z - mean(coredata(subset(z,it=c(1961,1990)))) + normal61.90
i1 <- is.element(year(z),years)
i2 <- is.element(years,year(z))
if (verbose) print(c(i,sum(i1),sum(i2)))
prex <- data.frame(x=coredata(z[i1]))
if (verbose) print(summary(prex))
if (verbose) print('prediction')
browser()
z.predict <- predict(wc.model, newdata=prex) +
rnorm(n=sum(i1),sd=sd.noise)
if (length(z.predict) != sum(i2)) {
print('problem discovered')
browser()
}
X[i,i2] <- z.predict
if (verbose) print(paste("i=",i,"GCM=",gcmnm[i],sum(i2)))
#if (sum(i2) != length(years))
if (plot) lines(years[i2],X[i,i2],col=rgb(0,0.3,0.6,0.2))
}
if (plot) {
if (n==1) lines(aggregate(y,by=year,FUN='wetmean'),col='red',lwd=3) else
lines(PCA(aggregate(y,by=year,FUN='wetmean'))[,is],col='red',lwd=3)
}
X <- zoo(t(X),order.by=years)
colnames(X) <- gcmnm
attr(X,"model_id") <- gcmnm
attr(X,'station') <- aggregate(y,by=year,FUN='wetmean')
attr(X,'predictor') <- attr(pre,'source')
attr(X,'domain') <- list(lon=lon,lat=lat)
attr(X,'path') <- path
attr(X,'scenario') <- rcp
attr(X,'history') <- history.stamp(y)
class(X) <- c("dsensemble","zoo")
save(file="DSensemble.rda",X)
if (verbose) print("--- end of iteration")
if (n>1) eval(parse(text=paste('results$pca.',is,' <- X',sep='')))
}
if (n>1) X <- results
if (verbose) print(names(X))
if (verbose) print("--- Exit DSensemble.mu.worstcase")
invisible(X)
}
#' @exportS3Method
#' @export
DSensemble.pca <- function(y,...,plot=TRUE,path="CMIP5.monthly/",rcp="rcp45",biascorrect=FALSE,
predictor="ERA40_t2m_mon.nc",non.stationarity.check=FALSE,
ip=1:16,lon=c(-30,20),lat=c(-20,10), it=NULL,rel.cord=TRUE,
select=NULL,FUN="mean",rmtrend=TRUE,FUNX="mean",xfuns='C.C.eq',
threshold=1,pattern="tas_Amon_",verbose=FALSE,
file.ds="DSensemble.rda",path.ds=NULL,nmin=NULL,
ds.interval=NULL,test=FALSE) {
if (verbose) print('DSensemble.pca')
cls <- class(y)
# This function is for downscaling PCA to represent a group of stations
if (!is.na(attr(y,'longitude'))[1] & (any(lon>0) & any(lon<0)) & rel.cord)
lon <- round( range(attr(y,'longitude'),na.rm=TRUE) + lon )
if (!is.na(attr(y,'latitude'))[1] & (any(lat>0) & any(lat<0)) & rel.cord)
lat <- round( range(attr(y,'latitude'),na.rm=TRUE) + lat )
if (sum(!is.finite(lon))>0)
warning(paste('Bad longitude range provided: ',paste(lon,collapse='-')))
if (sum(!is.finite(lat))>0)
warning(paste('Bad latitude range provided: ',paste(lat,collapse='-')))
if (is.character(predictor)) {
if (verbose) print('retrieve the predictor from netCDF file')
lsp <- retrieve(file=predictor,lon=lon,lat=lat,
verbose=verbose)
if (!is.null(it)) {
if (verbose) print('Extract some months or a time period')
if (verbose) print(it)
lsp <- subset(lsp,it=it,verbose=verbose)
## if it is character, then then extraction of months reduces number of
## days per year.
}
} else if (inherits(predictor,'field')) {
if (verbose) print('use the predictor provided as an argument')
lsp <- predictor
lon <- range(lon(lsp))
lat <- range(lat(lsp))
if (!is.annual(lsp) & !is.null(it)) {
if (verbose) print('Extract months/time period:')
if (verbose) print(it)
lsp <- subset(lsp,it=it,verbose=verbose)
}
}
## If some months are selected, make sure that the minimum number of months
## required in the annual aggregation is updated
if ((is.null(nmin)) & (is.character(it))) nmin <- length(it)
## Apply seasonal aggregation is predictand contains seasonal data
if (inherits(y,'season')) {
if (verbose) print('seasonal data found in the predictand')
if(attr(y,'season.interval')=='4seasons') {
if (FUNX !='C.C.eq') {
if (!inherits(lsp,'season')) {
if (verbose) print(paste('apply',FUNX,'to the predictor'))
LSP <- as.4seasons(lsp,FUN=FUNX,nmin=nmin)
} else {
if (verbose) print('The predictor already contains seasonal statistics')
LSP <- lsp ## REM 2024-03-01.
}
} else {
if (verbose) print('apply C.C.eq to the predictor:')
LSP <- as.4seasons(C.C.eq(lsp),FUN="mean",nmin=nmin)
}
} else {
it.season <- unlist(strsplit(attr(y,'season.interval'), split=' to '))
if (FUNX !='C.C.eq') {
if (verbose) print(paste('apply',FUNX,'to the predictor'))
LSP <- as.seasons(lsp,FUN=FUNX,
start=it.season[1],end=it.season[2],nmin=nmin)
}
}
LSP <- matchdate(LSP,y,verbose=verbose)
# Recursive: do each season separately if there are more than one season
if (length(table(season(y)))>1) {
if (verbose) print('--- Apply DS to seasons seperately ---')
Z <- list(info=paste('DSensemble.pca for different seasons: ',
paste(lon,collapse='-'),'E/',paste(lat,collapse='-'),'N',sep=''))
for (season in names(table(season(y)))) {
if (verbose) print(paste('Select',season))
z <- DSensemble.pca(subset(y,it=season),plot=plot,path=path,
rcp=rcp,biascorrect=biascorrect,predictor=LSP,
non.stationarity.check=non.stationarity.check,
ip=ip,lon=lon,lat=lat,rel.cord=FALSE,
select=select,FUN=FUN,rmtrend=rmtrend,
FUNX=FUNX,xfuns=xfuns,threshold=threshold,
pattern=pattern,verbose=verbose,nmin=nmin)
eval(parse(text=paste('Z$',season,' <- z',sep='')))
}
if (verbose) print('--- Results returned as a list ---')
return(Z)
}
} else if (inherits(y,'annual')) {
if (verbose) print('annual data')
if (!inherits(predictor,'field')) {
LSP <- lsp
} else if (!is.annual(lsp)) {
if (verbose) print(paste('Aggregate annually',FUNX,'for calibration'))
if (sum(is.element(FUNX,xfuns))==0) {
LSP <- annual(lsp,FUN=FUNX,nmin=nmin)
} else {
eval(parse(text=paste('LSP <- annual(',FUNX,'(lsp),FUN="mean",nmin=nmin)',sep="")))
}
} else {
LSP <- lsp
}
## Match the date
LSP <- matchdate(LSP,y)
} else if (inherits(y,'month')) {
if (verbose) print('monthly data')
LSP <- matchdate(lsp,y)
##if (FUNX=='C.C.eq')
## LSP <- mask(LSP,land=TRUE)
}
if (inherits(LSP,"eof")) LSP <- as.field(LSP)
rm("predictor","lsp"); gc(reset=TRUE)
if (verbose) {print('Check LSP:'); print(class(LSP)); print(index(LSP))}
# Ensemble GCMs
path <- file.path(path,rcp,fsep = .Platform$file.sep)
ncfiles <- list.files(path=path,pattern=pattern,full.names=TRUE)
N <- length(ncfiles)
if (!is.null(select)) {
select <- select[select<=N]
N <- length(select)
} else {
select <- 1:N
}
if (verbose) {print('GCMs:'); print(path); print(pattern); print(ncfiles[select])}
d.y <- dim(y)
years <- 1900:2100
m <- length(years)
months <- rep(month(y)[1],m)
X <- matrix(rep(NA,N*m*d.y[2]),N,m*d.y[2])
dim(X) <- c(d.y[2],N,m)
gcmnm <- rep("",N)
scorestats <- matrix(rep(NA,N*11),N,11)
colnames(scorestats) <- c("1-r.xval","mean.diff","1-sd.ratio","1-autocorr.ratio",
"res.trend","res.K-S","res.ar1","amplitude.ratio",
"1-R2","1-sd.ratio.predict","1-autocorr.ratio.predict")
t <- as.Date(paste(years,months,'01',sep='-'))
if (plot) {
par0 <- par()
par(bty='n')
index(y) <- year(y)
plot.zoo(y[,1],lwd=3,main='PC1',ylab='',xlab='',xlim=range(years))
par(bty=par0$bty)
}
flog <- file("DSensemble.pca-log.txt","at")
## Set up a list environment to keep all the results
dse.pca <- list(info=paste('DSensemble.pca for different seasons: ',
paste(lon,collapse='-'),'E/',paste(lat,collapse='-'),'N',sep=''),
pca=y) ## KMP 06-08-2015
if (verbose) print("loop...")
r.xval.all <- matrix(NA, nrow=N, ncol=ncol(y))
colnames(r.xval.all) <- paste0("PC",seq(ncol(y)))
for (i in 1:N) {
if (verbose) print(ncfiles[select[i]])
gcm <- retrieve(file = ncfiles[select[i]],
lon=range(lon(LSP))+c(-2,2),
lat=range(lat(LSP))+c(-2,2),verbose=verbose)
if (!is.null(ds.interval)) gcm <- subset(gcm,it=ds.interval)
if (length(index(gcm))<=1) print(paste('Problem selecting GCM results in period',
min(year(y),na.rm=TRUE),'2099'))
if (!is.null(it)) {
if (verbose) print('Extract some months or a time period')
if ( is.null(nmin) & is.character(it[1]) ) warning(paste("The argument 'it' is set but not 'nmin'; it=",
paste(it,collapse="-")))
if (verbose) print(it)
gcm <- subset(gcm,it=it,verbose=verbose)
}
nc.i <- getncid(filename=ncfiles[select[i]], verbose=verbose)
meta.i <- try(metaextract.cmip(nc.i,verbose=verbose))
if(!inherits(meta.i, "try-error")) {
gcmnm.i <- paste0(meta.i[,"gcm"],".",meta.i[,"gcm_rip"])
} else {
rip <- NULL
if(any(grepl("rip", names(attributes(gcm))))) {
nm.rip <- names(attributes(gcm))[grepl("rip",names(attributes(gcm)))][[1]]
if(!is.null(attr(gcm, nm.rip))) {
rip <- attr(gcm, nm.rip)
if(!grepl("r[0-9]{1,2}i[0-9]{1,2}p[0-9]{1,2}", rip)) rip <- NULL
}
}
if(is.null(rip)) {
if (verbose) print('is.null(rip)...5')
if(any(grepl("realization", names(attributes(gcm)))))
nm.r <- names(attributes(gcm))[grep("realization",names(attributes(gcm)))][[1]] else
nm.r <- 'NA'
if(any(grepl("initialization", names(attributes(gcm)))))
nm.i <- names(attributes(gcm))[grep("initialization",names(attributes(gcm)))][[1]] else
nm.i <- 'NA'
if(any(grepl("physics", names(attributes(gcm)))))
nm.p <- names(attributes(gcm))[grep("physics",names(attributes(gcm)))][[1]] else
nm.p <- 'NA'
rip <- paste0("r",attr(gcm,nm.r),"i",attr(gcm,nm.i),"p",attr(gcm,nm.p))
}
gcmnm.i <- paste0(attr(gcm,'model_id'),".",rip)
}
if(verbose) print(gcmnm.i)
if(!grepl("r[0-9]{1,4}i[0-9]{1,4}p[0-9]{1,4}",gcmnm.i)) {
if(verbose) print(paste("Missing model name and/or rip for file",
i, ncfiles[select[i]]))
}
if (verbose) {
print(paste('Extract month/season/annual data nmin=',nmin))
print(class(y))
print(FUNX)
}
if (inherits(y,'season')) {
if(attr(y,'season.interval')=='4seasons') {
if (sum(is.element(FUNX,xfuns))==0) {
if (verbose) print(paste('->- Aggregate GCM: FUN=',FUNX,'nmin=',nmin,
'dim:',dim(gcm)[1],dim(gcm)[2],
' class:',paste(class(gcm),collapse='-')))
GCM <- as.4seasons(gcm,FUN=FUNX,nmin=nmin)
} else {
if (verbose) print('Need to aggregate FUNX(gcm)')
eval(parse(text=paste('GCM <- as.4seasons(',FUNX,'(gcm),FUN="mean",nmin=nmin)',sep="")))
}
## REB 2024-03-01: make the code more robust. LSP is the aggregated predictor used for calibration
if (verbose) print(paste('index(LSP):',
paste(range(index(LSP)),collapse='-'),'length=',length(index(LSP))))
it.lsp <- season(LSP)[1]
if (verbose) print(paste('subset: it=',it.lsp))
GCM <- subset(GCM,it=it.lsp)
} else {
if (sum(is.element(FUNX,xfuns))==0) {
if (verbose) print(paste('No special transformation (PCA)',FUNX,nmin))
GCM <- as.seasons(gcm,FUN=FUNX,
start=it.season[1],end=it.season[2],nmin=nmin)
} else {
if (verbose) print('Need to aggregate FUNX(gcm)')
eval(parse(text=paste('GCM <- as.seasons(',FUNX,
'(gcm),FUN="mean",start=it.season[1],end=it.season[2],nmin=nmin)',sep="")))
}
}
if (verbose) {print('Check: index(LSP)'); print(index(LSP))}
} else if (inherits(y,'annual')) {
if (verbose) print(paste('Annually aggregated',FUNX,'for GCM'))
if (sum(is.element(FUNX,xfuns))==0)
GCM <- annual(gcm,FUN=FUNX,nmin=nmin) else
eval(parse(text=paste('GCM <- annual(',FUNX,'(gcm),FUN="mean",nmin=nmin)',sep="")))
} else if (inherits(y,'month')) {
if (length(table(month(y)))==1)
GCM <- subset(gcm,it=month.abb[month(y)[1]])
else
GCM <- gcm
if (!is.null(FUNX)) {
GCM <- do.call(FUNX,list(GCM))
}
}
if (verbose) {
print(paste('Estimate common EOFs - combine fields',varid(LSP),esd::unit(LSP),'&',
varid(GCM),esd::unit(GCM)))
print(paste('Magnitudes/scales',paste(range(c(coredata(LSP)[1,]),collapse=' - '),
paste(range(c(coredata(GCM)[1,]),collapse=' - ')))))
}
if (is.null(src(LSP))) attr(LSP,'source') <- 'reanalysis'
if (length(src(GCM))>1) attr(GCM,'source') <- attr(GCM,'source')[1]
LSPGCM <- combine(LSP,GCM)
if (verbose) print("- - - > EOFs")
Z <- try(EOF(LSPGCM,verbose=verbose))
if (verbose) save(Z,file='Z.ceof.DSensemble.temp.rda')
## The test lines are included to assess for non-stationarity
if (non.stationarity.check) {
testGCM <- subset(GCM,it=range(year(LSP))) # REB 29.04.2014
testy <- as.station(regrid(testGCM,is=y)) # REB 29.04.2014
attr(testGCM,'source') <- 'testGCM' # REB 29.04.2014
testZ <- combine(testGCM,GCM) # REB 29.04.2014
rm("testGCM"); gc(reset=TRUE)
}
rm("gcm","GCM"); gc(reset=TRUE)
if (verbose) print("- - - > DS (pca)")
Z0 <- Z
if (verbose) print(class(attr(Z,'appendix.1')))
if (biascorrect) Z <- try(biasfix(Z))
if(inherits(Z,"try-error")) Z <- Z0
ds <- try(DS(y,Z,ip=ip,rmtrend=rmtrend,verbose=verbose))
## REB 2024-03-08 - reduce the volume of the information
attr(ds,'model') <- summary(attr(ds,'model'))
if(inherits(ds,"try-error")) {
print(paste("esd failed for",gcmnm.i))
print(range(index(y)))
print(range(index(Z)))
print(range(index(attr(Z,'appendix.1'))))
} else {
if (verbose) print("post-processing")
## REB 2024-03-08 - reduce the volume of the information
attr(ds,'model') <- try(summary(attr(ds,'model')))
gcmnm[i] <- gcmnm.i
## Keep the results for the projections:
if (verbose) print('Extract the downscaled projection')
z <- attr(ds,'appendix.1') ## KMP 09.08.2015
## REB: 2016-11-29
## REB: 2024-03-08: Only save model summary in DS.
# if (test) {
# ## model takes up too much space! can it be stored more efficiently?
# ## REB 2016-11-29: remove most of the contents and keep only a small part
# if (verbose) print('Add reduced model information')
# for (iii in 1:dim(ds)[2]) {
# print(names(attr(ds,'model')[[iii]]))
# attr(ds,'model')[[iii]]$residuals <- NULL
# attr(ds,'model')[[iii]]$effects <- NULL
# attr(ds,'model')[[iii]]$rank <- NULL
# attr(ds,'model')[[iii]]$fitted.values <- NULL
# attr(ds,'model')[[iii]]$assign <- NULL
# attr(ds,'model')[[iii]]$qr <- NULL
# attr(ds,'model')[[iii]]$df.residual <- NULL
# attr(ds,'model')[[iii]]$xlevels <- NULL
# attr(ds,'model')[[iii]]$model <- NULL
# attr(ds,'model')[[iii]]$terms <- NULL
# print(names(attr(ds,'model')[[iii]]))
# }
# }
attr(z,'predictor.pattern') <- attr(ds,'predictor.pattern')
## REB: 2023-12-08 - add information about the regression/calibration coefficients.
attr(z,'model') <- attr(ds,'model')
attr(z,'evaluation') <- attr(ds,'evaluation')
# REB 2016-11-28: adjust results to have same mean as observations in overlapping period:
if (verbose) print('adjust offset of predicted PCs for overlapping period')
index(z) <- year(z)
zolp <- window(zoo(z),start=start(y),end=end(y))
coredata(z) <- t(t(coredata(z)) - mean(coredata(zolp)) + colMeans(coredata(y)))
## y is a pca with no missing values; z has no NAs.
if (verbose) print(round(colMeans(y),2))
## REB 2021-05-25
#cl <- paste('dse.pca$i',i,'_',gsub('-','.',gcmnm[i]),' <- z',sep='')
#eval(parse(text=cl))
mod.ssp.ripf <- decipher(ncfiles[select[i]])
dse.pca[[paste0('i',i,'_',mod.ssp.ripf[1],'.',mod.ssp.ripf[3])]] <- z
if (verbose) {
print('Test to see if as.station has all information needed.')
test.stations.ds <- as.station(ds)
a <- attrcp(y,z); class(a) <- c("ds",class(y))
test.stations.z <- as.station(a)
}
# Diagnose the residual: ACF, pdf, trend. These will together with the
# cross-validation and the common EOF diagnostics provide a set of
# quality indicators.
## REB 2016-10-20 revised code
if (verbose) print('examine residuals...')
## REB 2021-04-26: revised code - changed the index of cal and res to years.
cal <- attr(ds,"original_data"); index(cal) <- year(cal)
fit <- attr(ds,"fitted_values"); index(fit) <- year(fit)
res <- cal - fit
#REBres <- as.residual(ds)
res.trend <- 10*diff(range(trend(res)))/diff(range(year(res)))
ks <- round(ks.test(coredata(res),pnorm)$p.value,4)
# ar <- as.numeric(acf(trend(cal-fit,result="residual"),
# plot=FALSE)[[1]][2])
ar <- as.numeric(acf(coredata(trend(res,result="residual")[,1]),
plot=FALSE)[[1]][2])
if (verbose) print(paste("Residual trend=",
round(res.trend,3),'D/decade; K.S. p-val',
round(ks,2),'; AR(1)=',round(ar,2)))
# Evaluation: here are lots of different aspects...
# Get the diagnostics: this is based on the analysis of common EOFs...
xval <- attr(ds,'evaluation')
r.xval <- round(cor(xval[,1],xval[,2]),3)
if (verbose) print(paste("x-validation r=",r.xval))
for(j in seq(1,ncol(y))) r.xval.all[i,j] <-
round(cor(xval[,j*2-1],xval[,j*2]),3)
ds.ratio <- round(sd(ds[,1],na.rm=TRUE)/sd(y[,1],na.rm=TRUE),4)
if (verbose) print(paste("sd ratio=",ds.ratio))
if (verbose) print(names(attributes(ds)))
if (biascorrect) {
if (verbose) print('(biascorrect)')
diag <- attr(ds,'diagnose')
if ( (verbose) & !is.null(diag)) str(diag)
} else diag <- NULL
# diagnose for ds-objects
if (verbose) print('<debug milestone>')
## REB 2024-03-01: attempt to make the code more robust and cleaner
## diagnostics based on corresponding interval
if (verbose) print(paste(range(year(y)),collapse='-'))
z.esd <- subset(attr(ds,'appendix.1'),it=range(year(y)))
attr(z.esd,'location') <- loc(ds)
if (verbose) {print(dim(z.esd)); print(range(index(z.esd)))}
z.obs <- subset(ds,it=range(year(y)))
if (verbose) {print(dim(z.obs)); print(range(index(z.obs)))}
if (length(intersect(index(z.esd),index(z.obs))) > 30) {
z.esd <- matchdate(z.esd,z.obs); z.obs <- matchdate(z.obs,z.esd)
if (verbose) {str(z.esd); str(z.obs)}
z.esd <- coredata(z.esd); z.obs <- coredata(z.obs)
if (verbose) cat('coredata() OK')
srati.predict <- sd(z.esd,na.rm=TRUE)/sd(z.obs,na.rm=TRUE)
arati.predict <- ar1(z.esd)/ar1(z.obs)
} else {
if (verbose) {print('Too few matching dates');
print(intersect(index(z.esd),index(z.obs)))}
srati.predict <- NA
arati.predict <- NA
}
if (verbose) print('<std & ar1 OK>')
if (is.null(diag)) {
if (verbose) print('{no diag present}')
z.y <- coredata(subset(y,it=range(year(ds))))
mdiff <- (mean(z.y,na.rm=TRUE)-mean(z.obs,na.rm=TRUE))/sd(y,na.rm=TRUE)
srati <- sd(z.obs,na.rm=TRUE)/sd(z.y,na.rm=TRUE)
## Try to estimate ratio of lag-1 autocorrelations
arati <- ar1(z.esd)/ar1(z.y)
} else {
if (verbose) print('{diag present}')
# Extract the mean score for leading EOF from the 4 seasons:
mdiff <- mean(c(diag$s.1$mean.diff[1]/diag$s.1$sd0[1],
diag$s.2$mean.diff[1]/diag$s.2$sd0[1],
diag$s.3$mean.diff[1]/diag$s.3$sd0[1],
diag$s.4$mean.diff[1]/diag$s.4$sd0[1]))
srati <- mean(c(diag$s.1$sd.ratio[1],diag$s.2$sd.ratio[1],
diag$s.3$sd.ratio[1],diag$s.4$sd.ratio[1]))
arati <- mean(c(diag$s.1$autocorr.ratio[1],
diag$s.2$autocorr.ratio[1],
diag$s.3$autocorr.ratio[1],
diag$s.4$autocorr.ratio[1]))
}
scorestats[i,] <- c(1-r.xval,mdiff,1-srati,1-arati,res.trend,ks,ar,1-ds.ratio,
1-round(var(xval[,2])/var(xval[,1]),2),
1-srati.predict,1-arati.predict)
if (verbose) print('[scorestats]')
if (verbose) print(scorestats[i,])
quality <- 100*(1-mean(sapply(scorestats[i,], function(x) min(1,abs(x))),na.rm=TRUE))
R2 <- round(100*sd(xval[,2])/sd(xval[,1]),2)
print(paste("i=",i,"GCM=",gcmnm[i],' x-valid cor=',round(100*r.xval,2),
"R2=",R2,'% ','Common EOF: bias=',round(mdiff,2),
' sd1/sd2=',round(srati,3),
' sd1(ds.gcm)/sd2(ds.reanalysis)=',round(srati.predict,3),
"mean=",round(mean(coredata(y),na.rm=TRUE),2),
'quality=',
round(quality)))
index(y) <- year(y); index(z) <- year(z)
if (plot) {
qcol <- quality
qcol[qcol < 1] <- 1;qcol[qcol > 100] <- 100
cols <- rgb(seq(1,0,length=100),rep(0,100),seq(0,1,length=100),0.15)
lines(z[,1],lwd=2,col=cols[qcol])
lines(y[,1],lwd=3,main='PC1')
}
}
if (verbose) print('Downscaling finished')
}
## Unpacking the information tangled up in GCMs, PCs and stations:
## Save GCM-wise in the form of PCAs
gcmnm <- gsub('-','.',gcmnm)
#Z <- attrcp(y,Z)
if (verbose) print('Set attributes')
if (test) {
attr(dse.pca,'model') <- attr(ds,'model') ## KMP 09-08-2015
attr(dse.pca,'ceof0') <- Z0
attr(dse.pca,'ceof') <- Z
}
attr(dse.pca,'predictor') <- attr(LSP,'source')
attr(dse.pca,"longname") <- attr(y,"longname")
attr(dse.pca,'domain') <- list(lon=lon,lat=lat)
attr(scorestats, "longname") <- paste(c("1 - cross validation correlation of first PC","mean bias",
"1 - ratio of standard deviations","1 - ratio of autocorrelations",
"residual trend","Kolmogorov-Smirnov Test for normal distribution",
"autocorrelation of the residual",
"1 - ratio of standard deviations for first PC",
"1 - ratio of variance for first PC from cross-validation",
"1 - ratio of standard deviations (predictions from GCM simulations/predictions from reanalysis)",
"1 - ratio of autocorrelations (predictions from GCM simulations/predictions from reanalysis)",
collapse=", "))
attr(dse.pca,'scorestats') <- scorestats
attr(r.xval, "longname") <- "cross validation correlation scores for all PCs"
attr(dse.pca,'r.xval') <- r.xval.all
attr(dse.pca,'path') <- path
attr(dse.pca,'scenario') <- rcp
attr(dse.pca,'model_id') <- gcmnm
attr(dse.pca,'variable') <- attr(y,"variable")[1]
attr(dse.pca,'unit') <- attr(y,"unit")[1]
attr(dse.pca,'history') <- history.stamp(y)
# KMP 2018-11-12: difference.z is not defined in this function
#if (non.stationarity.check) {
# attr(dse.pca,'on.stationarity.check') <- difference.z
#} else {
# attr(dse.pca,'on.stationarity.check') <- NULL
#}
class(dse.pca) <- c("dsensemble","pca","list")
if(inherits(y,"season")) {
class(dse.pca) <- c(class(dse.pca), "season")
attr(dse.pca, "season.interval") <- attr(y,"season.interval")
}
if(!is.null(path.ds)) file.ds <- file.path(path.ds,file.ds)
if (verbose) print(file.ds)
save(file=file.ds,dse.pca)
if (verbose) print("---")
invisible(dse.pca)
}
#' @exportS3Method
#' @export DSensemble.eof
DSensemble.eof <- function(y,...,plot=TRUE,path="CMIP5.monthly",rcp="rcp45",biascorrect=FALSE,
predictor="ERA40_slp_mon.nc",non.stationarity.check=FALSE,
ip=1:5,lon=c(-30,20),lat=c(-20,10),it=NULL,rel.cord=TRUE,nmin=NULL,
lev=NULL,levgcm=NULL,select=NULL,FUN="mean",rmtrend=TRUE,FUNX="mean",
xfuns='C.C.eq',threshold=1,pattern="psl_Amon_",verbose=FALSE,
file.ds="DSensemble.eof.rda",path.ds=NULL,ds.interval=NULL,test=FALSE) {
if(verbose) print("DSensemble.eof")
stopifnot(inherits(y,c("EOF","field")))
cls <- class(y)
if (!is.na(attr(y,'longitude'))[1] & (any(lon>0) & any(lon<0)) & rel.cord)
lon <- round( range(attr(y,'longitude'),na.rm=TRUE) + lon )
if (!is.na(attr(y,'latitude'))[1] & (any(lat>0) & any(lat<0)) & rel.cord)
lat <- round( range(attr(y,'latitude'),na.rm=TRUE) + lat )
if (sum(!is.finite(lon))>0)
warning(paste('Bad longitude range provided: ',paste(lon,collapse='-')))
if (sum(!is.finite(lat))>0)
warning(paste('Bad latitude range provided: ',paste(lat,collapse='-')))
if (is.character(predictor))
slp <- retrieve(file=predictor,lon=lon,lat=lat,lev=lev,
verbose=verbose) else
if (inherits(predictor,'field'))
slp <- subset(predictor,is=list(lon=lon,lat=lat))
if (inherits(y,'season')) {
if (verbose) print('seasonal data')
if (FUNX!='C.C.eq') SLP <- as.4seasons(slp,FUN=FUNX) else
eval(parse(text=paste('SLP <- as.4seasons(',FUNX,'(slp),FUN="mean",nmin=nmin)',sep="")))
SLP <- matchdate(SLP,y)
if (length(table(season(y)))>1) {
if (verbose) print('--- Apply DS to seasons seperately ---')
Z <- list(info=paste('DSensemble.pca for different seasons: ',
paste(lon,collapse='-'),'E/',paste(lat,collapse='-'),'N',sep=''))
## KMP 2016-10-25: Looping over seasons will not work if y is an eof object.
## I added a temporary fix, turning the multi-season eof object into a field
## before selecting a season. This could result in a serious loss of information.
if(inherits(y,"eof")) y <- as.field(y)
for (season in names(table(season(y)))) {
if (verbose) print(paste('Select',season))
z <- DSensemble.eof(subset(y,it=season),plot=plot,path=path,
rcp=rcp,biascorrect=biascorrect,predictor=SLP,
non.stationarity.check=non.stationarity.check,
ip=ip,lon=lon,lat=lat,rel.cord=FALSE,
select=select,FUN=FUN,rmtrend=rmtrend,
FUNX=FUNX,threshold=threshold,
pattern=pattern,verbose=verbose,nmin=nmin)
eval(parse(text=paste('Z$',season,' <- z',sep='')))
}
if (verbose) print('--- Results returned as a list ---')
return(Z)
}
} else if (inherits(y,'annual')) {
if (verbose) print('annual data')
if (!is.null(it)) {
if (verbose) print('Extract some months or a time period')
if (verbose) print(it)
slp <- subset(slp,it=it)
if (is.null(nmin) & is.character(it)) nmin <- length(it)
if (is.null(nmin)) nmin <- 1
}
if (!is.annual(slp)) {
if (FUNX!='C.C.eq')
SLP <- annual(slp,FUN=FUNX,nmin=nmin) else
SLP <- annual(C.C.eq(slp),FUN='mean',nmin=nmin)
} else SLP <- slp
## Synchronise
if (verbose) {print(index(SLP)); print(index(y))}
SLP <- matchdate(SLP,y)
} else if (inherits(y,'month')) {
SLP <- matchdate(slp,y)
}
if (inherits(SLP,"eof")) SLP <- as.field(SLP)
rm("predictor","slp"); gc(reset=TRUE)
if(!inherits(y,"eof")) y <- EOF(y)
# Ensemble GCMs
path <- file.path(path,rcp,fsep = .Platform$file.sep)
ncfiles <- list.files(path=path,pattern=pattern,full.names=TRUE)
N <- length(ncfiles)
if (is.null(select)) select <- 1:N else
N <- length(select)
if (verbose) {print('GCMs:'); print(path); print(pattern); print(ncfiles[select])}
d.y <- dim(y)
years <- 1900:2100
m <- length(years)
months <- rep(month(y)[1],m)
X <- matrix(rep(NA,N*m*d.y[2]),N,m*d.y[2])
dim(X) <- c(d.y[2],N,m)
gcmnm <- rep("",N)
scorestats <- matrix(rep(NA,N*9),N,9)
colnames(scorestats) <- c("1-r.xval","mean.diff","1-sd.ratio","autocorr.diff",
"res.trend","res.K-S","res.ar1",'amplitude.ratio',
"1-R2")
t <- as.Date(paste(years,months,'01',sep='-'))
flog <- file("DSensemble.eof-log.txt","at")
## Set up a list environment to keep all the results
dse.eof <- list(info=paste('DSensemble.pca for different seasons: ',
paste(lon,collapse='-'),'E/',paste(lat,collapse='-'),'N',sep=''),eof=y)
if (verbose) print("loop...")
for (i in 1:N) {
if (verbose) print(ncfiles[select[i]])
gcm <- try(retrieve(file = ncfiles[select[i]],
lon=range(lon(SLP))+c(-2,2),
lat=range(lat(SLP))+c(-2,2),
lev=levgcm,verbose=verbose))
if(inherits(gcm,"try-error")) {
print(paste("retrieve failed for",ncfiles[select[i]]))
} else {
nmattsgcm <- names(attributes(gcm))
if (verbose) print('nmattsgcm <- names(attributes(gcm))')
if (!is.null(ds.interval)) gcm <- subset(gcm,it=ds.interval)
if (length(index(gcm))<=1) print(paste('Problem selecting GCM results in period',
min(year(y),na.rm=TRUE),'2099'))
## KMP 2016-08-09 added separate level input for slp and gcm
## because they can have levels of different units
if(is.null(levgcm) & !is.null(attr(gcm,"level")))
levgcm <- attr(gcm,"level")
if (!is.null(it)) {
if ((nmin==0) & is.character(it)) nmin <- length(it)
if (verbose) print('Extract some months or a time period')
if (verbose) {print(it); print(nmin)}
gcm <- subset(gcm,it=it)
}
#gcmnm[i] <- attr(gcm,'model_id')
#gcmnm.i <- paste(attr(gcm,'model_id'),attr(gcm,'realization'),sep="-r")
# KMP: 10.03.2017 - pass on additional information about GCM runs (gcm + rip - realization, initialization, physics version)
#gcmnm.i <- paste(attr(gcm,'model_id'),attr(gcm,'parent_experiment_rip'),sep="-")
rip <- NULL
if(any(grepl("rip", names(attributes(gcm))))) {
nm.rip <- names(attributes(gcm))[grepl("rip",names(attributes(gcm)))][[1]]
if(!is.null(attr(gcm, nm.rip))) {
rip <- attr(gcm, nm.rip)
if(!grepl("r[0-9]{1,2}i[0-9]{1,2}p[0-9]{1,2}", rip)) rip <- NULL
}
}
if(is.null(rip)) {
if (verbose) print('is.null(rip)...6')
if (length(grep("realization",nmattsgcm)) > 0) nm.r <- attributes(gcm)[grep("realization",nmattsgcm)][[1]] else nm.r <- '_'
if (length(grep("initialization",nmattsgcm)) > 0) nm.i <- attributes(gcm)[grep("initialization",nmattsgcm)][[1]] else nm.i <- '_'
if (length(grep("physics",nmattsgcm)) > 0) nm.p <- attributes(gcm)[grep("physics",nmattsgcm)][[1]] else nm.p <- '_'
# nm.r <- names(attributes(gcm))[grep("realization",names(attributes(gcm)))][[1]]
# nm.i <- names(attributes(gcm))[grep("initialization",names(attributes(gcm)))][[1]]
# nm.p <- names(attributes(gcm))[grep("physics",names(attributes(gcm)))][[1]]
rip <- paste0("r",attr(gcm,nm.r),"i",attr(gcm,nm.i),"p",attr(gcm,nm.p))
}
gcmnm.i <- paste0(attr(gcm,'model_id'),".",rip)
if (verbose) print(class(y))
## REB 2016-10-25
if (inherits(y,'season')) {
if (verbose) print(paste('Seasonally aggregated',FUNX,'for GCM'))
if (sum(is.element(FUNX,xfuns))==0) {
if (verbose) print('No special transformation (EOF)')
GCM <- as.4seasons(gcm,FUN=FUNX,nmin=nmin)
} else {
if (verbose) print('Need to aggregate FUNX(gcm)')
eval(parse(text=paste('GCM <- as.4seasons(',FUNX,'(gcm),FUN="mean",nmin=nmin)',sep="")))
}
if (verbose) {print(season(SLP)[1]); print(dim(GCM))}
GCM <- subset(GCM,it=season(SLP)[1])
} else if (inherits(y,'annual')) {
if (verbose) print(paste('Annually aggregated',FUNX,'for GCM'))
if (sum(is.element(FUNX,xfuns))==0)
GCM <- annual(gcm,FUN=FUNX,nmin=nmin) else
eval(parse(text=paste('GCM <- annual(',FUNX,'(gcm),FUN="mean",nmin=nmin)',sep="")))
} else if (inherits(y,'month')) {
if (length(table(month(y)))==1)
GCM <- subset(gcm,it=month.abb[month(y)[1]])
else
GCM <- gcm
if (!is.null(FUNX)) {
GCM <- do.call(FUNX,list(GCM))
}
}
#ds.parts <- TRUE
#if(ds.parts) {
# GCM <- subset(GCM,it=c(seq(1900,1920),year(SLP),seq(1981,2100)))
#}
## REB 2016-10-25
#if (inherits(y,'season')) {
# if (FUNX!='C.C.eq') GCM <- as.4seasons(gcm,FUN=FUNX,nmin=nmin) else
# GCM <- as.4seasons(C.C.eq(gcm),FUN='mean',nmin=nmin)
# GCM <- subset(GCM,it=season(SLP)[1])
#} else if (inherits(y,'annual')) {
# if (FUNX!='C.C.eq') GCM <- annual(gcm,FUN=FUNX,nmin=nmin) else
# GCM <- annual(C.C.eq(gcm),FUN='mean',nmin=nmin)
#} else if (inherits(y,'month')) {
# if (length(table(month(y)))==1)
# GCM <- subset(gcm,it=month.abb[month(y)[1]]) else
# GCM <- gcm
#}
if (verbose) {str(SLP); str(GCM)}
SLPGCM <- combine(SLP,GCM)
if (verbose) print("- - - > EOFs")
Z <- try(EOF(SLPGCM))
if(inherits(Z,"try-error")) biascorrect <- FALSE
## The test lines are included to assess for non-stationarity
## KMP 2018-11-02: Does the nonstationarity test work for DSensemble.eof?
if (non.stationarity.check) {
testGCM <- subset(GCM,it=range(year(SLP)))
testy <- regrid(testGCM,is=as.field(y))
attr(testGCM,'source') <- 'testGCM'
testZ <- combine(testGCM,GCM)
difference.z <- testy - testZ
rm("testGCM"); gc(reset=TRUE)
}
rm("gcm","GCM"); gc(reset=TRUE)
if (verbose) print("- - - > DS (eof)")
Z0 <- Z
if (verbose) print(class(attr(Z,'appendix.1')))
if (biascorrect) Z <- biasfix(Z)
diag <- diagnose(Z)
ds <- try(DS(y,Z,ip=ip,verbose=verbose))
if(inherits(ds,"try-error")) {
print(paste("esd failed for",gcmnm.i))
print(range(index(y)))
print(range(index(Z)))
print(range(index(attr(Z,'appendix.1'))))
} else {
if (verbose) print("post-processing")
## REB 2024-03-08 - reduce the volume of the information
attr(ds,'model') <- summary(attr(ds,'model'))
gcmnm[i] <- gcmnm.i
## Unpacking the information tangled up in GCMs, PCs and stations:
## Save GCM-wise in the form of PCAs
gcmnm[i] <- gsub('-','.',gcmnm[i])
gcmnm[i] <- gsub('/','.',gcmnm[i])
## Keep the results for the projections:
if (verbose) print('Extract the downscaled projection')
z <- attr(ds,'appendix.1') ## KMP 09.08.2015
##attr(z,'model') <- attr(ds,'model') ## KMP 09-08-2015
## model takes up too much space! can it be stored more efficiently?
## REB: 2016-11-29
# if (test) {
# ## model takes up too much space! can it be stored more efficiently?
# ## REB 2016-11-29: remove most of the contents and keep only a small part
# if (verbose) print('Reduced model information')
# for (iii in 1:dim(ds)[2]) {
# print(names(attr(ds,'model')[[iii]]))
# attr(ds,'model')[[iii]]$residuals <- NULL
# attr(ds,'model')[[iii]]$effects <- NULL
# attr(ds,'model')[[iii]]$rank <- NULL
# attr(ds,'model')[[iii]]$fitted.values <- NULL
# attr(ds,'model')[[iii]]$assign <- NULL
# attr(ds,'model')[[iii]]$qr <- NULL
# attr(ds,'model')[[iii]]$df.residual <- NULL
# attr(ds,'model')[[iii]]$xlevels <- NULL
# attr(ds,'model')[[iii]]$model <- NULL
# attr(ds,'model')[[iii]]$terms <- NULL
# print(names(attr(ds,'model')[[iii]]))
# }
# }
attr(z,'predictor.pattern') <- attr(ds,'predictor.pattern')
attr(z,'evaluation') <- attr(ds,'evaluation')
## Store the results in a list element
cl <- paste('dse.eof$i',i,'_',gcmnm[i],' <- z',sep='')
cl <- sub('=','_',cl)
if (verbose) print(paste('execute this line:',cl))
eval(parse(text=cl))
if (verbose) {
print('Test to see if as.field has all information needed')
test.field.ds <- as.field(ds)
a <- attrcp(y,z); class(a) <- class(y)
test.field.z <- as.field(a)
}
# Diagnose the residual: ACF, pdf, trend. These will together with the
# cross-validation and the common EOF diagnostics provide a set of
# quality indicators.
## REB 2016-10-20
cal <- attr(ds,"original_data"); index(cal) <- year(cal)
fit <- attr(ds,"fitted_values"); index(fit) <- year(fit)
if (verbose) print('examine residuals:')
res <- cal - fit ## REB 2016-10-20 test PCs
res.trend <- 10*diff(range(trend(res)))/diff(range(year(res)))
ks <- round(ks.test(coredata(res),pnorm)$p.value,4)
ar <- as.numeric(acf(coredata(trend(res,result="residual")[,1],
plot=FALSE))[[1]][2])
## REB 2016-10-20 - end of revised code
if (verbose) print(paste("Residual trend=",
round(res.trend,3),'D/decade; K.S. p-val',
round(ks,2),'; AR(1)=',round(ar,2)))
# Evaluation: here are lots of different aspects...
# Get the diagnostics: this is based on the analysis of common EOFs...
xval <- attr(ds,'evaluation')
r.xval <- round(sapply(seq(1,2*ncol(ds),2),function(i) cor(xval[,i],xval[,i+1])),3)
#round(cor(xval[,1],xval[,2]),3)
if (verbose) print(paste("x-validation r=",paste(r.xval,collapse=", ")))
ds.ratio <- round(sapply(seq(1,ncol(ds)),
function(i) sd(ds[,i],na.rm=TRUE)/sd(y[,i],na.rm=TRUE)),3)
#round(sd(ds[,1],na.rm=TRUE)/sd(y[,1],na.rm=TRUE),4)
if (verbose) print(paste("sd ratio=",paste(ds.ratio,collapse=", ")))
if (verbose) print(names(attributes(ds)))
if (biascorrect) {
if (verbose) print('<biascorrect>')
diag <- attr(ds,'diagnose')
if ( (verbose) & !is.null(diag)) str(diag)
} else diag <- NULL
# diagnose for ds-objects
if (verbose) print('...')
#
if (is.null(diag)) {
if (verbose) print('no diag')
mdiff <- (mean(subset(y,it=range(year(y))),na.rm=TRUE)-
mean(subset(ds,it=range(year(ds))),na.rm=TRUE))/
sd(y,na.rm=TRUE)
srati <- 1 - sd(subset(ds,it=range(year(ds))),na.rm=TRUE)/
sd(subset(y,it=range(year(y))),na.rm=TRUE)
adiff <- as.numeric(acf(coredata(y)[,1],plot=FALSE,na.action=na.pass)[[1]][2])-
as.numeric(acf(coredata(ds)[,1],plot=FALSE,na.action=na.pass)[[1]][2])
} else {
if (verbose) print('diag ok')
# Extract the mean score for leading EOF from the 4 seasons:
mdiff <- mean(c(diag$s.1$mean.diff[1]/diag$s.1$sd0[1],
diag$s.2$mean.diff[1]/diag$s.2$sd0[1],
diag$s.3$mean.diff[1]/diag$s.3$sd0[1],
diag$s.4$mean.diff[1]/diag$s.4$sd0[1]))
srati <- mean(1 - c(diag$s.1$sd.ratio[1],diag$s.2$sd.ratio[1],
diag$s.3$sd.ratio[1],diag$s.4$sd.ratio[1]))
adiff <- mean(1 - c(diag$s.1$autocorr.ratio[1],
diag$s.2$autocorr.ratio[1],
diag$s.3$autocorr.ratio[1],
diag$s.4$autocorr.ratio[1]))
}
scorestats[i,] <- c(1-r.xval[1],mdiff,srati,adiff,res.trend,ks,ar,1-ds.ratio[1],
1-round(var(xval[,2])/var(xval[,1]),2))
if (verbose) print('scorestats')
if (verbose) print(scorestats[i,])
quality <- 100*(1-mean(abs(scorestats[i,]),na.rm=TRUE))
R2 <- round(100*sd(xval[,2])/sd(xval[,1]),2)
print(paste("i=",i,"GCM=",gcmnm[i],' x-valid cor=',round(100*r.xval[1],2),
"R2=",R2,'% ','Common EOF: bias=',round(mdiff,2),
' sd1/sd2=',round(srati,3),
"mean=",round(mean(coredata(y),na.rm=TRUE),2),'quality=',
round(quality)))
}
}
if (verbose) print('Downscaling finished')
}
#Z <- attrcp(y,Z)
if (verbose) print('Set attributes')
if (test) {
attr(dse.eof,'model') <- attr(ds,'model') ## KMP 09-08-2015
attr(dse.eof,'ceof0') <- Z0
attr(dse.eof,'ceof') <- Z
}
attr(dse.eof,'predictor') <- attr(SLP,'source')
attr(dse.eof,'domain') <- list(lon=lon,lat=lat)
attr(dse.eof,'scorestats') <- scorestats
attr(dse.eof,'path') <- path
attr(dse.eof,'scenario') <- rcp
attr(dse.eof,'variable') <- attr(y,"variable")[1]
attr(dse.eof,'unit') <- attr(y,"unit")[1]
attr(dse.eof,'unitarea') <- attr(y,"unitarea")
attr(dse.eof,'history') <- history.stamp(y)
if (non.stationarity.check)
attr(dse.eof,'on.stationarity.check') <- difference.z else
attr(dse.eof,'on.stationarity.check') <- NULL
class(dse.eof) <- c("dsensemble","eof","list")
if(!is.null(path.ds)) file.ds <- file.path(path.ds,file.ds)
save(file=file.ds,dse.eof)
if (verbose) print("---")
invisible(dse.eof)
}
#' @exportS3Method
#' @export DSensemble.field
DSensemble.field <- function(y,...,plot=TRUE,path="CMIP5.monthly/",rcp="rcp45",biascorrect=FALSE,
predictor="ERA40_t2m_mon.nc",non.stationarity.check=FALSE,
ip=1:16,lon=c(-30,20),lat=c(-20,10),it=c('djf','mam','jja','son'),
rel.cord=TRUE,select=NULL,FUN="mean",rmtrend=TRUE,FUNX="mean",
xfuns='C.C.eq',threshold=1,pattern="tas_Amon_",verbose=FALSE,
file.ds="DSensemble.rda",path.ds=NULL,nmin=NULL,ds.interval=NULL) {
## For downscaling gridded predictand. This is a wrap-around which extracts the season or aggregates
## to annual values and then calls the other types for the downscaling.
## KMP 2016-10-25: Redirect to DSensemble.eof
if(verbose) print("DSensemble.field")
dse.eof <- DSensemble.eof(y,plot=plot,path=path,rcp=rcp,biascorrect=biascorrect,
predictor=predictor,non.stationarity.check=non.stationarity.check,
ip=ip,lon=lon,lat=lat,it=it,rel.cord=rel.cord,select=select,
FUN=FUN,rmtrend=rmtrend,FUNX=FUNX,xfuns=xfuns,threshold=threshold,
pattern=pattern,verbose=verbose,
file.ds=file.ds,path.ds=path.ds,nmin=nmin)
invisible(dse.eof)
}
#' @exportS3Method
#' @export DSensemble.station
DSensemble.station <- function(y,...,verbose=FALSE) {
if(verbose) print("DSensemble.station")
dse <- DSensemble.default(y=y,...,verbose=verbose)
return(dse)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.