analyseUG <- function(G=NULL, gLON=NULL, gLAT=NULL, fit, prm, X, yls=NULL, gauDA=NULL, gauTS=NULL, aug=NULL, ana=NULL,
dsn='.', debugmode=FALSE, retdy=FALSE, mpi=FALSE, analysis_scn='a0', theta=NULL, lite=TRUE) {
# ensemble contructor as input to assimilation - analyseUG stands for unstructured grids
#
#
# G :: OPT, LIST, gmeta6 class object - structured grids. [$cells, $xseq (W-E longitudes),
# $ryseq (N-S latitudes), $cols (grid dimension: longitude), $rows (grid dimension: latitude)
# gLON,gLAT :: OPT, LIST, Geographical coordinates - unstructured grids [nx,ny] 'matrix'
# fit :: INTEGER, forecast time index [for output data]
# prm :: LIST, DA-specific data
# X :: LIST, state vector, structured as X[[xKIND]][[timelab]][[im]]
# yls :: OPT, LIST, observation list, structured as yls[[yKIND]][[yTYPE]]$s lists.
# gauDA :: OPT, LIST, y <-> HE matches, to include observations by state-vector augmentation. Augments x and y
# gauTS :: OPT, LIST, as gauDA lists, without the $y slot. Augments x, and can be used to obtain a smooth estimate (ETKS)
# at specific locations/variables in the domain.
# aug :: OPT, LIST, augmentation blocks (parameters)
# ana :: LIST, analysis parameter for each variable type
# DEM :: OPT, byrow=TRUE vector representation. Digital Terrain Model - just required for water level variables
# dsn :: CHARACTER != ''
# retdy :: OPT, TRUE for no assimilation, and instead return the innovation vector [just overpass observations]
# mpi :: whether to parallel DA in assimilate()
# analysis_scn :: CHARACTER - appended to background and analysis files
# theta :: OPT, LIST - required for parameter space KF [prm$method]. List components:
# $d :: [ntheta] vector of parameter perturbations - has to comply with X, so the ensemble for X components
# match first the background, then one column for each $d element
# $b :: [ntheta] vector of background parameter values
# $Pb :: [ntheta,ntheta] parameters error background covariance matrix
# lite :: LOGICAL, wheter to use the lite version: no MPI, no localization, no inflation, no rotation for ensemble filters
# revisions:
# 2016-01-16 JGP
# re-engineering from former version
# now flexible assimilation building blocks for any number of variables to assimilate & analyse
# forward operator is only by 0-order nearest neighbourhood [no scaling, no representation, no interpolation]
# 2017-03-15
# For water level observations, the distributed observations have to be passed now as gauDA structures.
# This makes this function more standard at the cost of increasing memory requirements.
#
# Notes: either G, or [gLON,gLAT] input assumes a common grid for all gridded variables, which must match the given dimensions.
# State input via X[[vKIND]]$val[[im]], im=1,...m must then match the given metadata.
# That is a vector of length G$cells indicating coordinates as 1st by nx [longitude, in general],
# starting from the grid ul corner.
# Thus, for staggered grids, variables on each grid should be analysed independently
# 2017-10-18
# include pIKS, pMKS schemes. These are not an ensemble schemes, in the sense that covariances are explicit.
# The method uses finite differences though (or conditional sampling), which are provided as a list of ensemble matrices,
# as input estimates of the sensitivites of the observations to the model inputs, to construct a Kalman update for the inputs
# 2018-11-25
# Modifications to make it amenable to R-CRAN
options(warn=2)
if (lite)
mpi <- FALSE
if (!("package:SpatialGraph" %in% search())) {
pointsToLines <- function(a,b) {
stop('analyseUG: pointsToLines() not available and requested. Load SpatialGraph package')}
}
if (!is.null(G) && !is.null(gLON))
stop('analyseUG: use just either G [regular grids] or gLON,gLAT [irregular grids] for horizontal grid definition')
if (is.null(G))
structG <- FALSE # irregular grid - although it also may be used for regular grids
else
structG <- TRUE # structured regular grid
if (debugmode && !file.exists(file.path(dsn,'results')))
dir.create(file.path(dsn,'results'), recursive=TRUE)
if (prm$method %in% c('pIKS','pMKS')) {
if (is.null(theta))
stop('analyseUG:: ---ERR: theta list not provided for parameter-space method---')
m <- length(theta$d)+1 # extra member for the background in X
} else
m <- prm$m # ensemble size
if (!is.null(G))
ng <- G$cells # cells in each horizontal grid
else
ng <- length(gLON)
x <- NULL # [n] - state vector
xpos <- NULL # [n,2] - geographical positions - [NA,NA] for global
xz <- NULL # [n] - vertical position - [NA] for global
xKIND <- NULL # [n] - variable KIND
xtime <- NULL # [n] CHAR, state variable timestamp
xgrd <- NULL # [n] LOGICAL, TRUE for gridded data | FALSE for sparse in space
p <- 0 # [1] - number of observations
y <- NULL # [p] - observation vector
ypos <- NULL # [p,2] - geographical positions - [NA,NA] global obs
yz <- NULL # [p] - vertical positions - [NA] global obs
yKIND <- NULL # [p] - variable KIND
yTYPE <- NULL # [p] - observation TYPE
ytime <- NULL # [p] CHAR, observation timestamp
ygrd <- NULL # [p] LOGICAL, TRUE for variables mapped from gridded variables QC
E <- NULL # [n,m]
HE <- NULL # [p,m]
R <- NULL # [p,p]
H <- NULL # [p,n]
getCooG <- function(G) {
# get coordinates in a regular lattice as a 2-col matrix
cbind(rep(G$xseq,G$rows),rep(G$ryseq,each=G$cols))}
wSigma <- function(sigma, w) {
# inflates a covariance keeping correlation
diag(sqrt(w)) %*% sigma %*% diag(sqrt(w))
}
## make y, ypos, HE
# 1 :: gridded variables <-> satellite observations
if (structG)
gpos <- getCooG(G) # horizontal grid center positions
else
gpos <- cbind(as.numeric(gLON),as.numeric(gLAT)) # horizontal grid center positions
# just for SpatialGraph-based localisation
if (!is.null(prm$loc_boo)) {
if (prm$loc_boo && !is.null(prm$loc_distype)) {
if (prm$loc_distype == 'SG') { # along-network distance calculations
prm$sglst <- list() # information for SpatialGraph-based distance
prm$sglst$sg <- readRDS(file.path(dsn,prm$sgf)) # 'SpatialGraph'
gposSG <- readRDS(file.path(dsn,prm$gridSGf)) # precalculated along-network distances for grid nodes
}
}
}
if (is.list(X[[1]]$val[[1]])) {
ismatX <- FALSE
} else if (is.matrix(X[[1]]$val[[1]])) {
ismatX <- TRUE
} else {
stop('data in X could not be identified either as list or matrix')
}
getXtimes <- function(x){names(x$val)[which(sapply(x$val,length) != 0)] }
vts <- lapply(X, FUN=getXtimes) # list : CHARACTER timelab / gridded vKIND
nvts <- sapply(vts,length) # vector: number of timesteps per gridded x vKIND
if (!ismatX)
vlen <- sapply(X, function(x){length(x$val[[vts[[1]]]][[1]])}) # vector: state-vector length for this variable
else
vlen <- sapply(X,function(x){nrow(x$val[[1]])})
nzs <- vlen/ng # vector: number of vertical levels / variable
xzl <- lapply(X,FUN=function(x){x$z}) # list: vertical levels / variable
if( !identical(as.numeric(nzs), as.numeric(sapply(xzl,'length'))) )
stop('analyseUG: xzl vector does not match given grid layers')
# stack X as E
E <- matrix(NA, nrow=sum(nvts*vlen), ncol=m) # just gridded data
xpos <- matrix(NA, nrow=sum(nvts*vlen), ncol=2) # init for gridded state vector time-stacks
xpos[,1] <- gpos[,1] # recycle [~F95 spread(gpos,1,sum(nvts))]
xpos[,2] <- gpos[,2]
xKIND <- rep(names(X), times=nvts*vlen)
xtime <- rep(do.call(c,vts),times=rep(vlen,nvts)) # CHARACTER
xgrd <- rep(TRUE,nrow(xpos)) # spatially distributed [i.e. either structured or unstructured grids]
xzlg <- lapply(xzl, function(x,ng){rep(x,each=ng)},ng=ng)
for (iv in 1:length(X)) {
xz <- c(xz,rep(xzlg[[iv]],nvts[iv]))
}; rm(xzlg,xzl)
FUN <- function(x,vlen) {if (length(x)==0)
x <- rep(NA,vlen)
else
x <- as.vector(x)
return(x)}
for (iv in 1:length(X)) { # iterate through variables
vKIND <- names(X)[iv]
for (vit in 1:length(X[[iv]]$val)) { # through times
vtime <- names(X[[iv]]$val)[vit] # CHAR
xboo <- as.logical(xKIND == vKIND & xtime == vtime)
if (sum(xboo) == 0)
next
if (is.list(X[[iv]]$val[[vit]]) && length(X[[iv]]$val[[vit]]) == m) {
E[xboo,] <- vapply(X[[iv]]$val[[vit]],FUN=FUN, FUN.VALUE=rep(0,sum(xboo)), vlen=vlen[iv])
} else if (is.matrix(X[[iv]]$val[[vit]]) && ncol(X[[iv]]$val[[vit]]) == m) {
E[xboo,] <- X[[iv]]$val[[vit]]
} else
stop('X[[iv]]$val[[vit]] not identified as an ensemble matrix or list')
if (!is.null(prm$sglst)) {
if (is.null(prm$sglst$xposSG))
prm$sglst$xposSG <- gposSG # init
else
prm$sglst$xposSG <- rbind(prm$sglst$xposSG,gposSG) # augment
}
}
}
# y, ypos, yKIND, yTYPE, ytime, HE
if (!is.null(yls)) {
for (iv in 1:length(yls)) { # iterate through variable KIND
vKIND <- names(yls)[iv]
if (all(sapply(yls[[vKIND]], function(x){is.null(x$s)}))) # restricted to yFMT=='s'
next
cat('mapping X into',vKIND,'observations\n')
if (!(vKIND %in% names(X)))
stop('yls$',vKIND,' not a valid vKIND in X')
# cat('analyse:: stacking satellite observations|',vKIND,'|vit:',vit,'\n')
for (j in 1:length(yls[[vKIND]])) { # iterate through yTYPE observations
if (is.null(yls[[vKIND]][[j]]$s$data) || !yls[[vKIND]][[j]]$s$use) # matching observation type exists
next
yTYPEo <- names(yls[[vKIND]])[j]
vytimes <- yls[[vKIND]][[yTYPEo]]$s$data$timelab # CHARACTER
yfield <- yls[[vKIND]][[yTYPEo]]$s$data$yfield
if (is.null(yfield))
yfield <- 'y'
#if (!inherits(vytimes,'POSIXct'))
# stop('analyse:: yls:',vKIND,' times not POSIXct')
cat('vytimes:',vytimes,'\n')
y_usew <- yls[[vKIND]][[yTYPEo]]$s$w
if (is.null(y_usew))
ylsusew <- FALSE
for (yit in 1:length(vytimes)) {
cat('OP:',vytimes[yit],'\n')
if (!(vytimes[yit] %in% xtime))
next
y_op <- yls[[vKIND]][[yTYPEo]]$s$data[[vytimes[yit]]][[yfield]] # [p_op]
ypos_op <- as.matrix(yls[[vKIND]][[yTYPEo]]$s$data[[vytimes[yit]]][['pos']]) # [p_op x 2] coordinate matrix
yz_op <- yls[[vKIND]][[yTYPEo]]$s$data[[vytimes[yit]]][['z']] # [p_op]
qced_op <- yls[[vKIND]][[yTYPEo]]$s$data[[vytimes[yit]]][['qced']] # [p_op] flag to assimilate just quality-controlled observations
if (is.null(qced_op))
qced_op <- rep(TRUE,length(y_op))
yposOK <- ypos_op[,1] >= min(gpos[,1]) & ypos_op[,1] <= max(gpos[,1]) & # indices of observations within the domain for the analysis
ypos_op[,2] >= min(gpos[,2]) & ypos_op[,2] <= max(gpos[,2])
qced_op <- qced_op & yposOK
if (y_usew) { # apply observation weights -> inverse multiplier of R
yw_op <- yls[[vKIND]][[yTYPEo]]$s$data[[vytimes[yit]]][['w']]
if (is.null(yw_op))
stop('analyse:: --observation weights requested but not available--')
} else {
yw_op <- rep(1.0, length(y_op))
}
y_op <- y_op[qced_op]
ypos_op <- ypos_op[qced_op,]
yz_op <- yz_op[qced_op]
yw_op <- yw_op[qced_op]
p_op <- length(y_op)
if (!is.null(yls[[vKIND]][[yTYPEo]]$s$data[[vytimes[yit]]][['R']])) { # 1st try: R matrix input a list with as many R-matrices as overpasses
R_op <- yls[[vKIND]][[yTYPEo]]$s$data[[vytimes[yit]]][['R']][qced_op,qced_op]
if (y_usew) {
if (all(R_op[!diag(nrow(R_op))] == 0)) { # observation weight scaling
R_op <- diag(diag(R_op)/yw_op) # if it is diagonal
} else { # full covariance scaling required
R_op <- wSigma(R_op,1/yw_op)
}
}; rm(y_usew)
} else if (!is.null(yls[[vKIND]][[yTYPEo]]$s$data[[vytimes[yit]]][['r']])) { # 2nd try: vector variance, specific to observations - uncorrelated errors
R_op <- Diagonal(x=yls[[vKIND]][[yTYPEo]]$s$data[[vytimes[yit]]][['r']][qced_op]/yw_op)
} else if (is.numeric(yls[[vKIND]][[yTYPEo]]$s$r)) { # 3rd try: common scalar variance for all this [[yTYPEo]][[yFMT]]
if (length(yls[[vKIND]]$s$r) != 1)
stop('analyse:: length of scalar observation error variance != 1')
R_op <- Diagonal(x=rep(yls[[vKIND]][[yTYPEo]]$s$r, p_op)/yw_op)
} else {
stop('analyse:: observation error variance not found for vKIND:',vKIND)
}
if (!all(dim(R_op) == p_op))
stop('analyse:: yls$',vKIND,'[[',yTYPEo,']] R dimensions do not match p_op at ',vytimes[yit])
if (nrow(ypos_op) != p_op || length(yz_op) != p_op)
stop('analyse:: yls$',vKIND,'[[',yTYPEo,']] position dimensions do not match p_op at ',vytimes[yit])
H_op <- calcHnn(G, gLON, gLAT, ypos_op) # [p_op,ng] dgCMatrix
xboo <- as.logical(xKIND == vKIND & xtime == vytimes[yit])
if (sum(xboo) != ng)
stop('analyseUG: full grid not identified for ',vKIND,'.',yTYPEo,'|time:',vytimes[yit])
#HE_op <- H_op %*% E[xboo,] # [p_op x ng].[ng x m] dgeMatrix E-NA columns maps into HE_op NA columns
#HE_op <- as.matrix(HE_op) # [p_op, m] matrix
Ho <- Matrix(0,nrow(H_op),nrow(E)) # [p_op,n]
Ho[,xboo] <- H_op
p <- p + p_op
y <- c(y, y_op)
ypos <- rbind(ypos, ypos_op)
yz <- c(yz, yz_op)
yKIND <- c(yKIND, rep(vKIND,p_op))
yTYPE <- c(yTYPE, rep(yTYPEo,p_op))
ytime <- c(ytime, rep(vytimes[yit],p_op))
ygrd <- c(ygrd, rep(TRUE, p_op))
#HE <- rbind(HE, HE_op) not needed -- recalculated later in the transformed space
if (is.null(R)) { # 1st observation dataset
R <- R_op
H <- Ho # dgCMatrix
} else { # with possible intermediate x augmentation
R <- bdiag(R,R_op)
H <- rbind(H,Ho)
}
rm(p_op, y_op, ypos_op, yz_op, qced_op, yposOK, yw_op, R_op, H_op, Ho, xboo)
} # end for overpass
} # end for yTYPE
} # end for vKIND
} # end if not NULL yls
# 2 :: gauDA [y <-> HE] state-vector and observation vector augmentation
cat('analyse:: time series observations\n')
p_gau <- 0
if (length(gauDA) > 0) {
E_gau <- NULL
y_gau <- NULL
ypos_gau <- NULL
yz_gau <- NULL
yKIND_gau <- NULL # generic observation KIND
yTYPE_gau <- NULL # specific observation TYPE
ytime_gau <- NULL
Rblk <- NULL
for (iv in 1:length(gauDA)) { # variable types
vKIND <- names(gauDA)[iv]
p_this <- length(gauDA[[iv]]$y) # available observations
if (p_this == 0)
next
E_gau <- rbind(E_gau,gauDA[[iv]]$HE)
y_gau <- c(y_gau, gauDA[[iv]]$y)
ypos_gau <- rbind(ypos_gau, gauDA[[iv]]$pos )
yz_gau <- c(yz_gau, gauDA[[iv]]$z)
yKIND_gau <- c(yKIND_gau, rep(vKIND, p_this))
yTYPE_gau <- c(yTYPE_gau, gauDA[[iv]]$yTYPE)
ytime_gau <- c(ytime_gau, gauDA[[iv]]$timelab)
if (!is.null(gauDA[[iv]]$R)) # full R matrix input [e.g. preprocessed online for temporal correlation model]
Rgau <- gauDA[[iv]]$R
else
Rgau <- Diagonal(x=gauDA[[iv]]$r) # ddiMatrix
if (is.null(Rblk))
Rblk <- Rgau
else
Rblk <- bdiag(Rblk,Rgau)
} # end for iv TS assim
p_gau <- length(y_gau)
ygrd <- c(ygrd,rep(FALSE,p_gau))
} # end if(length(gauDA))
if (p_gau > 0) {
E <- rbind(E, E_gau)
xpos <- rbind(xpos, ypos_gau)
xz <- c(xz, yz_gau)
if (!is.null(prm$sglst))
prm$sglst$xposSG <- rbind(prm$sglst$xposSG,
pointsToLines(ypos_gau, prm$sglst$sg@e))
xKIND <- c(xKIND, yKIND_gau)
if (is.null(xtime)) {
xtime <- ytime_gau
} else {
xtime <- c(xtime, ytime_gau)
}
xgrd <- c(xgrd, rep(FALSE,p_gau))
y <- c(y, y_gau)
ypos <- rbind(ypos, ypos_gau)
yz <- c(yz, yz_gau)
yKIND <- c(yKIND, yKIND_gau)
yTYPE <- c(yTYPE, yTYPE_gau)
ytime <- c(ytime, ytime_gau)
#HE <- rbind(HE, E_gau) # as HE_gau == E_gau -- not needed as recalculated later in transformed space
if (is.null(R)) {
H <- Matrix(0,p_gau,nrow(E))
H[,(nrow(E)-p_gau+1):nrow(E)] <- Diagonal(p_gau)
R <- Rblk
} else {
H <- bdiag(H,Diagonal(p_gau))
R <- bdiag(R,Rblk)
}
rm(p_gau, E_gau, y_gau, ypos_gau, yKIND_gau, yTYPE_gau, ytime_gau)
} # end if (p_gau > 0)
if (prm$rfactor != 1) {
cat('analyseUG:: inflating R by rfactor:',prm$rfactor,'\n')
R <- prm$rfactor * R # e.g. for inflating R for fractional assimilation
}
# observations done!
# 3 :: augment state-vector with gauTS to get smoothed time series
cat('analyse:: gauTS augmentation',as.character(Sys.time()),'\n')
if (!is.null(gauTS))
stop('analyse :: code not ready as TS smoother - pending augmentation by gauTS')
# 4 :: augment state-vector with parameters [boundary conditions + model parameters]
if (!is.null(aug)) {
for (iv in 1:length(aug)) {
vKIND <- names(aug)[iv]
E <- rbind(E,aug[[iv]]$E)
xpos <- rbind(xpos, aug[[iv]]$pos)
xz <- c(xz, aug[[iv]]$z)
if (!is.null(prm$sglst))
prm$sglst$xposSG <- rbind(prm$sglst$xposSG,
pointsToLines(matrix(aug[[iv]]$pos,ncol=2), prm$sglst$sg@e)) # note this is meaningless for global parameters
xKIND <- c(xKIND, rep(vKIND,length(aug[[iv]]$timelab)))
xtime <- c(xtime, aug[[iv]]$timelab)
xgrd <- c(xgrd, rep(FALSE, length(aug[[iv]]$timelab)))
}
}
x <- rowMeans(E, na.rm=TRUE) # [n,1] ensemble mean (augmented state-vector) - physical space
if (debugmode) {
cat('analyse:: writing debugmode=TRUE files',as.character(Sys.time()),'\n')
#xvar <- varSparse(E)
xvar <- apply(E,1,var,na.rm=TRUE)
saveRDS(E, file=file.path(dsn,"results",paste("Eaug_",gsub('a','b',analysis_scn),"_",fit,".rds",sep="")))
saveRDS(xvar, file=file.path(dsn,"results",paste("xvar_",gsub('a','b',analysis_scn),"_",fit,".rds",sep="")))
}
if (length(y) == 0 && !retdy)
return(list(E=E, xdf=data.frame(xKIND=xKIND, xgrd=xgrd, xtime=xtime, xpos=xpos)))
if (nrow(E) > ncol(H)) # if 'gauTS' or 'aug' augmentation exists - append columns with 0s
H <- cbind2(H,Matrix(0,nrow(H),nrow(E)-ncol(H))) # [p,n] dgCMatrix :: H does not include transformations here
if (prm$method %in% c('pIKS','pMKS')) {
if (is.null(theta))
stop('theta list not provided for pKs parameter estimation')
theta$R <- R
if (is.null(theta$G)) { # sensitivity matrix: if not externally estimated
if (ncol(E) != length(theta$b) + 1) # get here by simple forward FD
stop('analyseUG:: dim(E) not compliant for finite-difference sensitivity estimation for pKs')
Gx <- (E[,-1] - E[,1]) / rep(theta$d, each=nrow(E)) # [n,ntheta] finite difference approximation to the Jacobian of the observations with respect to the model parameters
theta$G <- H %*% Gx # [p,ntheta] = [p,n][n,nbeta]
}
if (prm$method == 'pMKS') {
dyb <- y - H %*% E[,1] # for a fully nonlinear H, this should be calculated out of analyseUG
theta$PHT <- theta$Pthetab %*% t(theta$G)
} else { # pIKS
dyb <- y - H %*% E[,1] - theta$G %*% (theta$b0 - theta$b) # " " | theta$b0 is theta_b, theta$b is theta_l
theta$PHT <- theta$Pthetab0 %*% t(theta$G)
}
dyb <- as.matrix(dyb) # 'dgeMatrix' from the above operation does not allow for data.frame storage
theta$HPHT <- theta$G %*% theta$PHT
theta$kappa <- kappa(theta$HPHT+R) # condition number
if (theta$kappa > 1.0E06)
stop('analyseUG:: theta$kappa > 1.0E02 [',theta$kappa,'] -- consider prescaling to improve covariance matrix condition')
theta$K <- theta$PHT %*% solve(theta$HPHT + R) # [ntheta,p]
I_KG <- diag(rep(1,length(theta$b))) - theta$K %*% theta$G
if (prm$method == 'pMKS') {
theta$a <- theta$b + theta$K %*% dyb
theta$Pthetaa <- I_KG %*% theta$Pthetab %*% t(I_KG) + theta$K %*% R %*% t(theta$K) # Jazwinski (1970), p. 270 after Aoki0 (1967)
} else { # pIKS
theta$a <- theta$b0 + theta$K %*% dyb
theta$Pthetaa <- I_KG %*% theta$Pthetab0 %*% t(I_KG) + theta$K %*% R %*% t(theta$K) # Jazwinski (1970), p. 270 after Aoki0 (1967)
}
if (debugmode) {
dydf <- data.frame(y=y, dyb=dyb, yKIND=yKIND, yTYPE=yTYPE, ygrd=ygrd, ytime=ytime, ypos.lon=ypos[,1], ypos.lat=ypos[,2],
yz=yz, stringsAsFactors=FALSE)
xdf <- data.frame(xa=NA, xb=E[,1], dx=NA, xKIND=xKIND, xgrd=xgrd, xtime=xtime, xpos.lon=xpos[,1],xpos.lat=xpos[,2],
xz=xz, stringsAsFactors=FALSE) # physical space analysis, increments & metadata
saveRDS(dydf,file=file.path(dsn,"results", paste("dydf_",analysis_scn,"_",fit,".rds",sep="")))
saveRDS(xdf, file=file.path(dsn,"results", paste("xdf_", analysis_scn,"_",fit,".rds",sep="")))
}
#theta$Pthetaa <- I_KG %*% theta$Pthetab # standard
return(theta)
}
#sdB <- sdSparse(E) # pre-transform for eventual inflation
sdB <- apply(E,1,sd,na.rm=TRUE)
# 6 :: adapt data to assimilate()
# get x-based localisation lengths
cat('analyse:: getting localisation lengths',as.character(Sys.time()),'\n')
xKINDs <- unique(xKIND)
if (!all(xKINDs %in% names(ana)))
stop('analyse:: analysis parameters not provided for all xKINDs')
lls <- sapply(ana, function(x){x$ll})
xllen <- rep(0,nrow(E))
for (iv in 1:length(xKINDs)) {
xllen[xKIND == xKINDs[iv]] <- lls[xKINDs[iv]]
}; rm(lls)
#browser()
# forward transform :: individualised functions for specific kind of variables in the ensemble
# note : observations are not transformed - thus for observed vKIND variables, if observations need to be transformed,
# it has to be done prior to call analyse(), and the forward transform function of HE here is into
# the forward transform space of y.
# Also, ana[[vKIND]]$trf should be set to 'none' for this function for vKIND variables in gauDA, and transformations done externally if needed
cat('analyse:: getting forward transforms',as.character(Sys.time()),'\n')
for (iv in 1:length(xKINDs)) {
vKIND <- xKINDs[iv]
if (ana[[vKIND]]$trf == 'none')
next
if (ana[[vKIND]]$trf %in% c('autoGA','GA')) # has to be done externally
next
else {
xids <- which(xKIND == vKIND)
#yids <- which(yKIND == vKIND)
}
if (ana[[vKIND]]$trf == 'qpow35') {
E[xids,] <- qpow35(E[xids,])
#y[yids] <- qpow35(y[yids]) not ready because R would then have to be transformed as well
} else if (ana[[vKIND]]$trf == 'linearScale') {
E[xids,] <- linearScale(E[xids,], b=ana[[vKIND]]$linmul) # warning forced similar for all yTYPES
} else if (ana[[vKIND]]$trf == 'log') { # natural logarithm
E[xids,] <- log(E[xids,])
} else
stop('analyse :: transformation function ',ana[[vKIND]]$trf,' for ',vKIND,' not available')
}
cat('analyse:: adapting data to assimilate',as.character(Sys.time()),'\n')
# HE <- calcHE_Hm(H,E,Hm=NULL) # not necessarily linear - get HE - transformed [observation] space
HE <- as.matrix(H %*% E)
if (length(y) > 0) {
yb <- rowMeans(HE, na.rm=TRUE) # Hunt_al2007 notation \overline{y^b}
dyb <- y - yb # [p] average prior innovation vector - observation space
dydf <- data.frame(y=y, dyb=dyb, yKIND=yKIND, yTYPE=yTYPE, ygrd=ygrd, ytime=ytime, ypos.lon=ypos[,1], ypos.lat=ypos[,2],
yz=yz, stringsAsFactors=FALSE)
if (retdy) # to QC
return(dydf)
if (!is.null(prm$sglst))
prm$sglst$yposSG <- pointsToLines(ypos, prm$sglst$sg@e)
} else {
if (retdy)
return(NULL)
}
HA <- Matrix(HE - rowMeans(HE, na.rm=TRUE)) # [p,m] perturbations of the transformed HE matrix (recycled Hx)
rm(HE) # warning: transformation of observed variables not ready
# this would involve trf(R),trf(y)
# and re-calculate HA, dyb in the transformed space to assimilate()
xf <- rowMeans(E, na.rm=TRUE) # transformed space - state mean
A <- E - xf # [n,m] transformed space - ensemble perturbations
A <- Matrix(A) # 'dgCMatrix', else 'dgeMatrix'
#R <- Matrix(R) # R is 'dtCMatrix'[triangular sparse matrix], or 'ddiMatrix' [diagonal]
sm <- !is.na(E[which(!is.na(sdB))[1],]) # LOGICAL
prm$n <- nrow(E)
rm(E) # release memory for assimilation - recreate later
Kfname <- file.path(dsn,"results", paste("K_",analysis_scn,"_",fit,".rds",sep="")) # filename to save Kalman gain matrix
cat('analyse:: start assimilation |',as.character(Sys.time()),'| p =',length(dyb),'\n')
if (lite)
dxA <- assimilateLite(prm, A[,sm], HA[,sm], dyb, R, debugmode, Kfname)
else {
if (!exists('assimilate')) {
cat('analyseUG: WARNING:: switch to assimilateLite(), as assimilate() not available in this version')
assimilate <- assimilateLite
}
dxA <- assimilate(prm, A[,sm], HA[,sm], dyb, R, debugmode, Kfname,
xpos, xllen, ypos, mpi=mpi, sglst=prm$sglst)
}
cat('analyse:: end assimilation |',as.character(Sys.time()),'\n')
# transformed space - analysis mean
xa <- xf + as.numeric(dxA$dx) # 'dgeMatrix' to standard vector
if (prm$method != "EnOI") {
E <- matrix(NA,length(xa),m)
E[,sm] <- as.matrix(dxA$A + xa) # transformed space - analysis ensemble [cast from dgeMatrix]
}
# Hxa <- rowMeans(calcHE_Hm(H,E,Hm), na.rm=TRUE)
Hxa <- rowMeans(H %*% E, na.rm=TRUE)
dydf$dya <- y - Hxa # [p] average posterior innovation vector - observation space
# inverse transform into model space
xKINDs <- unique(xKIND)
for (iv in 1:length(xKINDs)) {
vKIND <- xKINDs[iv]
if (ana[[vKIND]]$trf == 'none')
next
if (ana[[vKIND]]$trf %in% c('autoGA','GA')) # has to be done externally
next
else
xids <- which(xKIND == vKIND)
if (ana[[vKIND]]$trf == 'qpow35') {
E[xids,] <- qpow35(E[xids,], inverse=TRUE)
} else if (ana[[vKIND]]$trf == 'linearScale') {
E[xids,] <- linearScale(E[xids,], b=ana[[vKIND]]$linmul, inverse=TRUE)
} else if (ana[[vKIND]]$trf == 'log') {
E[xids,] <- exp(E[xids,])
} else
stop('analyse:: transformation function not available')
}
xa <- rowMeans(E, na.rm=TRUE) # physical space - analysis mean
A <- E - xa # physical space - analysis perturbations
# inflate in physical ensemble space
# note: variables with forward/inverse transform external
# to analyseUG are however inflated in the transformed space!
# inflation and rotation
if (prm$method != 'EnOI') {
for (iv in 1:length(xKINDs)) {
vKIND <- xKINDs[iv]
infac <- ana[[vKIND]]$infac
#cat('analyse::vKIND:',vKIND,'|infac:',infac,'\n')
if (infac == 1.0)
next
else
xids <- which(xKIND == vKIND)
if (infac < 1.0) { # pseudo-automatic
if (length(xids) > 1) {
# sdA <- sdSparse(A[xids,])
sdA <- apply(A[xids,],1,sd,na.rm=TRUE)
} else
sdA <- sd(A[xids,])
infav <- (1.0 - infac) * sdB[xids] / sdA + infac
infav <- pmin(infav,10.0) # hard threshold. Consider improving
infav[is.na(infav)] <- 0.0
} else { # fix infac
infav <- rep(infac, length(xids))
}
A[xids,] <- A[xids,] * infav # recycle infav for each col
} # end for inflation
# TODO: randomly rotate the ensemble every prm$rotate steps
#if (prm$rotate && prm$rotate_ampl != 0) {
# if (prm$rotate_ampl == 1)
# A[xids,] <- A[xids,] * genU(m)
# else
# A[xids,] <- A[xids,] * genUeps(m, prm$rotate_ampl)
#}
E <- A + xa # physical space - analysis ensemble
} # end inflation/rotation
xdf <- data.frame(xa=xa, xb=x, dx=xa-x, xKIND=xKIND, xgrd=xgrd, xtime=xtime, xpos.lon=xpos[,1],xpos.lat=xpos[,2],
xz=xz, stringsAsFactors=FALSE) # physical space analysis, increments & metadata
if (debugmode) {
xvar = apply(E,1,'var', na.rm=TRUE)
saveRDS(E, file=file.path(dsn,"results", paste("Eaug_",analysis_scn,"_",fit,".rds",sep="")))
saveRDS(xvar,file=file.path(dsn,"results", paste("xvar_",analysis_scn,"_",fit,".rds",sep="")))
saveRDS(dydf,file=file.path(dsn,"results", paste("dydf_",analysis_scn,"_",fit,".rds",sep="")))
saveRDS(xdf, file=file.path(dsn,"results", paste("xdf_", analysis_scn,"_",fit,".rds",sep="")))
}
return(list(E=E, xdf=xdf, dydf=dydf))
} # end function analyseUG
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.