In blockDemac: parallel DEMC with several parameter blocks

library(knitr)
opts_chunk$set(out.extra='style="display:block; margin: auto"'
    #, fig.align="center"
    #, fig.width=4.6, fig.height=3.2
    , fig.width=6, fig.height=3.75 #goldener Schnitt 1.6
    , dev.args=list(pointsize=10)
    , dev=c('png','pdf')
    )
knit_hooks$set(spar = function(before, options, envir) {
    if (before){
        par( las=1 )                   #also y axis labels horizontal
        par(mar=c(2.0,3.3,0,0)+0.3 )  #margins
        par(tck=0.02 )                          #axe-tick length inside plots             
        par(mgp=c(1.1,0.2,0) )  #positioning of axis title, axis labels, axis
     }
})
# genVigs("TwoDenGP")

Accounting for model discrepancy

This vignette details the explicit modelling of the unknown function of model discrepancy by a Gaussian Process (GP) for two data streams.

#isDevelopMode <- TRUE
if(!exists("isDevelopMode")) library(blockDemac)
library(ggplot2)
library(plyr) 
library(grid)
set.seed(0815)      # for reproducible results
baseSize=10         # font for ggplot

The example model and data

In the model two streams of predictions are generated, each using the same two parameters. The variance in the first stream is dominated by the first parameter a, while the variance in the second stream is dominated by the second parameter b.

modTwTwoDenEx1 <- function(
        theta           ##<< model parameters a and b
        , xSparse       ##<< numeric vector of Sparse input/output relationship
        , xRich         ##<< numeric vector of rich input/output relationship
        , thresholdCovar=0      ##<< model structural deficiency
){
    list(  y1 = as.numeric(theta[1]*xSparse + theta[2]*mean(xRich)/10)
           ,y2 = as.numeric(theta[1]*xSparse[1] + theta[2]*pmax(0,xRich-thresholdCovar) ))
}

# second model having the discrepancy in calculation of y1 corresponding to sparse data stream
modTwTwoDenEx2 <- function(
        theta           ##<< model parameters a and b
        , xSparse       ##<< numeric vector of Sparse input/output relationship
        , xRich         ##<< numeric vector of rich input/output relationship
        , thresholdCovar=0      ##<< model structural deficiency
){
    list(  y1 = as.numeric(theta[1]*pmax(0,xSparse-thresholdCovar) + theta[2]*mean(xRich)/10)
           ,y2 = as.numeric(theta[1]*xSparse[1] + theta[2]*xRich) )   
}
#modTwTwoDenEx1 <- modTwTwoDenEx2

The synthetic data is generated by running the model. The second stream,y2, with covariate xRich has 100 times as many records as the first stream,y1, with covariate xSparse.

Note that for fitting a Gaussian process to model discrepancies, the observations need to be sorted in increasing order.

xSparse <- sort(c(1,runif(9,min=0.5,max=1.5)))  # only 10 observations
xRich <- sort(runif(1000,min=.7,max=1))    # 1000 observations    
# for plotting predictions, define a grid
xGridRich <- seq( xRich[1], xRich[length(xRich)], length.out=30)
xGridSparse <- seq( xSparse[1], xSparse[length(xSparse)], length.out=30)

thetaTrue <- c( a=1, b=2 )
theta0 <- thetaTrue
covarTheta0 <- diag(0.2*thetaTrue)
thresholdCovarTrue=0.3
obsTrue <- modTwTwoDenEx1( thetaTrue, xSparse=xSparse,xRich=xRich
        , thresholdCovar=thresholdCovarTrue)
sdObsTrue <- sdObs <- list( 
        #y1=mean(obsTrue$y1)*0.06
        #,y2=mean(obsTrue$y2)*0.02 )
        #y1=mean(obsTrue$y1)*0.04
        #,y2=mean(obsTrue$y2)*0.03 )
        y1=mean(obsTrue$y1)*0.05
        ,y2=mean(obsTrue$y2)*0.03 )
obs <- list(
        y1 = obsTrue$y1 + rnorm(length(obsTrue$y1),sd=sdObsTrue$y1) 
        ,y2 = obsTrue$y2 + rnorm(length(obsTrue$y2),sd=sdObsTrue$y2) )

# locations where model+discrepancy will be sampled        
xPredSparse <- c(range(xSparse), 1.0)   
xPredRich <- c(range(xRich), 0.8)

thresholdCovarBiased <- 0.1

     dsObs <- data.frame(
        obs=c(obs$y1, obs$y2)
        , x=c(xSparse, xRich)
        , stream=rep(c("sparse","rich"), times=c(length(xSparse),length(xRich))) 
     )
    g1 <- ggplot( dsObs, aes(x=x ) ) +
            geom_point(aes(y=obs), col="grey50") +  
            facet_wrap(~stream, scales="free", ncol=1) +
            ylab("y") +
            theme_bw(base_size=baseSize)+
            theme(axis.text.x = element_blank(), axis.text.y = element_blank()
            , axis.ticks.x = element_blank(), axis.ticks.y = element_blank())  
    print(g1)

Generating model predictions

Sampling of model discrepancies requires model predictions for current parameters. Therefore, one intermediate result of model predictions is set up for each data stream.

    predSpec <- intermediateSpec(
            function(theta, intermediates, xSparseVal, xRichVal, ... ){
                modTwTwoDenEx1(theta, xSparse=xSparseVal, xRich=xRichVal, ...) 
            }
            ,argsUpdateFunction=list(xSparseVal=c(xSparse,xPredSparse), xRichVal=c(xRich,xPredRich)
                , thresholdCovar=thresholdCovarBiased)
            ,parameters = c("a","b")
            ,intermediates = character(0)  
    )
    pred0 <- getUpdateFunction(predSpec)( theta0, list(), xSparse, xRich )

For comparison we sample parameters without accounting for model discrepancy. Note that the logDensity depends on the intermediates of the predictions.

logDenTheta <- function( theta, intermediates, logDensityComponents
    ,xSparse, xRich, obs, sdObs, ...
){
    misfit1 <- obs$y1 - intermediates[["pred"]]$y1[seq_along(xSparse)]
    misfit2 <- obs$y2 - intermediates[["pred"]]$y2[seq_along(xRich)]
    structure( -1/2 * c(sum((misfit1/sdObs$y1)^2), sum((misfit2/sdObs$y2)^2))
        , names=c('y1','y2') )
}
resLogDenTheta0 <- logDenTheta( theta0, list(pred=pred0)
    , xSparse=xSparse, xRich=xRich, obs=obs, sdObs=sdObs )

bTheta0Spec <- blockSpec(c("a","b"),,    
        new("MetropolisBlockUpdater",
                fLogDensity=logDenTheta,
                argsFLogDensity=list( xSparse=xSparse, xRich=xRich, obs=obs, sdObs=sdObs),
                logDensityComponentNames = names(resLogDenTheta0)
        )
        ,intermediatesUsed=c("pred")
)

sampler <- newPopulationSampler( list(bTheta=bTheta0Spec), theta0, covarTheta0
, nPopulation=1L
,intermediateSpecifications=list(pred=predSpec)
)
samplerNoMd <- sampler <- setupAndSample(sampler, nSample=120L)
sampleLogs <- subsetTail(getSampleLogs(sampler), 0.8)
#plot( asMcmc(sampleLogs), smooth=FALSE )

stacked <- stackChainsInPopulation(subsetTail(getSampleLogs(sampler),0.8))
sample1 <- sampleNoMd <- t(getParametersForPopulation(stacked, 1L)[,,1L])
#summary(sample1)
thetaNoMd <- colMeans(sample1)

plot( obs$y2 ~ xRich )
lines( modTwTwoDenEx1( thetaNoMd, xSparse=xGridSparse, xRich=xGridRich, thresholdCovar=thresholdCovarBiased )$y2 ~ xGridRich, col="blue" )

plot( obs$y1 ~ xSparse )
lines( modTwTwoDenEx1( thetaNoMd, xSparse=xGridSparse, xRich=xGridRich, thresholdCovar=thresholdCovarBiased )$y1 ~ xGridSparse, col="blue" )

Note, that the threshold model bias (thresholdCovarBiased) has been introduced in the modelling of the rich data stream. However, after fitting, the discrepancy is visible in the sparse data stream.

Model discrepancy sampling

Function compileDiscrepancyBlocksForStream sets up additional intermediates and blocks for sampling the model discrepancies by a Gaussian process. Besides, locations x, observations, and its uncertainties, it required an intermediate of model predictions, predProcSpec. In the following those are set up for each stream. Their names are distinguished by argument streamSuffix.

In addition, the function requires specification of parameters of the prior knowledge of the hyperparameters of the Gaussian process. The parameter vector to sample is extended by two parameters for each stream. The first hyperparameter is the logarithm of correlation length logPsi. It is larger for higher correlated model discrepancies. The logarithm of signal variance logSd2Discr, expressed as a factor of observation variance, is larger for larger model discrepancies.

We use a prior of correlation length that depends on the spread of the covariate. The prior on signal variance, priorInvGammaSd2DiscrPars, is kept at a default wide low-informative prior.

xSparseRange <- diff(range(xSparse))
blocksSparse <- compileDiscrepancyBlocksForStream(x=xSparse, obs=obs$y1, sd2Obs=(sdObs$y1)^2
    , predProcSpec = list(pred=predSpec), predProcSpecEntry="y1"
    , streamSuffix="sparse", xPred=xPredSparse
    ,priorGammaPsiPars = getGammaParsFromMeanAndVariance(xSparseRange/3, (xSparseRange/3.2)^2 )
)

xRichRange <- diff(range(xRich))
blocksRich <- compileDiscrepancyBlocksForStream(x=xRich, obs=obs$y2, sd2Obs=(sdObs$y2)^2
    , predProcSpec = list(pred=predSpec), predProcSpecEntry="y2"
    , streamSuffix="rich", xPred=xPredRich
    ,priorGammaPsiPars = getGammaParsFromMeanAndVariance(xRichRange/3, (xRichRange/3.2)^2)
)

The parameter vector is extended by the hyperparameters explained above. In addition we also sample process predictions, i.e. of the sum of model and discrepancy at a few locations xPred defined above.

namesPred <- c(blocksSparse$namesProcessSamples, blocksRich$namesProcessSamples )
thetaPred <- structure( rep(1,length(namesPred)), names=namesPred)
theta0Md <- c(thetaNoMd, thetaPred
    , logPsi_sparse=0.2, logSd2Discr_sparse=0.5
    , logPsi_rich=0.2, logSd2Discr_rich=0.5)
covarTheta0Md <- diag((0.2*theta0Md)^2)

The observations are modelled as the sum of model prediction and model discrepancy and observation uncertainty: $obs = m(\theta) + \delta + \epsilon$.

The sampling of the model parameters, now also depends on the model discrepancies at all observations. They are provided with list entry "deltaA" of intermediate "deltaA". Resampling takes place whenever predictions or a hyperparameter of the Gaussian Process changes. This includes the resampling on specification of a new proposal of model parameters.

The computation of the residual sums of squares is already done of the intermediate of model discrepancies. It is provided with list entry "SSQpred" and can be re-used.

In addition to the model discrepancies, the term penalizing high model discrepancies is provided with list entry "logPDeltaO". The logDensity function adds this component as an additional term. Hence, proposed parameters can be rejected when the corresponding model discrepancies are too high.

The Log-Density function of model parameters $\theta$ is modified as follows.

logDenThetaMd <- function( theta, intermediates, logDensityComponents
    ,xSparse, xRich, obs, sdObs
    , ...
){
    intSparse <- intermediates[["deltaA_sparse"]]
    intRich <- intermediates[["deltaA_rich"]]
    #misfit1 <- obs$y1 - (intermediates[["pred"]]$y1[seq_along(xSparse)] + intSparse$deltaA)
    #misfit2 <- obs$y2 - (intermediates[["pred"]]$y2[seq_along(xRich)] + intRich$deltaA)
    #logPObs <- structure(-1/2*c(sum((misfit1/sdObs$y1)^2), sum((misfit2/sdObs$y2)^2)), names=c('ySparse','yRich') )   
    logPObs <- structure(-1/2*c(intSparse$SSQpred, intRich$SSQpred), names=c('ySparse','yRich') )       
    res <- c( logPObs, mdSparse=intSparse$logPDeltaO, mdRich=intRich$logPDeltaO)
    res
}

resLogDenThetaMd0 <- logDenThetaMd( theta0, list(pred=pred0
    ,deltaA_rich=list(deltaA=0, logPriorInvGammaSd2Discr=-2, logPDeltaO=-2, SSQpred=13.9)
    , deltaA_sparse=list(deltaA=0, logPriorInvGammaSd2Discr=-2, logPDeltaO=-2, SSQpred=13.9))
    , xSparse=xSparse, xRich=xRich, obs=obs, sdObs=sdObs )

bThetaMdSpec <- blockSpec(
    c("a","b"),,    
    new("MetropolisBlockUpdater",
            #logDenThetaMd,
            fLogDensity=function(...){ logDenThetaMd(...) },    # allows tracing
            argsFLogDensity=list( xSparse=xSparse, xRich=xRich, obs=obs, sdObs=sdObs),
            logDensityComponentNames = names(resLogDenThetaMd0)
    )
    ,intermediatesUsed=c("pred", "deltaA_sparse", "deltaA_rich")
)

The setup of the sampling includes all the new intermediates and blocks. In addition, the space of sampling of the hyperparameters needs to be constrained, because with large correlation length, some matrices can become indeterminate.

cSubSpace <- blocksRich$fConstrainingSubSpace( 
    blocksSparse$fConstrainingSubSpace(new("SubSpace")) )
# know that correlation length is in the magnitude of the locations    
cSubSpace@lowerParBounds["logPsi_rich"] <- log(diff(range(xRich))/5)      


sampler <- newPopulationSampler( 
    blockSpecifications=c(list(bTheta=bThetaMdSpec)
        ,blocksSparse$blockSpecs, blocksRich$blockSpecs)
    , theta0Md, covarTheta0Md
    , nPopulation=1L
    ,intermediateSpecifications=c(blocksSparse$intermediateSpecs
        , blocksRich$intermediateSpecs[-1L])   #-1L to not include the prediction intermediate twice 
    ,subSpace=cSubSpace
)
#sampler@jumpProposer <- new("DEJumpProposer", deSettings=new("DESettings",DRgamma=0.005))

sampler <- setupAndSample(sampler, nSample=120L)
samplerMd <- sampler <- setupAndSample(sampler, nSample=60L) # extend
sampleLogs <- getSampleLogs(sampler)
plot( asMcmc(sampleLogs), smooth=FALSE )

#aL <- (getAcceptanceTracker(sampler)@acceptanceLog["logDenLogPsi_rich",,])
#computePopulationAcceptanceRates(aL)
#matplot(aL, type="l")

logTail <- subsetTail(getSampleLogs(sampler),0.6)
#logTail <- subsetTail(getSampleLogs(sampler),0.4)
#plot( asMcmc(logTail), smooth=FALSE )
stacked <- stackChainsInPopulation(logTail)
sample1 <- sampleMd <- t(getParametersForPopulation(stacked, 1L)[,,1L])

.tmp.inspectLogDensities <- function(){
    lDen1 <- t(getLogDensityComponentsForPopulation(logTail, 1L)[,,1L]) # first chain
    #lDen1 <- t(getLogDensityComponentsForPopulation(logTail, 1L)[,,3L]) # third chain
    #lDen1 <- getLogDensityComponentsForPopulation(logTail, 1L)["y2",,] # only sparse
    summary(lDen1)
    denMedian <- apply(lDen1, 2, median)
    denAnomalies <- t(t(lDen1)-denMedian)
    #plot( obs_rich ~ priorPsi_rich, data=as.data.frame(denAnomalies))   # no correlation
    plot( obs_rich ~ mD_rich, data=as.data.frame(denAnomalies))         # low props of obs mostly at low probs of mD
    (denQuantile <- apply( apply(lDen1, 2, quantile, probs=c(0.1,0.9) ), 2, diff))
    matplot( 1:nrow(lDen1), denAnomalies, type="l" )
    legend( "bottomright", colnames(lDen1), lty=1:ncol(lDen1) ,col = 1:ncol(lDen1) )    
    plot( rowSums(lDen1), type="l" )
    plot( lDen1[,"obs_rich"] )
    #sample1 <- t(getParametersForPopulation(subsetTail(getSampleLogs(sampler),0.8), 1L)[,,1L])
 }

thetaMd <- colMeans(sample1)[1:2]
apply(sample1[,1:2], 2, quantile, probs=c(0.025,0.975) )

predSparse <- colMeans(sample1)[grep("^ksi_.*_sparse$",colnames(sample1))]
predRich <- colMeans(sample1)[grep("^ksi_.*_rich$",colnames(sample1))]
predModelSparse <- modTwTwoDenEx1(thetaMd, xSparse=c(xSparse,xPredSparse), xRich=c(xRich,xPredRich), thresholdCovar=thresholdCovarBiased)$y1[-seq_along(xSparse)]

plot( obs$y2 ~ xRich )
lines( modTwTwoDenEx1( thetaMd, xSparse=xGridSparse, xRich=xGridRich, thresholdCovar=thresholdCovarBiased )$y2 ~ xGridRich, col="blue" )
points( predRich ~ xPredRich, col="orange")

plot( obs$y1 ~ xSparse, ylim=c(range(obs$y1,xPredSparse,predModelSparse)) )
lines( modTwTwoDenEx1( thetaMd, xSparse=xGridSparse, xRich=xGridRich, thresholdCovar=thresholdCovarBiased )$y1 ~ xGridSparse, col="blue" )
points( predModelSparse ~ xPredSparse, col="blue", pch="x")
points( predSparse  ~ xPredSparse, col="orange", pch="+")

Note, how model discrepancy in the sparse stream almost disappeared.

Excursion: Starting at optimum from ignore scenario

Here, the parameters are set initially to the ignore optimum. It is checked that they still converge to the GP optimum case and are not stuck in a local minimum.

ZinitParms <- getParametersForAllChains( subsetTail(getSampleLogs(samplerNoMd),0.4) )
ZinitGP <- computeInitialPopulation( theta0Md[-(1:2)], diag(covarTheta0Md)[-(1:2)]
    , subSpace=cSubSpace)[,,1L]
Zinit <- rbind(ZinitParms[,1:ncol(ZinitGP)], ZinitGP)   
sampler <- newPopulationSampler( 
    blockSpecifications=c(list(bTheta=bThetaMdSpec)
        ,blocksSparse$blockSpecs, blocksRich$blockSpecs)
    , Zinit = Zinit
    , nPopulation=1L
    ,intermediateSpecifications=c(blocksSparse$intermediateSpecs
        , blocksRich$intermediateSpecs[-1L])   #-1L to not include the prediction intermediate twice 
    ,subSpace=cSubSpace
)
#sampler@jumpProposer <- new("DEJumpProposer", deSettings=new("DESettings",DRgamma=0.005))

#sampler <- setupAndSample(sampler, nSample=120L)
samplerMd_startNoMd <- sampler <- setupAndSample(sampler, nSample=60L)
sampleLogs <- getSampleLogs(sampler)
plot( asMcmc(sampleLogs), smooth=FALSE )

#aL <- (getAcceptanceTracker(sampler)@acceptanceLog["logDenLogPsi_rich",,])
#matplot(aL, type="l")

logTail <- subsetTail(getSampleLogs(sampler),0.4)
#plot( asMcmc(logTail), smooth=FALSE )
stacked <- stackChainsInPopulation(logTail)
sample1 <- sampleMd_startNoMd <- t(getParametersForPopulation(stacked, 1L)[,,1L])

apply(sample1[,1:2], 2, quantile, probs=c(0.025,0.975) )

It still converged to the case where model discrepancy is allocated to the rich data stream. Sometimes, however, some of the chains go to a different optimum.

Comparison of the scenarios

Here we use the sampled parameters to do forward predictions and plot the predictive posterior distributions.

    #ss <- sample1
    iSubsetRich <- sort(sample.int(length(xRich), 30))
    xGridRich2 <- xRich[iSubsetRich] 
    obsComb <- c(obs$y1, obs$y2[iSubsetRich])
    doSamplePredObsDiffPost <- function(ss, scenario="ignore", nSample=180 ){
        iRows <- sample.int(nrow(ss), nSample, replace=TRUE)
        predTrue <- do.call(c, modTwTwoDenEx1( thetaTrue, xSparse=xSparse, xRich=xGridRich,
             thresholdCovar=thresholdCovarTrue ))
        tmp <- apply( ss[iRows,1:2], 1, function(theta){
            predMod <- do.call(c,modTwTwoDenEx1( theta, xSparse=xSparse, xRich=xGridRich2,
                thresholdCovar=thresholdCovarBiased ))
            #predMod - predTrue
            predMod - obsComb      # too noisy? -> uses smoothers
        })
        stat <- t(apply(tmp,1,quantile,probs = c(0.025, 0.5, 0.975), names=FALSE))
        colnames(stat) <- c("lower","median","upper")        
        res <- cbind( data.frame( scenario=scenario
            , stream=rep(c("ySparse","yRich"),times=c(length(xSparse),length(xGridRich2)) )
            , x =(c(xSparse,xGridRich2)))
            , as.data.frame(stat)) 
    }
    samplePostDiff <- rbind( doSamplePredObsDiffPost(sampleNoMd,"ignore"),doSamplePredObsDiffPost(sampleMd,"GP"))    

#http://stackoverflow.com/questions/19643234/fill-region-between-two-loess-smoothed-lines-in-r-with-ggplot
dsPlot <- samplePostDiff  #droplevels(subset(samplePostDiff, scenario %in% c("ignore","GP")))
g1 <- ggplot( dsPlot, aes(x=x, col=scenario, linetype=scenario, shape=scenario ) ) + 
        facet_wrap(~stream, scales="free_x") +
        #facet_wrap(~stream, scales="free") +
        #geom_point(aes(y=median)) + 
        geom_hline(yintercept=0, col="grey") +
        stat_smooth(aes(y = upper), method = "lm", formula = y ~ x, se = FALSE, linetype="solid", size=0.2) +
        stat_smooth(aes(y = lower), method = "lm", formula = y ~ x, se = FALSE, linetype="solid", size=0.2) +        
        #geom_ribbon(aes(ymin=lower, ymax=upper), fill="grey", alpha=0.2) +
        #ylab("predicted - true value") +
        ylab("predicted - observed") +
        theme()
gg1 <- ggplot_build(g1)
iSmooth <- 2L
df2 <- data.frame(x = gg1$data[[iSmooth]]$x,
                  ymin = gg1$data[[iSmooth]]$y,
                  ymax = gg1$data[[iSmooth+1L]]$y,
                  scenario = levels(dsPlot$scenario)[(gg1$data[[iSmooth]]$group %/% 2)+1],
                  stream = levels(dsPlot$stream)[gg1$data[[iSmooth]]$PANEL]
                  )
levels(df2$scenario) <- levels(dsPlot$scenario)                  

g1 + geom_ribbon(data = df2, aes(x = x, ymin = ymin, ymax = ymax, linetype=scenario), fill="grey"
        ,alpha = 0.3, colour=NA) +
        stat_smooth(aes(y = median), method = "lm", formula = y ~ x, size = 1, se = FALSE) +
     theme_bw(base_size=baseSize)

Further graphs

Predictive posterior in faceted plots: orange are the samples of the process (model + discrepancy).

    ss <- sample1
        iSubsetRich <- sort(sample.int(length(xRich), 100))
        xGridRich2 <- xRich[iSubsetRich]
        obsS <- c(obs$y1, obs$y2[iSubsetRich])        
    doSamplePred <- function(ss, scenario="ignore", nSample=180 ){
        iRows <- sample.int(nrow(ss), nSample, replace=TRUE)
        tmp <- apply( ss[iRows,1:2], 1, function(theta){
            predMod <- do.call(c,modTwTwoDenEx1( theta, xSparse=xSparse, xRich=xGridRich2, thresholdCovar=thresholdCovarBiased ))
        })
        stat <- t(apply(tmp,1,quantile,probs = c(0.025, 0.5, 0.975), names=FALSE))
        colnames(stat) <- c("lower","median","upper")
        res <- cbind( data.frame( scenario=scenario
            , stream=rep(c("ySparse","yRich"),times=c(length(xSparse),length(xGridRich2)) )
            , x =(c(xSparse,xGridRich2)))
            , obs = obsS
            , as.data.frame(stat)) 
    }
    samplePost <- rbind( doSamplePred(sampleNoMd,"ignore"),doSamplePred(sampleMd,"GP")
    #,doSamplePred(sampleMd2,"equal")
    #,doSamplePred(sampleMdC,"MdC")
    )    


  doSamplePredProc <- function(ss, scenario="GP"){
        tmp <- ss[,grep("^ksi_",colnames(ss))]
        stat <- t(apply(tmp,2,quantile,probs = c(0.025, 0.5, 0.975), names=FALSE))
        colnames(stat) <- c("lower","median","upper")
        res <- cbind( data.frame( scenario=scenario
            , stream=rep(c("ySparse","yRich"),times=c(length(xPredSparse),length(xPredRich)) )
            , x =(c(xPredSparse,xPredRich)))
            , as.data.frame(stat))  
  }
    samplePostProc <- rbind( doSamplePredProc(sampleMd,"GP")
    #,doSamplePredProc(sampleMd2,"equal")
    #,doSamplePredProc(sampleMdC,"MdC")
    )

    # Plotting for different streams independently, so that the <- scales of the graphs can match.
    iStream <- "yRich"
    g1 <- ggplot( subset(samplePost, stream==iStream), aes(x=x ) ) +
            geom_point(aes(y=obs), col="grey50") +  
            #facet_wrap(~stream+scenario, scales="free", ncol=2) +
            facet_grid(~scenario, scales="free_x") +  
            geom_line(aes(y=median)) + 
            geom_ribbon(aes(ymin=lower, ymax=upper), fill="blue", alpha=0.4) +
            geom_point(data=subset(samplePostProc, stream==iStream), aes(y=median), col="orange") +
            geom_errorbar(data=subset(samplePostProc, stream==iStream), aes(ymin=lower, ymax=upper), width=0.03, col="orange") +
            #ylab("respiration rate") + xlab("Temperature") +
            ylab("y rich") + xlab("x") +
            theme_bw(base_size=baseSize)+
            theme(axis.title.x=element_blank()) +
            theme(axis.text.x = element_blank(), axis.text.y = element_blank()
            , axis.ticks.x = element_blank(), axis.ticks.y = element_blank())  

    iStream <- "ySparse"
    g2 <- ggplot( subset(samplePost, stream==iStream), aes(x=x ) ) +
            geom_point(aes(y=obs), col="grey50") +  
            #facet_wrap(~stream+scenario, scales="free", ncol=2) +
            facet_grid(~scenario, scales="free_x") +  
            geom_line(aes(y=median)) + 
            geom_ribbon(aes(ymin=lower, ymax=upper), fill="blue", alpha=0.4) +
            geom_point(data=subset(samplePostProc, stream==iStream), aes(y=median), col="orange") +
            geom_errorbar(data=subset(samplePostProc, stream==iStream), aes(ymin=lower, ymax=upper), width=0.03, col="orange") +
            #ylab("cumulated respiration") + xlab("Soil carbon stocks") +
            ylab("y sparse") + xlab("x") +
            theme_bw(base_size=baseSize)+
            theme(axis.text.x = element_blank(), axis.text.y = element_blank()
            , axis.ticks.x = element_blank(), axis.ticks.y = element_blank())
    grid.newpage()
    pushViewport(viewport(layout = grid.layout(2,1)))
    print(g1, vp = viewport(layout.pos.row = 1, layout.pos.col = 1))
    print(g2, vp = viewport(layout.pos.row = 2, layout.pos.col = 1))

Comparison of model signal variance

#colnames(sampleMdC)
varColNames <- c("logSd2Discr_sparse","logSd2Discr_rich")
dsPlot <- rbind( 
    data.frame( scenario="GP", stream="sparse", logSigma=sampleMd[,"logSd2Discr_sparse"])
    ,data.frame( scenario="GP", stream="rich", logSigma=sampleMd[,"logSd2Discr_rich"])
    #,data.frame( scenario="equal", stream="sparse", logSigma=sampleMd2[,"logSd2Discr_sparse"])
    #,data.frame( scenario="equal", stream="rich", logSigma=sampleMd2[,"logSd2Discr_rich"])
    #,data.frame( scenario="MdC", stream="sparse", logSigma=sampleMdC[,"logSd2Discr"])
    #,data.frame( scenario="MdC", stream="rich", logSigma=sampleMdC[,"logSd2Discr"])
)

g1 <- ggplot( dsPlot
    #, aes(x=logSigma, col=scenario, fill=scenario, linetype=scenario, group=interaction(scenario,stream)) ) +
    , aes(x=logSigma, col=stream, fill=stream, linetype=stream, group=interaction(scenario,stream)) ) +
    geom_density(alpha=0.1) +
    theme_bw(base_size=baseSize)
print(g1)

ds <- data.frame(modelDiscrepancy=exp(sampleMd[,"logSd2Discr_rich"]))

We sampled the signal variance, i.e. the magnitude of the model discrepancy variance, as a factor of the variance of observation uncertainty. Inspection of this parameter shows that overall model discrepancies in this calibration ranged between r signif(quantile(ds$modelDiscrepancy, p=0.25),2) times to r signif(quantile(ds$modelDiscrepancy, p=0.75),2) times the observation uncertainty.

suppressWarnings(ggplot(ds
, aes(x=modelDiscrepancy)) + geom_density() + 
xlim(0,2) +
xlab("Model discrepancy") +     
theme_bw(base_size=baseSize))

GP-approach with gradient based optimization

The approach of modelling discrepancy by Gaussian Processes can be also used with non-sampling settings, such as gradient based optimizers. However, the properties of the discrepancy have to be specified in advance. They cannot be estimated from the data as with the sampling based model inversion.

First we perform a gradient based optimization with ignoring model discrepancy. A first order estimate of parameter uncertainty is obtained from the Hessian matrix at the optimum.

fLogThetaIgnore <- function(theta){
    pred <- getUpdateFunction(predSpec)( theta, list(), xSparse, xRich )
    logDenComp <- logDenTheta( theta, list(pred=pred)
            , xSparse=xSparse, xRich=xRich, obs=obs, sdObs=sdObs )
    sum(logDenComp)
}
fLogThetaIgnore(theta0)
res0 <- optim(theta0, fLogThetaIgnore, control =list(fnscale=-1), hessian=TRUE, method="BFGS")
thetaBest <- res0$par
thetaCov <- -solve(res0$hessian)
sampleGradIgnore <- if( requireNamespace("mvtnorm") ){
  theta <- mvtnorm::rmvnorm( 800, thetaBest, sigma=thetaCov )
}else matrix(thetaBest, nrow=5, ncol=2, byrow=TRUE)

samplePostGrad <- rbind( doSamplePred(sampleGradIgnore,"gradIgnore")  )
g1 <- ggplot( subset(samplePostGrad), aes(x=x ) ) +
        geom_point(aes(y=obs), col="grey50") +  
        facet_wrap(~stream+scenario, scales="free", ncol=2) +
        #facet_grid(~scenario, scales="free_x") +  
        geom_line(aes(y=median)) + 
        geom_ribbon(aes(ymin=lower, ymax=upper), fill="blue", alpha=0.4) +
        #ylab("respiration rate") + xlab("Temperature") +
        xlab("") + ylab("obs") + 
        theme_bw(base_size=baseSize)+
        theme(axis.text.x = element_blank(), axis.text.y = element_blank()
                , axis.ticks.x = element_blank(), axis.ticks.y = element_blank())
print(g1)

From the predictive posterior we see model discrepancy. Its magnitude exceeds the observation uncertainty and the correlations span the entire range of the data. Based on these observations we specify the parameters of the Gaussian process that models discrepancy. The results of the ignore-fit does not tell us in which stream the model discrepancy is. Hence, we specify the same magnitude, i.e. signal variance parameter as a factor of measurement variance, for both streams.

# fix GP parameters: signal variance and correlation length 
sigma2d1 <- (1.5 * sdObs$y1)^2
sigma2d2 <- (1.5 * sdObs$y2)^2
psi1 <- diff(range(xSparse))/3 
psi2 <- diff(range(xRich))/3
# fix supporting locations, here as index in covariate vector
o1 <- seq(1, length(xSparse), length.out=4)
o2 <- seq(1, length(xRich), length.out=4)
# compute covariance matrix at supporing locations
fCor <- function(x1,x2,psi){  exp(-((x1-x2)/psi)^2) }
Kss1 <- sigma2d1 * outer( xSparse[o1], xSparse[o1], fCor, psi=psi1)
Kss2 <- sigma2d2 * outer( xRich[o2], xRich[o2], fCor, psi=psi2)
Kz1 <- Kss1 + diag(sdObs$y1^2, nrow=nrow(Kss1))
Kz2 <- Kss2 + diag(sdObs$y2^2, nrow=nrow(Kss2))
# compute covariance matrix at remaining locations
Krs1 <- sigma2d1 * outer( xSparse[-o1], xSparse[o1], fCor, psi=psi1)
Krs2 <- sigma2d2 * outer( xRich[-o2], xRich[o2], fCor, psi=psi2)

We are now ready to compute the expected model discrepancies and its Likelihood. Hence, we can use them in the cost function.

fLogThetaGP <- function(theta){
    pred <- getUpdateFunction(predSpec)( theta, list(), xSparse, xRich )
    # compute expected value of model discrepancies
    z1 <- obs$y1 - pred$y1
    kZ_z1 <- solve(Kz1, z1[o1])
    delta1 <- numeric( length(pred$y1))
    delta1[o1] <- deltaS1 <- Kss1 %*% kZ_z1  
    delta1[-o1] <- deltaR1 <- Krs1 %*% kZ_z1
    z2 <- obs$y2 - pred$y2
    kZ_z2 <- solve(Kz2, z2[o2])
    delta2 <- numeric( length(pred$y2))
    delta2[o2] <- deltaS2 <- Kss2 %*% kZ_z2  
    delta2[-o2] <- deltaR2 <- Krs2 %*% kZ_z2
    # compute Likelihood of model discrepancies (penalizing large discrepancies)
    logPDelta1 <- as.vector( -1/2* delta1[o1] %*% solve(Kss1, delta1[o1]) )    
    logPDelta2 <- as.vector( -1/2* delta2[o2] %*% solve(Kss2, delta2[o2]) )    
    # compute Likelihood of observations given the model and the discrepancies
    misfit1 <- obs$y1 - (pred$y1 + delta1)
    logPObs1 <- -1/2*sum((misfit1/sdObs$y1)^2)
    misfit2 <- obs$y2 - (pred$y2 + delta2)
    logPObs2 <- -1/2*sum((misfit2/sdObs$y2)^2)
    #
    ans <- logPObs1 + logPDelta1 + logPObs2 + logPDelta2 
    ans
}
fLogThetaGP(theta0)
resGP <- optim(theta0, fLogThetaGP, hessian=TRUE, control =list(fnscale=-1), method="BFGS" ) 
thetaBestGP <- resGP$par
thetaCovGP <- -solve(resGP$hessian)
sampleGradGP <- if( requireNamespace("mvtnorm") ){
  theta <- mvtnorm::rmvnorm( 800, thetaBestGP, sigma=thetaCovGP )
}else matrix(thetaBestGP, nrow=5, ncol=2, byrow=TRUE)

samplePostGrad <- rbind( doSamplePred(sampleGradIgnore,"gradIgnore") , doSamplePred(sampleGradGP,"gradGP") )
iStream <- "yRich"
g1 <- ggplot( subset(samplePostGrad, stream==iStream), aes(x=x ) ) +
        geom_point(aes(y=obs), col="grey50") +  
        facet_grid(~scenario, scales="free_x") +  
        geom_line(aes(y=median)) + 
        geom_ribbon(aes(ymin=lower, ymax=upper), fill="blue", alpha=0.4) +
        #ylab("respiration rate") + xlab("Temperature") +
        xlab("") + ylab("y rich") + 
        theme_bw(base_size=baseSize)+
        theme(axis.title.x=element_blank()) +
        theme(axis.text.x = element_blank(), axis.text.y = element_blank()
                , axis.ticks.x = element_blank(), axis.ticks.y = element_blank())
iStream <- "ySparse"
g2 <- ggplot( subset(samplePostGrad, stream==iStream), aes(x=x ) ) +
        geom_point(aes(y=obs), col="grey50") +  
        facet_grid(~scenario, scales="free_x") +  
        geom_line(aes(y=median)) + 
        geom_ribbon(aes(ymin=lower, ymax=upper), fill="blue", alpha=0.4) +
        #ylab("cumulated respiration") + xlab("Soil carbon stocks") +
        xlab("x") + ylab("y sparse") + 
        theme_bw(base_size=baseSize)+
        theme(axis.text.x = element_blank(), axis.text.y = element_blank()
                , axis.ticks.x = element_blank(), axis.ticks.y = element_blank())  
grid.newpage()
pushViewport(viewport(layout = grid.layout(2,1)))
print(g1, vp = viewport(layout.pos.row = 1, layout.pos.col = 1))
print(g2, vp = viewport(layout.pos.row = 2, layout.pos.col = 1))

When comparing the Gaussian process (GP) scenario to the ignore sceanrio, results are similar to the sampling based inversion. The estimates of the prediction uncertainty increased and the model discrepancy in the sparse data stream almost disappeared.