library(knitr)
opts_chunk$set(out.extra='style="display:block; margin: auto"'
    #, fig.align="center"
    #, fig.width=4.6, fig.height=3.2
    , fig.width=6, fig.height=3.75 #goldener Schnitt 1.6
    , dev.args=list(pointsize=10)
    , dev=c('png','pdf')
    )
knit_hooks$set(spar = function(before, options, envir) {
    if (before){
        par( las=1 )                   #also y axis labels horizontal
        par(mar=c(2.0,3.3,0,0)+0.3 )  #margins
        par(tck=0.02 )                          #axe-tick length inside plots             
        par(mgp=c(1.1,0.2,0) )  #positioning of axis title, axis labels, axis
     }
})

Accounting for model discrepancy

This vignette details the explicit modelling of the unknown function of model discrepancy by a Gaussian Process (GP) for two data streams.

#isDevelopMode <- TRUE
if(!exists("isDevelopMode")) library(blockDemac)
library(ggplot2)
library(plyr) 
library(grid)
set.seed(0815)      # for reproducible results
baseSize=10         # font for ggplot

The example model and data

In the model two streams of predictions are generated, each using the same two parameters. The variance in the first stream is dominated by the first parameter a, while the variance in the second stream is dominated by the second parameter b.

modTwTwoDenEx1 <- function(
        theta           ##<< model parameters a and b
        , xSparse       ##<< numeric vector of Sparse input/output relationship
        , xRich         ##<< numeric vector of rich input/output relationship
        , thresholdCovar=0      ##<< model structural deficiency
){
    list(  y1 = as.numeric(theta[1]*xSparse + theta[2]*mean(xRich)/10)
           ,y2 = as.numeric(theta[1]*xSparse[1] + theta[2]*pmax(0,xRich-thresholdCovar) ))
}

# second model having the discrepancy in calculation of y1 corresponding to sparse data stream
modTwTwoDenEx2 <- function(
        theta           ##<< model parameters a and b
        , xSparse       ##<< numeric vector of Sparse input/output relationship
        , xRich         ##<< numeric vector of rich input/output relationship
        , thresholdCovar=0      ##<< model structural deficiency
){
    list(  y1 = as.numeric(theta[1]*pmax(0,xSparse-thresholdCovar) + theta[2]*mean(xRich)/10)
           ,y2 = as.numeric(theta[1]*xSparse[1] + theta[2]*xRich) )   
}
#modTwTwoDenEx1 <- modTwTwoDenEx2

The synthetic data is generated by running the model. The second stream,y2, with covariate xRich has 100 times as many records as the first stream,y1, with covariate xSparse.

Note that for fitting a Gaussian process to model discrepancies, the observations need to be sorted in increasing order.

xSparse <- sort(c(1,runif(9,min=0.5,max=1.5)))  # only 10 observations
xRich <- sort(runif(1000,min=.7,max=1))    # 1000 observations    
# for plotting predictions, define a grid
xGridRich <- seq( xRich[1], xRich[length(xRich)], length.out=30)
xGridSparse <- seq( xSparse[1], xSparse[length(xSparse)], length.out=30)

thetaTrue <- c( a=1, b=2 )
theta0 <- thetaTrue
covarTheta0 <- diag(0.2*thetaTrue)
thresholdCovarTrue=0.3
obsTrue <- modTwTwoDenEx1( thetaTrue, xSparse=xSparse,xRich=xRich
        , thresholdCovar=thresholdCovarTrue)
sdObsTrue <- sdObs <- list( 
        #y1=mean(obsTrue$y1)*0.06
        #,y2=mean(obsTrue$y2)*0.02 )
        #y1=mean(obsTrue$y1)*0.04
        #,y2=mean(obsTrue$y2)*0.03 )
        y1=mean(obsTrue$y1)*0.05
        ,y2=mean(obsTrue$y2)*0.03 )
obs <- list(
        y1 = obsTrue$y1 + rnorm(length(obsTrue$y1),sd=sdObsTrue$y1) 
        ,y2 = obsTrue$y2 + rnorm(length(obsTrue$y2),sd=sdObsTrue$y2) )

# locations where model+discrepancy will be sampled        
xPredSparse <- c(range(xSparse), 1.0)   
xPredRich <- c(range(xRich), 0.8)

thresholdCovarBiased <- 0.1
     dsObs <- data.frame(
        obs=c(obs$y1, obs$y2)
        , x=c(xSparse, xRich)
        , stream=rep(c("sparse","rich"), times=c(length(xSparse),length(xRich))) 
     )
    g1 <- ggplot( dsObs, aes(x=x ) ) +
            geom_point(aes(y=obs), col="grey50") +  
            facet_wrap(~stream, scales="free", ncol=1) +
            ylab("y") +
            theme_bw(base_size=baseSize)+
            theme(axis.text.x = element_blank(), axis.text.y = element_blank()
            , axis.ticks.x = element_blank(), axis.ticks.y = element_blank())  
    print(g1)

Generating model predictions

Sampling of model discrepancies requires model predictions for current parameters. Therefore, one intermediate result of model predictions is set up for each data stream.

    predSpec <- intermediateSpec(
            function(theta, intermediates, xSparseVal, xRichVal, ... ){
                modTwTwoDenEx1(theta, xSparse=xSparseVal, xRich=xRichVal, ...) 
            }
            ,argsUpdateFunction=list(xSparseVal=c(xSparse,xPredSparse), xRichVal=c(xRich,xPredRich)
                , thresholdCovar=thresholdCovarBiased)
            ,parameters = c("a","b")
            ,intermediates = character(0)  
    )
    pred0 <- getUpdateFunction(predSpec)( theta0, list(), xSparse, xRich )

For comparison we sample parameters without accounting for model discrepancy. Note that the logDensity depends on the intermediates of the predictions.

logDenTheta <- function( theta, intermediates, logDensityComponents
    ,xSparse, xRich, obs, sdObs, ...
){
    misfit1 <- obs$y1 - intermediates[["pred"]]$y1[seq_along(xSparse)]
    misfit2 <- obs$y2 - intermediates[["pred"]]$y2[seq_along(xRich)]
    structure( -1/2 * c(sum((misfit1/sdObs$y1)^2), sum((misfit2/sdObs$y2)^2))
        , names=c('y1','y2') )
}
resLogDenTheta0 <- logDenTheta( theta0, list(pred=pred0)
    , xSparse=xSparse, xRich=xRich, obs=obs, sdObs=sdObs )

bTheta0Spec <- blockSpec(c("a","b"),,    
        new("MetropolisBlockUpdater",
                fLogDensity=logDenTheta,
                argsFLogDensity=list( xSparse=xSparse, xRich=xRich, obs=obs, sdObs=sdObs),
                logDensityComponentNames = names(resLogDenTheta0)
        )
        ,intermediatesUsed=c("pred")
)

sampler <- newPopulationSampler( list(bTheta=bTheta0Spec), theta0, covarTheta0
, nPopulation=1L
,intermediateSpecifications=list(pred=predSpec)
)
samplerNoMd <- sampler <- setupAndSample(sampler, nSample=120L)
sampleLogs <- subsetTail(getSampleLogs(sampler), 0.8)
#plot( asMcmc(sampleLogs), smooth=FALSE )

stacked <- stackChainsInPopulation(subsetTail(getSampleLogs(sampler),0.8))
sample1 <- sampleNoMd <- t(getParametersForPopulation(stacked, 1L)[,,1L])
#summary(sample1)
thetaNoMd <- colMeans(sample1)

plot( obs$y2 ~ xRich )
lines( modTwTwoDenEx1( thetaNoMd, xSparse=xGridSparse, xRich=xGridRich, thresholdCovar=thresholdCovarBiased )$y2 ~ xGridRich, col="blue" )

plot( obs$y1 ~ xSparse )
lines( modTwTwoDenEx1( thetaNoMd, xSparse=xGridSparse, xRich=xGridRich, thresholdCovar=thresholdCovarBiased )$y1 ~ xGridSparse, col="blue" )

Note, that the model (thresholdCovarBiased) is part of the prediction that is compared to the rich data stream. However, after fitting, the discrepancy is visible in the sparse data stream.

Model discrepancy sampling with separate signal variance per stream

Function compileDiscrepancyBlocksForStream sets up additional intermediates and blocks for sampling the model discrepancies by a Gaussian process. Besides, the locations x, observations, and its uncertainties, it required an intermediate of model predictions, predProcSpec. In the following those are set up for each stream. Their names are distinguished by argument streamSuffix.

In addition, the function requires specification of parameters of the prior knowledge of the hyperparameters of the Gaussian process. The parameter vector to sample is extended by two parameters for each stream. The first hyperparameter is the logarithm of correlation length logPsi. It is larger for higher correlated model discrepancies. The logarithm of signal variance logSd2Discr, expressed as a factor of observation variance, is larger for larger model discrepancies.

xSparseRange <- diff(range(xSparse))
blocksSparse <- compileDiscrepancyBlocksForStream(x=xSparse, obs=obs$y1, sd2Obs=(sdObs$y1)^2
    , predProcSpec = list(pred=predSpec), predProcSpecEntry="y1"
    , streamSuffix="sparse", xPred=xPredSparse
    ,priorGammaPsiPars = getGammaParsFromMeanAndVariance(xSparseRange/3, (xSparseRange/3.2)^2 )
)

xRichRange <- diff(range(xRich))
blocksRich <- compileDiscrepancyBlocksForStream(x=xRich, obs=obs$y2, sd2Obs=(sdObs$y2)^2
    , predProcSpec = list(pred=predSpec), predProcSpecEntry="y2"
    , streamSuffix="rich", xPred=xPredRich
    ,priorGammaPsiPars = getGammaParsFromMeanAndVariance(xRichRange/3, (xRichRange/3.2)^2)
)

The parameter vector is extended by the hyperparameters explained above and the samples of process prediction, i.e. of the sum of model and in addition by the discrepancies at xPred defined above.

namesPred <- c(blocksSparse$namesProcessSamples, blocksRich$namesProcessSamples )
thetaPred <- structure( rep(1,length(namesPred)), names=namesPred)
theta0Md <- c(thetaNoMd, thetaPred
    , logPsi_sparse=0.2, logSd2Discr_sparse=0.5
    , logPsi_rich=0.2, logSd2Discr_rich=0.5)
covarTheta0Md <- diag((0.2*theta0Md)^2)

The observations are modelled as the sum of model prediction and model discrepancy and observation uncertainty: $obs = m(\theta) + \delta + \epsilon$.

The sampling of the model parameters, now also depends on the model discrepancies at all observations. They are provided with list entry "deltaA" of intermediate "deltaA". Resampling takes place whenever predictions or a hyperparameter of the Gaussian Process changes. This includes the resampling on specification of a new proposal of model parameters.

In addition to the model discrepancies, the prior probability of the magnitude of these discrepancies is provided. Hence, proposed parameters can be rejected when the corresponding model discrepancies are too high.

The Likelihood hood function of model parameters $\theta$ is modified as follows.

logDenThetaMd <- function( theta, intermediates, logDensityComponents
    ,xSparse, xRich, obs, sdObs
    , priorDiscrFacSparse= length(xSparse)/2
    , priorDiscrFacRich= length(xRich)/2
    , ...
){
    intSparse <- intermediates[["deltaA_sparse"]]
    intRich <- intermediates[["deltaA_rich"]]
    deltaSparse <- intSparse$deltaA
    deltaRich <- intRich$deltaA
    #misfit1 <- obs$y1 - (intermediates[["pred"]]$y1[seq_along(xSparse)] + deltaSparse)
    #misfit2 <- obs$y2 - (intermediates[["pred"]]$y2[seq_along(xRich)] + deltaRich)
    #logPriorSd2DiscrSparse <- priorDiscrFacSparse*intermediates$deltaA_sparse$logPriorInvGammaSd2Discr 
    #logPriorSd2DiscrRich <- priorDiscrFacRich*intermediates$deltaA_rich$logPriorInvGammaSd2Discr
    logPriorSd2DiscrSparse <- intSparse$logPDeltaO  
    logPriorSd2DiscrRich <- intRich$logPDeltaO
    # logPObs <- structure(-1/2*c(sum((misfit1/sdObs$y1)^2), sum((misfit2/sdObs$y2)^2)), names=c('ySparse','yRich') )   
    logPObs <- structure(-1/2*c(intSparse$SSQpred, intRich$SSQpred), names=c('ySparse','yRich') )       
#if( logPObs["yRich"] < -515 ) recover()    
    res <- c( logPObs, mdSparse=logPriorSd2DiscrSparse, mdRich=logPriorSd2DiscrRich)
    res
}

.tmp.f <- function(){
    # code for inspection while tracing logDenThetaMd
    pred <- (intermediates[["predSparse"]][seq_along(xSparse)]) 
    plot(obs$y1 ~ xSparse, ylim=range(obs$y1, pred))
    points(pred ~ xSparse, col="blue")
    points(pred + deltaSparse ~ xSparse, col="orange")
    #
    pred <- (intermediates[["predRich"]][seq_along(xRich)]) 
    plot(obs$y2 ~ xRich, ylim=range(obs$y2, pred))
    points(pred ~ xRich, col="blue")
    points(pred + deltaRich ~ xRich, col="orange")

    with( intermediates[["deltaA_rich"]], {
        points(pred[iO]+zO ~ xO, col="red" )
    }) 

}

resLogDenThetaMd0 <- logDenThetaMd( theta0, list(pred=pred0
    ,deltaA_rich=list(deltaA=0, logPriorInvGammaSd2Discr=-2, logPDeltaO=-2, SSQpred=13.9)
    , deltaA_sparse=list(deltaA=0, logPriorInvGammaSd2Discr=-2, logPDeltaO=-2, SSQpred=13.9))
    , xSparse=xSparse, xRich=xRich, obs=obs, sdObs=sdObs )

bThetaMdSpec <- blockSpec(
c("a","b"),,    
    new("MetropolisBlockUpdater",
            #logDenThetaMd,
            fLogDensity=function(...){ logDenThetaMd(...) },    # allows tracing
            argsFLogDensity=list( xSparse=xSparse, xRich=xRich, obs=obs, sdObs=sdObs),
            logDensityComponentNames = names(resLogDenThetaMd0)
    )
    ,intermediatesUsed=c("pred", "deltaA_sparse", "deltaA_rich")
)

The setup of the sampling includes all the new intermediates and blocks. In addition, the subspace of sampling of the hyperparameters needs to be constrained, because with large correlation length, some matrices can become indeterminate.

cSubSpace <- blocksRich$fConstrainingSubSpace( 
    blocksSparse$fConstrainingSubSpace(new("SubSpace")) )
# know that correlation length is in the magnitude of the locations    
cSubSpace@lowerParBounds["logPsi_rich"] <- log(diff(range(xRich))/5)      


sampler <- newPopulationSampler( 
    blockSpecifications=c(list(bTheta=bThetaMdSpec)
        ,blocksSparse$blockSpecs, blocksRich$blockSpecs)
    , theta0Md, covarTheta0Md
    , nPopulation=1L
    ,intermediateSpecifications=c(blocksSparse$intermediateSpecs
        , blocksRich$intermediateSpecs[-1L])   #-1L to not include the prediction intermediate twice 
    ,subSpace=cSubSpace
)
#sampler@jumpProposer <- new("DEJumpProposer", deSettings=new("DESettings",DRgamma=0.005))

sampler <- setupAndSample(sampler, nSample=120L)
samplerMd <- sampler <- setupAndSample(sampler, nSample=60L)
samplerMd <- sampler <- setupAndSample(sampler, nSample=4L)
sampleLogs <- getSampleLogs(sampler)
plot( asMcmc(sampleLogs), smooth=FALSE )

#aL <- (getAcceptanceTracker(sampler)@acceptanceLog["logDenLogPsi_rich",,])
#computePopulationAcceptanceRates(aL)
#matplot(aL, type="l")

logTail <- subsetTail(getSampleLogs(sampler),0.6)
#logTail <- subsetTail(getSampleLogs(sampler),0.4)
#plot( asMcmc(logTail), smooth=FALSE )
stacked <- stackChainsInPopulation(logTail)
sample1 <- sampleMd <- t(getParametersForPopulation(stacked, 1L)[,,1L])

.tmp.f <- function(){
    lDen1 <- t(getLogDensityComponentsForPopulation(logTail, 1L)[,,1L]) # first chain
    #lDen1 <- t(getLogDensityComponentsForPopulation(logTail, 1L)[,,3L]) # third chain
    #lDen1 <- getLogDensityComponentsForPopulation(logTail, 1L)["y2",,] # only sparse
    summary(lDen1)
    denMedian <- apply(lDen1, 2, median)
    denAnomalies <- t(t(lDen1)-denMedian)
    #plot( obs_rich ~ priorPsi_rich, data=as.data.frame(denAnomalies))   # no correlation
    plot( obs_rich ~ mD_rich, data=as.data.frame(denAnomalies))         # low props of obs mostly at low probs of mD
    (denQuantile <- apply( apply(lDen1, 2, quantile, probs=c(0.1,0.9) ), 2, diff))
    matplot( 1:nrow(lDen1), denAnomalies, type="l" )
    legend( "bottomright", colnames(lDen1), lty=1:ncol(lDen1) ,col = 1:ncol(lDen1) )    
    plot( rowSums(lDen1), type="l" )
    plot( lDen1[,"obs_rich"] )
    #sample1 <- t(getParametersForPopulation(subsetTail(getSampleLogs(sampler),0.8), 1L)[,,1L])
 }

thetaMd <- colMeans(sample1)[1:2]
apply(sample1[,1:2], 2, quantile, probs=c(0.025,0.975) )

predSparse <- colMeans(sample1)[grep("^ksi_.*_sparse$",colnames(sample1))]
predRich <- colMeans(sample1)[grep("^ksi_.*_rich$",colnames(sample1))]
predModelSparse <- modTwTwoDenEx1(thetaMd, xSparse=c(xSparse,xPredSparse), xRich=c(xRich,xPredRich), thresholdCovar=thresholdCovarBiased)$y1[-seq_along(xSparse)]

plot( obs$y2 ~ xRich )
lines( modTwTwoDenEx1( thetaMd, xSparse=xGridSparse, xRich=xGridRich, thresholdCovar=thresholdCovarBiased )$y2 ~ xGridRich, col="blue" )
points( predRich ~ xPredRich, col="orange")

plot( obs$y1 ~ xSparse, ylim=c(range(obs$y1,xPredSparse,predModelSparse)) )
lines( modTwTwoDenEx1( thetaMd, xSparse=xGridSparse, xRich=xGridRich, thresholdCovar=thresholdCovarBiased )$y1 ~ xGridSparse, col="blue" )
points( predModelSparse ~ xPredSparse, col="blue", pch="x")
points( predSparse  ~ xPredSparse, col="orange", pch="+")

plot( density(sample1[,"logSd2Discr_rich"]), col="blue")
lines( density(sample1[,"logSd2Discr_sparse"]), col="red")

#plot( density(sample1[,"logSd2Discr_sparse"]), col="red")
#lines( density(sample1[,"logSd2Discr_rich"]), col="blue")

Starting at optimum from ignore scenario

Here we explore the hypothesis that the optimum of the ignore scenario still and optimum in the GP case. The parameters are set initially to the ignore optimum, and it is checked that they still converge to the GP optimum case.

ZinitParms <- getParametersForAllChains( subsetTail(getSampleLogs(samplerNoMd),0.4) )
ZinitGP <- computeInitialPopulation( theta0Md[-(1:2)], diag(covarTheta0Md)[-(1:2)]
    , subSpace=cSubSpace)[,,1L]
Zinit <- rbind(ZinitParms[,1:ncol(ZinitGP)], ZinitGP)   
sampler <- newPopulationSampler( 
    blockSpecifications=c(list(bTheta=bThetaMdSpec)
        ,blocksSparse$blockSpecs, blocksRich$blockSpecs)
    , Zinit = Zinit
    , nPopulation=1L
    ,intermediateSpecifications=c(blocksSparse$intermediateSpecs
        , blocksRich$intermediateSpecs[-1L])   #-1L to not include the prediction intermediate twice 
    ,subSpace=cSubSpace
)
#sampler@jumpProposer <- new("DEJumpProposer", deSettings=new("DESettings",DRgamma=0.005))

#sampler <- setupAndSample(sampler, nSample=120L)
samplerMd_startNoMd <- sampler <- setupAndSample(sampler, nSample=60L)
sampleLogs <- getSampleLogs(sampler)
plot( asMcmc(sampleLogs), smooth=FALSE )

#aL <- (getAcceptanceTracker(sampler)@acceptanceLog["logDenLogPsi_rich",,])
#matplot(aL, type="l")

logTail <- subsetTail(getSampleLogs(sampler),0.4)
#plot( asMcmc(logTail), smooth=FALSE )
stacked <- stackChainsInPopulation(logTail)
sample1 <- sampleMd_startNoMd <- t(getParametersForPopulation(stacked, 1L)[,,1L])

apply(sample1[,1:2], 2, quantile, probs=c(0.025,0.975) )

It still converged to the case where model discrepancy is allocated to the rich data stream.

Comparison of the scenarios

    #ss <- sample1
    iSubsetRich <- sort(sample.int(length(xRich), 30))
    xGridRich2 <- xRich[iSubsetRich] 
    obsComb <- c(obs$y1, obs$y2[iSubsetRich])
    doSamplePredObsDiffPost <- function(ss, scenario="ignore", nSample=180 ){
        iRows <- sample.int(nrow(ss), nSample, replace=TRUE)
        predTrue <- do.call(c, modTwTwoDenEx1( thetaTrue, xSparse=xSparse, xRich=xGridRich, thresholdCovar=thresholdCovarTrue ))
        tmp <- apply( ss[iRows,1:2], 1, function(theta){
            predMod <- do.call(c,modTwTwoDenEx1( theta, xSparse=xSparse, xRich=xGridRich2, thresholdCovar=thresholdCovarBiased ))
            #predMod - predTrue
            predMod - obsComb      # too noisy? -> uses smoothers
        })
        stat <- t(apply(tmp,1,quantile,probs = c(0.025, 0.5, 0.975), names=FALSE))
        colnames(stat) <- c("lower","median","upper")        
        res <- cbind( data.frame( scenario=scenario
            , stream=rep(c("ySparse","yRich"),times=c(length(xSparse),length(xGridRich2)) )
            , x =(c(xSparse,xGridRich2)))
            , as.data.frame(stat)) 
    }
    samplePostDiff <- rbind( doSamplePredObsDiffPost(sampleNoMd,"ignore"),doSamplePredObsDiffPost(sampleMd,"GP"))    

#http://stackoverflow.com/questions/19643234/fill-region-between-two-loess-smoothed-lines-in-r-with-ggplot
dsPlot <- samplePostDiff  #droplevels(subset(samplePostDiff, scenario %in% c("ignore","GP")))
g1 <- ggplot( dsPlot, aes(x=x, col=scenario, linetype=scenario, shape=scenario ) ) + 
        facet_wrap(~stream, scales="free_x") +
        #facet_wrap(~stream, scales="free") +
        #geom_point(aes(y=median)) + 
        geom_hline(yintercept=0, col="grey") +
        stat_smooth(aes(y = upper), method = "lm", formula = y ~ x, se = FALSE, linetype="solid", size=0.2) +
        stat_smooth(aes(y = lower), method = "lm", formula = y ~ x, se = FALSE, linetype="solid", size=0.2) +        
        #geom_ribbon(aes(ymin=lower, ymax=upper), fill="grey", alpha=0.2) +
        #ylab("predicted - true value") +
        ylab("predicted - observed") +
        theme()
gg1 <- ggplot_build(g1)
iSmooth <- 2L
df2 <- data.frame(x = gg1$data[[iSmooth]]$x,
                  ymin = gg1$data[[iSmooth]]$y,
                  ymax = gg1$data[[iSmooth+1L]]$y,
                  scenario = levels(dsPlot$scenario)[(gg1$data[[iSmooth]]$group %/% 2)+1],
                  stream = levels(dsPlot$stream)[gg1$data[[iSmooth]]$PANEL]
                  )
levels(df2$scenario) <- levels(dsPlot$scenario)                  

g1 + geom_ribbon(data = df2, aes(x = x, ymin = ymin, ymax = ymax, linetype=scenario), fill="grey"
        ,alpha = 0.3, colour=NA) +
        stat_smooth(aes(y = median), method = "lm", formula = y ~ x, size = 1, se = FALSE) +
     theme_bw(base_size=baseSize)

Further graphs

Predictive posterior: orange are the samples of the process (model + discrepancy).

    ss <- sample1
        iSubsetRich <- sort(sample.int(length(xRich), 100))
        xGridRich2 <- xRich[iSubsetRich]
        obsS <- c(obs$y1, obs$y2[iSubsetRich])        
    doSamplePred <- function(ss, scenario="ignore", nSample=180 ){
        iRows <- sample.int(nrow(ss), nSample, replace=TRUE)
        tmp <- apply( ss[iRows,1:2], 1, function(theta){
            predMod <- do.call(c,modTwTwoDenEx1( theta, xSparse=xSparse, xRich=xGridRich2, thresholdCovar=thresholdCovarBiased ))
        })
        stat <- t(apply(tmp,1,quantile,probs = c(0.025, 0.5, 0.975), names=FALSE))
        colnames(stat) <- c("lower","median","upper")
        res <- cbind( data.frame( scenario=scenario
            , stream=rep(c("ySparse","yRich"),times=c(length(xSparse),length(xGridRich2)) )
            , x =(c(xSparse,xGridRich2)))
            , obs = obsS
            , as.data.frame(stat)) 
    }
    samplePost <- rbind( doSamplePred(sampleNoMd,"ignore"),doSamplePred(sampleMd,"GP")
    #,doSamplePred(sampleMd2,"equal")
    #,doSamplePred(sampleMdC,"MdC")
    )    


  doSamplePredProc <- function(ss, scenario="GP"){
        tmp <- ss[,grep("^ksi_",colnames(ss))]
        stat <- t(apply(tmp,2,quantile,probs = c(0.025, 0.5, 0.975), names=FALSE))
        colnames(stat) <- c("lower","median","upper")
        res <- cbind( data.frame( scenario=scenario
            , stream=rep(c("ySparse","yRich"),times=c(length(xPredSparse),length(xPredRich)) )
            , x =(c(xPredSparse,xPredRich)))
            , as.data.frame(stat))  
  }
    samplePostProc <- rbind( doSamplePredProc(sampleMd,"GP")
    #,doSamplePredProc(sampleMd2,"equal")
    #,doSamplePredProc(sampleMdC,"MdC")
    )    


plotScen <- "ignore"
plotScen <- "GP"
for( plotScen in character(0) ){ #levels(samplePost$scenario) ){
    print(plotScen)
    dsPlot <- subset(samplePost, scenario==plotScen )
    dsPlotProc <- subset(samplePostProc, scenario==plotScen )

    g1 <- ggplot( dsPlot, aes(x=x ) ) +
            geom_point(aes(y=obs), col="grey50") +  
            #facet_wrap(~stream, scales="free_x") +
            facet_wrap(~stream, scales="free") +
            geom_line(aes(y=median)) + 
            geom_ribbon(aes(ymin=lower, ymax=upper), fill="blue", alpha=0.4) +
            geom_point(data=dsPlotProc, aes(y=median), col="orange") +
            {if( plotScen == "ignore") list() else geom_errorbar(data=dsPlotProc, aes(ymin=lower, ymax=upper), width=0.05, col="orange")} +  
            #geom_errorbar(data=dsPlotProc, aes(ymin=lower, ymax=upper), width=0.05, col="orange") +
            ylab("y") +
     theme_bw(base_size=baseSize)
    print(g1)      
}  
    g1 <- ggplot( samplePost, aes(x=x ) ) +
            geom_point(aes(y=obs), col="grey50") +  
            facet_wrap(~stream+scenario, scales="free", ncol=2) +
            #facet_grid(stream~scenario, scales="free_x") +  
            geom_line(aes(y=median)) + 
            geom_ribbon(aes(ymin=lower, ymax=upper), fill="blue", alpha=0.4) +
            geom_point(data=samplePostProc, aes(y=median), col="orange") +
            geom_errorbar(data=samplePostProc, aes(ymin=lower, ymax=upper), width=0.03, col="orange") +
            ylab("y") +
            theme_bw(base_size=baseSize)+
            theme(axis.text.x = element_blank(), axis.text.y = element_blank()
            , axis.ticks.x = element_blank(), axis.ticks.y = element_blank())  
    print(g1)
    iStream <- "yRich"
    g1 <- ggplot( subset(samplePost, stream==iStream), aes(x=x ) ) +
            geom_point(aes(y=obs), col="grey50") +  
            facet_wrap(~stream+scenario, scales="free", ncol=2) +
            facet_grid(~scenario, scales="free_x") +  
            geom_line(aes(y=median)) + 
            geom_ribbon(aes(ymin=lower, ymax=upper), fill="blue", alpha=0.4) +
            #geom_point(data=subset(samplePostProc, stream==iStream), aes(y=median), col="orange") +
            #geom_errorbar(data=subset(samplePostProc, stream==iStream), aes(ymin=lower, ymax=upper), width=0.03, col="orange") +
            ylab("respiration rate") + xlab("Temperature") +
            theme_bw(base_size=baseSize)+
            theme(axis.text.x = element_blank(), axis.text.y = element_blank()
            , axis.ticks.x = element_blank(), axis.ticks.y = element_blank())  
    print(g1)

    iStream <- "ySparse"
    g2 <- ggplot( subset(samplePost, stream==iStream), aes(x=x ) ) +
            geom_point(aes(y=obs), col="grey50") +  
            facet_wrap(~stream+scenario, scales="free", ncol=2) +
            facet_grid(~scenario, scales="free_x") +  
            geom_line(aes(y=median)) + 
            geom_ribbon(aes(ymin=lower, ymax=upper), fill="blue", alpha=0.4) +
            #geom_point(data=subset(samplePostProc, stream==iStream), aes(y=median), col="orange") +
            #geom_errorbar(data=subset(samplePostProc, stream==iStream), aes(ymin=lower, ymax=upper), width=0.03, col="orange") +
            ylab("cumulated respiration") + xlab("Soil carbon stocks") +
            theme_bw(base_size=baseSize)+
            theme(axis.text.x = element_blank(), axis.text.y = element_blank()
            , axis.ticks.x = element_blank(), axis.ticks.y = element_blank())  
    print(g2)    

Comparison of model signal variance

#colnames(sampleMdC)
varColNames <- c("logSd2Discr_sparse","logSd2Discr_rich")
dsPlot <- rbind( 
    data.frame( scenario="GP", stream="sparse", logSigma=sampleMd[,"logSd2Discr_sparse"])
    ,data.frame( scenario="GP", stream="rich", logSigma=sampleMd[,"logSd2Discr_rich"])
    #,data.frame( scenario="equal", stream="sparse", logSigma=sampleMd2[,"logSd2Discr_sparse"])
    #,data.frame( scenario="equal", stream="rich", logSigma=sampleMd2[,"logSd2Discr_rich"])
    #,data.frame( scenario="MdC", stream="sparse", logSigma=sampleMdC[,"logSd2Discr"])
    #,data.frame( scenario="MdC", stream="rich", logSigma=sampleMdC[,"logSd2Discr"])
)

g1 <- ggplot( dsPlot
    #, aes(x=logSigma, col=scenario, fill=scenario, linetype=scenario, group=interaction(scenario,stream)) ) +
    , aes(x=logSigma, col=stream, fill=stream, linetype=stream, group=interaction(scenario,stream)) ) +
    geom_density(alpha=0.1) +
    theme_bw(base_size=baseSize)
print(g1)
ds <- data.frame(modelDiscrepancy=exp(sampleMd[,"logSd2Discr_rich"]))

We sampled the signal variance, i.e. the magnitude of the model discrepancy variance, as a factor of the variance of observation uncertainty. Inspection of this parameter shows that overall model discrepancies in this calibration ranged between r signif(quantile(ds$modelDiscrepancy, p=0.25),2) times to r signif(quantile(ds$modelDiscrepancy, p=0.75),2) times the observation uncertainty.

suppressWarnings(ggplot(ds
, aes(x=modelDiscrepancy)) + geom_density() + 
xlim(0,2) +
xlab("Model discrepancy") +     
theme_bw(base_size=baseSize))


Try the blockDemac package in your browser

Any scripts or data that you put into this service are public.

blockDemac documentation built on May 2, 2019, 4:52 p.m.