library(knitr) opts_chunk$set(out.extra='style="display:block; margin: auto"' #, fig.align="center" #, fig.width=4.6, fig.height=3.2 , fig.width=6, fig.height=3.75 #goldener Schnitt 1.6 , dev.args=list(pointsize=10) , dev=c('png','pdf') ) knit_hooks$set(spar = function(before, options, envir) { if (before){ par( las=1 ) #also y axis labels horizontal par(mar=c(2.0,3.3,0,0)+0.3 ) #margins par(tck=0.02 ) #axe-tick length inside plots par(mgp=c(1.1,0.2,0) ) #positioning of axis title, axis labels, axis } }) # genVigs("TwoDenGP")
This vignette details the explicit modelling of the unknown function of model discrepancy by a Gaussian Process (GP) for two data streams.
#isDevelopMode <- TRUE if(!exists("isDevelopMode")) library(blockDemac) library(ggplot2) library(plyr) library(grid) set.seed(0815) # for reproducible results baseSize=10 # font for ggplot
In the model two streams of predictions are generated, each using the same two parameters. The variance in the first stream is dominated by the first parameter a
, while the variance in the second stream is dominated by the second parameter b
.
modTwTwoDenEx1 <- function( theta ##<< model parameters a and b , xSparse ##<< numeric vector of Sparse input/output relationship , xRich ##<< numeric vector of rich input/output relationship , thresholdCovar=0 ##<< model structural deficiency ){ list( y1 = as.numeric(theta[1]*xSparse + theta[2]*mean(xRich)/10) ,y2 = as.numeric(theta[1]*xSparse[1] + theta[2]*pmax(0,xRich-thresholdCovar) )) } # second model having the discrepancy in calculation of y1 corresponding to sparse data stream modTwTwoDenEx2 <- function( theta ##<< model parameters a and b , xSparse ##<< numeric vector of Sparse input/output relationship , xRich ##<< numeric vector of rich input/output relationship , thresholdCovar=0 ##<< model structural deficiency ){ list( y1 = as.numeric(theta[1]*pmax(0,xSparse-thresholdCovar) + theta[2]*mean(xRich)/10) ,y2 = as.numeric(theta[1]*xSparse[1] + theta[2]*xRich) ) } #modTwTwoDenEx1 <- modTwTwoDenEx2
The synthetic data is generated by running the model. The second stream,y2
, with covariate xRich
has 100 times as many records as the first stream,y1
, with covariate xSparse
.
Note that for fitting a Gaussian process to model discrepancies, the observations need to be sorted in increasing order.
xSparse <- sort(c(1,runif(9,min=0.5,max=1.5))) # only 10 observations xRich <- sort(runif(1000,min=.7,max=1)) # 1000 observations # for plotting predictions, define a grid xGridRich <- seq( xRich[1], xRich[length(xRich)], length.out=30) xGridSparse <- seq( xSparse[1], xSparse[length(xSparse)], length.out=30) thetaTrue <- c( a=1, b=2 ) theta0 <- thetaTrue covarTheta0 <- diag(0.2*thetaTrue) thresholdCovarTrue=0.3 obsTrue <- modTwTwoDenEx1( thetaTrue, xSparse=xSparse,xRich=xRich , thresholdCovar=thresholdCovarTrue) sdObsTrue <- sdObs <- list( #y1=mean(obsTrue$y1)*0.06 #,y2=mean(obsTrue$y2)*0.02 ) #y1=mean(obsTrue$y1)*0.04 #,y2=mean(obsTrue$y2)*0.03 ) y1=mean(obsTrue$y1)*0.05 ,y2=mean(obsTrue$y2)*0.03 ) obs <- list( y1 = obsTrue$y1 + rnorm(length(obsTrue$y1),sd=sdObsTrue$y1) ,y2 = obsTrue$y2 + rnorm(length(obsTrue$y2),sd=sdObsTrue$y2) ) # locations where model+discrepancy will be sampled xPredSparse <- c(range(xSparse), 1.0) xPredRich <- c(range(xRich), 0.8) thresholdCovarBiased <- 0.1
dsObs <- data.frame( obs=c(obs$y1, obs$y2) , x=c(xSparse, xRich) , stream=rep(c("sparse","rich"), times=c(length(xSparse),length(xRich))) ) g1 <- ggplot( dsObs, aes(x=x ) ) + geom_point(aes(y=obs), col="grey50") + facet_wrap(~stream, scales="free", ncol=1) + ylab("y") + theme_bw(base_size=baseSize)+ theme(axis.text.x = element_blank(), axis.text.y = element_blank() , axis.ticks.x = element_blank(), axis.ticks.y = element_blank()) print(g1)
Sampling of model discrepancies requires model predictions for current parameters. Therefore, one intermediate result of model predictions is set up for each data stream.
predSpec <- intermediateSpec( function(theta, intermediates, xSparseVal, xRichVal, ... ){ modTwTwoDenEx1(theta, xSparse=xSparseVal, xRich=xRichVal, ...) } ,argsUpdateFunction=list(xSparseVal=c(xSparse,xPredSparse), xRichVal=c(xRich,xPredRich) , thresholdCovar=thresholdCovarBiased) ,parameters = c("a","b") ,intermediates = character(0) ) pred0 <- getUpdateFunction(predSpec)( theta0, list(), xSparse, xRich )
For comparison we sample parameters without accounting for model discrepancy. Note that the logDensity depends on the intermediates of the predictions.
logDenTheta <- function( theta, intermediates, logDensityComponents ,xSparse, xRich, obs, sdObs, ... ){ misfit1 <- obs$y1 - intermediates[["pred"]]$y1[seq_along(xSparse)] misfit2 <- obs$y2 - intermediates[["pred"]]$y2[seq_along(xRich)] structure( -1/2 * c(sum((misfit1/sdObs$y1)^2), sum((misfit2/sdObs$y2)^2)) , names=c('y1','y2') ) } resLogDenTheta0 <- logDenTheta( theta0, list(pred=pred0) , xSparse=xSparse, xRich=xRich, obs=obs, sdObs=sdObs ) bTheta0Spec <- blockSpec(c("a","b"),, new("MetropolisBlockUpdater", fLogDensity=logDenTheta, argsFLogDensity=list( xSparse=xSparse, xRich=xRich, obs=obs, sdObs=sdObs), logDensityComponentNames = names(resLogDenTheta0) ) ,intermediatesUsed=c("pred") ) sampler <- newPopulationSampler( list(bTheta=bTheta0Spec), theta0, covarTheta0 , nPopulation=1L ,intermediateSpecifications=list(pred=predSpec) ) samplerNoMd <- sampler <- setupAndSample(sampler, nSample=120L) sampleLogs <- subsetTail(getSampleLogs(sampler), 0.8) #plot( asMcmc(sampleLogs), smooth=FALSE ) stacked <- stackChainsInPopulation(subsetTail(getSampleLogs(sampler),0.8)) sample1 <- sampleNoMd <- t(getParametersForPopulation(stacked, 1L)[,,1L]) #summary(sample1) thetaNoMd <- colMeans(sample1) plot( obs$y2 ~ xRich ) lines( modTwTwoDenEx1( thetaNoMd, xSparse=xGridSparse, xRich=xGridRich, thresholdCovar=thresholdCovarBiased )$y2 ~ xGridRich, col="blue" ) plot( obs$y1 ~ xSparse ) lines( modTwTwoDenEx1( thetaNoMd, xSparse=xGridSparse, xRich=xGridRich, thresholdCovar=thresholdCovarBiased )$y1 ~ xGridSparse, col="blue" )
Note, that the threshold model bias (thresholdCovarBiased) has been introduced in the modelling of the rich data stream. However, after fitting, the discrepancy is visible in the sparse data stream.
Function compileDiscrepancyBlocksForStream
sets up additional intermediates and blocks for sampling the model discrepancies by a Gaussian process. Besides, locations x
, observations, and its uncertainties, it required an intermediate of model predictions, predProcSpec
. In the following those are set up for each stream. Their names are distinguished by argument streamSuffix
.
In addition, the function requires specification of parameters of the prior knowledge of the hyperparameters of the Gaussian process.
The parameter vector to sample is extended by two parameters for each stream. The first hyperparameter is the logarithm of correlation length logPsi
. It is larger for higher correlated model discrepancies. The logarithm of signal variance logSd2Discr
, expressed as a factor of observation variance, is larger for larger model discrepancies.
We use a prior of correlation length that depends on the spread of the covariate. The prior on signal variance, priorInvGammaSd2DiscrPars
, is kept at a default wide low-informative prior.
xSparseRange <- diff(range(xSparse)) blocksSparse <- compileDiscrepancyBlocksForStream(x=xSparse, obs=obs$y1, sd2Obs=(sdObs$y1)^2 , predProcSpec = list(pred=predSpec), predProcSpecEntry="y1" , streamSuffix="sparse", xPred=xPredSparse ,priorGammaPsiPars = getGammaParsFromMeanAndVariance(xSparseRange/3, (xSparseRange/3.2)^2 ) ) xRichRange <- diff(range(xRich)) blocksRich <- compileDiscrepancyBlocksForStream(x=xRich, obs=obs$y2, sd2Obs=(sdObs$y2)^2 , predProcSpec = list(pred=predSpec), predProcSpecEntry="y2" , streamSuffix="rich", xPred=xPredRich ,priorGammaPsiPars = getGammaParsFromMeanAndVariance(xRichRange/3, (xRichRange/3.2)^2) )
The parameter vector is extended by the hyperparameters explained above. In addition we also sample process predictions, i.e. of the sum of model and discrepancy at a few locations xPred
defined above.
namesPred <- c(blocksSparse$namesProcessSamples, blocksRich$namesProcessSamples ) thetaPred <- structure( rep(1,length(namesPred)), names=namesPred) theta0Md <- c(thetaNoMd, thetaPred , logPsi_sparse=0.2, logSd2Discr_sparse=0.5 , logPsi_rich=0.2, logSd2Discr_rich=0.5) covarTheta0Md <- diag((0.2*theta0Md)^2)
The observations are modelled as the sum of model prediction and model discrepancy and observation uncertainty: $obs = m(\theta) + \delta + \epsilon$.
The sampling of the model parameters, now also depends on the model discrepancies at all observations. They are provided with list entry "deltaA" of intermediate "deltaA". Resampling takes place whenever predictions or a hyperparameter of the Gaussian Process changes. This includes the resampling on specification of a new proposal of model parameters.
The computation of the residual sums of squares is already done of the intermediate of model discrepancies. It is provided with list entry "SSQpred" and can be re-used.
In addition to the model discrepancies, the term penalizing high model discrepancies is provided with list entry "logPDeltaO". The logDensity function adds this component as an additional term. Hence, proposed parameters can be rejected when the corresponding model discrepancies are too high.
The Log-Density function of model parameters $\theta$ is modified as follows.
logDenThetaMd <- function( theta, intermediates, logDensityComponents ,xSparse, xRich, obs, sdObs , ... ){ intSparse <- intermediates[["deltaA_sparse"]] intRich <- intermediates[["deltaA_rich"]] #misfit1 <- obs$y1 - (intermediates[["pred"]]$y1[seq_along(xSparse)] + intSparse$deltaA) #misfit2 <- obs$y2 - (intermediates[["pred"]]$y2[seq_along(xRich)] + intRich$deltaA) #logPObs <- structure(-1/2*c(sum((misfit1/sdObs$y1)^2), sum((misfit2/sdObs$y2)^2)), names=c('ySparse','yRich') ) logPObs <- structure(-1/2*c(intSparse$SSQpred, intRich$SSQpred), names=c('ySparse','yRich') ) res <- c( logPObs, mdSparse=intSparse$logPDeltaO, mdRich=intRich$logPDeltaO) res } resLogDenThetaMd0 <- logDenThetaMd( theta0, list(pred=pred0 ,deltaA_rich=list(deltaA=0, logPriorInvGammaSd2Discr=-2, logPDeltaO=-2, SSQpred=13.9) , deltaA_sparse=list(deltaA=0, logPriorInvGammaSd2Discr=-2, logPDeltaO=-2, SSQpred=13.9)) , xSparse=xSparse, xRich=xRich, obs=obs, sdObs=sdObs ) bThetaMdSpec <- blockSpec( c("a","b"),, new("MetropolisBlockUpdater", #logDenThetaMd, fLogDensity=function(...){ logDenThetaMd(...) }, # allows tracing argsFLogDensity=list( xSparse=xSparse, xRich=xRich, obs=obs, sdObs=sdObs), logDensityComponentNames = names(resLogDenThetaMd0) ) ,intermediatesUsed=c("pred", "deltaA_sparse", "deltaA_rich") )
The setup of the sampling includes all the new intermediates and blocks. In addition, the space of sampling of the hyperparameters needs to be constrained, because with large correlation length, some matrices can become indeterminate.
cSubSpace <- blocksRich$fConstrainingSubSpace( blocksSparse$fConstrainingSubSpace(new("SubSpace")) ) # know that correlation length is in the magnitude of the locations cSubSpace@lowerParBounds["logPsi_rich"] <- log(diff(range(xRich))/5) sampler <- newPopulationSampler( blockSpecifications=c(list(bTheta=bThetaMdSpec) ,blocksSparse$blockSpecs, blocksRich$blockSpecs) , theta0Md, covarTheta0Md , nPopulation=1L ,intermediateSpecifications=c(blocksSparse$intermediateSpecs , blocksRich$intermediateSpecs[-1L]) #-1L to not include the prediction intermediate twice ,subSpace=cSubSpace ) #sampler@jumpProposer <- new("DEJumpProposer", deSettings=new("DESettings",DRgamma=0.005)) sampler <- setupAndSample(sampler, nSample=120L) samplerMd <- sampler <- setupAndSample(sampler, nSample=60L) # extend sampleLogs <- getSampleLogs(sampler) plot( asMcmc(sampleLogs), smooth=FALSE ) #aL <- (getAcceptanceTracker(sampler)@acceptanceLog["logDenLogPsi_rich",,]) #computePopulationAcceptanceRates(aL) #matplot(aL, type="l") logTail <- subsetTail(getSampleLogs(sampler),0.6) #logTail <- subsetTail(getSampleLogs(sampler),0.4) #plot( asMcmc(logTail), smooth=FALSE ) stacked <- stackChainsInPopulation(logTail) sample1 <- sampleMd <- t(getParametersForPopulation(stacked, 1L)[,,1L]) .tmp.inspectLogDensities <- function(){ lDen1 <- t(getLogDensityComponentsForPopulation(logTail, 1L)[,,1L]) # first chain #lDen1 <- t(getLogDensityComponentsForPopulation(logTail, 1L)[,,3L]) # third chain #lDen1 <- getLogDensityComponentsForPopulation(logTail, 1L)["y2",,] # only sparse summary(lDen1) denMedian <- apply(lDen1, 2, median) denAnomalies <- t(t(lDen1)-denMedian) #plot( obs_rich ~ priorPsi_rich, data=as.data.frame(denAnomalies)) # no correlation plot( obs_rich ~ mD_rich, data=as.data.frame(denAnomalies)) # low props of obs mostly at low probs of mD (denQuantile <- apply( apply(lDen1, 2, quantile, probs=c(0.1,0.9) ), 2, diff)) matplot( 1:nrow(lDen1), denAnomalies, type="l" ) legend( "bottomright", colnames(lDen1), lty=1:ncol(lDen1) ,col = 1:ncol(lDen1) ) plot( rowSums(lDen1), type="l" ) plot( lDen1[,"obs_rich"] ) #sample1 <- t(getParametersForPopulation(subsetTail(getSampleLogs(sampler),0.8), 1L)[,,1L]) } thetaMd <- colMeans(sample1)[1:2] apply(sample1[,1:2], 2, quantile, probs=c(0.025,0.975) ) predSparse <- colMeans(sample1)[grep("^ksi_.*_sparse$",colnames(sample1))] predRich <- colMeans(sample1)[grep("^ksi_.*_rich$",colnames(sample1))] predModelSparse <- modTwTwoDenEx1(thetaMd, xSparse=c(xSparse,xPredSparse), xRich=c(xRich,xPredRich), thresholdCovar=thresholdCovarBiased)$y1[-seq_along(xSparse)] plot( obs$y2 ~ xRich ) lines( modTwTwoDenEx1( thetaMd, xSparse=xGridSparse, xRich=xGridRich, thresholdCovar=thresholdCovarBiased )$y2 ~ xGridRich, col="blue" ) points( predRich ~ xPredRich, col="orange") plot( obs$y1 ~ xSparse, ylim=c(range(obs$y1,xPredSparse,predModelSparse)) ) lines( modTwTwoDenEx1( thetaMd, xSparse=xGridSparse, xRich=xGridRich, thresholdCovar=thresholdCovarBiased )$y1 ~ xGridSparse, col="blue" ) points( predModelSparse ~ xPredSparse, col="blue", pch="x") points( predSparse ~ xPredSparse, col="orange", pch="+")
Note, how model discrepancy in the sparse stream almost disappeared.
Here, the parameters are set initially to the ignore optimum. It is checked that they still converge to the GP optimum case and are not stuck in a local minimum.
ZinitParms <- getParametersForAllChains( subsetTail(getSampleLogs(samplerNoMd),0.4) ) ZinitGP <- computeInitialPopulation( theta0Md[-(1:2)], diag(covarTheta0Md)[-(1:2)] , subSpace=cSubSpace)[,,1L] Zinit <- rbind(ZinitParms[,1:ncol(ZinitGP)], ZinitGP) sampler <- newPopulationSampler( blockSpecifications=c(list(bTheta=bThetaMdSpec) ,blocksSparse$blockSpecs, blocksRich$blockSpecs) , Zinit = Zinit , nPopulation=1L ,intermediateSpecifications=c(blocksSparse$intermediateSpecs , blocksRich$intermediateSpecs[-1L]) #-1L to not include the prediction intermediate twice ,subSpace=cSubSpace ) #sampler@jumpProposer <- new("DEJumpProposer", deSettings=new("DESettings",DRgamma=0.005)) #sampler <- setupAndSample(sampler, nSample=120L) samplerMd_startNoMd <- sampler <- setupAndSample(sampler, nSample=60L) sampleLogs <- getSampleLogs(sampler) plot( asMcmc(sampleLogs), smooth=FALSE ) #aL <- (getAcceptanceTracker(sampler)@acceptanceLog["logDenLogPsi_rich",,]) #matplot(aL, type="l") logTail <- subsetTail(getSampleLogs(sampler),0.4) #plot( asMcmc(logTail), smooth=FALSE ) stacked <- stackChainsInPopulation(logTail) sample1 <- sampleMd_startNoMd <- t(getParametersForPopulation(stacked, 1L)[,,1L]) apply(sample1[,1:2], 2, quantile, probs=c(0.025,0.975) )
It still converged to the case where model discrepancy is allocated to the rich data stream. Sometimes, however, some of the chains go to a different optimum.
Here we use the sampled parameters to do forward predictions and plot the predictive posterior distributions.
#ss <- sample1 iSubsetRich <- sort(sample.int(length(xRich), 30)) xGridRich2 <- xRich[iSubsetRich] obsComb <- c(obs$y1, obs$y2[iSubsetRich]) doSamplePredObsDiffPost <- function(ss, scenario="ignore", nSample=180 ){ iRows <- sample.int(nrow(ss), nSample, replace=TRUE) predTrue <- do.call(c, modTwTwoDenEx1( thetaTrue, xSparse=xSparse, xRich=xGridRich, thresholdCovar=thresholdCovarTrue )) tmp <- apply( ss[iRows,1:2], 1, function(theta){ predMod <- do.call(c,modTwTwoDenEx1( theta, xSparse=xSparse, xRich=xGridRich2, thresholdCovar=thresholdCovarBiased )) #predMod - predTrue predMod - obsComb # too noisy? -> uses smoothers }) stat <- t(apply(tmp,1,quantile,probs = c(0.025, 0.5, 0.975), names=FALSE)) colnames(stat) <- c("lower","median","upper") res <- cbind( data.frame( scenario=scenario , stream=rep(c("ySparse","yRich"),times=c(length(xSparse),length(xGridRich2)) ) , x =(c(xSparse,xGridRich2))) , as.data.frame(stat)) } samplePostDiff <- rbind( doSamplePredObsDiffPost(sampleNoMd,"ignore"),doSamplePredObsDiffPost(sampleMd,"GP")) #http://stackoverflow.com/questions/19643234/fill-region-between-two-loess-smoothed-lines-in-r-with-ggplot dsPlot <- samplePostDiff #droplevels(subset(samplePostDiff, scenario %in% c("ignore","GP"))) g1 <- ggplot( dsPlot, aes(x=x, col=scenario, linetype=scenario, shape=scenario ) ) + facet_wrap(~stream, scales="free_x") + #facet_wrap(~stream, scales="free") + #geom_point(aes(y=median)) + geom_hline(yintercept=0, col="grey") + stat_smooth(aes(y = upper), method = "lm", formula = y ~ x, se = FALSE, linetype="solid", size=0.2) + stat_smooth(aes(y = lower), method = "lm", formula = y ~ x, se = FALSE, linetype="solid", size=0.2) + #geom_ribbon(aes(ymin=lower, ymax=upper), fill="grey", alpha=0.2) + #ylab("predicted - true value") + ylab("predicted - observed") + theme() gg1 <- ggplot_build(g1) iSmooth <- 2L df2 <- data.frame(x = gg1$data[[iSmooth]]$x, ymin = gg1$data[[iSmooth]]$y, ymax = gg1$data[[iSmooth+1L]]$y, scenario = levels(dsPlot$scenario)[(gg1$data[[iSmooth]]$group %/% 2)+1], stream = levels(dsPlot$stream)[gg1$data[[iSmooth]]$PANEL] ) levels(df2$scenario) <- levels(dsPlot$scenario) g1 + geom_ribbon(data = df2, aes(x = x, ymin = ymin, ymax = ymax, linetype=scenario), fill="grey" ,alpha = 0.3, colour=NA) + stat_smooth(aes(y = median), method = "lm", formula = y ~ x, size = 1, se = FALSE) + theme_bw(base_size=baseSize)
Predictive posterior in faceted plots: orange are the samples of the process (model + discrepancy).
ss <- sample1 iSubsetRich <- sort(sample.int(length(xRich), 100)) xGridRich2 <- xRich[iSubsetRich] obsS <- c(obs$y1, obs$y2[iSubsetRich]) doSamplePred <- function(ss, scenario="ignore", nSample=180 ){ iRows <- sample.int(nrow(ss), nSample, replace=TRUE) tmp <- apply( ss[iRows,1:2], 1, function(theta){ predMod <- do.call(c,modTwTwoDenEx1( theta, xSparse=xSparse, xRich=xGridRich2, thresholdCovar=thresholdCovarBiased )) }) stat <- t(apply(tmp,1,quantile,probs = c(0.025, 0.5, 0.975), names=FALSE)) colnames(stat) <- c("lower","median","upper") res <- cbind( data.frame( scenario=scenario , stream=rep(c("ySparse","yRich"),times=c(length(xSparse),length(xGridRich2)) ) , x =(c(xSparse,xGridRich2))) , obs = obsS , as.data.frame(stat)) } samplePost <- rbind( doSamplePred(sampleNoMd,"ignore"),doSamplePred(sampleMd,"GP") #,doSamplePred(sampleMd2,"equal") #,doSamplePred(sampleMdC,"MdC") ) doSamplePredProc <- function(ss, scenario="GP"){ tmp <- ss[,grep("^ksi_",colnames(ss))] stat <- t(apply(tmp,2,quantile,probs = c(0.025, 0.5, 0.975), names=FALSE)) colnames(stat) <- c("lower","median","upper") res <- cbind( data.frame( scenario=scenario , stream=rep(c("ySparse","yRich"),times=c(length(xPredSparse),length(xPredRich)) ) , x =(c(xPredSparse,xPredRich))) , as.data.frame(stat)) } samplePostProc <- rbind( doSamplePredProc(sampleMd,"GP") #,doSamplePredProc(sampleMd2,"equal") #,doSamplePredProc(sampleMdC,"MdC") )
# Plotting for different streams independently, so that the <- scales of the graphs can match. iStream <- "yRich" g1 <- ggplot( subset(samplePost, stream==iStream), aes(x=x ) ) + geom_point(aes(y=obs), col="grey50") + #facet_wrap(~stream+scenario, scales="free", ncol=2) + facet_grid(~scenario, scales="free_x") + geom_line(aes(y=median)) + geom_ribbon(aes(ymin=lower, ymax=upper), fill="blue", alpha=0.4) + geom_point(data=subset(samplePostProc, stream==iStream), aes(y=median), col="orange") + geom_errorbar(data=subset(samplePostProc, stream==iStream), aes(ymin=lower, ymax=upper), width=0.03, col="orange") + #ylab("respiration rate") + xlab("Temperature") + ylab("y rich") + xlab("x") + theme_bw(base_size=baseSize)+ theme(axis.title.x=element_blank()) + theme(axis.text.x = element_blank(), axis.text.y = element_blank() , axis.ticks.x = element_blank(), axis.ticks.y = element_blank()) iStream <- "ySparse" g2 <- ggplot( subset(samplePost, stream==iStream), aes(x=x ) ) + geom_point(aes(y=obs), col="grey50") + #facet_wrap(~stream+scenario, scales="free", ncol=2) + facet_grid(~scenario, scales="free_x") + geom_line(aes(y=median)) + geom_ribbon(aes(ymin=lower, ymax=upper), fill="blue", alpha=0.4) + geom_point(data=subset(samplePostProc, stream==iStream), aes(y=median), col="orange") + geom_errorbar(data=subset(samplePostProc, stream==iStream), aes(ymin=lower, ymax=upper), width=0.03, col="orange") + #ylab("cumulated respiration") + xlab("Soil carbon stocks") + ylab("y sparse") + xlab("x") + theme_bw(base_size=baseSize)+ theme(axis.text.x = element_blank(), axis.text.y = element_blank() , axis.ticks.x = element_blank(), axis.ticks.y = element_blank()) grid.newpage() pushViewport(viewport(layout = grid.layout(2,1))) print(g1, vp = viewport(layout.pos.row = 1, layout.pos.col = 1)) print(g2, vp = viewport(layout.pos.row = 2, layout.pos.col = 1))
Comparison of model signal variance
#colnames(sampleMdC) varColNames <- c("logSd2Discr_sparse","logSd2Discr_rich") dsPlot <- rbind( data.frame( scenario="GP", stream="sparse", logSigma=sampleMd[,"logSd2Discr_sparse"]) ,data.frame( scenario="GP", stream="rich", logSigma=sampleMd[,"logSd2Discr_rich"]) #,data.frame( scenario="equal", stream="sparse", logSigma=sampleMd2[,"logSd2Discr_sparse"]) #,data.frame( scenario="equal", stream="rich", logSigma=sampleMd2[,"logSd2Discr_rich"]) #,data.frame( scenario="MdC", stream="sparse", logSigma=sampleMdC[,"logSd2Discr"]) #,data.frame( scenario="MdC", stream="rich", logSigma=sampleMdC[,"logSd2Discr"]) ) g1 <- ggplot( dsPlot #, aes(x=logSigma, col=scenario, fill=scenario, linetype=scenario, group=interaction(scenario,stream)) ) + , aes(x=logSigma, col=stream, fill=stream, linetype=stream, group=interaction(scenario,stream)) ) + geom_density(alpha=0.1) + theme_bw(base_size=baseSize) print(g1)
ds <- data.frame(modelDiscrepancy=exp(sampleMd[,"logSd2Discr_rich"]))
We sampled the signal variance, i.e. the magnitude of the model discrepancy variance, as a factor of the variance of observation uncertainty. Inspection of this parameter shows that overall model discrepancies in this calibration ranged between r signif(quantile(ds$modelDiscrepancy, p=0.25),2)
times to r signif(quantile(ds$modelDiscrepancy, p=0.75),2)
times the observation uncertainty.
suppressWarnings(ggplot(ds , aes(x=modelDiscrepancy)) + geom_density() + xlim(0,2) + xlab("Model discrepancy") + theme_bw(base_size=baseSize))
The approach of modelling discrepancy by Gaussian Processes can be also used with non-sampling settings, such as gradient based optimizers. However, the properties of the discrepancy have to be specified in advance. They cannot be estimated from the data as with the sampling based model inversion.
First we perform a gradient based optimization with ignoring model discrepancy. A first order estimate of parameter uncertainty is obtained from the Hessian matrix at the optimum.
fLogThetaIgnore <- function(theta){ pred <- getUpdateFunction(predSpec)( theta, list(), xSparse, xRich ) logDenComp <- logDenTheta( theta, list(pred=pred) , xSparse=xSparse, xRich=xRich, obs=obs, sdObs=sdObs ) sum(logDenComp) } fLogThetaIgnore(theta0) res0 <- optim(theta0, fLogThetaIgnore, control =list(fnscale=-1), hessian=TRUE, method="BFGS") thetaBest <- res0$par thetaCov <- -solve(res0$hessian) sampleGradIgnore <- if( requireNamespace("mvtnorm") ){ theta <- mvtnorm::rmvnorm( 800, thetaBest, sigma=thetaCov ) }else matrix(thetaBest, nrow=5, ncol=2, byrow=TRUE)
samplePostGrad <- rbind( doSamplePred(sampleGradIgnore,"gradIgnore") ) g1 <- ggplot( subset(samplePostGrad), aes(x=x ) ) + geom_point(aes(y=obs), col="grey50") + facet_wrap(~stream+scenario, scales="free", ncol=2) + #facet_grid(~scenario, scales="free_x") + geom_line(aes(y=median)) + geom_ribbon(aes(ymin=lower, ymax=upper), fill="blue", alpha=0.4) + #ylab("respiration rate") + xlab("Temperature") + xlab("") + ylab("obs") + theme_bw(base_size=baseSize)+ theme(axis.text.x = element_blank(), axis.text.y = element_blank() , axis.ticks.x = element_blank(), axis.ticks.y = element_blank()) print(g1)
From the predictive posterior we see model discrepancy. Its magnitude exceeds the observation uncertainty and the correlations span the entire range of the data. Based on these observations we specify the parameters of the Gaussian process that models discrepancy. The results of the ignore-fit does not tell us in which stream the model discrepancy is. Hence, we specify the same magnitude, i.e. signal variance parameter as a factor of measurement variance, for both streams.
# fix GP parameters: signal variance and correlation length sigma2d1 <- (1.5 * sdObs$y1)^2 sigma2d2 <- (1.5 * sdObs$y2)^2 psi1 <- diff(range(xSparse))/3 psi2 <- diff(range(xRich))/3 # fix supporting locations, here as index in covariate vector o1 <- seq(1, length(xSparse), length.out=4) o2 <- seq(1, length(xRich), length.out=4) # compute covariance matrix at supporing locations fCor <- function(x1,x2,psi){ exp(-((x1-x2)/psi)^2) } Kss1 <- sigma2d1 * outer( xSparse[o1], xSparse[o1], fCor, psi=psi1) Kss2 <- sigma2d2 * outer( xRich[o2], xRich[o2], fCor, psi=psi2) Kz1 <- Kss1 + diag(sdObs$y1^2, nrow=nrow(Kss1)) Kz2 <- Kss2 + diag(sdObs$y2^2, nrow=nrow(Kss2)) # compute covariance matrix at remaining locations Krs1 <- sigma2d1 * outer( xSparse[-o1], xSparse[o1], fCor, psi=psi1) Krs2 <- sigma2d2 * outer( xRich[-o2], xRich[o2], fCor, psi=psi2)
We are now ready to compute the expected model discrepancies and its Likelihood. Hence, we can use them in the cost function.
fLogThetaGP <- function(theta){ pred <- getUpdateFunction(predSpec)( theta, list(), xSparse, xRich ) # compute expected value of model discrepancies z1 <- obs$y1 - pred$y1 kZ_z1 <- solve(Kz1, z1[o1]) delta1 <- numeric( length(pred$y1)) delta1[o1] <- deltaS1 <- Kss1 %*% kZ_z1 delta1[-o1] <- deltaR1 <- Krs1 %*% kZ_z1 z2 <- obs$y2 - pred$y2 kZ_z2 <- solve(Kz2, z2[o2]) delta2 <- numeric( length(pred$y2)) delta2[o2] <- deltaS2 <- Kss2 %*% kZ_z2 delta2[-o2] <- deltaR2 <- Krs2 %*% kZ_z2 # compute Likelihood of model discrepancies (penalizing large discrepancies) logPDelta1 <- as.vector( -1/2* delta1[o1] %*% solve(Kss1, delta1[o1]) ) logPDelta2 <- as.vector( -1/2* delta2[o2] %*% solve(Kss2, delta2[o2]) ) # compute Likelihood of observations given the model and the discrepancies misfit1 <- obs$y1 - (pred$y1 + delta1) logPObs1 <- -1/2*sum((misfit1/sdObs$y1)^2) misfit2 <- obs$y2 - (pred$y2 + delta2) logPObs2 <- -1/2*sum((misfit2/sdObs$y2)^2) # ans <- logPObs1 + logPDelta1 + logPObs2 + logPDelta2 ans } fLogThetaGP(theta0) resGP <- optim(theta0, fLogThetaGP, hessian=TRUE, control =list(fnscale=-1), method="BFGS" ) thetaBestGP <- resGP$par thetaCovGP <- -solve(resGP$hessian) sampleGradGP <- if( requireNamespace("mvtnorm") ){ theta <- mvtnorm::rmvnorm( 800, thetaBestGP, sigma=thetaCovGP ) }else matrix(thetaBestGP, nrow=5, ncol=2, byrow=TRUE)
samplePostGrad <- rbind( doSamplePred(sampleGradIgnore,"gradIgnore") , doSamplePred(sampleGradGP,"gradGP") ) iStream <- "yRich" g1 <- ggplot( subset(samplePostGrad, stream==iStream), aes(x=x ) ) + geom_point(aes(y=obs), col="grey50") + facet_grid(~scenario, scales="free_x") + geom_line(aes(y=median)) + geom_ribbon(aes(ymin=lower, ymax=upper), fill="blue", alpha=0.4) + #ylab("respiration rate") + xlab("Temperature") + xlab("") + ylab("y rich") + theme_bw(base_size=baseSize)+ theme(axis.title.x=element_blank()) + theme(axis.text.x = element_blank(), axis.text.y = element_blank() , axis.ticks.x = element_blank(), axis.ticks.y = element_blank()) iStream <- "ySparse" g2 <- ggplot( subset(samplePostGrad, stream==iStream), aes(x=x ) ) + geom_point(aes(y=obs), col="grey50") + facet_grid(~scenario, scales="free_x") + geom_line(aes(y=median)) + geom_ribbon(aes(ymin=lower, ymax=upper), fill="blue", alpha=0.4) + #ylab("cumulated respiration") + xlab("Soil carbon stocks") + xlab("x") + ylab("y sparse") + theme_bw(base_size=baseSize)+ theme(axis.text.x = element_blank(), axis.text.y = element_blank() , axis.ticks.x = element_blank(), axis.ticks.y = element_blank()) grid.newpage() pushViewport(viewport(layout = grid.layout(2,1))) print(g1, vp = viewport(layout.pos.row = 1, layout.pos.col = 1)) print(g2, vp = viewport(layout.pos.row = 2, layout.pos.col = 1))
When comparing the Gaussian process (GP) scenario to the ignore sceanrio, results are similar to the sampling based inversion. The estimates of the prediction uncertainty increased and the model discrepancy in the sparse data stream almost disappeared.
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.