library(knitr) opts_chunk$set(out.extra='style="display:block; margin: auto"' #, fig.align="center" #, fig.width=4.6, fig.height=3.2 , fig.width=6, fig.height=3.75 #goldener Schnitt 1.6 , dev.args=list(pointsize=10) , dev=c('png','pdf') ) knit_hooks$set(spar = function(before, options, envir) { if (before){ par( las=1 ) #also y axis labels horizontal par(mar=c(2.0,3.3,0,0)+0.3 ) #margins par(tck=0.02 ) #axe-tick length inside plots par(mgp=c(1.1,0.2,0) ) #positioning of axis title, axis labels, axis } })
This vignette details the explicit modelling of the unknown function of model discrepancy by a Gaussian Process (GP) for two data streams.
#isDevelopMode <- TRUE if(!exists("isDevelopMode")) library(blockDemac) library(ggplot2) library(plyr) library(grid) set.seed(0815) # for reproducible results baseSize=10 # font for ggplot
In the model two streams of predictions are generated, each using the same two parameters. The variance in the first stream is dominated by the first parameter a
, while the variance in the second stream is dominated by the second parameter b
.
modTwTwoDenEx1 <- function( theta ##<< model parameters a and b , xSparse ##<< numeric vector of Sparse input/output relationship , xRich ##<< numeric vector of rich input/output relationship , thresholdCovar=0 ##<< model structural deficiency ){ list( y1 = as.numeric(theta[1]*xSparse + theta[2]*mean(xRich)/10) ,y2 = as.numeric(theta[1]*xSparse[1] + theta[2]*pmax(0,xRich-thresholdCovar) )) } # second model having the discrepancy in calculation of y1 corresponding to sparse data stream modTwTwoDenEx2 <- function( theta ##<< model parameters a and b , xSparse ##<< numeric vector of Sparse input/output relationship , xRich ##<< numeric vector of rich input/output relationship , thresholdCovar=0 ##<< model structural deficiency ){ list( y1 = as.numeric(theta[1]*pmax(0,xSparse-thresholdCovar) + theta[2]*mean(xRich)/10) ,y2 = as.numeric(theta[1]*xSparse[1] + theta[2]*xRich) ) } #modTwTwoDenEx1 <- modTwTwoDenEx2
The synthetic data is generated by running the model. The second stream,y2
, with covariate xRich
has 100 times as many records as the first stream,y1
, with covariate xSparse
.
Note that for fitting a Gaussian process to model discrepancies, the observations need to be sorted in increasing order.
xSparse <- sort(c(1,runif(9,min=0.5,max=1.5))) # only 10 observations xRich <- sort(runif(1000,min=.7,max=1)) # 1000 observations # for plotting predictions, define a grid xGridRich <- seq( xRich[1], xRich[length(xRich)], length.out=30) xGridSparse <- seq( xSparse[1], xSparse[length(xSparse)], length.out=30) thetaTrue <- c( a=1, b=2 ) theta0 <- thetaTrue covarTheta0 <- diag(0.2*thetaTrue) thresholdCovarTrue=0.3 obsTrue <- modTwTwoDenEx1( thetaTrue, xSparse=xSparse,xRich=xRich , thresholdCovar=thresholdCovarTrue) sdObsTrue <- sdObs <- list( #y1=mean(obsTrue$y1)*0.06 #,y2=mean(obsTrue$y2)*0.02 ) #y1=mean(obsTrue$y1)*0.04 #,y2=mean(obsTrue$y2)*0.03 ) y1=mean(obsTrue$y1)*0.05 ,y2=mean(obsTrue$y2)*0.03 ) obs <- list( y1 = obsTrue$y1 + rnorm(length(obsTrue$y1),sd=sdObsTrue$y1) ,y2 = obsTrue$y2 + rnorm(length(obsTrue$y2),sd=sdObsTrue$y2) ) # locations where model+discrepancy will be sampled xPredSparse <- c(range(xSparse), 1.0) xPredRich <- c(range(xRich), 0.8) thresholdCovarBiased <- 0.1
dsObs <- data.frame( obs=c(obs$y1, obs$y2) , x=c(xSparse, xRich) , stream=rep(c("sparse","rich"), times=c(length(xSparse),length(xRich))) ) g1 <- ggplot( dsObs, aes(x=x ) ) + geom_point(aes(y=obs), col="grey50") + facet_wrap(~stream, scales="free", ncol=1) + ylab("y") + theme_bw(base_size=baseSize)+ theme(axis.text.x = element_blank(), axis.text.y = element_blank() , axis.ticks.x = element_blank(), axis.ticks.y = element_blank()) print(g1)
Sampling of model discrepancies requires model predictions for current parameters. Therefore, one intermediate result of model predictions is set up for each data stream.
predSpec <- intermediateSpec( function(theta, intermediates, xSparseVal, xRichVal, ... ){ modTwTwoDenEx1(theta, xSparse=xSparseVal, xRich=xRichVal, ...) } ,argsUpdateFunction=list(xSparseVal=c(xSparse,xPredSparse), xRichVal=c(xRich,xPredRich) , thresholdCovar=thresholdCovarBiased) ,parameters = c("a","b") ,intermediates = character(0) ) pred0 <- getUpdateFunction(predSpec)( theta0, list(), xSparse, xRich )
For comparison we sample parameters without accounting for model discrepancy. Note that the logDensity depends on the intermediates of the predictions.
logDenTheta <- function( theta, intermediates, logDensityComponents ,xSparse, xRich, obs, sdObs, ... ){ misfit1 <- obs$y1 - intermediates[["pred"]]$y1[seq_along(xSparse)] misfit2 <- obs$y2 - intermediates[["pred"]]$y2[seq_along(xRich)] structure( -1/2 * c(sum((misfit1/sdObs$y1)^2), sum((misfit2/sdObs$y2)^2)) , names=c('y1','y2') ) } resLogDenTheta0 <- logDenTheta( theta0, list(pred=pred0) , xSparse=xSparse, xRich=xRich, obs=obs, sdObs=sdObs ) bTheta0Spec <- blockSpec(c("a","b"),, new("MetropolisBlockUpdater", fLogDensity=logDenTheta, argsFLogDensity=list( xSparse=xSparse, xRich=xRich, obs=obs, sdObs=sdObs), logDensityComponentNames = names(resLogDenTheta0) ) ,intermediatesUsed=c("pred") ) sampler <- newPopulationSampler( list(bTheta=bTheta0Spec), theta0, covarTheta0 , nPopulation=1L ,intermediateSpecifications=list(pred=predSpec) ) samplerNoMd <- sampler <- setupAndSample(sampler, nSample=120L) sampleLogs <- subsetTail(getSampleLogs(sampler), 0.8) #plot( asMcmc(sampleLogs), smooth=FALSE ) stacked <- stackChainsInPopulation(subsetTail(getSampleLogs(sampler),0.8)) sample1 <- sampleNoMd <- t(getParametersForPopulation(stacked, 1L)[,,1L]) #summary(sample1) thetaNoMd <- colMeans(sample1) plot( obs$y2 ~ xRich ) lines( modTwTwoDenEx1( thetaNoMd, xSparse=xGridSparse, xRich=xGridRich, thresholdCovar=thresholdCovarBiased )$y2 ~ xGridRich, col="blue" ) plot( obs$y1 ~ xSparse ) lines( modTwTwoDenEx1( thetaNoMd, xSparse=xGridSparse, xRich=xGridRich, thresholdCovar=thresholdCovarBiased )$y1 ~ xGridSparse, col="blue" )
Note, that the model (thresholdCovarBiased) is part of the prediction that is compared to the rich data stream. However, after fitting, the discrepancy is visible in the sparse data stream.
Function compileDiscrepancyBlocksForStream
sets up additional intermediates and blocks for sampling the model discrepancies by a Gaussian process. Besides, the locations x
, observations, and its uncertainties, it required an intermediate of model predictions, predProcSpec
. In the following those are set up for each stream. Their names are distinguished by argument streamSuffix
.
In addition, the function requires specification of parameters of the prior knowledge of the hyperparameters of the Gaussian process.
The parameter vector to sample is extended by two parameters for each stream. The first hyperparameter is the logarithm of correlation length logPsi
. It is larger for higher correlated model discrepancies. The logarithm of signal variance logSd2Discr
, expressed as a factor of observation variance, is larger for larger model discrepancies.
xSparseRange <- diff(range(xSparse)) blocksSparse <- compileDiscrepancyBlocksForStream(x=xSparse, obs=obs$y1, sd2Obs=(sdObs$y1)^2 , predProcSpec = list(pred=predSpec), predProcSpecEntry="y1" , streamSuffix="sparse", xPred=xPredSparse ,priorGammaPsiPars = getGammaParsFromMeanAndVariance(xSparseRange/3, (xSparseRange/3.2)^2 ) ) xRichRange <- diff(range(xRich)) blocksRich <- compileDiscrepancyBlocksForStream(x=xRich, obs=obs$y2, sd2Obs=(sdObs$y2)^2 , predProcSpec = list(pred=predSpec), predProcSpecEntry="y2" , streamSuffix="rich", xPred=xPredRich ,priorGammaPsiPars = getGammaParsFromMeanAndVariance(xRichRange/3, (xRichRange/3.2)^2) )
The parameter vector is extended by the hyperparameters explained above and the samples of process prediction, i.e. of the sum of model and in addition by the discrepancies at xPred
defined above.
namesPred <- c(blocksSparse$namesProcessSamples, blocksRich$namesProcessSamples ) thetaPred <- structure( rep(1,length(namesPred)), names=namesPred) theta0Md <- c(thetaNoMd, thetaPred , logPsi_sparse=0.2, logSd2Discr_sparse=0.5 , logPsi_rich=0.2, logSd2Discr_rich=0.5) covarTheta0Md <- diag((0.2*theta0Md)^2)
The observations are modelled as the sum of model prediction and model discrepancy and observation uncertainty: $obs = m(\theta) + \delta + \epsilon$.
The sampling of the model parameters, now also depends on the model discrepancies at all observations. They are provided with list entry "deltaA" of intermediate "deltaA". Resampling takes place whenever predictions or a hyperparameter of the Gaussian Process changes. This includes the resampling on specification of a new proposal of model parameters.
In addition to the model discrepancies, the prior probability of the magnitude of these discrepancies is provided. Hence, proposed parameters can be rejected when the corresponding model discrepancies are too high.
The Likelihood hood function of model parameters $\theta$ is modified as follows.
logDenThetaMd <- function( theta, intermediates, logDensityComponents ,xSparse, xRich, obs, sdObs , priorDiscrFacSparse= length(xSparse)/2 , priorDiscrFacRich= length(xRich)/2 , ... ){ intSparse <- intermediates[["deltaA_sparse"]] intRich <- intermediates[["deltaA_rich"]] deltaSparse <- intSparse$deltaA deltaRich <- intRich$deltaA #misfit1 <- obs$y1 - (intermediates[["pred"]]$y1[seq_along(xSparse)] + deltaSparse) #misfit2 <- obs$y2 - (intermediates[["pred"]]$y2[seq_along(xRich)] + deltaRich) #logPriorSd2DiscrSparse <- priorDiscrFacSparse*intermediates$deltaA_sparse$logPriorInvGammaSd2Discr #logPriorSd2DiscrRich <- priorDiscrFacRich*intermediates$deltaA_rich$logPriorInvGammaSd2Discr logPriorSd2DiscrSparse <- intSparse$logPDeltaO logPriorSd2DiscrRich <- intRich$logPDeltaO # logPObs <- structure(-1/2*c(sum((misfit1/sdObs$y1)^2), sum((misfit2/sdObs$y2)^2)), names=c('ySparse','yRich') ) logPObs <- structure(-1/2*c(intSparse$SSQpred, intRich$SSQpred), names=c('ySparse','yRich') ) #if( logPObs["yRich"] < -515 ) recover() res <- c( logPObs, mdSparse=logPriorSd2DiscrSparse, mdRich=logPriorSd2DiscrRich) res } .tmp.f <- function(){ # code for inspection while tracing logDenThetaMd pred <- (intermediates[["predSparse"]][seq_along(xSparse)]) plot(obs$y1 ~ xSparse, ylim=range(obs$y1, pred)) points(pred ~ xSparse, col="blue") points(pred + deltaSparse ~ xSparse, col="orange") # pred <- (intermediates[["predRich"]][seq_along(xRich)]) plot(obs$y2 ~ xRich, ylim=range(obs$y2, pred)) points(pred ~ xRich, col="blue") points(pred + deltaRich ~ xRich, col="orange") with( intermediates[["deltaA_rich"]], { points(pred[iO]+zO ~ xO, col="red" ) }) } resLogDenThetaMd0 <- logDenThetaMd( theta0, list(pred=pred0 ,deltaA_rich=list(deltaA=0, logPriorInvGammaSd2Discr=-2, logPDeltaO=-2, SSQpred=13.9) , deltaA_sparse=list(deltaA=0, logPriorInvGammaSd2Discr=-2, logPDeltaO=-2, SSQpred=13.9)) , xSparse=xSparse, xRich=xRich, obs=obs, sdObs=sdObs ) bThetaMdSpec <- blockSpec( c("a","b"),, new("MetropolisBlockUpdater", #logDenThetaMd, fLogDensity=function(...){ logDenThetaMd(...) }, # allows tracing argsFLogDensity=list( xSparse=xSparse, xRich=xRich, obs=obs, sdObs=sdObs), logDensityComponentNames = names(resLogDenThetaMd0) ) ,intermediatesUsed=c("pred", "deltaA_sparse", "deltaA_rich") )
The setup of the sampling includes all the new intermediates and blocks. In addition, the subspace of sampling of the hyperparameters needs to be constrained, because with large correlation length, some matrices can become indeterminate.
cSubSpace <- blocksRich$fConstrainingSubSpace( blocksSparse$fConstrainingSubSpace(new("SubSpace")) ) # know that correlation length is in the magnitude of the locations cSubSpace@lowerParBounds["logPsi_rich"] <- log(diff(range(xRich))/5) sampler <- newPopulationSampler( blockSpecifications=c(list(bTheta=bThetaMdSpec) ,blocksSparse$blockSpecs, blocksRich$blockSpecs) , theta0Md, covarTheta0Md , nPopulation=1L ,intermediateSpecifications=c(blocksSparse$intermediateSpecs , blocksRich$intermediateSpecs[-1L]) #-1L to not include the prediction intermediate twice ,subSpace=cSubSpace ) #sampler@jumpProposer <- new("DEJumpProposer", deSettings=new("DESettings",DRgamma=0.005)) sampler <- setupAndSample(sampler, nSample=120L) samplerMd <- sampler <- setupAndSample(sampler, nSample=60L) samplerMd <- sampler <- setupAndSample(sampler, nSample=4L) sampleLogs <- getSampleLogs(sampler) plot( asMcmc(sampleLogs), smooth=FALSE ) #aL <- (getAcceptanceTracker(sampler)@acceptanceLog["logDenLogPsi_rich",,]) #computePopulationAcceptanceRates(aL) #matplot(aL, type="l") logTail <- subsetTail(getSampleLogs(sampler),0.6) #logTail <- subsetTail(getSampleLogs(sampler),0.4) #plot( asMcmc(logTail), smooth=FALSE ) stacked <- stackChainsInPopulation(logTail) sample1 <- sampleMd <- t(getParametersForPopulation(stacked, 1L)[,,1L]) .tmp.f <- function(){ lDen1 <- t(getLogDensityComponentsForPopulation(logTail, 1L)[,,1L]) # first chain #lDen1 <- t(getLogDensityComponentsForPopulation(logTail, 1L)[,,3L]) # third chain #lDen1 <- getLogDensityComponentsForPopulation(logTail, 1L)["y2",,] # only sparse summary(lDen1) denMedian <- apply(lDen1, 2, median) denAnomalies <- t(t(lDen1)-denMedian) #plot( obs_rich ~ priorPsi_rich, data=as.data.frame(denAnomalies)) # no correlation plot( obs_rich ~ mD_rich, data=as.data.frame(denAnomalies)) # low props of obs mostly at low probs of mD (denQuantile <- apply( apply(lDen1, 2, quantile, probs=c(0.1,0.9) ), 2, diff)) matplot( 1:nrow(lDen1), denAnomalies, type="l" ) legend( "bottomright", colnames(lDen1), lty=1:ncol(lDen1) ,col = 1:ncol(lDen1) ) plot( rowSums(lDen1), type="l" ) plot( lDen1[,"obs_rich"] ) #sample1 <- t(getParametersForPopulation(subsetTail(getSampleLogs(sampler),0.8), 1L)[,,1L]) } thetaMd <- colMeans(sample1)[1:2] apply(sample1[,1:2], 2, quantile, probs=c(0.025,0.975) ) predSparse <- colMeans(sample1)[grep("^ksi_.*_sparse$",colnames(sample1))] predRich <- colMeans(sample1)[grep("^ksi_.*_rich$",colnames(sample1))] predModelSparse <- modTwTwoDenEx1(thetaMd, xSparse=c(xSparse,xPredSparse), xRich=c(xRich,xPredRich), thresholdCovar=thresholdCovarBiased)$y1[-seq_along(xSparse)] plot( obs$y2 ~ xRich ) lines( modTwTwoDenEx1( thetaMd, xSparse=xGridSparse, xRich=xGridRich, thresholdCovar=thresholdCovarBiased )$y2 ~ xGridRich, col="blue" ) points( predRich ~ xPredRich, col="orange") plot( obs$y1 ~ xSparse, ylim=c(range(obs$y1,xPredSparse,predModelSparse)) ) lines( modTwTwoDenEx1( thetaMd, xSparse=xGridSparse, xRich=xGridRich, thresholdCovar=thresholdCovarBiased )$y1 ~ xGridSparse, col="blue" ) points( predModelSparse ~ xPredSparse, col="blue", pch="x") points( predSparse ~ xPredSparse, col="orange", pch="+") plot( density(sample1[,"logSd2Discr_rich"]), col="blue") lines( density(sample1[,"logSd2Discr_sparse"]), col="red") #plot( density(sample1[,"logSd2Discr_sparse"]), col="red") #lines( density(sample1[,"logSd2Discr_rich"]), col="blue")
Here we explore the hypothesis that the optimum of the ignore scenario still and optimum in the GP case. The parameters are set initially to the ignore optimum, and it is checked that they still converge to the GP optimum case.
ZinitParms <- getParametersForAllChains( subsetTail(getSampleLogs(samplerNoMd),0.4) ) ZinitGP <- computeInitialPopulation( theta0Md[-(1:2)], diag(covarTheta0Md)[-(1:2)] , subSpace=cSubSpace)[,,1L] Zinit <- rbind(ZinitParms[,1:ncol(ZinitGP)], ZinitGP) sampler <- newPopulationSampler( blockSpecifications=c(list(bTheta=bThetaMdSpec) ,blocksSparse$blockSpecs, blocksRich$blockSpecs) , Zinit = Zinit , nPopulation=1L ,intermediateSpecifications=c(blocksSparse$intermediateSpecs , blocksRich$intermediateSpecs[-1L]) #-1L to not include the prediction intermediate twice ,subSpace=cSubSpace ) #sampler@jumpProposer <- new("DEJumpProposer", deSettings=new("DESettings",DRgamma=0.005)) #sampler <- setupAndSample(sampler, nSample=120L) samplerMd_startNoMd <- sampler <- setupAndSample(sampler, nSample=60L) sampleLogs <- getSampleLogs(sampler) plot( asMcmc(sampleLogs), smooth=FALSE ) #aL <- (getAcceptanceTracker(sampler)@acceptanceLog["logDenLogPsi_rich",,]) #matplot(aL, type="l") logTail <- subsetTail(getSampleLogs(sampler),0.4) #plot( asMcmc(logTail), smooth=FALSE ) stacked <- stackChainsInPopulation(logTail) sample1 <- sampleMd_startNoMd <- t(getParametersForPopulation(stacked, 1L)[,,1L]) apply(sample1[,1:2], 2, quantile, probs=c(0.025,0.975) )
It still converged to the case where model discrepancy is allocated to the rich data stream.
#ss <- sample1 iSubsetRich <- sort(sample.int(length(xRich), 30)) xGridRich2 <- xRich[iSubsetRich] obsComb <- c(obs$y1, obs$y2[iSubsetRich]) doSamplePredObsDiffPost <- function(ss, scenario="ignore", nSample=180 ){ iRows <- sample.int(nrow(ss), nSample, replace=TRUE) predTrue <- do.call(c, modTwTwoDenEx1( thetaTrue, xSparse=xSparse, xRich=xGridRich, thresholdCovar=thresholdCovarTrue )) tmp <- apply( ss[iRows,1:2], 1, function(theta){ predMod <- do.call(c,modTwTwoDenEx1( theta, xSparse=xSparse, xRich=xGridRich2, thresholdCovar=thresholdCovarBiased )) #predMod - predTrue predMod - obsComb # too noisy? -> uses smoothers }) stat <- t(apply(tmp,1,quantile,probs = c(0.025, 0.5, 0.975), names=FALSE)) colnames(stat) <- c("lower","median","upper") res <- cbind( data.frame( scenario=scenario , stream=rep(c("ySparse","yRich"),times=c(length(xSparse),length(xGridRich2)) ) , x =(c(xSparse,xGridRich2))) , as.data.frame(stat)) } samplePostDiff <- rbind( doSamplePredObsDiffPost(sampleNoMd,"ignore"),doSamplePredObsDiffPost(sampleMd,"GP")) #http://stackoverflow.com/questions/19643234/fill-region-between-two-loess-smoothed-lines-in-r-with-ggplot dsPlot <- samplePostDiff #droplevels(subset(samplePostDiff, scenario %in% c("ignore","GP"))) g1 <- ggplot( dsPlot, aes(x=x, col=scenario, linetype=scenario, shape=scenario ) ) + facet_wrap(~stream, scales="free_x") + #facet_wrap(~stream, scales="free") + #geom_point(aes(y=median)) + geom_hline(yintercept=0, col="grey") + stat_smooth(aes(y = upper), method = "lm", formula = y ~ x, se = FALSE, linetype="solid", size=0.2) + stat_smooth(aes(y = lower), method = "lm", formula = y ~ x, se = FALSE, linetype="solid", size=0.2) + #geom_ribbon(aes(ymin=lower, ymax=upper), fill="grey", alpha=0.2) + #ylab("predicted - true value") + ylab("predicted - observed") + theme() gg1 <- ggplot_build(g1) iSmooth <- 2L df2 <- data.frame(x = gg1$data[[iSmooth]]$x, ymin = gg1$data[[iSmooth]]$y, ymax = gg1$data[[iSmooth+1L]]$y, scenario = levels(dsPlot$scenario)[(gg1$data[[iSmooth]]$group %/% 2)+1], stream = levels(dsPlot$stream)[gg1$data[[iSmooth]]$PANEL] ) levels(df2$scenario) <- levels(dsPlot$scenario) g1 + geom_ribbon(data = df2, aes(x = x, ymin = ymin, ymax = ymax, linetype=scenario), fill="grey" ,alpha = 0.3, colour=NA) + stat_smooth(aes(y = median), method = "lm", formula = y ~ x, size = 1, se = FALSE) + theme_bw(base_size=baseSize)
Predictive posterior: orange are the samples of the process (model + discrepancy).
ss <- sample1 iSubsetRich <- sort(sample.int(length(xRich), 100)) xGridRich2 <- xRich[iSubsetRich] obsS <- c(obs$y1, obs$y2[iSubsetRich]) doSamplePred <- function(ss, scenario="ignore", nSample=180 ){ iRows <- sample.int(nrow(ss), nSample, replace=TRUE) tmp <- apply( ss[iRows,1:2], 1, function(theta){ predMod <- do.call(c,modTwTwoDenEx1( theta, xSparse=xSparse, xRich=xGridRich2, thresholdCovar=thresholdCovarBiased )) }) stat <- t(apply(tmp,1,quantile,probs = c(0.025, 0.5, 0.975), names=FALSE)) colnames(stat) <- c("lower","median","upper") res <- cbind( data.frame( scenario=scenario , stream=rep(c("ySparse","yRich"),times=c(length(xSparse),length(xGridRich2)) ) , x =(c(xSparse,xGridRich2))) , obs = obsS , as.data.frame(stat)) } samplePost <- rbind( doSamplePred(sampleNoMd,"ignore"),doSamplePred(sampleMd,"GP") #,doSamplePred(sampleMd2,"equal") #,doSamplePred(sampleMdC,"MdC") ) doSamplePredProc <- function(ss, scenario="GP"){ tmp <- ss[,grep("^ksi_",colnames(ss))] stat <- t(apply(tmp,2,quantile,probs = c(0.025, 0.5, 0.975), names=FALSE)) colnames(stat) <- c("lower","median","upper") res <- cbind( data.frame( scenario=scenario , stream=rep(c("ySparse","yRich"),times=c(length(xPredSparse),length(xPredRich)) ) , x =(c(xPredSparse,xPredRich))) , as.data.frame(stat)) } samplePostProc <- rbind( doSamplePredProc(sampleMd,"GP") #,doSamplePredProc(sampleMd2,"equal") #,doSamplePredProc(sampleMdC,"MdC") ) plotScen <- "ignore" plotScen <- "GP" for( plotScen in character(0) ){ #levels(samplePost$scenario) ){ print(plotScen) dsPlot <- subset(samplePost, scenario==plotScen ) dsPlotProc <- subset(samplePostProc, scenario==plotScen ) g1 <- ggplot( dsPlot, aes(x=x ) ) + geom_point(aes(y=obs), col="grey50") + #facet_wrap(~stream, scales="free_x") + facet_wrap(~stream, scales="free") + geom_line(aes(y=median)) + geom_ribbon(aes(ymin=lower, ymax=upper), fill="blue", alpha=0.4) + geom_point(data=dsPlotProc, aes(y=median), col="orange") + {if( plotScen == "ignore") list() else geom_errorbar(data=dsPlotProc, aes(ymin=lower, ymax=upper), width=0.05, col="orange")} + #geom_errorbar(data=dsPlotProc, aes(ymin=lower, ymax=upper), width=0.05, col="orange") + ylab("y") + theme_bw(base_size=baseSize) print(g1) }
g1 <- ggplot( samplePost, aes(x=x ) ) + geom_point(aes(y=obs), col="grey50") + facet_wrap(~stream+scenario, scales="free", ncol=2) + #facet_grid(stream~scenario, scales="free_x") + geom_line(aes(y=median)) + geom_ribbon(aes(ymin=lower, ymax=upper), fill="blue", alpha=0.4) + geom_point(data=samplePostProc, aes(y=median), col="orange") + geom_errorbar(data=samplePostProc, aes(ymin=lower, ymax=upper), width=0.03, col="orange") + ylab("y") + theme_bw(base_size=baseSize)+ theme(axis.text.x = element_blank(), axis.text.y = element_blank() , axis.ticks.x = element_blank(), axis.ticks.y = element_blank()) print(g1)
iStream <- "yRich" g1 <- ggplot( subset(samplePost, stream==iStream), aes(x=x ) ) + geom_point(aes(y=obs), col="grey50") + facet_wrap(~stream+scenario, scales="free", ncol=2) + facet_grid(~scenario, scales="free_x") + geom_line(aes(y=median)) + geom_ribbon(aes(ymin=lower, ymax=upper), fill="blue", alpha=0.4) + #geom_point(data=subset(samplePostProc, stream==iStream), aes(y=median), col="orange") + #geom_errorbar(data=subset(samplePostProc, stream==iStream), aes(ymin=lower, ymax=upper), width=0.03, col="orange") + ylab("respiration rate") + xlab("Temperature") + theme_bw(base_size=baseSize)+ theme(axis.text.x = element_blank(), axis.text.y = element_blank() , axis.ticks.x = element_blank(), axis.ticks.y = element_blank()) print(g1) iStream <- "ySparse" g2 <- ggplot( subset(samplePost, stream==iStream), aes(x=x ) ) + geom_point(aes(y=obs), col="grey50") + facet_wrap(~stream+scenario, scales="free", ncol=2) + facet_grid(~scenario, scales="free_x") + geom_line(aes(y=median)) + geom_ribbon(aes(ymin=lower, ymax=upper), fill="blue", alpha=0.4) + #geom_point(data=subset(samplePostProc, stream==iStream), aes(y=median), col="orange") + #geom_errorbar(data=subset(samplePostProc, stream==iStream), aes(ymin=lower, ymax=upper), width=0.03, col="orange") + ylab("cumulated respiration") + xlab("Soil carbon stocks") + theme_bw(base_size=baseSize)+ theme(axis.text.x = element_blank(), axis.text.y = element_blank() , axis.ticks.x = element_blank(), axis.ticks.y = element_blank()) print(g2)
Comparison of model signal variance
#colnames(sampleMdC) varColNames <- c("logSd2Discr_sparse","logSd2Discr_rich") dsPlot <- rbind( data.frame( scenario="GP", stream="sparse", logSigma=sampleMd[,"logSd2Discr_sparse"]) ,data.frame( scenario="GP", stream="rich", logSigma=sampleMd[,"logSd2Discr_rich"]) #,data.frame( scenario="equal", stream="sparse", logSigma=sampleMd2[,"logSd2Discr_sparse"]) #,data.frame( scenario="equal", stream="rich", logSigma=sampleMd2[,"logSd2Discr_rich"]) #,data.frame( scenario="MdC", stream="sparse", logSigma=sampleMdC[,"logSd2Discr"]) #,data.frame( scenario="MdC", stream="rich", logSigma=sampleMdC[,"logSd2Discr"]) ) g1 <- ggplot( dsPlot #, aes(x=logSigma, col=scenario, fill=scenario, linetype=scenario, group=interaction(scenario,stream)) ) + , aes(x=logSigma, col=stream, fill=stream, linetype=stream, group=interaction(scenario,stream)) ) + geom_density(alpha=0.1) + theme_bw(base_size=baseSize) print(g1)
ds <- data.frame(modelDiscrepancy=exp(sampleMd[,"logSd2Discr_rich"]))
We sampled the signal variance, i.e. the magnitude of the model discrepancy variance, as a factor of the variance of observation uncertainty. Inspection of this parameter shows that overall model discrepancies in this calibration ranged between r signif(quantile(ds$modelDiscrepancy, p=0.25),2)
times to r signif(quantile(ds$modelDiscrepancy, p=0.75),2)
times the observation uncertainty.
suppressWarnings(ggplot(ds , aes(x=modelDiscrepancy)) + geom_density() + xlim(0,2) + xlab("Model discrepancy") + theme_bw(base_size=baseSize))
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.