data-raw/stats-testdata.R

#### Run from package top level to regenerate test data.

#### Simulate data from a simple linear model to use in testing the Bayesian
#### inference functions in this package.
####
#### Our test model will be this:
####    y ~ Normal(mu, sig)
####    mu <- af + afnp*x1 + afc*x2
####    sig <- sqrt(xi*var(y))
####    af ~ Uniform(0, 6)
####    afnp ~ Uniform(0, 6)
####    afc ~ Uniform(0, 6)
####    xi ~ Uniform(0, 1)
####
#### Procedure:
#### 1. Pick a reference set of parameters for af, afnp, and afc, and use these to
#### simulate historical observations.
#### 2. Create a quasi-random uniform sampling of the parameter space.  For each
#### sample create a ScenarioInfo object with the appropriate parameters.
#### 3. Make predictions using the test model and write them into the
#### appropriate file in a directory structure that looks like the directory
#### structure generated by the actual land model.
#### 4. Run the Bayesian analysis as normal.  The ScenarioInfo objects generated
#### in step 2 will pull their data from the test simulation, and the rest of
#### the package will work as normal.
####
#### In practice we will do steps 1-3 once and store the results in an rda file
#### in the tests directory.  Then we can load that data and use it to test the
#### Bayesian functions.
####
#### To validate the results, we can compare our results to comparable results
#### from the rethinking package.  The numbers won't match exactly, due to
#### sampling variance, but we should be able to tell if our calculations are in
#### the right ballpark.

library(readr)
library(tibble)

gen_stats_testdata <- function()
{
    set.seed(8675309)
    oldwd <- getwd()
    setwd('tests/testthat')

### Generate observed data
    params <- c(2.25, 3.16, 3.24)
    xi <- 0.4

    years <- seq(1975, 2010)
    N <- length(years)
    x1m <- 0.5 + 0.025*(years-min(years))
    x2m <- 1 + 0.01*(years-min(years))
    x1 <- rnorm(N, x1m, 0.5)
    x2 <- rnorm(N, x2m, 0.5)


    ymeans <- params[1] + params[2]*x1 + params[3]*x2

    sig <- sqrt(var(ymeans) / (1/xi - 1))   # sig^2 should be approximately xi *
                                        # var(final values)
    message('sig = ', sig)

    yvals <- rnorm(length(ymeans), mean=ymeans, sd=sig)
    message('var(yvals)= ', var(yvals))

    obs.test <- tibble(region="USA", land.type="Corn", variable="Land Area",
                       year=years, obs=yvals, obsvar=var(yvals))
    xvals.test <- tibble(year=years, x1=x1, x2=x2)


    outdir <- 'data'
    write_csv(obs.test, file.path(outdir, 'obs-test.csv'))
    write_csv(xvals.test, file.path(outdir, 'xvals-test.csv'))

### Generate model predictions

    NPARAM <- 3
    Nsample <- 500
    limits <- c(0, 6)
    scentype <- 'Hindcast'
    exptype1 <- 'Perfect'
    exptype2 <- 'Adaptive'

    rn <- randtoolbox::sobol(Nsample, NPARAM)
    p <- limits[1] + rn*(limits[2]-limits[1])

    scenObjects <-
        unlist(
            lapply(1:Nsample,
                   function(i) {
                       scenname1 <- gcamland:::getScenName(scentype, exptype1, NULL, p[i,1],
                                                           p[i,2], p[i,3])
                       scenname2 <- gcamland:::getScenName(scentype, exptype2, NULL, p[i,1],
                                                           p[i,2], p[i,3])
                       list(
                           ScenarioInfo(aScenarioType = scentype,
                                        aExpectationType = exptype1,
                                        aLinearYears = NA,
                                        aLaggedShareOld = NA,
                                        aLogitUseDefault = FALSE,
                                        aLogitAgroForest = p[i,1],
                                        aLogitAgroForest_NonPasture = p[i,2],
                                        aLogitCropland = p[i,3],
                                        aScenarioName = scenname1,
                                        aFileName = "ensemble",
                                        aOutputDir = outdir),
                           ScenarioInfo(aScenarioType = scentype,
                                        aExpectationType = exptype2,
                                        aLinearYears = NA,
                                        aLaggedShareOld = NA,
                                        aLogitUseDefault = FALSE,
                                        aLogitAgroForest = p[i,1],
                                        aLogitAgroForest_NonPasture = p[i,2],
                                        aLogitCropland = p[i,3],
                                        aScenarioName = scenname2,
                                        aFileName = "ensemble",
                                        aOutputDir = outdir)
                           )
                   }),
            recursive=FALSE)

    model_output <-
        lapply(scenObjects,
               function(obj) {
                   ymod <- obj$mLogitAgroForest + x1*obj$mLogitAgroForest_NonPasture +
                     x2*obj$mLogitCropland
                   mdata <- tibble(name='CornAEZ000', land.allocation=ymod, year=years,
                                   scenario=paste(scentype, obj$mExpectationType,
                                   paste0('AgroForest', obj$mLogitAgroForest),
                                   paste0('AgroForestNonPasture',
                                          obj$mLogitAgroForest_NonPasture),
                                   paste0('Cropland', obj$mLogitCropland),
                                   sep='_'))
               }) %>%
          dplyr::bind_rows()

### Construct the filename and write out merged data set.
    fn <- file.path(outdir, "output_ensemble.rds")
    saveRDS(model_output, fn, compress='xz')

    ## Read the model data back in and process it so that we can calculate
    ## posteriors
    model_output <- get_scenario_land_data(scenObjects)

    ## Calculate posteriors for all scenarios
    scenObjects <-
        lapply(scenObjects,
               function(obj) {
                   sname <- obj$mScenarioName
                   mdata <- dplyr::filter(model_output, scenario==sname)
                   ## calculate the posterior PDF and likelihood
                   obj <- calc_post(obj, obs.test, mdata, get_lpdf(Inf))
               })

### Save model objects
    scenfile <- file.path(outdir, 'scenario-info.rds')
    saveRDS(scenObjects, scenfile, compress='xz')


    setwd(oldwd)

    invisible(list(obs.test=obs.test, scens=scenObjects, model.test=model_output))
}


## Run the model we've designed through another package, and verify that the EV,
## HDPI, WAIC, etc, look right.
run_comparison_analysis <- function()
{
    m1spec <- alist(
        obs ~ dnorm(mu, sig),
        mu <- af + afnp*x1 + afc*x2,
        ##        sig2 <- xi*9.22845317107775,
        ##        xi ~ dunif(0, 1),
        sig ~ dunif(0, 3.037837),
        af ~ dunif(0, 6),
        afnp ~ dunif(0, 6),
        afc ~ dunif(0, 6))

    d <- read_csv('tests/testthat/data/obs-test.csv')
    x <- read_csv('tests/testthat/data/xvals-test.csv')
    d <- as.data.frame(dplyr::left_join(d, x, by='year'))
    d <- d[,c('x1','x2','obs')]

    m1 <- rethinking::map2stan(m1spec, data=d)

    m1
}
JGCRI/gcamland documentation built on Oct. 6, 2020, 5:30 p.m.