inst/doc/IncludingDrivers.R

## ----setup_lies, warning=FALSE, message=FALSE, silent = TRUE------------------
library(EcoEnsemble)
library(mgcv)
library(ggplot2)
library(cowplot)
set.seed(5678)

## ----configure_priors, eval = FALSE-------------------------------------------
#  d <- 3 # The number of variables.
#  
#  priors <- EnsemblePrior(d,
#                          ind_st_params = IndSTPrior(AR_params=c(10,2)),
#                          ind_lt_params = IndLTPrior("beta",
#                                                     cor_params = list(matrix(5, d, d),
#                                                                       matrix(1, d, d)),var_params = list(1,0.5)),
#                          sha_st_params = ShaSTPrior(AR_params=c(10,2)),
#  )

## ----sampling priors, eval = FALSE--------------------------------------------
#  M <- 3 # The number of simulators we are considering.
#  MM <- 2 # The number of drivers we are considering.
#  prior_density <- prior_ensemble_model(priors, M = M, MM = MM)
#  ex.fit <- rstan::extract(prior_density$samples) # Extracted samples

## ----truth_best_guesses, eval = FALSE-----------------------------------------
#  Time <- 50
#  
#  true_par <-which.max(ex.fit$lp__)
#  
#  latent <- matrix(NA, Time, (M+MM+2)*d)
#  
#  #Priors on initial values
#  latent[1, 1:d] <- rnorm(d, 0, 1)
#  latent[1, -(1:d)] <- t(chol(ex.fit$SIGMA_init[true_par,-(1:d) ,-(1:d)])) %*% rnorm((M+MM+1)*d, 0, 1)
#  
#  #Find all the latent values of the dynamic linear model
#  SIGMA <- ex.fit$SIGMA[true_par,,]
#  SIGMA_chol <- t(chol(SIGMA))
#  for (t in 2:Time) {
#    latent[t,] <- ex.fit$AR_params[true_par,] * latent[t-1,] + SIGMA_chol %*% rnorm((M+MM+2)*d, 0, 1)
#  }
#  
#  #The truth is simply the first d values
#  truth_latent <- latent[,1:d]
#  
#  #The best guesses are sums of the truth and discrepancies
#  simulator_best_guesses <- array(NA,dim=c(Time,d,M*MM))
#  for(i in 1:M){
#    for (j in 1:MM){
#      simulator_best_guesses[,,MM*(i-1)+j] <- t(
#        t(latent[,1:d] + latent[,(d+1):(2*d)] + latent[,(i + 1)*d + (1:d)] + latent[,(M + j + 1)*d + (1:d)]) +
#          ex.fit$ind_lt[true_par,i,] + ex.fit$ind_lt_dri[true_par,j,] + ex.fit$sha_lt[true_par,])
#    }
#  }

## ----plot_data, eval = FALSE--------------------------------------------------
#  plotlist <- list()
#  for (sim in 1:M){
#    for (dri in 1:MM){
#      plotlist[[MM*(sim-1)+dri]] <- cbind(c(1:Time),simulator_best_guesses[,1,MM*(sim-1)+dri],rep(sim,Time),rep(dri,Time),rep("sim",Time))
#    }
#  }
#  plot_df <- data.frame(rbind(do.call(rbind, plotlist), cbind(c(1:Time), truth_latent[,1], rep(0,Time), rep(0,Time), rep("truth",Time))))
#  names(plot_df) <- c("Year", "Value", "Simulator", "Driver", "Type")
#  plot_df$Year <- as.numeric(plot_df$Year)
#  plot_df$Value <- as.numeric(plot_df$Value)
#  bgplot <- ggplot2::ggplot(plot_df) + geom_line(data = plot_df[which(plot_df$Type == "sim"),], aes(x = Year, y = Value, color = Simulator, linetype = Driver), linewidth = 0.8) + geom_line(data = plot_df[which(plot_df$Type == "truth"),], aes(x = Year, y = Value), linewidth = 1, color = "purple")

## ---- echo = FALSE, out.width="500px", out.height="500px"---------------------
knitr::include_graphics("data/bgplot.png")

## ----add_noise, eval = FALSE--------------------------------------------------
#  Times_obs <- round(Time * 0.8)
#  obs <- matrix(NA,Times_obs,d)
#  for(i in 1:d){
#    g1 <- gam(y~s(x,k = 15),data=data.frame(x=1:Times_obs,y = truth_latent[1:Times_obs,i]))
#    obs[,i] <- predict(g1)
#  }
#  
#  obs.cov <- cov(obs - truth_latent[1:Times_obs,])
#  
#  
#  model.cov <- array(0,dim=c(M*MM,d,d))
#  models_output <- array(NA,dim=c(M*MM,Time,d))
#  for (j in 1:(M*MM)){
#    for(i in 1:d){
#      g1 <- gam(y~s(x, k = 15),data=data.frame(x=1:Time,y=simulator_best_guesses[,i,j]))
#      models_output[j,,i] <- predict(g1)
#    }
#    model.cov[j,,] <- cov(models_output[j,,] - simulator_best_guesses[,,j])
#  }

## ----plot_obs, eval = FALSE---------------------------------------------------
#  #Observations and model outputs
#  plotlist <- list()
#  for (sim in 1:M){
#    for (dri in 1:MM){
#      plotlist[[MM*(sim-1)+dri]] <- cbind(c(1:Time),models_output[MM*(sim-1)+dri,,1],rep(sim,Time),rep(dri,Time),rep("sim",Time),rep("observed", Time))
#    }
#  }
#  plot_df <- data.frame(rbind(do.call(rbind, plotlist), cbind(c(1:Time), c(obs[,1],rep(0, Time - max(Times_obs))), rep(0,Time), rep(0,Time), rep("truth",Time), c(rep("observed", max(Times_obs)),rep("unobserved", Time - max(Times_obs))))))
#  names(plot_df) <- c("Year", "Value", "Simulator", "Driver", "Type", "Obs_Status")
#  plot_df$Year <- as.numeric(plot_df$Year)
#  plot_df$Value <- as.numeric(plot_df$Value)
#  obsplot <- ggplot(plot_df) + geom_line(data=plot_df[which(plot_df$Type == "sim"),], aes(x = Year, y = Value, color = Simulator, linetype = Driver), linewidth = 0.8) + geom_point(data = plot_df[intersect(which(plot_df$Type == "truth"),which(plot_df$Obs_Status == "observed")),], aes(x = Year, y = Value), color = "purple")

## ---- echo = FALSE, out.width="500px", out.height="500px"---------------------
knitr::include_graphics("data/obsplot.png")

## ---- eval = FALSE------------------------------------------------------------
#  #Create the data frames that we'll use for EcoEnsemble
#  val_obs <- data.frame(obs); cov_obs <- obs.cov
#  val_model_11 <- data.frame(models_output[1,,]); cov_model_11 <- model.cov[1,,]
#  val_model_12 <- data.frame(models_output[2,,]); cov_model_12 <- model.cov[2,,]
#  val_model_21 <- data.frame(models_output[3,,]); cov_model_21 <- model.cov[3,,]
#  val_model_22 <- data.frame(models_output[4,,]); cov_model_22 <- model.cov[4,,]
#  val_model_31 <- data.frame(models_output[5,,]); cov_model_31 <- model.cov[5,,]
#  val_model_32 <- data.frame(models_output[6,,]); cov_model_32 <- model.cov[6,,]
#  
#  #Set the dimnames to ensure EcoEnsemble can identify the information.
#  SPECIES_NAMES <- c("Species 1", "Species 2", "Species 3")
#  dimnames(val_obs) <- list(paste(1:Times_obs), SPECIES_NAMES)
#  dimnames(val_model_11) <- list(paste(1:Time), SPECIES_NAMES)
#  dimnames(val_model_21) <- list(paste(1:Time), SPECIES_NAMES)
#  dimnames(val_model_12) <- list(paste(1:Time), SPECIES_NAMES)
#  dimnames(val_model_22) <- list(paste(1:Time), SPECIES_NAMES)
#  dimnames(val_model_31) <- list(paste(1:Time), SPECIES_NAMES)
#  dimnames(val_model_32) <- list(paste(1:Time), SPECIES_NAMES)
#  
#  dimnames(cov_obs) <- list(SPECIES_NAMES, SPECIES_NAMES)
#  dimnames(cov_model_11) <- list(SPECIES_NAMES, SPECIES_NAMES)
#  dimnames(cov_model_21) <- list(SPECIES_NAMES, SPECIES_NAMES)
#  dimnames(cov_model_12) <- list(SPECIES_NAMES, SPECIES_NAMES)
#  dimnames(cov_model_22) <- list(SPECIES_NAMES, SPECIES_NAMES)
#  dimnames(cov_model_31) <- list(SPECIES_NAMES, SPECIES_NAMES)
#  dimnames(cov_model_32) <- list(SPECIES_NAMES, SPECIES_NAMES)
#  
#  val_model_1 <- list(val_model_11, val_model_12)
#  val_model_2 <- list(val_model_21, val_model_22)
#  val_model_3 <- list(val_model_31, val_model_32)
#  cov_model_1 <- list(cov_model_11, cov_model_12)
#  cov_model_2 <- list(cov_model_21, cov_model_22)
#  cov_model_3 <- list(cov_model_31, cov_model_32)

## ----fitting, eval = FALSE----------------------------------------------------
#  fit <- fit_ensemble_model(observations = list(val_obs, cov_obs),
#                                               simulators = list(list(val_model_1, cov_model_1, "Simulator 1", c("Driver 1", "Driver 2")),
#                                                                 list(val_model_2, cov_model_2, "Simulator 2", c("Driver 1", "Driver 2")),
#                                                                 list(val_model_3, cov_model_3, "Simulator 3", c("Driver 1", "Driver 2"))
#                                               ),
#                                               priors = EnsemblePrior(d),
#                                               control = list(adapt_delta = 0.9),drivers=T)
#  samples <- generate_sample(fit)

## ----results, eval = FALSE----------------------------------------------------
#  df_list <- list()
#  for (var_index in 1:3){
#    df <- data.frame("Year" = paste(1:Time),
#                     "Ensemble" = apply(samples@mle[, var_index, ], 1, median),
#                     "Lower"  = apply(samples@samples[, var_index, ], 1, quantile, 0.1),
#                     "Upper"  = apply(samples@samples[, var_index, ], 1, quantile, 0.9),
#                     "Actual" = truth_latent[,var_index])
#    df$Year <- as.numeric(df$Year)
#    df <-  reshape2::melt(df, id.vars=c("Year", "Lower", "Upper"), variable.name="Simulator")
#    # We only want uncertainty bands for the Ensemble values, else we set zero width bands
#    df[df$Simulator == "Actual", c("Lower", "Upper")] <- df[df$Simulator != "Actual", "value"]
#    df$Species = rep(SPECIES_NAMES[var_index], Time)
#    df_list[[var_index]] <- ggplot(df, aes(x=`Year`, y=`value`)) +
#    geom_line(aes(group=`Simulator`,colour=`Simulator`)) +
#    geom_ribbon(aes(ymin=`Lower`, ymax =`Upper`, fill = `Simulator`), alpha=0.2) + ggplot2::ggtitle(SPECIES_NAMES[var_index]) + ggplot2::theme(legend.position = "right")
#  }
#  
#  legend <- cowplot::get_plot_component(df_list[[1]], "guide-box-right")
#  df_list <- lapply(df_list, function(x) {x + ggplot2::theme(legend.position = "none")})
#  output_plot_nolegend <- cowplot::plot_grid(plotlist=df_list, nrow = 3, ncol = 1)
#  output_plot <- cowplot::plot_grid(output_plot_nolegend, legend, rel_widths = c(4,1), nrow = 1, ncol = 2)

## ----plot_truth, echo = FALSE, out.width="600px", out.height="600px"----------
knitr::include_graphics("data/output_plot.png")

## ---- eval = FALSE------------------------------------------------------------
#  samples_array <- as.array(fit@samples)
#  #Drop last element of third dimension as this is "lp__"
#  max(apply(samples_array[,,-dim(samples_array)[3]], 3, rstan::Rhat), na.rm = TRUE)
#  min(apply(samples_array[,,-dim(samples_array)[3]], 3, rstan::ess_bulk), na.rm = TRUE)
#  min(apply(samples_array[,,-dim(samples_array)[3]], 3, rstan::ess_tail), na.rm = TRUE)

## ----echo=FALSE---------------------------------------------------------------
1.007311; 836.737; 559.6022

## ---- eval = FALSE------------------------------------------------------------
#  rstan::check_divergences(fit@samples)

## ---- echo = FALSE------------------------------------------------------------
message(sprintf("%s of %s iterations ended with a divergence (%s%%).", 
            5, 4000, 100 * 5/4000), "\nTry increasing 'adapt_delta' to remove the divergences.")

## ---- eval = FALSE, fig.dim = c(7,4)------------------------------------------
#  rstan::traceplot(fit@samples, pars = "ind_st_sd[3,2]")

## ---- echo = FALSE, out.width = "500px", out.height = "300px"-----------------
knitr::include_graphics("data/drivers_trace.png")

Try the EcoEnsemble package in your browser

Any scripts or data that you put into this service are public.

EcoEnsemble documentation built on April 4, 2025, 1:55 a.m.