tests/test_simulate_gam.R

# Testing simulate_gam
require(ggplot2)
require(nlraa)
require(nlme)
require(mgcv)

if(Sys.info()[["user"]] == "fernandomiguez"){

  y <- c(12, 14, 33, 50, 67, 74, 123, 141, 165, 204, 253, 246, 240)
  t <- 1:13

  dat <- data.frame(y = y, t = t)

  ggplot(data = dat, aes(x = t, y = y)) + geom_point()

  ## From page 132 form GAM book by Simon Wood
  m1 <- gam(y ~ t + I(t^2), data = dat, family = poisson)

  ggplot(data = dat, aes(x = t, y = y)) + 
    geom_point() + 
    geom_line(aes(y = fitted(m1)))

  m1.sim <- simulate_gam(m1, nsim = 1e3)

  m1.sims <- summary_simulate(m1.sim)
  datA <- cbind(dat, m1.sims)

  ggplot(data = datA, aes(x = t, y = y)) + 
    geom_point() + 
    geom_line(aes(y = Estimate)) + 
    geom_ribbon(aes(ymin = Q2.5, ymax = Q97.5), fill = "purple", alpha = 0.3) + 
    ggtitle("Simulate + summary_simulate method")

  ## Compare to native method from mgcv package
  m1.prd <- predict(m1, se.fit = TRUE, type = "response")

  m1.prdd <- data.frame(dat, prd = m1.prd$fit, 
                        lwr = m1.prd$fit - 1.96 * m1.prd$se.fit, 
                        upr = m1.prd$fit + 1.96 * m1.prd$se.fit)

  ggplot(data = m1.prdd, aes(x = t, y = y)) + 
    geom_point() + 
    geom_line(aes(y = prd)) + 
    geom_ribbon(aes(ymin = lwr, ymax = upr), fill = "purple", alpha = 0.3) + 
    ggtitle("Built-in predict.gam method")

  ## Prediction. The default method uses 'simulate.lm' under the hood
  m1.simP <- simulate_gam(m1, nsim = 1e3, psim = 2)

  m1.simPs <- summary_simulate(m1.simP)
  datAP <- cbind(dat, m1.simPs)

  ggplot(data = datAP, aes(x = t, y = y)) + 
    geom_point() + 
    geom_line(aes(y = Estimate)) + 
    geom_ribbon(aes(ymin = Q2.5, ymax = Q97.5), fill = "purple", alpha = 0.3)

  ## This method includes the uncertainty in the coefficients and the residuals
  m1.simP2 <- simulate_gam(m1, nsim = 1e3, psim = 2, resid.type = "resample")

  m1.simP2s <- summary_simulate(m1.simP2)
  datAP2 <- cbind(dat, m1.simP2s)

  ggplot(data = datAP2, aes(x = t, y = y)) + 
    geom_point() + 
    geom_line(aes(y = Estimate)) + 
    geom_ribbon(aes(ymin = Q2.5, ymax = Q97.5), fill = "purple", alpha = 0.3)

  ## Wild residuals 
  m1.simPW <- simulate_gam(m1, nsim = 1e3, psim = 2, resid.type = "wild")

  m1.simPWs <- summary_simulate(m1.simPW)
  datAPW <- cbind(dat, m1.simPWs)

  ggplot(data = datAPW, aes(x = t, y = y)) + 
    geom_point() + 
    geom_line(aes(y = Estimate)) + 
    geom_ribbon(aes(ymin = Q2.5, ymax = Q97.5), fill = "purple", alpha = 0.3)
  
  ## Dataset Soybean
  data(Soybean)
  
  fms.G <- gam(weight ~ Time + s(Time), data = Soybean)
  fms.C <- lm(weight ~ Time + I(Time^2) + I(Time^3), data = Soybean)
  fms.B <- nls(weight ~ SSbgrp(Time, w.max, lt.e, ldt), data = Soybean)

  ## AIC strongly favors GAM and BIC strongly favors beta
  IC_tab(fms.G, fms.C, fms.B, criteria = "AIC")
  IC_tab(fms.G, fms.C, fms.B, criteria = "BIC")
  
  ggplot(data = Soybean, aes(x = Time, y = weight)) + 
    geom_point() + 
    geom_line(aes(y = fitted(fms.C), color = "Cubic")) + 
    geom_line(aes(y = fitted(fms.G), color = "GAM")) + 
    geom_line(aes(y = fitted(fms.B), color = "Beta"))
  
  ## Prediction based on GAM
  prd <- predict_gam(fms.G, interval = "confidence")
  
  SoybeanAG <- cbind(Soybean, prd)
  
  ggplot(data = SoybeanAG, aes(x = Time, y = weight)) + 
    geom_point() + 
    geom_line(aes(y = fitted(fms.G), color = "GAM")) + 
    geom_ribbon(aes(ymin = Q2.5, ymax = Q97.5), fill = "purple", alpha = 0.3)

  ## Prediction based on beta
  prdc <- predict_nls(fms.B, interval = "confidence")
  prdp <- predict_nls(fms.B, interval = "prediction")
  colnames(prdp) <- paste0("p", c("Estimate", "Est.Error", "Q2.5", "Q97.5"))
  
  SoybeanAB <- cbind(Soybean, prdc, prdp)
  
  ggplot(data = SoybeanAB, aes(x = Time, y = weight)) + 
    geom_point() + 
    geom_line(aes(y = fitted(fms.B), color = "beta")) + 
    geom_ribbon(aes(ymin = Q2.5, ymax = Q97.5), fill = "purple", alpha = 0.3) + 
    geom_ribbon(aes(ymin = pQ2.5, ymax = pQ97.5), fill = "purple", alpha = 0.2)
  
  ## Clearly a gnls model would be better
  fms.Bg <- gnls(weight ~ SSbgrp(Time, w.max, lt.e, ldt), data = Soybean,
                 weights = varPower())

  IC_tab(fms.G, fms.C, fms.B, fms.Bg, criteria = "AIC")    
  
  prdc.bg <- predict_nlme(fms.Bg, interval = "confidence")
  prdp.bg <- predict_nlme(fms.Bg, interval = "prediction")  
  colnames(prdp.bg) <- paste0("p", c("Estimate", "Est.Error", "Q2.5", "Q97.5"))

  SoybeanBG <- cbind(Soybean, prdc.bg, prdp.bg)
  
  ggplot(data = SoybeanBG, aes(x = Time, y = weight)) + 
    geom_point() + 
    geom_line(aes(y = fitted(fms.Bg), color = "mean")) + 
    geom_ribbon(aes(ymin = Q2.5, ymax = Q97.5, color = "confidence"), fill = "purple", alpha = 0.3) + 
    geom_ribbon(aes(ymin = pQ2.5, ymax = pQ97.5, color = "prediction"), fill = "purple", alpha = 0.2) + 
    ggtitle("Beta growth with increasing variance is the best model")
  
  ## Looking at data barley
  data(barley)
  fgb0 <- gam(yield ~ s(NF, k = 5), data = barley)
  
  fgb0p <- predict_gam(fgb0, interval = "conf")
  barleyA <- cbind(barley, fgb0p)

  prd <- predict(fgb0, se = TRUE)
  barleyG <- barley
  barleyG$fit <- prd$fit
  barleyG$lwr <- prd$fit - 1.96 * prd$se.fit
  barleyG$upr <- prd$fit + 1.96 * prd$se.fit
  
  barleyAG <- merge(barleyA, barleyG)
  
  ## It is hard to see, but they are identical
  ggplot(data = barleyAG, aes(x = NF, y = yield)) + 
    geom_point() + 
    geom_line(aes(y = fit)) + 
    geom_line(aes(y = Estimate), linetype = 2) + 
    geom_ribbon(aes(ymin = Q2.5, ymax = Q97.5), fill = "purple", alpha = 0.2) + 
    geom_ribbon(aes(ymin = lwr, ymax = upr), fill = "yellow", alpha = 0.1)
  
  ## What if we include a random effect of year?
  barley$year.f <- as.factor(barley$year)
  
  fgb1 <- gam(yield ~ s(NF, k = 5) + s(year.f, bs = "re"), data = barley)    

  fgb1p <- predict_gam(fgb1, interval = "conf")
  barleyA <- cbind(barley, fgb1p)

  prd <- predict(fgb1, se = TRUE)
  barleyG <- barley
  barleyG$fit <- prd$fit
  barleyG$lwr <- prd$fit - 1.96 * prd$se.fit
  barleyG$upr <- prd$fit + 1.96 * prd$se.fit
  
  barleyAG <- merge(barleyA, barleyG)
  
  ## It is hard to see, but they are identical
  ggplot(data = barleyAG, aes(x = NF, y = yield)) + 
    facet_wrap(~year.f) + 
    geom_point() + 
    geom_line(aes(y = fit)) + 
    geom_line(aes(y = Estimate), linetype = 2) + 
    geom_ribbon(aes(ymin = Q2.5, ymax = Q97.5), fill = "blue", alpha = 0.3) + 
    geom_ribbon(aes(ymin = lwr, ymax = upr), fill = "white", alpha = 0.3)
  
  f1 <- function(){
    dat <- data.frame(x = rnorm(10), y = rnorm(10))
    fm00 <- mgcv::gam(y ~ x, data = dat)
    ans <- simulate_gam(fm00)
    ans
  }
  ## For GAM objects it works regardless because I use 'predict.gam' under the hood and method = "lpmatrix"
  res1 <- f1()

}

Try the nlraa package in your browser

Any scripts or data that you put into this service are public.

nlraa documentation built on May 29, 2024, 2:01 a.m.