tests/test_simulate_nlme.R

## Testing simulate_nlme levels
require(nlme)
require(nlraa)
require(ggplot2)
require(car)

run.test.simulate.nlme <- Sys.info()[["user"]] == "fernandomiguez" && FALSE

if(run.test.simulate.nlme){

  data(Orange)

  fitL <- nlsList(circumference ~ SSlogis(age, Asym, xmid, scal), data = Orange)

  fmm <- nlme(fitL, random = pdDiag(Asym + xmid + scal ~ 1))

  ## This first simulation is at the level of population (fixed, level = 0)
  ## It also means that psim = 0, so there is no sampling from mvrnorm
  sim00 <- simulate_nlme(fmm, nsim = 100, psim = 0, level = 0, value = "data.frame")
  ## Level one means that we simlate at the level of individual Tree (or BLUP in this case)
  ## But psim = 0, so this is the same as predict(x, level = 0)
  sim01 <- simulate_nlme(fmm, nsim = 100, psim = 0, level = 1, value = "data.frame")

  ## What if I simulate?
  sim10 <- simulate_nlme(fmm, nsim = 100, psim = 1, level = 0, value = "data.frame")
  sim11 <- simulate_nlme(fmm, nsim = 100, psim = 1, level = 1, value = "data.frame")

  ## Level 2?
  ## Not sure if it is correct, but it seems to work as intended
  ## The line below does not make sense because we are adding error
  ## at the population level
  ## sim20 <- simulate_nlme(fmm, nsim = 100, psim = 2, level = 0)
  sim21 <- simulate_nlme(fmm, nsim = 100, psim = 2, level = 1, value = "data.frame")

  ggplot(data = sim00, aes(x = age, y = circumference)) + 
    geom_point() + 
    geom_line(aes(y = sim.y, group = ii)) + 
    ggtitle("psim = 0, level = 0")

  ggplot(data = sim01) + 
    geom_point(aes(x = age, y = circumference, color = Tree)) + 
    geom_line(aes(y = sim.y, x = age, color = Tree)) + 
    theme(legend.position = "none") + 
    ggtitle("psim = 0, level = 1")
    
  ggplot(data = sim10) + 
    geom_line(aes(y = sim.y, x = age, group = ii), color = "gray", alpha = 0.4) + 
    geom_point(aes(x = age, y = circumference)) + 
    theme(legend.position = "none") + 
    ggtitle("psim = 1, level = 0")

  sim11$Tree_ID <- with(sim11, paste0(Tree,"_",ii))
  
  ggplot(data = sim11) + 
    facet_wrap(~ Tree) + 
    geom_line(aes(y = sim.y, x = age, color = Tree, group = Tree_ID), alpha = 0.4) + 
    geom_point(aes(x = age, y = circumference)) + 
    theme(legend.position = "none") + 
    ggtitle("psim = 1, level = 1")
  
  ## Test with Soybean example
  data(Soybean)
  
  fmsL <- nlsList(SSlogis, Soybean)
  fmsm <- nlme(fmsL)  

  fxf <- fixef(fmsm)
  fmsm2 <- update(fmsm, fixed = list(Asym + xmid + scal ~ Variety),
                  start = c(fxf[1], 0, fxf[2], 0, fxf[3], 0),
                  random = pdDiag(Asym + xmid + scal ~ 1))
  
  fmsm.s <- simulate_nlme(fmsm2, nsim = 100, value = "data.frame")

  fmsm.s$ID <- with(fmsm.s, paste0(ii,"_",Plot))
  
  ggplot(data = fmsm.s, aes(x = Time, y = weight)) + 
    facet_grid(Variety  ~ Year) + 
    geom_line(aes(x = Time, y = sim.y, group = ID, color = Variety), alpha = 0.5) + 
    geom_point() 

  ## Test with CO2 example
  data(CO2)
  
  fmL <- nlsList(SSasymp, CO2)
  fmcm <- nlme(fmL, random = pdDiag(Asym + R0 ~ 1))
  
  fxf <- fixef(fmcm)
  fmcm2 <- update(fmcm, fixed = list(Asym + R0 + lrc ~ Treatment),
                  start = c(fxf[1], 0, fxf[2], 0, fxf[3], 0))

  fmcm2.s <- simulate_nlme(fmcm2, nsim = 100, value = "data.frame")
 
  fmcm2.s$ID <- with(fmcm2.s, paste0(ii,"_",Plant))
  
  ggplot(data = fmcm2.s, aes(x = conc, y = uptake)) + 
    facet_grid(Type ~ Treatment) + 
    geom_line(aes(x = conc, y = sim.y, group = ID, color = Treatment), alpha = 0.5) + 
    geom_point() 
  
  ## What about bootstrap?
  system.time(fmcm2.bt <- boot_nlme(fmcm2, cores = 4)) ## 35 seconds on my laptop MacBook Pro
  
  hist(fmcm2.bt, 2) 

  ## predictions
  ## Incorporate the fixed effect of type
  fxf2 <- fixef(fmcm2)
  fmcm3 <- update(fmcm2, fixed = list(Asym + R0 + lrc ~ Treatment + Type),
                  start = c(fxf2[1:2], 0, fxf2[3:4], 0, fxf2[5:6], 0),
                  weights = varPower())  
  
  fxf3 <- fixef(fmcm3)
  fmcm4 <- update(fmcm3, fixed = list(Asym + R0 + lrc ~ Treatment + Type + Treatment:Type),
                  start = c(fxf3[1:3], 0, fxf3[4:6], 0, fxf3[7:9], 0))
  
  prd_fun <- function(x) predict(x, level = 0)
  
  fmcm4.bt2 <- boot_nlme(fmcm4, prd_fun, cores = 4)
  
  CO2$lwr <- apply(t(fmcm4.bt2$t), 1, quantile, probs = 0.05, na.rm = TRUE)
  CO2$upr <- apply(t(fmcm4.bt2$t), 1, quantile, probs = 0.95, na.rm = TRUE)

  CO2$ID <- with(CO2, paste0(Treatment,"_",Type))
  
  ggplot(data = CO2, aes(x = conc, y = uptake)) + 
    facet_grid( Type ~ Treatment) + 
    geom_ribbon(aes(x = conc, ymin = lwr, ymax = upr, group = ID, fill = Treatment), alpha = 0.5) + 
    geom_point() 
  
  ## Trying the nested approach
  fmcm4 <- update(fmcm2, random = list(Type = pdDiag(Asym + R0 ~ 1),
                                       Plot = list(Asym + R0 ~ 1)),
                  groups = ~Type/Plant)
  
  prd0 <- predict(fmcm4, newdata = CO2, level = 1)

  fmcm4.s <- simulate_nlme(fmcm4, nsim = 100, value = "data.frame")
  ## This shows that while predict.nlme fails, simulate_nlme works 
  ## (or seems to work)
  
  fmcm4.s$ID <- with(fmcm4.s, paste0(ii,"_",Plant))
  
  ggplot(data = fmcm4.s, aes(x = conc, y = uptake)) + 
    facet_grid( Type ~ Treatment) + 
    geom_line(aes(x = conc, y = sim.y, group = ID, color = Treatment), alpha = 0.5) + 
    geom_point() 
  
  ## GAMS don't quite get it right
  library(mgcv)
  CO2$id <- as.factor(with(CO2, paste0(Type, "_", Treatment)))
  fit.gam <- gam(uptake ~ Type*Treatment + s(conc, k = 3, by = id), data = CO2)
  # 
  ggplot(data = CO2, aes(x = conc, y = uptake)) + 
     facet_grid( Type ~ Treatment) + 
     geom_line(aes(y = fitted(fit.gam))) + 
     geom_point()
  
  ## Compared to gnls model
  fit.lis <- nlsLMList(uptake ~ SSnrh(conc, asym, phi, theta, rd), data = CO2)
  
  plot(fit.lis)
  
  fmco2 <- nlme(fit.lis, random = pdDiag(asym + phi + rd ~ 1))
  
  fmco2.2 <- update(fmco2, fixed = asym + phi + theta + rd ~ Type,
                    start = c(fixef(fmco2)[1], 0, 
                              fixef(fmco2)[2], 0, 
                              fixef(fmco2)[3], 0,
                              fixef(fmco2)[4], 0))

  fmco2.3 <- update(fmco2, fixed = asym + phi + theta + rd ~ Type + Treatment,
                    start = c(fixef(fmco2.2)[1:2], 0, 
                              fixef(fmco2.2)[3:4], 0, 
                              fixef(fmco2.2)[5:6], 0,
                              fixef(fmco2.2)[7:8], 0))
  
  ## Turns out this model has too many parameters and they are not stable
  ## for this reason the predictions are all over the place
  fmco2.4 <- update(fmco2, fixed = asym + phi + theta + rd ~ Type + Treatment + Type:Treatment,
                    start = c(fixef(fmco2.3)[1:3], 0, 
                              fixef(fmco2.3)[4:6], 0, 
                              fixef(fmco2.3)[7:9], 0,
                              fixef(fmco2.3)[10:12], 0))
  
  prd <- predict_nlme(fmco2.4, interval = "conf", level = 0.9)
  CO2A <- cbind(CO2, prd)
  
  CO2AS <- subset(CO2A, Type == "Quebec")
  ggplot(data = CO2AS, aes(x = conc, y = uptake)) + 
    facet_grid( Type ~ Treatment) + 
    geom_line(aes(y = Estimate)) +
    geom_ribbon(aes(ymin = Q5, ymax = Q95), fill = "purple", alpha = 0.3) + 
    geom_point()
  
  ## A simpler model is better
  fmco2.5 <- update(fmco2.4, fixed = list(asym + phi ~ Type + Treatment + Type:Treatment,
                                        theta + rd ~ 1),
                    start = c(fixef(fmco2.3)[1:3], 0, 
                              fixef(fmco2.3)[4:6], 0, 
                              fixef(fmco2.3)[7],
                              fixef(fmco2.3)[10]))
  
  prd <- predict_nlme(fmco2.5, interval = "conf", level = 0.9)
  CO2A <- cbind(CO2, prd)
  
  ggplot(data = CO2A, aes(x = conc, y = uptake)) + 
    facet_grid( Type ~ Treatment) + 
    geom_line(aes(y = Estimate)) +
    geom_ribbon(aes(ymin = Q5, ymax = Q95), fill = "purple", alpha = 0.3) + 
    geom_point()
  
  fit.gnls <- gnls(uptake ~ SSnrh(conc, asym, phi, theta, rd), data = CO2,
                   params = list(asym + phi ~ Type*Treatment, theta + rd ~ 1),
                   start = c(35.35, 0, 0, 0, 0.1577, 0, 0, 0, 0.955, 2.35))

  ## This shows that a 'good' nls model is better than a 'GAM' but 'gams'
  ## are much, much more flexible
  IC_tab(fit.gnls, fit.gam)
  
  ## Testing the implementation of psim = 3 for nlme objects
  ## Picking up the previous Soybean model
  data(Soybean)
  
  fmsL <- nlsList(SSlogis, Soybean)
  fmsm <- nlme(fmsL)  
  
  fxf <- fixef(fmsm)
  fmsm2 <- update(fmsm, fixed = list(Asym + xmid + scal ~ Variety),
                  start = c(fxf[1], 0, fxf[2], 0, fxf[3], 0),
                  random = pdDiag(Asym + xmid + scal ~ 1))
  
  ## Ok, this seems to work
  sim2.psim2 <- simulate_nlme(fmsm2, nsim = 1, 
                              psim = 2, level = 1,
                              value = "data.frame")
  
  sim2.psim2$id <- with(sim2.psim2, paste(Plot, Variety, sep = "_"))
  
  ggplot(data = sim2.psim2, aes(x = Time, y = weight)) + 
    geom_line(aes(x = Time, y = sim.y, group = id), color = "gray", alpha = 0.4) + 
    geom_point() 
  
  ## psim = 3 is not specific for a given subject
  sim2.psim3 <- simulate_nlme(fmsm2, nsim = 10, 
                              psim = 3, level = 1,
                              value = "data.frame")

  sim2.psim3$id <- with(sim2.psim3, paste(Plot, Variety, ii, sep = "_"))
  
  ggplot(data = sim2.psim3, aes(x = Time, y = weight)) + 
    geom_line(aes(x = Time, y = sim.y, group = id), color = "gray", alpha = 0.3) + 
    geom_point() 
  
  ## Since psim = 3 is not specific to an individual subject
  ## we should not expect for the lines to vary around the observed data
  ## in each individual panel. If that is what you want then
  ## you need to use psim = 2
  ggplot(data = sim2.psim3, aes(x = Time, y = weight)) + 
    facet_wrap(~ Variety * Plot) + 
    geom_line(aes(x = Time, y = sim.y, group = ii), color = "gray", alpha = 0.3) + 
    geom_point() 
  
  fmsm3 <- update(fmsm2, random = list(Year = pdDiag(Asym + xmid + scal ~ 1),
                                       Plot = pdDiag(Asym + xmid + scal ~ 1)),
                  groups = ~Year/Plot)
  
  ## I won't check if this gives you the right answer at the moment, but it runs
  sim3.psim3 <- simulate_nlme(fmsm3, nsim = 10, psim = 3, value = "data.frame")
  
  sim3.psim3$id <- with(sim3.psim3, paste(Plot, Variety, Year, ii, sep = "_"))
  
  ggplot(data = sim3.psim3, aes(x = Time, y = weight)) + 
    geom_point() + 
    geom_line(aes(x = Time, y = sim.y, group = id), alpha = 0.1)
  
  ggplot(data = sim3.psim3, aes(x = Time, y = weight)) + 
    facet_wrap(~ Year * Variety) + 
    geom_point() + 
    geom_line(aes(x = Time, y = sim.y, group = id), alpha = 0.1)

  varT <- var(Soybean$weight)  
  
  sim3.psim2.2 <- simulate_nlme(fmsm3, nsim = 2e3, psim = 2)
  sim3.psim3.2 <- simulate_nlme(fmsm3, nsim = 2e3, psim = 3)
  
  varT.psim2 <- apply(sim3.psim2.2, 2, var, na.rm = TRUE)  
  varT.psim3 <- apply(sim3.psim3.2, 2, var, na.rm = TRUE)  
  
  ggplot() + 
    geom_density(aes(x = varT.psim2, color = "psim = 2")) + 
    geom_density(aes(x = varT.psim3, color = "psim = 3")) + 
    geom_vline(xintercept = varT)
  ## Unexpectedly, this shows that the variance for psim = 3
  ## is lower than the variance for psim2, perhaps I need to do more simulations?
  ## Try a different dataset
  
  data(Orange)
  fitL <- nlsList(circumference ~ SSlogis(age, Asym, xmid, scal), data = Orange)
  fmm <- nlme(fitL, random = pdDiag(Asym + xmid + scal ~ 1))
  
  varT <- var(Orange$circumference)
  fmm.sim.psim2 <- simulate_nlme(fmm, nsim = 1e3, psim = 2)  
  fmm.sim.psim3 <- simulate_nlme(fmm, nsim = 1e3, psim = 3)  
 
  varT.psim2 <- apply(fmm.sim.psim2, 2, var, na.rm = TRUE)  
  varT.psim3 <- apply(fmm.sim.psim3, 2, var, na.rm = TRUE)  
  
  ggplot() + 
    geom_density(aes(x = varT.psim2, color = "psim = 2")) + 
    geom_density(aes(x = varT.psim3, color = "psim = 3")) + 
    geom_vline(xintercept = varT) 
  
  ## Prediction Interval for the mean of a NEW Tree
  nprd <- simulate_nlme(fmm, nsim = 1e3, psim = 3, value = "data.frame")  
  
  ## First step: remove the observation-level variation
  nprdA1 <- aggregate(sim.y ~ ii + age + Tree, data = nprd, FUN = mean)
  
  nprdA <- aggregate(sim.y ~ age, data = nprdA1, FUN = median)
  nprdA$lwr <- aggregate(sim.y ~ age, data = nprdA1, FUN = quantile, probs = 0.05)$sim.y
  nprdA$upr <- aggregate(sim.y ~ age, data = nprdA1, FUN = quantile, probs = 0.95)$sim.y

  ggplot() + 
    geom_point(data = Orange, aes(x = age, y = circumference, color = Tree)) + 
    geom_line(data = nprdA, aes(x = age, y = sim.y)) + 
    geom_ribbon(data = nprdA, aes(x = age, ymin = lwr, ymax = upr), alpha = 0.3) + 
    ggtitle("90% Prediction Interval for a NEW Tree regression")
}

Try the nlraa package in your browser

Any scripts or data that you put into this service are public.

nlraa documentation built on May 29, 2024, 2:01 a.m.