tests/test_optim_apsimx.R

## Testing the capability of optimizing parameters
require(apsimx)
require(ggplot2)
apsimx_options(warn.versions = FALSE)

## 1. Simulate data from a model with known parameters
## 2. Change the parameter values and some some random "noise"
## 3. Run the optimization algorithm
## 4. Compare optimized parameter values with original ones

extd.dir <- system.file("extdata", package = "apsimx")

## extd.dir <- "~/Dropbox/apsimx/inst/extdata"

## I won't routinely run this
#### First set of tests ----
if(FALSE){
  
  ## One model run takes ~4 seconds
  ## The original values for these were 1.5 for RUE and 90 for BasePhyllochron
  system.time(sim0 <- apsimx("Wheat-opt-ex.apsimx", src.dir = extd.dir))

  pp1 <- inspect_apsimx_replacement("Wheat-opt-ex.apsimx", 
                                    src.dir = extd.dir, 
                                    node = "Wheat", 
                                    node.child = "Leaf",
                                    node.subchild = "Photosynthesis",
                                    node.subsubchild = "RUE",
                                    parm = "FixedValue",
                                    print.path = TRUE,
                                    display.available = FALSE, 
                                    verbose = FALSE)
  
  pp2 <- inspect_apsimx_replacement("Wheat-opt-ex.apsimx", 
                                    src.dir = extd.dir, 
                                    node = "Wheat", 
                                    node.child = "Cultivars",
                                    node.subchild = "USA",
                                    node.subsubchild = "Yecora",
                                    parm = "BasePhyllochron",
                                    print.path = TRUE,
                                    display.available = FALSE, 
                                    verbose = FALSE)
  
  ## edit_apsimx_replacement("Wheat-opt-ex.apsimx")
  
  ## Visualize the data
  ggplot(sim0, aes(Date, Wheat.AboveGround.Wt)) + 
    geom_line()
  ggplot(sim0, aes(Date, Wheat.Leaf.LAI)) + 
    geom_line()
  ggplot(sim0, aes(Date, Wheat.Phenology.Stage)) + 
    geom_line()

  ## Create 'observed' data
  sample.dates <- seq(from = as.Date("2016-10-01"), to = as.Date("2017-06-01"), length.out = 10)

  set.seed(12345)
  obs0 <- sim0[sim0$Date %in% sample.dates, c("Date","Wheat.Phenology.Stage","Wheat.Leaf.LAI","Wheat.AboveGround.Wt")]
  obs0$Wheat.Phenology.Stage <- obs0$Wheat.Phenology.Stage + rnorm(10, 0, sd = 0.1)
  obs0$Wheat.Leaf.LAI[5:10] <- obs0$Wheat.Leaf.LAI[5:10] + rnorm(6, 0, sd = 0.5)
  obs0$Wheat.AboveGround.Wt[3:10] <- obs0$Wheat.AboveGround.Wt[3:10] + rnorm(8, 0, sd = 3)

  obsWheat <- obs0
  save(obsWheat, file = "obsWheat.rda")

  data(obsWheat)
  ## We will try to optimize only 2 parameters
  ## RUE: default 1.5
  ## Phyllochron for cultivar Yecora Fixed Value 90
  inspect_apsimx_replacement("Wheat-opt-ex.apsimx", 
                             src.dir = extd.dir, 
                             node = "Wheat", 
                             node.child = "Leaf",
                             node.subchild = "Photosynthesis",
                             node.subsubchild = "RUE",
                             parm = "FixedValue",
                             print.path = TRUE,
                             display.available = FALSE, 
                             verbose = FALSE)

  inspect_apsimx_replacement("Wheat-opt-ex.apsimx", 
                             src.dir = extd.dir, 
                             node = "Wheat", 
                             node.child = "Cultivars",
                             node.subchild = "USA",
                             node.subsubchild = "Yecora",
                             parm = "BasePhyllochron",
                             print.path = TRUE,
                             display.available = FALSE, 
                             verbose = FALSE)
  
  ## Run the model with incorrect parameters
  ## RUE = 1.2
  ## Phyllochron = 120
  tmp <- tempdir()
  setwd(tmp)
  file.copy(file.path(extd.dir, "Ames.met"), ".")
  file.copy(file.path(extd.dir, "Wheat-opt-ex.apsimx"), ".")
  
  system.time(sim.b4 <- apsimx("Wheat-opt-ex.apsimx", src.dir = tmp)) ## This takes 4-5 seconds
  
  ## write.csv(sim.b4, "wheat-sim-b4-opt.csv", row.names = FALSE)

  sim.b4.s <- subset(sim.b4, Date > as.Date("2016-09-30") & Date < as.Date("2017-07-01"))
  ## phenology
  ggplot() + 
    geom_point(data = obsWheat, aes(x = Date, y = Wheat.Phenology.Stage)) +
    geom_line(data = sim.b4.s, aes(x = Date, y = Wheat.Phenology.Stage)) + 
    ggtitle("Phenology")
  ## LAI
  ggplot() + 
    geom_point(data = obsWheat, aes(x = Date, y = Wheat.Leaf.LAI)) +
    geom_line(data = sim.b4.s, aes(x = Date, y = Wheat.Leaf.LAI)) + 
    ggtitle("LAI")
  ## Biomass
  ggplot() + 
    geom_point(data = obsWheat, aes(x = Date, y = Wheat.AboveGround.Wt)) +
    geom_line(data = sim.b4.s, aes(x = Date, y = Wheat.AboveGround.Wt)) + 
    ggtitle("Biomass (g/m2)")
  
  ## Optimization
  pp1 <- "Wheat.Leaf.Photosynthesis.RUE.FixedValue"
  pp2 <- "Wheat.Cultivars.USA.Yecora.BasePhyllochron"
  
  # edit_apsimx_replacement("Wheat-b4-opt.apsimx",
  #                         src.dir = ".",
  #                         overwrite = TRUE,
  #                         node = "Wheat", node.child = "Cultivars",
  #                         node.subchild = "USA", node.subsubchild = "Yecora",
  #                         parm = "BasePhyllochron", value = 110)
  
  ## This takes about ~7.5 minutes
  start <- Sys.time()
  wop <- optim_apsimx("Wheat-opt-ex.apsimx", 
                       parm.paths = c(pp1, pp2),
                       data = obsWheat, 
                       weights = "mean",
                       replacement = c(TRUE, TRUE),
                       initial.values = c(1.2, 120))
  end <- Sys.time()
  
  ## This took 8.8 minutes
  start <- Sys.time()
  wop.h <- optim_apsimx("Wheat-opt-ex.apsimx", 
                          src.dir = extd.dir, 
                          parm.paths = c(pp1, pp2),
                          data = obsWheat, 
                          weights = "mean",
                          replacement = c(TRUE, TRUE),
                          initial.values = c(1.2, 120),
                          hessian = TRUE)
  end <- Sys.time()
  
  sim.opt <- apsimx("Wheat-opt-ex.apsimx", src.dir = extd.dir, value = "report")  

  sim.opt.s <- subset(sim.opt, Date > as.Date("2016-09-30") & Date < as.Date("2017-07-01"))
  
  ## For vignette
  ## write.csv(sim.opt.s, file = "wheat-sim-opt.csv", row.names = FALSE)
  
  ## phenology
  ggplot() + 
    geom_point(data = obsWheat, aes(x = Date, y = Wheat.Phenology.Stage)) +
    geom_line(data = sim.opt.s, aes(x = Date, y = Wheat.Phenology.Stage)) + 
    ggtitle("Phenology")
  ## LAI
  ggplot() + 
    geom_point(data = obsWheat, aes(x = Date, y = Wheat.Leaf.LAI)) +
    geom_line(data = sim.opt.s, aes(x = Date, y = Wheat.Leaf.LAI)) + 
    ggtitle("LAI")
  ## Biomass
  ggplot() + 
    geom_point(data = obsWheat, aes(x = Date, y = Wheat.AboveGround.Wt)) +
    geom_line(data = sim.opt.s, aes(x = Date, y = Wheat.AboveGround.Wt)) + 
    ggtitle("Biomass (g/m2)")

  ## What about nloptr?
  start <- Sys.time()
  wop.n <- optim_apsimx("Wheat-opt-ex.apsimx", 
                        src.dir = extd.dir, 
                        parm.paths = c(pp1, pp2),
                        data = obsWheat, 
                        weights = "mean",
                        replacement = c(TRUE, TRUE),
                        initial.values = c(1.2, 120),
                        type = "nloptr",
                        opts = list("algorithm" = "NLOPT_LN_NELDERMEAD"))
  end <- Sys.time()
  
  ## Test BayesianTools??? Would need to embbed it inside optim
  ## How long does this take in Windows laptop? 500 iter, 9 chains: 3464 seconds ~ 1 hour
  ## 500 iter, 3 chains: 18 minutes
  ## 5000 iter, 3 chains: 4 hours, but still not good enough
  start <- Sys.time()
  wop.mcmc <- optim_apsimx("Wheat-opt-ex.apsimx", 
                           src.dir = extd.dir, 
                           parm.paths = c(pp1, pp2),
                           data = obsWheat, 
                           weights = "mean",
                           replacement = c(TRUE, TRUE),
                           initial.values = c(1.2, 120),
                           type = "mcmc",
                           parallel = FALSE,
                           settings = list(iterations = 5000))
  end <- Sys.time()
  
}

#### Classic #### 
if(FALSE){

  tmp <- tempdir()
  setwd(tmp)
  
  apsim_options(warn.versions = FALSE)
  
  ## Testing optimization for an APSIM Classic example
  ## Maize
  if(file.exists("Maize.apsim")) file.remove("Maize.apsim")
  if(file.exists("Maize.xml")) file.remove("Maize.xml")
  if(file.exists("Maize.out")) file.remove("Maize.out")
  if(file.exists("Maize.sum")) file.remove("Maize.sum")
  
  extd.dir <- system.file("extdata", package = "apsimx")
  file.copy(file.path(extd.dir, "Maize-op.apsim"), "./Maize.apsim")
  file.copy("C:/Program Files (x86)/APSIM710-r4207/Model/Maize.xml", ".")

  inspect_apsim("Maize.apsim")
  
  inspect_apsim("Maize.apsim", node = "Crop",
                parm = list("Sow", 3))
  
  edit_apsim("Maize.apsim", node = "Clock",
             parm = "start_date", value = "01/01/1987",
             overwrite = TRUE)
  
  sim0 <- apsim("Maize.apsim")
  
  obsMaize <- data.frame(Date = as.Date(c("1988-02-23", "1989-02-25")),
                         biomass = c(4600, 5000),
                         yield = c(350, 570))
  
  pp1 <- inspect_apsim_xml("Maize.xml", parm = "sc401/rue")
  pp2 <- inspect_apsim_xml("Maize.xml", parm = "sc401/GNmaxCoef")
  
  ## Optimize a single parameter
  op01 <- optim_apsim("Maize.apsim", 
                     crop.file = "Maize.xml",
                     parm.paths = pp2,
                     data = obsMaize,
                     hessian = TRUE,
                     weights = "mean")
  
  
  op1 <- optim_apsim("Maize.apsim", 
                     crop.file = "Maize.xml",
                     parm.paths = c(pp1, pp2),
                     data = obsMaize,
                     weights = "mean")

  file.remove("Maize.xml")
  file.copy("C:/Program Files (x86)/APSIM710-r4207/Model/Maize.xml", ".")
  
  op2 <- optim_apsim("Maize.apsim", 
                     crop.file = "Maize.xml",
                     parm.paths = c(pp1, pp2),
                     data = obsMaize,
                     hessian = TRUE,
                     weights = "mean")
  
  # op2.u <- optim_apsim("Maize.apsim", 
  #                      crop.file = "Maize.xml",
  #                      parm.paths = c(pp1, pp2),
  #                      type = "ucminf",
  #                      data = obsMaize,
  #                      hessian = TRUE,
  #                      weights = "mean")
  
  op2nl <- optim_apsim("Maize.apsim", 
                     crop.file = "Maize.xml",
                     parm.paths = c(pp1, pp2),
                     data = obsMaize,
                     type = "nloptr",
                     weights = "mean",
                     opts = list("algorithm" = "NLOPT_LN_NELDERMEAD"))
  
  sim2 <- apsim("Maize.apsim")
  
  ## Can I also optimize management?
  pp3 <- inspect_apsim("Maize.apsim", node = "Crop",
                       parm = list("Sow", 17),
                       print.path = TRUE)

  pp4 <- inspect_apsim("Maize.apsim", node = "Crop",
                       parm = list("Sow", 28),
                       print.path = TRUE)

  ## The goal of this is to show that you 
  ## can try to optimize parameters which are in the
  ## .apsim and .xml file
  op3 <- optim_apsim("Maize.apsim", 
                     crop.file = "Maize.xml",
                     parm.paths = c(pp2, pp4),
                     xml.parm = c(TRUE, FALSE),
                     data = obsMaize,
                     weights = "mean",
                     hessian = TRUE)

  inspect_apsim("Maize.apsim", node = "Crop",
                parm = list("Sow", 17),
                print.path = TRUE)
  
  inspect_apsim("Maize.apsim", node = "Crop",
                 parm = list("Sow", 28),
                 print.path = TRUE)

  
  if(file.exists("Maize.apsim")) file.remove("Maize.apsim")
  if(file.exists("Maize.xml")) file.remove("Maize.xml")
  if(file.exists("Maize.out")) file.remove("Maize.out")
  if(file.exists("Maize.sum")) file.remove("Maize.sum")
}

## Testing the optimization of a single parameter 
## comparing methods
if(FALSE){
  
  tmp <- tempdir()
  setwd(tmp)
  
  extd.dir <- system.file("extdata", package = "apsimx")
  
  file.copy(file.path(extd.dir, "Ames.met"), ".")
  file.copy(file.path(extd.dir, "Wheat-opt-ex.apsimx"), ".")
  
  sim0 <- apsimx("Wheat-opt-ex.apsimx")

  pp1 <- "Wheat.Leaf.Photosynthesis.RUE.FixedValue"
  pp2 <- "Wheat.Cultivars.USA.Yecora.BasePhyllochron"
  
  ## Testing the "unreliable" method
  start <- Sys.time()
  wop.1 <- optim_apsimx("Wheat-opt-ex.apsimx", 
                        parm.paths = pp1,
                        data = obsWheat, 
                        replacement = TRUE,
                        initial.values = 1.2)
  end <- Sys.time() ## It took 3.25 minutes
  
  ## Plus hessian
  file.remove("Wheat-opt-ex.apsimx")
  file.copy(file.path(extd.dir, "Wheat-opt-ex.apsimx"), ".")
  
  start <- Sys.time()
  wop.h.1 <- optim_apsimx("Wheat-opt-ex.apsimx", 
                          parm.paths = pp1,
                          data = obsWheat, 
                          replacement = TRUE,
                          hessian = TRUE,
                          initial.values = 1.2)
  end <- Sys.time() ## It took 3.4 minutes
 
  ## Erase and try Brent 
  file.remove("Wheat-opt-ex.apsimx")
  file.copy(file.path(extd.dir, "Wheat-opt-ex.apsimx"), ".")
 
  start <- Sys.time()
  wop.b.1 <- optim_apsimx("Wheat-opt-ex.apsimx", 
                          parm.paths = pp1,
                          data = obsWheat, 
                          method = "Brent",
                          lower = 0.5, upper = 2,
                          replacement = TRUE,
                          initial.values = 1.2)
  end <- Sys.time() ## It took 2.18 minutes 
  ## Brent, naturally, will not provide the best solution if
  ## the solution is outside the interval (lower, upper)
  
  ## Erase and try L-BFGS-B
  file.remove("Wheat-opt-ex.apsimx")
  file.copy(file.path(extd.dir, "Wheat-opt-ex.apsimx"), ".")
  
  start <- Sys.time()
  wop.bfgs.1 <- optim_apsimx("Wheat-opt-ex.apsimx", 
                          parm.paths = pp1,
                          data = obsWheat, 
                          method = "L-BFGS-B",
                          lower = 0.5,
                          hessian = TRUE,
                          replacement = TRUE,
                          initial.values = 1.2,
                          control = list(trace = 3))
  end <- Sys.time() ## It took 7.02 minutes 
  
  ## Next step is to test whether weighting affects the estimate of the vcov
  ## and how different it is to the Bayesian analysis
  
  ## Erase and try L-BFGS-B
  file.remove("Wheat-opt-ex.apsimx")
  file.copy(file.path(extd.dir, "Wheat-opt-ex.apsimx"), ".")
  
  start <- Sys.time()
  wop.bfgs.2 <- optim_apsimx("Wheat-opt-ex.apsimx", 
                             parm.paths = pp1,
                             data = obsWheat,
                             weights = "mean",
                             method = "L-BFGS-B",
                             lower = 0.5,
                             hessian = TRUE,
                             replacement = TRUE,
                             initial.values = 1.2,
                             control = list(trace = 3))
  end <- Sys.time() ## It took 4.16 minutes 
  
  ## Testing with two parameters
  file.remove("Wheat-opt-ex.apsimx")
  file.copy(file.path(extd.dir, "Wheat-opt-ex.apsimx"), ".")
  ## Testing with two parameters
  start <- Sys.time()
  wop.2 <- optim_apsimx("Wheat-opt-ex.apsimx", 
                        parm.paths = c(pp1, pp2),
                        data = obsWheat, 
                        replacement = c(TRUE, TRUE),
                        hessian = TRUE,
                        initial.values = c(1.2, 120))
  end <- Sys.time() ## It took 15.7 minutes
  
  ## Testing with two parameters, without the hessian
  file.remove("Wheat-opt-ex.apsimx")
  file.copy(file.path(extd.dir, "Wheat-opt-ex.apsimx"), ".")
  ## Testing with two parameters
  start <- Sys.time()
  wop.nh.2 <- optim_apsimx("Wheat-opt-ex.apsimx", 
                        parm.paths = c(pp1, pp2),
                        data = obsWheat, 
                        replacement = c(TRUE, TRUE),
                        initial.values = c(1.2, 120))
  end <- Sys.time() ## It took 13.97 minutes
  
  file.remove("Wheat-opt-ex.apsimx")
  file.copy(file.path(extd.dir, "Wheat-opt-ex.apsimx"), ".")
  ## Testing with two parameters
  start <- Sys.time()
  wop.3 <- optim_apsimx("Wheat-opt-ex.apsimx", 
                        parm.paths = c(pp1, pp2),
                        data = obsWheat, 
                        type = "ucminf",
                        weights = "mean",
                        replacement = c(TRUE, TRUE),
                        hessian = TRUE,
                        initial.values = c(1.2, 120))
  end <- Sys.time() ## It took 28.97 minutes and it did not find the right answer
}

#### Testing grid method ----
if(FALSE){
  
  data(obsWheat)
  
  system.time(sim0 <- apsimx("Wheat-opt-ex.apsimx", src.dir = extd.dir)) 
  
  cmp.b4 <- compare_apsim(obsWheat, sim0, labels = c("obs", "sim"), cRSS = TRUE)
  ### pre-optimized log-RSS
  log(cmp.b4$cRSS)
  
  ## Creating a grid for two parameters
  pp1 <- inspect_apsimx_replacement("Wheat-opt-ex.apsimx", 
                                    src.dir = extd.dir, 
                                    node = "Wheat", 
                                    node.child = "Leaf",
                                    node.subchild = "Photosynthesis",
                                    node.subsubchild = "RUE",
                                    parm = "FixedValue",
                                    print.path = TRUE,
                                    display.available = FALSE, 
                                    verbose = FALSE)
  
  pp2 <- inspect_apsimx_replacement("Wheat-opt-ex.apsimx", 
                                    src.dir = extd.dir, 
                                    node = "Wheat", 
                                    node.child = "Cultivars",
                                    node.subchild = "USA",
                                    node.subsubchild = "Yecora",
                                    parm = "BasePhyllochron",
                                    print.path = TRUE,
                                    display.available = FALSE, 
                                    verbose = FALSE)

  pgrd <- expand.grid(RUE = c(0.8, 1, 1.2, 1.4, 1.6, 1.8, 2.0), 
                      BasePhyllochron = c(40, 60, 80, 100, 120, 140, 160))  
  
  tmp <- tempdir()
  setwd(tmp)
  file.copy(file.path(extd.dir, "Ames.met"), ".")
  file.copy(file.path(extd.dir, "Wheat-opt-ex.apsimx"), ".")
  
  system.time(sim.b4 <- apsimx("Wheat-opt-ex.apsimx", src.dir = tmp)) ## This takes 4-5 seconds
  xrgs <- xargs_apsimx(single.threaded = TRUE)
  system.time(sim.b4 <- apsimx("Wheat-opt-ex.apsimx", src.dir = tmp, xargs = xrgs)) ## This takes 4-5 seconds
  

  start <- Sys.time()
  wop.grd <- optim_apsimx("Wheat-opt-ex.apsimx",
                          src.dir = tmp,
                          parm.paths = c(pp1, pp2),
                          data = obsWheat, 
                          type = "grid",
                          replacement = c(TRUE, TRUE),
                          initial.values = list(RUE = 1.2, BasePhyllochron = 120),
                          verbose = TRUE,
                          grid = pgrd,
                          cores = 2)
  end <- Sys.time() ## It took 6.1 minutes 
                    ## Took 3.67 minutes using two cores and single-thread (Mac 2017)
  
  ggplot(wop.grd$res, aes(x = RUE, y = lrss)) + 
    facet_wrap(~BasePhyllochron) + 
    geom_point() + 
    geom_line()
  
  pp1 <- inspect_apsimx_replacement("Wheat-opt-ex.apsimx", 
                                    src.dir = ".", 
                                    node = "Wheat", 
                                    node.child = "Leaf",
                                    node.subchild = "Photosynthesis",
                                    node.subsubchild = "RUE",
                                    parm = "FixedValue",
                                    print.path = TRUE,
                                    display.available = FALSE, 
                                    verbose = FALSE)
 
  pp2 <- inspect_apsimx_replacement("Wheat-opt-ex.apsimx", 
                                    src.dir = ".", 
                                    node = "Wheat", 
                                    node.child = "Cultivars",
                                    node.subchild = "USA",
                                    node.subsubchild = "Yecora",
                                    parm = "BasePhyllochron",
                                    print.path = TRUE,
                                    display.available = FALSE, 
                                    verbose = FALSE)
  
 ## Compare to optim
  start <- Sys.time()
  wop.nm <- optim_apsimx("Wheat-opt-ex.apsimx",
                          src.dir = tmp,
                          parm.paths = c(pp1, pp2),
                          data = obsWheat, 
                          replacement = c(TRUE, TRUE),
                          initial.values = list(RUE = 1.8, BasePhyllochron = 120))
  end <- Sys.time() ## It took 5.7 minutes 
  
  ### Maybe we need a fine grid around the true values?
  pgrd3 <- expand.grid(RUE = c(1.2, 1.3, 1.4, 1.5, 1.6, 1.7, 1.8, 1.9), 
                      BasePhyllochron = c(70, 75, 80, 85, 90, 95, 100))  
  
  start <- Sys.time()
  wop.grd3 <- optim_apsimx("Wheat-opt-ex.apsimx",
                          src.dir = tmp,
                          parm.paths = c(pp1, pp2),
                          data = obsWheat, 
                          type = "grid",
                          replacement = c(TRUE, TRUE),
                          initial.values = list(RUE = 1.5, BasePhyllochron = 90),
                          verbose = TRUE,
                          grid = pgrd3,
                          cores = 3)
  end <- Sys.time() ## It took 6.63 minutes 
  
  ggplot(wop.grd3$res, aes(x = RUE, y = lrss)) + 
    facet_wrap(~BasePhyllochron) + 
    geom_point() + 
    geom_line()

  system.time(sim.a4 <- apsimx("Wheat-opt-ex.apsimx", src.dir = tmp)) ## This takes 4-5 seconds
  wop.grd3  
  cmp <- compare_apsim(obsWheat, sim.a4, labels = c("obs", "sim"), cRSS = TRUE)
}

Try the apsimx package in your browser

Any scripts or data that you put into this service are public.

apsimx documentation built on Sept. 11, 2024, 5:42 p.m.