knitr::opts_chunk$set(dpi=100, out.width="700px", out.height="500px") library(apsimx) library(ggplot2) library(lubridate) library(dplyr) library(mgcv) library(tidyr) library(caret) library(ggplot2) library(apsimo) library(gganimate)
Statistical models can be used to emulate (approximate) complex computer models, such as APSIM.
Predict the $i^{th}$ observation by modeling the previous $i-1$ observations, then update the model and predict the $(i+1)^{th}$ observation.
#Load datasets from package data("training") data("testing") ggplot(data = (testing %>% filter(year == 2005))[120:275,]) + geom_line(aes(x=Date, y=Maize.AboveGround.Wt)) + geom_line(aes(x=Date, y=online_emulate_maize(training, testing %>% filter(year == 2005), pred_var = "Maize.AboveGround.Wt", method = "ar", pred_type = "online_total_local")[120:275]), col = "red", linetype = "dashed") + transition_reveal(yday)
It may seem silly to predict the next observation using a statistical model when we are only running a single APSIM simulation...
But, online prediction can be seen as a first step toward full emulation of APSIM.
I work for the National Resources Inventory, which conducts land-use / erosion monitoring through complex geographical survey techniques over all non-Federal lands in the United States.
knitr::include_graphics("/Users/labuzzetta/Downloads/NRI.png")
I used a simple APSIM simulation, as learning the basics of APSIM was one of my goals for this project.
Covariates:
data("simulation") print(colnames(simulation[c(5:11,22)]))
Outputs:
data("simulation") print(colnames(simulation[c(12:21)]))
data("simulation") report <- simulation #Training Data Yield Figure ggplot(data = report %>% filter(year(Date) < 2005) %>% mutate(Year = as.factor(lubridate::year(Date)), `Above Ground Biomass (g/m^2)` = Maize.AboveGround.Wt, `Day of Year` = lubridate::yday(Date)), aes(x = `Day of Year`, y = `Above Ground Biomass (g/m^2)`, col = Year)) + geom_line()
data("simulation") report <- simulation #Testing and Validation Data Yield Figure ggplot(data = report %>% filter(year(Date) >= 2005) %>% mutate(Year = as.factor(lubridate::year(Date)), `Above Ground Biomass (g/m^2)` = Maize.AboveGround.Wt, `Day of Year` = lubridate::yday(Date)), aes(x = `Day of Year`, y = `Above Ground Biomass (g/m^2)`, col = Year)) + geom_line()
data("testing") data("training") trial1 <- apsimo::online_emulate_maize(training, testing, pred_var = "Maize.AboveGround.Wt", pred_type = "online_total_full", method = "lm") `Maize AboveGround Wt` <- trial1 ggplot(data = testing) + geom_line(aes(x=Date, y=`Maize AboveGround Wt`), col = "red", linetype = "dashed") + geom_line(aes(x=Date, y=Maize.AboveGround.Wt))
Definitely violate the independence assumption... why?
The residuals here are also ugly.
data("testing") data("training") trial2 <- apsimo::online_emulate_maize(training, testing, pred_var = "Maize.AboveGround.Wt", pred_type = "online_change_full", method = "lm") ggplot(data = testing) + geom_line(aes(x=Date, y=Maize.AboveGround.Wt)) + geom_line(aes(x=Date, y=trial2 %>% purrr::accumulate(`+`)), col = "red", linetype = "dashed")
data("testing") data("training") trial3 <- apsimo::online_emulate_maize(training, testing, pred_var = "Maize.AboveGround.Wt", pred_type = "online_total_full", method = "ar") ggplot(data = testing) + geom_line(aes(x=Date, y=Maize.AboveGround.Wt)) + geom_line(aes(x=Date, y=trial3), col = "red", linetype = "dashed")
data("testing") data("training") trial4 <- apsimo::online_emulate_maize(training, testing, pred_var = "Maize.AboveGround.Wt", pred_type = "online_total_full", method = "gam") ggplot(data = testing) + geom_line(aes(x=Date, y=Maize.AboveGround.Wt)) + geom_line(aes(x=Date, y=trial4), col = "red", linetype = "dashed")
data("testing") data("training") trial5 <- apsimo::online_emulate_maize(training, testing, pred_var = "Maize.AboveGround.Wt", pred_type = "online_total_local", method = "gam") ggplot(data = testing) + geom_line(aes(x=Date, y=Maize.AboveGround.Wt)) + geom_line(aes(x=Date, y=trial5), col = "red", linetype = "dashed")
data("testing") data("training") trial6 <- apsimo::online_emulate_maize(training, testing, pred_var = "Maize.AboveGround.Wt", pred_type = "no_update", method = "rf") ggplot(data = testing) + geom_line(aes(x=Date, y=Maize.AboveGround.Wt)) + geom_line(aes(x=Date, y=trial6), col = "red", linetype = "dashed")
methods <- c("LM: total", "LM: daily change", "AR", "GAM: full", "GAM: local", "RF") rmse <- c(RMSE(trial1, testing$Maize.AboveGround.Wt), RMSE(trial2 %>% purrr::accumulate(`+`), testing$Maize.AboveGround.Wt), RMSE(trial3, testing$Maize.AboveGround.Wt), RMSE(trial4, testing$Maize.AboveGround.Wt), RMSE(trial5, testing$Maize.AboveGround.Wt), RMSE(trial6, testing$Maize.AboveGround.Wt)) knitr::kable(cbind(methods, rmse=trunc(rmse)))
data("testing") data("training") trial7 <- apsimo::online_emulate_maize(training, testing, pred_var = "Maize.Leaf.Transpiration", pred_type = "online_total_local", method = "gam") ggplot(data = testing[1:365,]) + geom_line(aes(x=Date, y=Maize.Leaf.Transpiration)) + geom_line(aes(x=Date, y=trial7[1:365]), col = "red", linetype = "dashed")
data("testing") data("training") trial7 <- apsimo::online_emulate_maize(training, testing, pred_var = "Soil.SoilWater.Runoff", pred_type = "no_update", method = "rf") ggplot(data = testing) + geom_point(aes(x=Date, y=Soil.SoilWater.Runoff)) + geom_point(aes(x=Date, y=trial7), col = "red")
Online prediction can be used to approximate daily APSIM output.
Complex techniques might be needed to fully emulate APSIM
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.