library(tidyverse) library(fst) requireNamespace("udunits2", quietly = TRUE) requireNamespace("knitr", quietly = TRUE) requireNamespace("data.table", quietly = TRUE) requireNamespace("here", quietly = TRUE) knitr::opts_chunk$set(cache = TRUE, cache.lazy = FALSE)
This code has two requjred input datasets.
The first -- ed_in
-- contains the half-hourly ED2 outputs by variable and ensemble member.
It is huge (~137 million rows).
It is generated by the analysis/scratch/read_pecan_output.R
script.
ed_in_file <- here::here("analysis/data/derived-data/ed-ensemble-out.fst") ed_in <- read_fst(ed_in_file) %>% as_tibble() ed_in
The second -- params
-- contains ED2 input parameters for each ensemble member.
It is generated by the analysis/scratch/read_trait_data.R
script.
params_file <- here::here("analysis/data/derived-data/ed-params.fst") params <- read_fst(params_file) %>% as_tibble() params
Both are currently tied to a specific workflow ID (99000000066).
Eventually, I will modify this to use drake
to improve reproducibility.
The data are stored in fst
format for fast IO.
Most of the parameters are not currently varied by PEcAn, so there isn't much use in looking at them in the context of an ensemble analysis. The following code removes any parameters that are constant across ensemble members.
params_sub <- params %>% semi_join( params %>% group_by(pft, variable) %>% filter(sd(value) > 0) %>% ungroup() %>% distinct(variable) ) %>% rename( parameter = variable, parameter_value = value )
Since we will be using these parameters as dependent variables, let's create a form of these that is wide.
params_wide <- spread(params_sub, parameter, parameter_value) params_wide
To make the ED2 output more manageable, let's calculate the annual total GPP and NPP (and convert them to more sensible units), the max annual LAI, and the max annual water table depth.
(We'll use data.table
to make this a bit faster.)
.data.table.aware <- TRUE data.table::setDT(ed_in) ed_wide <- data.table::dcast(ed_in, time + run_id ~ variable) ed_wide <- ed_wide[, year := lubridate::year(time)] ed_summary_wide <- ed_wide[, .( GPP = udunits2::ud.convert(sum(gpp), "kg m-2", "Mg ha-1") * 60 * 30, NPP = udunits2::ud.convert(sum(npp), "kg m-2", "Mg ha-1") * 60 * 30, LAI = max(lai), water_table = min(watertable) ), by = .(run_id, year)] ed_summary <- data.table::melt( ed_summary_wide, id.vars = c("run_id", "year") ) %>% as_tibble()
Now, let's have a look at the results. For reference, I also include values from @hardiman_2013_maintaining in red (min-max as dashed lines, mean as solid line).
hardiman <- tribble( ~variable, ~low, ~mean, ~hi, "LAI", 1.8, 4.14, 6.56, "NPP", 1.68, 3.11, 7.26 ) ggplot(ed_summary) + aes(x = year, y = value, group = run_id) + geom_line(alpha = 0.2) + geom_hline(aes(yintercept = low), data = hardiman, color = "red", linetype = "dashed") + geom_hline(aes(yintercept = mean), data = hardiman, color = "red") + geom_hline(aes(yintercept = hi), data = hardiman, color = "red", linetype = "dashed") + facet_wrap(~variable, scales = "free_y")
Compute the average across the time series (excluding establishment years 1902-1912), and join with parameter values.
time_average <- ed_summary %>% filter(year > 1912) %>% group_by(variable, run_id) %>% summarize(value = mean(value)) %>% left_join(params_sub)
Calculate linear regression between parameter values and time-series averages. Note that there because parameters are PFT-specific, I have to fit regressions for each PFT parameter (so 4 regressions per variable-parameter combination).
linmod <- time_average %>% group_by(variable, pft, parameter) %>% nest() %>% mutate( fit = suppressWarnings(map( data, possibly(lm, NULL), formula = value ~ parameter_value )), failed_fit = map_lgl(fit, is.null), ) %>% filter(!failed_fit) %>% mutate( r2 = map2_dbl( fit, data, ~suppressWarnings(modelr::rsquare(.x, .y)) ), slope = map_dbl(fit, ~coefficients(.x)[[2]]) )
Now, let's look at the top predictors:
linmod %>% select(variable, pft, parameter, r2, slope) %>% arrange(desc(r2)) %>% print(n = 20)
Load the LAI output (generated by script analysis/scratch/read_ed_lai.R
).
lai_file <- here::here("analysis/data/derived-data/ed-lai-output.fst") all_lai <- fst::read_fst(lai_file)
Calculate annual maximum and plot.
all_lai %>% group_by(year = lubridate::floor_date(month, "year"), pft, ensemble) %>% summarize(lai = max(lai)) %>% ungroup() %>% filter(pft != "Late conifer") %>% ggplot() + aes(x = year, y = lai, color = pft) + geom_line() + facet_wrap(~ensemble)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.