Setup

library(tidyverse)
library(fst)

requireNamespace("udunits2", quietly = TRUE)
requireNamespace("knitr", quietly = TRUE)
requireNamespace("data.table", quietly = TRUE)
requireNamespace("here", quietly = TRUE)

knitr::opts_chunk$set(cache = TRUE, cache.lazy = FALSE)

This code has two requjred input datasets. The first -- ed_in -- contains the half-hourly ED2 outputs by variable and ensemble member. It is huge (~137 million rows). It is generated by the analysis/scratch/read_pecan_output.R script.

ed_in_file <- here::here("analysis/data/derived-data/ed-ensemble-out.fst")
ed_in <- read_fst(ed_in_file) %>% as_tibble()
ed_in

The second -- params -- contains ED2 input parameters for each ensemble member. It is generated by the analysis/scratch/read_trait_data.R script.

params_file <- here::here("analysis/data/derived-data/ed-params.fst")
params <- read_fst(params_file) %>% as_tibble()
params

Both are currently tied to a specific workflow ID (99000000066). Eventually, I will modify this to use drake to improve reproducibility. The data are stored in fst format for fast IO.

Most of the parameters are not currently varied by PEcAn, so there isn't much use in looking at them in the context of an ensemble analysis. The following code removes any parameters that are constant across ensemble members.

params_sub <- params %>%
  semi_join(
    params %>%
      group_by(pft, variable) %>%
      filter(sd(value) > 0) %>%
      ungroup() %>%
      distinct(variable)
  ) %>%
  rename(
    parameter = variable,
    parameter_value = value
  )

Since we will be using these parameters as dependent variables, let's create a form of these that is wide.

params_wide <- spread(params_sub, parameter, parameter_value)
params_wide

To make the ED2 output more manageable, let's calculate the annual total GPP and NPP (and convert them to more sensible units), the max annual LAI, and the max annual water table depth. (We'll use data.table to make this a bit faster.)

.data.table.aware <- TRUE
data.table::setDT(ed_in)

ed_wide <- data.table::dcast(ed_in, time + run_id ~ variable)
ed_wide <- ed_wide[, year := lubridate::year(time)]

ed_summary_wide <- ed_wide[, .(
  GPP = udunits2::ud.convert(sum(gpp), "kg m-2", "Mg ha-1") * 60 * 30,
  NPP = udunits2::ud.convert(sum(npp), "kg m-2", "Mg ha-1") * 60 * 30,
  LAI = max(lai),
  water_table = min(watertable)
), by = .(run_id, year)]

ed_summary <- data.table::melt(
  ed_summary_wide,
  id.vars = c("run_id", "year")
) %>% as_tibble()

Summary of output

Now, let's have a look at the results. For reference, I also include values from @hardiman_2013_maintaining in red (min-max as dashed lines, mean as solid line).

hardiman <- tribble(
  ~variable, ~low, ~mean, ~hi,
  "LAI", 1.8, 4.14, 6.56,
  "NPP", 1.68, 3.11, 7.26
)

ggplot(ed_summary) +
  aes(x = year, y = value, group = run_id) +
  geom_line(alpha = 0.2) +
  geom_hline(aes(yintercept = low), data = hardiman,
             color = "red", linetype = "dashed") +
  geom_hline(aes(yintercept = mean), data = hardiman,
             color = "red") +
  geom_hline(aes(yintercept = hi), data = hardiman,
             color = "red", linetype = "dashed") +
  facet_wrap(~variable, scales = "free_y")

Compute the average across the time series (excluding establishment years 1902-1912), and join with parameter values.

time_average <- ed_summary %>%
  filter(year > 1912) %>%
  group_by(variable, run_id) %>%
  summarize(value = mean(value)) %>%
  left_join(params_sub)

Calculate linear regression between parameter values and time-series averages. Note that there because parameters are PFT-specific, I have to fit regressions for each PFT parameter (so 4 regressions per variable-parameter combination).

linmod <- time_average %>%
  group_by(variable, pft, parameter) %>%
  nest() %>%
  mutate(
    fit = suppressWarnings(map(
      data,
      possibly(lm, NULL),
      formula = value ~ parameter_value
    )),
    failed_fit = map_lgl(fit, is.null),
  ) %>%
  filter(!failed_fit) %>%
  mutate(
    r2 = map2_dbl(
      fit,
      data,
      ~suppressWarnings(modelr::rsquare(.x, .y))
    ),
    slope = map_dbl(fit, ~coefficients(.x)[[2]])
  )

Now, let's look at the top predictors:

linmod %>%
  select(variable, pft, parameter, r2, slope) %>%
  arrange(desc(r2)) %>%
  print(n = 20)

LAI, by ensemble member

Load the LAI output (generated by script analysis/scratch/read_ed_lai.R).

lai_file <- here::here("analysis/data/derived-data/ed-lai-output.fst")
all_lai <- fst::read_fst(lai_file)

Calculate annual maximum and plot.

all_lai %>%
  group_by(year = lubridate::floor_date(month, "year"),
           pft, ensemble) %>%
  summarize(lai = max(lai)) %>%
  ungroup() %>%
  filter(pft != "Late conifer") %>%
  ggplot() +
  aes(x = year, y = lai, color = pft) +
  geom_line() +
  facet_wrap(~ensemble)


ashiklom/fortebaseline documentation built on May 9, 2020, 1:56 a.m.