FluxSynthU: Suite of functions to analyze eddy covariance data

Documented in fGPPandNEELightRespCoeff

#' This function takes a dataframe containing PPFD, GPP, and NEE variable columns and returns the fit coefficients for GPP light response curves.
#'
#' @export
#' @title Estimate light response coefficients
#' @param dat a dataframe containing timestamped columns of PPFD, GPP, and NEE
#' @param PPFD_col the name of the column containing PPFD data.
#' @param GPP_col the name of the column containing GPP data.
#' @param NEE_col the name of the column containing NEE data.


# fGPPandNEELightRespCoeff
# 200826, KS
# This function has been updated to accept a dataframe where the user specifies which columns are PPFD, GPP, and NEE, respectively (prev versions would only accept these as vectors)
# This version also tries to determine measures of data availability around each GPPsat estimate. Rationale below:
# Take a window of n = 5 days, and say that we're subsetting half-hourly data, this will give us 5 days x 48 half-hours = 240 rows of data
# Now, let's say we only have 197 observations of GPP (82.1%). We also need to subset our GPP observations so that we are omitting nighttime data, and let's say we omit 61 rows.
# Now, we only have 136 GPP datapoints out of 240 (56.7%).
# But in reality, we need to also omit the same number of points from the total interval size, so 240 - 61 = 179.
# Now, the total percentage of available GPP data in each time interval is 136/179 = 76.0%
# Based on this rationale, this update now calculates the total number of GPP/NEE datapoints before and after subsetting for daytime data.

fGPPandNEELightRespCoeff <- function(dat, PPFD_col, GPP_col, NEE_col) {
  # arguments
  # PPFD_col
  # GPP_col
  # NEE_col


  # -------------- GPP analysis --------------------
  # assign x and y for GPP curve fitting

  PPFD <- dat[,colnames(dat) %in% PPFD_col]
  GPP <- dat[,colnames(dat) %in% GPP_col]
  NEE <- dat[,colnames(dat) %in% NEE_col]

  # omit all 3 variables if one is NA
  test <- is.na(PPFD)|is.na(GPP)|is.na(NEE)
  PPFD[test] <- NA
  GPP[test] <- NA
  NEE[test] <- NA


  x <- PPFD
  y <- GPP

  # Calculate the total number of GPP observations in the interval
  n_GPP_tot <- sum(!is.na(y))

  # omit night data for GPP analysis (PPFD x < 5)
  test <- x < 5
  x <- x[!test]
  y <- y[!test]

  # Calculate the remaining number of GPP observations in the interval (as well as the number omitted)
  n_GPP_included <- sum(!is.na(y))
  n_GPP_omitted <- sum(test)


  # linear model for GPP
  lin_formula <- lm(y~x-1) # linear model of y as a function of x (Note: the '-1' suppresses the intercept term)
  sum.lin <- summary(lin_formula) # provides summary of model fit

  # linear fit parameters for GPP
  m_lin_GPP <- sum.lin$coefficients[1]     # m = slope
  R2_lin_GPP <- sum.lin$r.squared          # R2
  adj.R2_lin_GPP <- sum.lin$adj.r.squared  # adjusted R2
  RMSE_lin_GPP <- sum.lin$sigma            # root mean square error
  pval_lin_GPP <- pf(sum.lin$fstatistic[1L], sum.lin$fstatistic[2L], sum.lin$fstatistic[3L], lower.tail = FALSE)  # p-value

  # nonlinear least squares model for GPP (2 fit coeffs)
  nls_formula <- formula(y ~ (a*x)/(1 + b*x))   # non-linear least squares model
  mod <- nls(nls_formula, start = c(a=1, b=1))  # performs fit
  sum.nls <- summary(mod)                       # provides summary of model fit

  # nls fit parameters for GPP (2 fit coeffs)
  a_nls_GPP <- sum.nls$coefficients[1]              # a parameter
  b_nls_GPP <- sum.nls$coefficients[2]              # b parameter
  SSE_nls_GPP <- sum((y - predict(mod))^2)          # sum of squares due to error
  SST_nls_GPP <- sum((y - mean(y))^2)               # sum of squares about the mean
  R2_nls_GPP <- 1 - SSE_nls_GPP/SST_nls_GPP                 # R2
  adj.R2_nls_GPP <- 1 - (SSE_nls_GPP*(length(y) - 1))/(SST_nls_GPP * df.residual(mod))  # adjusted R2
  RMSE_nls_GPP <- sum.nls$sigma                     # root mean square error

  # nls fit parameters for GPP (fixed b coeff)
  # nonlinear least squares model (modified for constant b parameter)
  nls_formula_fixed <- formula(y ~ (a*x)/(1 + 0.002*x))   # non-linear least squares model
  mod_fixed <- nls(nls_formula_fixed, start = c(a=1))     # performs fit
  sum.nls_fixed <- summary(mod_fixed)                     # provides summary of model fit
  # nls fit parameters for GPP (fixed b coeff)
  a_nls_fixed_GPP <- sum.nls_fixed$coefficients[1]        # a parameter
  b_nls_fixed_GPP <- 0.002                                # fixed b parameter
  SSE_nls_fixed_GPP <- sum((y - predict(mod_fixed))^2)    # sum of squares due to error
  SST_nls_fixed_GPP <- sum((y - mean(y))^2)               # sum of squares about the mean
  R2_nls_fixed_GPP <- 1 - SSE_nls_fixed_GPP/SST_nls_fixed_GPP     # R2
  adj.R2_nls_fixed_GPP <- 1 - (SSE_nls_fixed_GPP*(length(y) - 1))/(SST_nls_fixed_GPP * df.residual(mod_fixed))  # adjusted R2
  RMSE_nls_fixed_GPP <- sum.nls_fixed$sigma               # root mean square error





  # -------------- end of GPP analysis --------------------

  # -------------- beginning of NEE analysis --------------------


  PPFD <- dat[,colnames(dat) %in% PPFD_col]
  GPP <- dat[,colnames(dat) %in% GPP_col]
  NEE <- dat[,colnames(dat) %in% NEE_col]

  # omit all 3 variables if one is NA
  test <- is.na(PPFD)|is.na(GPP)|is.na(NEE)
  PPFD[test] <- NA
  GPP[test] <- NA
  NEE[test] <- NA


  # assign x and y for NEE curve fitting
  x <- PPFD
  y <- NEE

  # Calculate the total number of NEE observations in the interval
  n_NEE_tot <- sum(!is.na(y))

  # omit night data for GPP analysis (PPFD x < 5)
  test <- x < 5

  if (sum(test != 0)) {
    Reco_NEE <- mean(y[test], na.rm = TRUE)
  } else {
    Reco_NEE <- 0
  }


  x <- x[!test]
  y <- -1*(y[!test] - Reco_NEE)

  # Calculate the remaining number of NEE observations in the interval (as well as the number omitted)
  n_NEE_included <- sum(!is.na(y))
  n_NEE_omitted <- n_NEE_tot - n_NEE_included


  # linear model for NEE fit
  lin_formula <- lm(y~x-1) # linear model of y as a function of x (Note: the '-1' suppresses the intercept term)
  sum.lin <- summary(lin_formula) # provides summary of model fit

  # linear fit parameters for NEE fit
  m_lin_NEE <- sum.lin$coefficients[1]     # m = slope
  R2_lin_NEE <- sum.lin$r.squared          # R2
  adj.R2_lin_NEE <- sum.lin$adj.r.squared  # adjusted R2
  RMSE_lin_NEE <- sum.lin$sigma            # root mean square error
  pval_lin_NEE <- pf(sum.lin$fstatistic[1L], sum.lin$fstatistic[2L], sum.lin$fstatistic[3L], lower.tail = FALSE)  # p-value

  # nonlinear least squares model for NEE fit (2 fit coeffs)
  nls_formula <- formula(y ~ (a*x)/(1 + b*x))   # non-linear least squares model
  mod <- nls(nls_formula, start = c(a=1, b=1))  # performs fit
  sum.nls <- summary(mod)                       # provides summary of model fit

  # nls fit parameters for NEE fit (2 fit coeffs)
  a_nls_NEE <- sum.nls$coefficients[1]              # a parameter
  b_nls_NEE <- sum.nls$coefficients[2]              # b parameter
  SSE_nls_NEE <- sum((y - predict(mod))^2)          # sum of squares due to error
  SST_nls_NEE <- sum((y - mean(y))^2)               # sum of squares about the mean
  R2_nls_NEE <- 1 - SSE_nls_NEE/SST_nls_NEE                 # R2
  adj.R2_nls_NEE <- 1 - (SSE_nls_NEE*(length(y) - 1))/(SST_nls_NEE * df.residual(mod))  # adjusted R2
  RMSE_nls_NEE <- sum.nls$sigma                     # root mean square error

  # nls fit parameters for NEE fit (fixed b coeff)
  # nonlinear least squares model (modified for constant b parameter)
  nls_formula_fixed <- formula(y ~ (a*x)/(1 + 0.002*x))   # non-linear least squares model
  mod_fixed <- nls(nls_formula_fixed, start = c(a=1))     # performs fit
  sum.nls_fixed <- summary(mod_fixed)                     # provides summary of model fit
  # nls fit parameters for NEE fit (fixed b coeff)
  a_nls_fixed_NEE <- sum.nls_fixed$coefficients[1]        # a parameter
  b_nls_fixed_NEE <- 0.002                                # fixed b parameter
  SSE_nls_fixed_NEE <- sum((y - predict(mod_fixed))^2)    # sum of squares due to error
  SST_nls_fixed_NEE <- sum((y - mean(y))^2)               # sum of squares about the mean
  R2_nls_fixed_NEE <- 1 - SSE_nls_fixed_NEE/SST_nls_fixed_NEE     # R2
  adj.R2_nls_fixed_NEE <- 1 - (SSE_nls_fixed_NEE*(length(y) - 1))/(SST_nls_fixed_NEE * df.residual(mod_fixed))  # adjusted R2
  RMSE_nls_fixed_NEE <- sum.nls_fixed$sigma               # root mean square error


  # -------------- end of NEE analysis --------------------

  # summary information is returned, including the information on each line below
  #            1) linear fit for GPP (line 1 below)
  #            2) nonlinear fit for GPP with a,b (line 2 below)
  #            3) nonlinear fit for GPP with fixed b (line 3 below)
  #            4) linear fit for NEE (line 1 below)
  #            5) nonlinear fit for NEE with a,b (line 2 below)
  #            6) nonlinear fit for NEE with fixed b (line 3 below)
  summary <- as.numeric(c(n_GPP_tot, n_GPP_included, m_lin_GPP, R2_lin_GPP, adj.R2_lin_GPP, RMSE_lin_GPP, pval_lin_GPP,
                          a_nls_GPP, b_nls_GPP, R2_nls_GPP, adj.R2_nls_GPP, RMSE_nls_GPP,
                          a_nls_fixed_GPP, b_nls_fixed_GPP, R2_nls_fixed_GPP, adj.R2_nls_fixed_GPP, RMSE_nls_fixed_GPP,
                          n_NEE_tot, n_NEE_included, m_lin_NEE, R2_lin_NEE, adj.R2_lin_NEE, RMSE_lin_NEE, pval_lin_NEE,
                          a_nls_NEE, b_nls_NEE, R2_nls_NEE, adj.R2_nls_NEE, RMSE_nls_NEE,
                          a_nls_fixed_NEE, b_nls_fixed_NEE, R2_nls_fixed_NEE, adj.R2_nls_fixed_NEE, RMSE_nls_fixed_NEE))
  names(summary) <- c("n_GPP_tot", "n_GPP_included", "m_lin_GPP", "R2_lin_GPP", "adj.R2_lin_GPP", "RMSE_lin_GPP", "pval_lin_GPP",
                      "a_nls_GPP", "b_nls_GPP", "R2_nls_GPP", "adj.R2_nls_GPP", "RMSE_nls_GPP",
                      "a_nls_fixed_GPP", "b_nls_fixed_GPP", "R2_nls_fixed_GPP", "adj.R2_nls_fixed_GPP", "RMSE_nls_fixed_GPP",
                      "n_NEE_tot", "n_NEE_included", "m_lin_NEE", "R2_lin_NEE", "adj.R2_lin_NEE", "RMSE_lin_NEE", "pval_lin_NEE",
                      "a_nls_NEE", "b_nls_NEE", "R2_nls_NEE", "adj.R2_nls_NEE", "RMSE_nls_NEE",
                      "a_nls_fixed_NEE", "b_nls_fixed_NEE", "R2_nls_fixed_NEE", "adj.R2_nls_fixed_NEE", "RMSE_nls_fixed_NEE")
  summary
}