RapidACi: Automated Rapid A-Ci curve correction for Li-Cor system

Documented in Rapid_aci_correction

#' Rapid_aci_correction function
#'
#' @description Main function of the package. From a list of files, as produced by the
#'   buid_list function, the function will first extract recalculated gas exchange values
#'   when new leaf surface areas are provided (or original values if leafArea_mm2 = NA).
#'   The values will then be corrected according to the paired matching empty chamber
#'   measurements. Finally, data are prepared for further modelling analysis in an R list
#'   object.
#'
#' @param list_files A dataframe as generated by the build_list function
#' @param delta_max Difference between two measure points below which we consider
#'   measurements to be at steady state. If not sure, verify with the diagnostic plots
#'   produced by the diagnose_sample function.
#' @param max_degree  The maximum polynomial order to consider for fitting the empty
#'   chamber measurement curve in order to correct the Rapid A-Ci measurements. This
#'   parameter takes an integer value from 1 up to 5 (as suggested by Stinziano _et al._,
#'   2017). Default max_degree = 3.
#' @param priority_curve For the very specific case when coefficients were retrieved from
#'   both an up and down ramp of the empty chamber A-Ci curve, a priority can be set. This
#'   argument can only take "positive" or "negative" (default = "postive").
#'
#' @return The function returns a list of elements per A-Ci measurement file that contains
#'   the sample ID, the leaf area used, the timestamp, the paths to the A-Ci and empty
#'   chamber file used, important data variables from these files (recalculated variables
#'   if leaf area was provided), the coefficients of the fitted curve(s) used to correct
#'   values, as well as corrected A and Ci. The formatted data for plantecophys can be
#'   found under the Raci element.
#'
#' @export


Rapid_aci_correction <- function(list_files,
                                 delta_max = 0.05,
                                 max_degree = 3,
                                 priority_curve = "positive") {

  # If dark files are found, include Rd in list_files
  # multiple dark files ok, but not duplicated sample_ID names
  dark_files <- dplyr::filter(list_files, chamber == "DARK")
  
  if(dim(dark_files)[1] > 0) {
    merged_dark <- vector("list", length = nrow(dark_files))
    LA <- dplyr::filter(list_files, chamber != "EMPTY", sample_ID != "none") %>% 
      select(sample_ID, leafArea_mm2)
    for(i in 1:nrow(dark_files)) {
      merged_dark[[i]] <- list_files %>%
                          left_join(correct_dark(dark_files$path[i], leafArea_df = LA,
                                                 LiCor_system = dark_files$LiCor_system[i]))
    }
    
    list_files <- do.call(bind_rows, merged_dark) %>% unique()
  } else {
    message("No DARK files detected")
    list_files$Rd <- NA
  }

  # Files with no START_time are more than likely useless for the rest of analysis
  list_files <- list_files[complete.cases(list_files[, "START_time"]),]
  message("\nThe following list of files is being processed :\n")
  list_files %>% print(n = Inf)
  
  # Initialization of results list 
  mts <- unique(list_files[, "timestamp"])
  lst <- rep(list(list()), nrow(dplyr::filter(list_files, chamber != "EMPTY")))
  
  # Input of basic information into the results list: for each raci file of each timestamp 
  z = 0
  for(i in 1:length(mts$timestamp)) {

    match_gr <- dplyr::filter(list_files, timestamp == mts$timestamp[i])
    empty_chamber <- match_gr[match_gr$chamber == "EMPTY", ]

    if(dim(empty_chamber)[1] == 0) {
      stop(paste("\nThere is NO empty chamber file for the timestamp",
                 mts$timestamp[i]))
    } else if(dim(empty_chamber)[1] != 1) {
      stop(paste("\nThere is more than one empty chamber files with the timestamp",
                 mts$timestamp[i]))
    }

    raci_files <- match_gr[match_gr$chamber == "FAST", ]

    if(length(raci_files) < 1) {
      message(paste("There is no raci file corresponding to timestamp",
                    mts$timestamp[i], "\n"))
    }

    for(j in 1:nrow(raci_files)) {
      
      xx <- best_coefs(empty_file = empty_chamber$path,
                       leafArea_cm2 = raci_files$leafArea_mm2[j] / 100,
                       delta_max = delta_max, max_degree = max_degree)
      
      lst[[j+i-1+z]]$Sample_ID <- raci_files$sample_ID[j]
      lst[[j+i-1+z]]$leafArea_cm2 <- raci_files$leafArea_mm2[j] / 100
      lst[[j+i-1+z]]$Rd <- raci_files$Rd[j]
      lst[[j+i-1+z]]$match_timestamp <- as.character(empty_chamber$timestamp)
      lst[[j+i-1+z]]$ACi_file <- raci_files$path[j]
      lst[[j+i-1+z]]$ACi_data <-  get_fromExcel(raci_files$path[j], 
                                               leafArea_cm2 = raci_files$leafArea_mm2[j] / 100,
                                               variables = c("GasEx_E", "GasEx_A", "GasEx_Ca", 
                                                             "GasEx_Ci", "GasEx_gtc", "GasEx_gsw",
                                                             "GasEx_TleafCnd","Meas_CO2_r", 
                                                             "Meas_Tleaf", "Meas_Tleaf2", 
                                                             "Meas_Qamb_in")) %>%
                                  mutate(deltaA  = c(0, diff(.$GasEx_A)), 
                                         deltaCi = c(0, diff(.$GasEx_Ci)), n = 1:n(),
                                         directn = ifelse(deltaA > 0, "positive", "negative"))
      lst[[j+i-1+z]]$empty_chamber_file <- empty_chamber$path
      lst[[j+i-1+z]]$empty_chamber_data <- xx[[1]]$empty_data
     #lst[[j+i-1+z]]$lag_between_curves <- xx[[1]]$lag
      lst[[j+i-1+z]]$delta_max <- delta_max
      lst[[j+i-1+z]]$max_degree <- max_degree
      lst[[j+i-1+z]]$priority_curve <- priority_curve
      lst[[j+i-1+z]]$posCurve_coefs <- xx[[1]]$positive
      lst[[j+i-1+z]]$negCurve_coefs <- xx[[1]]$negative
    }
    z = z + j - 1
  }


  for(i in seq_along(lst)) {

    if(priority_curve == "positive") {
      if(sum(is.na(lst[[i]]$posCurve_coefs)) == 0) {
        x <- correct_raci(lst[[i]], "positive")
        lst[[i]]$correction_curve_used <- "positive"
        lst[[i]]$correction_factor <- x[1]
        lst[[i]]$Aleaf <- x[2]
        lst[[i]]$Ci_corrected <- x[3]  
      } else if(sum(is.na(lst[[i]]$negCurve_coefs)) == 0) {
        x <- correct_raci(lst[[i]], "negative")
        lst[[i]]$correction_curve_used <- "negative"
        lst[[i]]$correction_factor <- x[1]
        lst[[i]]$Aleaf <- x[2]
        lst[[i]]$Ci_corrected <- x[3]
      } else {
        lst[[i]]$correction_curve_used <- lst[[i]]$correction_factor <- lst[[i]]$Aleaf <- lst[[i]]$Ci_corrected <- NA
      }
    } else if(priority_curve == "negative") {
      if(sum(is.na(lst[[i]]$negCurve_coefs)) == 0) {
        x <- correct_raci(lst[[i]], "negative")
        lst[[i]]$correction_curve_used <- "negative"
        lst[[i]]$correction_factor <- x[1]
        lst[[i]]$Aleaf <- x[2]
        lst[[i]]$Ci_corrected <- x[3] 
      } else if(sum(is.na(lst[[i]]$posCurve_coefs)) == 0) {
        x <- correct_raci(lst[[i]], "positive")
        lst[[i]]$correction_curve_used <- "positive"
        lst[[i]]$correction_factor <- x[1]
        lst[[i]]$Aleaf <- x[2]
        lst[[i]]$Ci_corrected <- x[3]
      } else {
        lst[[i]]$correction_curve_used <- lst[[i]]$correction_factor <- lst[[i]]$Aleaf <- lst[[i]]$Ci_corrected <- NA
      }
    } else {
      stop("Priority_curve argument is incorrect, it only takes one of 'positive' or 'negative' as value")
    }
  } 
  
  for(i in seq_along(lst)) {
    
    lst[[i]]$Raci <- bind_cols(lst[[i]]$ACi_data, lst[[i]]$Aleaf, lst[[i]]$Ci_corrected) %>%
      select(Photo = V1, Ci = V2, Tleaf = Meas_Tleaf, PARi = Meas_Qamb_in, everything()) %>%
      dplyr::filter(Photo > 0 & deltaA >= 0 & deltaCi >= 0 & Ci >= 0) %>%
      mutate(Rd = lst[[i]]$Rd)
  }
  
  names(lst) <- lapply(lst, `[[`, 1)
  return(lst)
}