R/Gap_filling.R

gapfill_met <- function(data, var, new_col = FALSE) {
  # fails if observation is near the beginning or end of dataset, need to fix
  # assign data, var
  cat(paste0("\n", "Gap filling: ", var, "\n"))
  df <- data
  var.na <- df$timestamp[which(is.na(df[, var]))]
  if (length(var.na) > 0) {
    locs <- match(var.na, df$timestamp)

    #pbar <- progress_estimated(length(locs))
    for (i in 1:length(locs)) {
      if (locs[i] > 24 & locs[i] < (nrow(df) - 24)) {
        point <- df[locs[i], ]
        period <- df[(locs[i] - 24):(locs[i] + 24), ]
        #browser()
        # expand range if too many NA values
        if (length(period[, var][is.na(period[, var])]) > 24) {
          if (locs[i] > 48 & locs[i] < (nrow(df) - 48)) {
            period <- df[(locs[i] - 48):(locs[i] + 48), ]
            # make sure there are enough non-NA values in range
            if (length(period[, var][is.na(period[, var])]) >= 48) {
              next
            }
          }
        }
        # return NA if no conditions are met
      } else if (locs[i] > 12 & locs[i] < (nrow(df) - 12)) {
        point <- df[locs[i], ]
        period <- df[(locs[i] - 12):(locs[i] + 12), ]
      } else {
        df[locs[i], var] <- NA
        next
      }
      # define spline function
      func <-
        suppressWarnings(
          splinefun(
            x = period[, "timestamp"],
            y = period[, var],
            method = "periodic"
          )
        )
      # execute spline function
      pred <- func(point$timestamp)
      df[locs[i], var] <- pred
      #pbar$tick()$print()
    }
  }
  return(df)
}

## =============================================================================
#' Gap-fill a Variable Using the MDS Algorithm
#'
#' Original name: sEddyProc_sMDSGapFill
#' @author AMM, TW
#'
#' @description
#' MDS gap filling algorithm adapted after the PV-Wave code and paper by Markus
#' Reichstein.
#'
#' @param var A variable to be filled.
#' @param qc_var The quality flag of the variable to be filled
#' @param qc_val The value of quality flag for _good_ (original) data, other
#'   data is set to missing
#' @param v1 The 1st condition variable (default: Global radiation "Rg" in
#'   W m-2)
#' @param t1 The tolerance interval for the 1st condition variable (default: 50
#'   W m-2)
#' @param v2 The 2nd condition variable (default: Vapor pressure deficit "VPD"
#'   in hPa)
#' @param t2 The tolerance interval for the 2nd condition variable (default: 5
#'   hPa)
#' @param v3 The 3rd condition variable (default: Air temperature "Tair" in
#'   degC)
#' @param t3 The tolerance interval for the 3rd condition variable (default:
#'   2.5 degC)
#' @param fill_all A logical value whether to fill all values to estimate
#'   uncertainties.
#' @param verbose A logical value whether to print status information to
#'   screen.
#' @param max_run A scalar integer indicating how many subsequent numerically
#'   equal values to allow until a warning is produced. Set to Inf or NA for no
#'   warnings. Defaults for "NEE" to records across 4 hours and no warning for
#'   others.
#' @param only_fill A logical value whether to only output a vector of the gap-
#'   filled variable.
#'
#' @details
#'
#' Runs of numerically equal numbers hint to problems of the data and cause
#' unreasonable estimates of uncertainty. The user is warned if this occurs.
#'
#' MDS gap filling algorithm calls the subroutines Look Up Table
#' \code{\link{sEddyProc_sFillLUT}} and Mean Diurnal Course
#' \code{\link{sEddyProc_sFillMDC}} with different window sizes as described in
#' the reference. To run dataset only with MDC algorithm
#' \code{\link{sEddyProc_sFillMDC}}, set condition variable v1 to "none".
#'
#' If meteo condition variables (v1, v2, v3) are at default values but do not
#' exist as columns, they are set to "none" (= disabled identifier). This allows
#' running MDS with less variables than prescribed in the default setting. If
#' meteo condition variable are same as variable to fill, also set to "none".
#' This prevents filling artificial gaps (for uncertainty estimates) with itself
#' as meteo condition variable.
#'
#' @return Gap filling results in sTEMP data frame (with renamed columns).
#'
#' @references Reichstein, M. et al. (2005) On the separation of net ecosystem
#' exchange into assimilation and ecosystem respiration: review and improved
#' algorithm. Global Change Biology, 11, 1424-1439.
#'
#' @export
fill_mds <- function(data, var, qc_var = "none", qc_val = NA, v1 = "Rg",
                     t1 = 50, v2 = "VPD", t2 = 5, v3 = "Tair", t3 = 2.5,
                     fill_all = TRUE, verbose = TRUE, max_run = NULL,
                     only_fill = FALSE) {
  time_start <- Sys.time()
  if (is.null(max_run)) {
    dts <- 48
    max_run <- if (var == "NEE") 4 * dts / 24 else NA
  }
  # Initialize temporary data frame for newly generated gap filled data and qualifiers
  temp <- fill_init(data, var, qc_var, qc_val, fill_all)
  if (is.null(temp)) {
    # Abort gap filling if initialization failed
    stop("Initialization of gap-filling data frame failed.")
  }
  if (is.finite(max_run) & (nrow(temp) >= max_run)) {
    rl <- check_runs(as.vector(temp$var_orig), min_length = max_run)
    if (length(rl$index)) {
      rl_sorted <- rl[rev(order(rl$nRep)), , drop = FALSE]
      warning(
        "Variable ", var,
        " contains long runs of numerically equal numbers.",
        " Longest of ", rl_sorted$nRep[1],
        " repeats of value ", temp$var_orig[rl_sorted$index[1]],
        " starts at index ", rl_sorted$index[1]
      )
    }
  }
  if ((v1 == "Rg" && !(v1 %in% c(colnames(data)))) || (v1 == var)) {
    v1 <- "none"
  }
  if ((v2 == "VPD" && !(v2 %in% c(colnames(data)))) || (v2 == var)) {
    v2 <- "none"
  }
  if ((v3 == "Tair" && !(v3 %in% c(colnames(data)))) || (v3 == var)) {
    v3 <- "none"
  }
  # Check column names (with "none" as dummy)
  check_names(cbind(data, temp), c(v1, v2, v3))
  # Check tolerance entries (if condition variable is not "none")
  none_cols <- c(v1, v2, v3) %in% "none"
  if (!check_value(t1) || !check_value(t2) || !check_value(t3)) {
    stop(
      "Tolerance intervals must be numeric ",
      "(if not specified, set to NA.)"
    )
  }
  if (sum(is.na(c(t1, t2, t3)[!none_cols]))) {
    stop(
      "If condition variable is specified (dummy name is 'none'), ",
      "the tolerance interval must be specified."
    )
  }
  # Run gap filling scheme depending on auxiliary meteo data availability
  # Check availablility of meteorological data for LUT
  met <-
    if (
      v1 != "none" && v2 != "none" && v3 != "none"
      && sum(!is.na(data[, v1])) != 0 && sum(!is.na(data[, v2])) != 0 &&
      sum(!is.na(data[, v3])) != 0
    ) {
      # All three meteo conditions are available and valid to use:
      message(
        "Full MDS algorithm for gap filling of '",
        attr(temp$var_f, "varnames"),
        "' with LUT(", v1, ", ", v2, ", ", v3, ") and MDC."
      )
      3
    } else if (v1 != "none" && sum(!is.na(data[, v1])) != 0) {
      # Only one meteo condition available for LUT
      message(
        "Limited MDS algorithm for gap filling of '",
        attr(temp$var_f, "varnames"), "' with LUT(", v1, " only) and MDC."
      )
      1
    } else {
      # No meteo condition available (use MDC only)
      message(
        "Restricted MDS algorithm for gap filling of '",
        attr(temp$var_f, "varnames"),
        "' with no meteo conditions and hence only MDC."
      )
      if (!var %in% c("Rg", "PAR", "PPFD")) {
        warning("No meteo available for MDS gap filling.")
      }
      0
    }
  # Full MDS algorithm =========================================================
  # Each call to a gap-filling algorithm updates the temp data
  # Step 1: LUT (method 1) with window size +-7 days
  if (met == 3) {
    temp <- fill_lut(data, temp, 7, v1, t1, v2, t2, v3, t3, verbose = verbose)
  }
  # Step 2: LUT (method 1) with window size +-14 days
  if (met == 3) {
    temp <- fill_lut(data, temp, 14, v1, t1, v2, t2, v3, t3, verbose = verbose)
  }
  # Step 3: LUT, Rg only (method 2) with window size +-7 days,
  if (met == 3 || met == 1) {
    temp <- fill_lut(data, temp, 7, v1, t1, verbose = verbose)
  }
  # Step 4: MDC (method 3) with window size 0 (same day)
  temp <- fill_mdc(temp, 0, verbose = verbose)
  # Step 5: MDC (method 3) with window size +-1, +-2 days
  temp <- fill_mdc(temp, 1, verbose = verbose)
  temp <- fill_mdc(temp, 2, verbose = verbose)
  # Step 6: LUT (method 1) with window size +-21, +-28, ..., +-70
  if (met == 3) {
    for (i in seq(21, 70, 7)) {
      temp <- fill_lut(data, temp, i, v1, t1, v2, t2, v3, t3, verbose = verbose)
    }
  }
  # Step 7: LUT (method 2) with window size +-14, +-21, ..., +-70
  if (met == 3 || met == 1) {
    for (i in seq(14, 70, 7)) {
      temp <- fill_lut(data, temp, i, v1, t1, verbose = verbose)
    }
  }
  # Step 8: MDC (method 3) with window size +-7, +-14, ..., +-210 days
  for (i in seq(7, 210, 7)) {
    temp <- fill_mdc(temp, i, verbose = verbose)
  }
  # Set long gaps again to NA
  temp$var_fall <- suppressMessages(convert_gaps(temp$var_fall))
  # Message on gap filling
  time_diff <- as.numeric(Sys.time()) - as.numeric(time_start)
  message(
    "Finished gap filling of '", var, "' in ",
    floor(time_diff),
    " seconds. Artificial gaps filled: ",
    length(temp$var_fall) - sum(is.na(temp$var_fall)), ", real gaps filled: ",
    sum(is.na(temp$var_orig)), ", unfilled (long) gaps: ",
    sum(is.na(temp$var_fall)), "."
  )
  # Return only the gap-filled data (if requested)
  if (only_fill) {
    return(temp[, "var_f"])
  }
  # Rename new columns generated during gap filling
  colnames(temp) <- gsub("var_", paste0(var, "_"), colnames(temp))
  return(temp)
}

#' =============================================================================
#' Initalize Gap Filling
#' =============================================================================
#'
#' Original name: sEddyProc_sFillInit
#' @author AMM
#'
#' @description
#' Initializes temporary data frame for newly generated gap filled data and
#' qualifiers.
#'
#' @param data A data frame.
#' @param var variable to be filled
#' @param qc_var quality flag of variable to be filled
#' @param qc_val value of quality flag for _good_ (original) data, other data
#'   is set to missing
#' @param fill_all Fill all values to estimate uncertainties
#'
#' @section Details
#' Long gaps (larger than 60 days) are not filled.
#'
#' Newly created variables with gap-filled data and qualifiers:
#' * VAR\emph{_orig} Original values of variable VAR used for gap filling
#' * VAR\emph{_f   } Original values and filled gaps
#' * VAR\emph{_fqc} Quality flag assigned depending on gap filling method and
#'   window length (0 = original data, 1 = most reliable, 2 = medium, 3 = least
#'   reliable)
#' * VAR\emph{_fall} All values considered as gaps (for uncertainty estimates)
#' * VAR\emph{_fall_qc} Quality flag assigned depending on gap filling method
#'   and window length (1 = most reliable, 2 = medium, 3 = least reliable)
#' * VAR\emph{_fnum} Number of datapoints used for gap-filling
#' * VAR\emph{_fsd} Standard deviation of datapoints used for gap filling
#'   (uncertainty)
#' * VAR\emph{_fmeth} Method used for gap filling (1 = similar meteo condition
#'   (fill_lut with Rg, VPD, Tair), 2 = similar meteo (fill_lut with Rg only),
#'   3 = mean diurnal course (fill_mdc))
#' * VAR\emph{_fwin} Full window length used for gap filling
#'
#' @return A temporary data frame ready to hold results of gap filling.
#'
#' @export
fill_init <- function(data, var, qc_var = "none", qc_val = NA,
                      fill_all = TRUE) {
  'Initializes data frame "temp" for newly generated gap filled data and
  qualifiers.'
  # Check variable to fill and apply quality flag
  check_names(data, c(var, qc_var))
  gf_var <- apply_qc(data, var, qc_var, qc_val)
  # Abort if variable to be filled contains no data
  if (sum(!is.na(gf_var)) == 0) {
    warning("Variable to be filled (", var, ") contains no data.")
    return(-111)
  }
  temp <- data.frame(
    var_orig = gf_var,
    var_f = NA,
    var_fqc = NA,
    var_fall = NA,
    var_fall_qc = NA,
    var_fnum = NA,
    var_fsd = NA,
    var_fmeth = NA,
    var_fwin = NA
  )
  # Set fqc to zero for original values
  temp$var_f <- temp$var_orig
  temp$var_fqc <- ifelse(!is.na(temp$var_orig), 0, NA)
  # Set filling of only gaps
  if (fill_all == FALSE) temp$var_fall <- temp$var_orig # "prefill" with original data
  # Add units
  attr(temp$var_f, "units") <- attr(gf_var, "units")
  attr(temp$var_fall, "units") <- attr(gf_var, "units")
  attr(temp$var_fsd, "units") <- attr(gf_var, "units")
  attr(temp$var_fwin, "units") <- "days"
  # Gaps longer than 60 days are not filled
  gap_length <- calc_gaplength(temp$var_orig)
  dts <- 48
  max_gap <- dts * 60 # half-hours in 60 days
  while (max(gap_length) > max_gap) {
    # Flag long gap with -9999.0
    end <- which(gap_length == max(gap_length))
    start <- end - max(gap_length) + 1
    temp$var_fall[start:end] <- -9999.0 # Set to -9999.0 as a flag for long gaps
    gap_length[start:end] <- -1 # Set to -1 since accounted for
    warning(
      "The long gap between position ", start, " and ",
      end, " will not be filled."
    )
  }
  if (fill_all == TRUE) {
    message(
      "Initialized variable '", var, "' with ", sum(is.na(temp$var_orig)),
      " real gaps for gap filling of all ", sum(is.na(temp$var_fall)),
      " values (to estimate uncertainties)."
    )
  } else {
    message(
      "Initialized variable '", var, "' with ", sum(is.na(temp$var_orig)),
      " real gaps for gap filling."
    )
  }
  return(temp)
}

## =============================================================================
#' Execute the Mean Diurnal Course (MDC) Algorithm
#'
#' Original name: sEddyProc_sFillMDC
#' @author AMM
#'
#' @description
#' Mean Diurnal Course (MDC) algorithm based on average values within +/- one
#' hour of adjacent days.
#'
#' @param temp
#' @param win_days The window size for filling in number of days.
#' @param verbose A logical value whether to print status information to
#'   screen.
#'
#' @details
#' Quality flags:
#' * 1: nDay <= 1
#' * 2: nDay [2,5)
#' * 3: nDay > 5
#'
#' @return MDC filling results in sTEMP data frame.
#'
#' @export
fill_mdc <- function(temp, win_days, verbose = TRUE) {
  lgf <- matrix(NA, nrow = 0, ncol = 7, dimnames = list(NULL, c(
    "index", "mean", "fnum", "fsd", "fmeth", "fwin", "fqc"
  )))
  # Determine gap positions
  gap_psns <- which(is.na(temp$var_fall))
  if (length(gap_psns) > 0) {
    for (i in 1:length(gap_psns)) {
      # Message on progress if wanted
      if (verbose && i == 1) {
        message(
          "Mean diurnal course with window size of ", win_days,
          " days: .", sep = ""
        )
      }
      # Set index within window size
      dts <- 48
      gap <- gap_psns[i]
      index <- numeric(0)
      for (j in (0:win_days)) {
        if (j == 0) {
          index <- c(index, gap + (-2:2))
        } else {
          index <- c(
            index, gap + c(-j * dts + (-2:2)),
            gap + c(j * dts + (-2:2))
          )
        }
      }
      index <- index[index > 0 & index <= nrow(temp)]
      # If enough available data, fill gap
      mdc <- subset(temp$var_orig[index], !is.na(temp$var_orig[index]))
      if (length(mdc) > 1) {
        lvar_index <- gap
        lvar_mean <- mean(mdc)
        lvar_fnum <- length(mdc)
        lvar_fsd <- sd(mdc)
        lvar_fmeth <- 3
        # Set window size and quality flag
        # Window calculation changes depending on size
        lvar_fwin <- if (win_days < 7) {
          2 * win_days + 1
        } else {
          win_days + 1
        }
        if (lvar_fwin <= 1) lvar_fqc <- 1
        if (lvar_fwin > 1 & lvar_fwin <= 5) lvar_fqc <- 2
        if (lvar_fwin > 5) lvar_fqc <- 3
        lgf <- rbind(lgf, c(
          lvar_index, lvar_mean, lvar_fnum,
          lvar_fsd, lvar_fmeth, lvar_fwin, lvar_fqc
        ))
      }
      if (verbose && i %% 100 == 0) message(".", appendLF = FALSE)
      if (verbose && i %% 6000 == 0) message("\n.", appendLF = FALSE)
    }
    if (verbose) message("", nrow(lgf))
  }
  # Copy gap filled values and properties to temp
  if (nrow(lgf) > 0) {
    # Fill all rows in var_fall and co
    temp[lgf[, "index"], c(
      "var_fall", "var_fnum", "var_fsd", "var_fmeth", "var_fwin", "var_fall_qc"
    )] <- lgf[, c("mean", "fnum", "fsd", "fmeth", "fwin", "fqc")]
    # Only fill gaps in var_f and var_fqc
    gaps <- is.na(temp[lgf[, "index"], "var_f"])
    temp[lgf[, "index"], c("var_f", "var_fqc")][gaps, ] <-
      as.data.frame(lgf[, c("mean", "fqc"), drop = FALSE])[gaps, ]
  }
  # Other columns are specific for full MR MDS algorithm
  return(temp)
}

## =============================================================================
#' Execute the Look-Up Table (LUC) Algorithm
#'
#' Original name: sEddyProc_sFillLUT
#' @author AMM
#'
#' @description
#' Look-Up Table (LUT) algorithm of up to five conditions within prescribed
#' window size.
#'
#' @param win_days Window size for filling in days.
#' @param v1 Condition variable 1.
#' @param t1 Tolerance interval 1.
#' @param v2 Condition variable 2.
#' @param t2 Tolerance interval 2.
#' @param v3 Condition variable 3.
#' @param t3 Tolerance interval 3.
#' @param v4 Condition variable 4.
#' @param t4 Tolerance interval 4.
#' @param v5 Condition variable 5.
#' @param t5 Tolerance interval 5.
#' @param verbose Print status information to screen.
#'
#' @details
#' Quality flags:
#' * 1: at least one variable and nDay <= 14
#' * 2: three variables and nDay in [14,56)
#'   or one variable and nDay in  [14,28)
#' * 3: three variables and nDay > 56
#'   or one variable and nDay > 28
#'
#' @return LUT filling results in temporary data frame.
#'
#' @export
fill_lut <- function(data, temp, win_days, v1 = "none", t1 = NA, v2 = "none",
                     t2 = NA, v3 = "none", t3 = NA, v4 = "none", t4 = NA,
                     v5 = "none", t5 = NA, verbose = TRUE) {
  lgf <- matrix(NA, nrow = 0, ncol = 7, dimnames = list(
    NULL, c("index", "mean", "fnum", "fsd", "fmeth", "fwin", "fqc")
  ))
  # Check if temp has been initialized with new var_ columns
  if (!exists("var_f", temp)) {
    stop(
      "Temporary data frame temp for processing results has not ",
      "been initalized with fill_init."
    )
  }
  # Determine gap positions
  gap_psns <- which(is.na(temp$var_fall))
  if (length(gap_psns) > 0) {
    for (i in 1:length(gap_psns)) {
      # Message on progress if wanted
      none_cols <- c(v1, v2, v3, v4, v5) %in% "none"
      if (verbose && i == 1) {
        message(
          "Look up table with window size of ", win_days, " days with ",
          paste(c(v1, v2, v3, v4, v5)[!none_cols], collapse = " ")
        )
      }
      # Set window size
      dts <- 48
      gap <- gap_psns[i]
      start <- gap - (win_days * dts - 1)
      end <- gap + (win_days * dts - 1)
      if (start <= 0) start <- 1
      if (end > nrow(temp)) end <- nrow(temp)
      t1red <- if (grepl("Rg", v1)) {
        # Reduce tolerance of radiation if variable name contains 'Rg' to
        # [20, 50] depending on measurement
        max(min(t1, data[gap, v1], na.rm = T), 20, na.rm = T)
      } else {
        t1
      }
      # For performance reasons, write sDATA subrange into vectors
      v1n <- data[start:end, v1]
      v2n <- data[start:end, v2]
      v3n <- data[start:end, v3]
      v4n <- data[start:end, v4]
      v5n <- data[start:end, v5]
      sub_gap <- gap - (start - 1)
      # Set LUT fill condition
      rows <- !is.na(temp$var_orig[start:end])
      if (v1 != "none") {
        rows <- rows & abs(v1n - v1n[sub_gap]) < t1red & !is.na(v1n)
      }
      if (v2 != "none") {
        rows <- rows & abs(v2n - v2n[sub_gap]) < t2 & !is.na(v2n)
      }
      if (v3 != "none") {
        rows <- rows & abs(v3n - v3n[sub_gap]) < t3 & !is.na(v3n)
      }
      if (v4 != "none") {
        rows <- rows & abs(v4n - v4n[sub_gap]) < t4 & !is.na(v4n)
      }
      if (v5 != "none") {
        rows <- rows & abs(v5n - v5n[sub_gap]) < t5 & !is.na(v5n)
      }
      lut <- subset(temp$var_orig[start:end], rows)
      # If enough available data, fill gap
      if (length(lut) > 1) {
        lvar_index <- gap
        lvar_mean <- mean(lut)
        lvar_fnum <- length(lut)
        lvar_fsd <- sd(lut)
        # Set window size and quality flag
        lvar_fwin <- 2 * win_days
        lvar_fmeth <- NA
        lvar_fqc <- NA
        if (v1 != "none" && v2 != "none" && v3 != "none") {
          # Three conditions
          lvar_fmeth <- 1
          if (lvar_fwin <= 14) lvar_fqc <- 1
          if (lvar_fwin > 14 & lvar_fwin <= 56) lvar_fqc <- 2
          if (lvar_fwin > 56) lvar_fqc <- 3
        }
        if (v1 != "none" && v2 == "none" && v3 == "none") {
          # One condition
          lvar_fmeth <- 2
          if (lvar_fwin <= 14) lvar_fqc <- 1
          if (lvar_fwin > 14 & lvar_fwin <= 28) lvar_fqc <- 2
          if (lvar_fwin > 28) lvar_fqc <- 3
        }
        lgf <- rbind(lgf, c(
          lvar_index, lvar_mean, lvar_fnum, lvar_fsd,
          lvar_fmeth, lvar_fwin, lvar_fqc
        ))
      }
      if (verbose && i %% 100 == 0) message(".", appendLF = FALSE)
      if (verbose && i %% 6000 == 0) message("\n.", appendLF = FALSE)
    }
    if (verbose) message("", nrow(lgf))
  }
  # Copy gap filled values and properties to temp
  if (nrow(lgf) > 0) {
    # Fill all rows in var_fall and co
    temp[lgf[, "index"], c(
      "var_fall", "var_fnum", "var_fsd", "var_fmeth", "var_fwin", "var_fall_qc"
    )] <- lgf[, c("mean", "fnum", "fsd", "fmeth", "fwin", "fqc")]
    # Only fill gaps in var_f and var_fqc
    gaps <- is.na(temp[lgf[, "index"], "var_f"])
    temp[lgf[, "index"], c("var_f", "var_fqc")][gaps, ] <-
      as.data.frame(lgf[, c("mean", "fqc"), drop = FALSE])[gaps, ]
  }

  # Other columns are specific for full MR MDS algorithm
  return(temp)
}

#' =============================================================================
#' Calculate Length of Gaps in a Vector
#' =============================================================================
#'
#' Original name: fCalcLengthOfGaps
#' @author AMM
#'
#' @section Description
#' Calculate length of gaps (with flag NA) in specified vector.
#'
#' @param x numeric vector with gaps (missing values, NAs)
#'
#' @section Value
#' An integer vector with length of gap since start of gap.
#'
calc_gaplength <- function(x) {
  x_na <- is.na(x)
  # Determine cumulative sums of gaps
  sum <- cumsum(x_na)
  # Calculate sum from start of gap
  out <- sum - cummax((!x_na) * sum)
  return(out)
}

## =============================================================================
#' Convert All Gap Flags to NA
#'
#' Original name: fConvertGapsToNA
#' @author AMM
#'
#' @description Converts all gap flags to NA.
#'
#' @param data data frame to be converted
#' @param flag flag value used for gaps, defaults to -9999.0
#'
#' @return A data frame with NAs instead of gaps.
#'
convert_gaps <- function(data, flag = -9999.0) {
  gaps <- sum(data == flag, na.rm = T)
  data[data == flag] <- NA_real_
  message('Number of \'', flag, '\' convertered to NA: ', gaps)
  return(data)
}

## =============================================================================
#' Detect Runs of Equal Values
#'
#' Original name: .runLength
#' @author
#'
#' @param x vector to check for runs
#' @param min_length minimum run length to report
#'
#' @section Value
#' Data frame with columns:
#' \itemize{
#' \item{index} starting index of runs
#' \item{nRep} length of the run
#' }
#'
check_runs <- function(x, min_length = 2) {
  # TODO implement na.rm = TRUE to not interreput runs by NA
  # Problems:
  # with na.omit() indices are hard to transfer back
  # with fillNAFoward, NAs after a value are counted as repeats, even if
  # the value after NA is different
  rl <- rle(x)$lengths
  rlc <- cumsum(rl) + 1 - rl
  iiRun <- which(rl >= min_length)
  iRun <- rlc[iiRun]
  out <- data.frame(index = iRun, nRep = rl[iiRun])
  return(out)
}

## =============================================================================
#' Title
#'
#' @description
#' Original name: sEddyProc_sMDSGapFillAfterUstar
#'
#' @param fluxVar
#' @param uStarVar
#' @param uStarTh
#' @param uStarSuffix
#' @param isFlagEntryAfterLowTurbulence
#' @param isFilterDayTime
#' @param swThr
#' @param RgColName
#' @param ...
#'
#' @return
#' @export
#'
#' @examples
sEddyProc_sMDSGapFillAfterUstar <- function(
  ### sEddyProc$sMDSGapFillAfterUstar - MDS gap filling algorithm after u* filtering
  fluxVar                ##<< Flux variable to gap fill after ustar filtering
  , uStarVar = 'Ustar'   ##<< Column name of friction velocity u * (ms-1),
  ## default 'Ustar'
  , uStarTh =        ##<< data.frame with
    ##  first column, season names, and second column estimates of uStar Threshold.
    ## Alternatively, a single value to be used as threshold for all records
    ## If only one value is given, it is used for all records.
    usGetAnnualSeasonUStarMap(sUSTAR_DETAILS$uStarTh)
  , uStarSuffix = 'uStar'   ##<< Different suffixes required are for
  ## different u * scenarios
  , isFlagEntryAfterLowTurbulence = FALSE  ##<< Set to TRUE for flagging the
  ## first entry after low turbulance as bad condition (by value of 2).
  , isFilterDayTime = FALSE		##<< Set to TRUE to also filter day-time values,
  ## default only filters night-time data
  , swThr = 10			  ##<< threshold of solar radiation below which data is
  ## marked as night time respiration.
  , RgColName = "Rg" ##<< Column name of incoming short wave radiation
  , ...              ##<< Other arguments passed to \code{\link{sEddyProc_sMDSGapFill}}
) {
  'Calling sMDSGapFill after filtering for (provided) friction velocity u * '
  ##author<<
  ## AMM, TW
  ##details<<
  ## Calling \code{\link{sEddyProc_sMDSGapFill}} after filtering for
  ## (provided) friction velocity u*
  ##
  ## The u* threshold(s) are provided with argument \code{uStarTh} for
  ## filtering the conditions of low turbulence.
  ## After filtering, the data is gap filled using the MDS algorithm
  ## \code{\link{sEddyProc_sMDSGapFill}}.
  #
  ##seealso<<
  ## \itemize{
  ## \item \code{\link{sEddyProc_sEstimateUstarScenarios}} and
  ## \code{link{sEddyProc_sEstUstarThold}} for estimating the
  ## u* threshold from the data.
  ## \item \code{\link{sEddyProc_sMDSGapFillUStarScens}} for
  ## automated gapfilling for several scenarios of u* threshold estimates.
  ## }
  #
  uStarThresVec <- if (is.numeric(uStarTh) ) {
    if (length(uStarTh) != 1L) stop(
      "Without seasons, only a single uStarThreshold can be provided,"
      , " but got a vector.")
    uStarThresVec <- rep(uStarTh, nrow(.self$sDATA) )
  } else {
    if (!("season" %in% colnames(sTEMP)) ) stop(
      "Seasons not defined yet. Provide argument seasonFactor to sEstUstarThold.")
    # make sure merge will work
    colnames(uStarTh) <- c("season", "uStarThreshold")
    if (any(!is.finite(uStarTh$uStarThreshold))) stop(
      "must provide finite uStarThresholds")
    iMissingLevels <- which(!(levels(.self$sTEMP$season) %in% uStarTh$season))
    if (length(iMissingLevels) ) stop(
      "missing uStarTrheshold for seasons "
      , paste(levels(.self$sTEMP$season)[iMissingLevels], collapse = ", "))
    tmpDs <- merge(subset(sTEMP, select = "season"), uStarTh, all.x = TRUE)
    uStarThresVec <- tmpDs[, 2L]
  }
  # Check column names (with 'none' as dummy)
  # (Numeric type and plausibility have been checked on initialization of sEddyProc)
  fCheckColNames(sDATA, c(fluxVar, uStarVar), 'sMDSGapFillAfterUstar')

  # Filter data
  uStar <- sDATA[, uStarVar]
  qfUStar <- integer(nrow(sDATA) )	# 0L
  # if not filtering dayTimeValues, create a vector that is TRUE only for nightTime
  isRowFiltered <- if (isFilterDayTime) TRUE else
    (!is.finite(sDATA[, RgColName]) | sDATA[, RgColName] < swThr)
  # mark low uStar or bad uStar as 1L
  qfUStar[
    isRowFiltered &
      !is.na(uStarThresVec) &
      (sDATA[[uStarVar]] < uStarThresVec)
    ] <- 1L
  if (isTRUE(isFlagEntryAfterLowTurbulence) ) {
    ##details<<
    ## With \code{isFlagEntryAfterLowTurbulence set to TRUE}, to be more
    ## conservative, in addition
    ## to the data acquired when uStar is below the threshold,
    ## the first half hour measured with good turbulence conditions
    ## after a period with low turbulence is also removed (Papaple et al. 2006).
    qfUStar[which(diff(qfUStar) == 1) + 1] <- 2L
  }
  # mark those conditions as bad, when no threshold is defined
  qfUStar[isRowFiltered & !is.finite(uStarThresVec) ]	<- 3L
  # mark those recods as bad, where uStar is not defined
  qfUStar[isRowFiltered & !is.finite(uStar) ]	<- 4L
  message(
    'Ustar filtering (u * Th_1 = ', uStarThresVec[1], '), marked '
    , (signif(sum(qfUStar != 0) / length(qfUStar), 2)) * 100
    , '% of the data as gap'  )
  if (isTRUE(isFlagEntryAfterLowTurbulence) ) {
    message(
      '(including removal of the first half-hour after a '
      , 'period of low turbulence).')
  }

  # Add filtering step to (temporal) results data frame
  suffixDash.s <- paste(
    (if (fCheckValString(uStarSuffix)) '_' else ''), uStarSuffix, sep = '')
  attr(uStarThresVec, 'varnames') <- paste(
    'Ustar', suffixDash.s, '_Thres', sep = '')
  attr(uStarThresVec, 'units') <- 'ms-1'
  attr(qfUStar, 'varnames') <- paste(
    'Ustar', suffixDash.s, '_fqc', sep = '')
  attr(qfUStar, 'units') <- '-'
  sTEMP$USTAR_Thres <<- uStarThresVec
  sTEMP$USTAR_fqc <<- qfUStar
  colnames(sTEMP) <<- gsub(
    'USTAR_', paste('Ustar', suffixDash.s, '_', sep = ''), colnames(.self$sTEMP))
  # Check for duplicate columns (to detect if different processing setups
  # were executed without different suffix)
  if (length(names(which(table(colnames(.self$sTEMP)) > 1))) )  {
    warning('sMDSGapFillAfterUstar::: Duplicated columns found!'
            , ' Please specify different suffix when processing different"
            , " setups on the same dataset!')
  }
  # Gap fill data after applying ustar filtering
  sMDSGapFill(
    fluxVar, QFVar = attr(qfUStar, 'varnames'), QFValue = 0, ...
    , suffix = uStarSuffix)
  ##value<<
  ## Vector with quality flag from filtering (here 0: good data
  ## , 1: low turbulence, 2: first half hour after low turbulence
  ## , 3: no threshold available, 4: missing uStar value)
  ## Gap filling results are in sTEMP data frame (with renamed columns)
  ## that can be retrieved by \code{\link{sEddyProc_sExportResults}}.
  return(invisible(qfUStar))
  # example in Eddy.R sEddyProc.example
  }

## =============================================================================
#' Title
#'
#' @param ...
#' @param uStarTh
#' @param uStarSuffixes
#'
#' @return
#' @export
#'
#' @examples
sEddyProc_sMDSGapFillAfterUStarDistr <- function(
  ### GapFilling for several filters of estimated friction velocity Ustar thresholds.
  ...                 ##<< other arguments to
  ## \code{\link{sEddyProc_sMDSGapFillAfterUstar}} and \code{\link{sEddyProc_sMDSGapFill}}
  ## such as \code{fluxVar}
  , uStarTh		  ##<< data.frame with first column, season names,
  ## and remaining columns different estimates of uStar Threshold.
  ## If the data.frame has only one row, then each uStar threshold estimate
  ## is applied to the entire dataset.
  ## Entries in first column must match levels in argument \code{seasonFactor}
  , uStarSuffixes = colnames(uStarTh)[-1]  ##<< String vector
  ## to distinguish result columns for different ustar values.
  ## Its length must correspond to column numbers in \code{UstarThres.m.n}.
  # return value function \code{\link{sEddyProc_sEstUstarThresholdDistribution}}
) {
  ##details<< This method is superseedec by
  ##\code{\link{sEddyProc_sMDSGapFillUStarScens}} and only there
  ## for backward portability.
  warning(
    "Method sEddyProc_sMDSGapFillAfterUStarDistr has been deprecated. "
    , "Please, replace it by calls to 'sEddyProc_sMDSGapFillUStarScens' "
    , "and if applicable calling before 'sEddyProc_sSetUstarScenarios'."
    , "see ?sEddyProc_sMDSGapFillUStarScens and vignette('uStarCases')"
  )
  if (missing(uStarTh)) stop(
    "Need to provide argument uStarTh. Since version 1.1.6 "
    , " Please, use new methods sEddyProc_sSetUstarScenarios and"
    , " sEddyProc_sMDSGapFillUStarScens instead of this method."
    , " see ?sEddyProc_sMDSGapFillUStarScens and ?")
  .self$sSetUstarScenarios(uStarTh, uStarSuffixes)
  .self$sMDSGapFillUStarScens(...)
}

## =============================================================================
#' Title
#'
#' @param ...
#'
#' @return
#' @export
#'
#' @examples
sEddyProc_sMDSGapFillUStarScens <- function(
  ### GapFilling for several filters of estimated friction velocity Ustar thresholds.
  ...                 ##<< other arguments to
  ## \code{\link{sEddyProc_sMDSGapFillAfterUstar}} and
  ## \code{\link{sEddyProc_sMDSGapFill}}
  ## such as \code{fluxVar}
) {
  ##author<< TW
  ##details<<
  ## sEddyProc$sMDSGapFillUStarDistr: calling
  ## \code{\link{sEddyProc_sMDSGapFillAfterUstar}} for several filters of
  ## friction velocity Ustar.
  ##
  ## The scenarios need to be set before by
  ## \code{\link{sEddyProc_sSetUstarScenarios}} or accepting the defaults
  ## annual estimates of \code{link{sEddyProc_sEstimateUstarScenarios}}.
  ##
  ## Then the difference between output columns NEE_U05_f and NEE_U95_f
  ## corresponds to the uncertainty
  ## introduced by the uncertain estimate of the u* threshold.
  #
  ##seealso<<
  ## \href{useCase vignette}{../doc/useCase.html}
  uStarTh <- .self$sUSTAR_SCEN
  uStarSuffixes <- colnames(.self$sUSTAR_SCEN)[-1]
  nEstimates <- ncol(uStarTh) - 1L
  #iCol <- 1L
  filterCols <- lapply(seq(1L:nEstimates), function(iCol) {
    .self$sMDSGapFillAfterUstar(
      ...
      , uStarTh = uStarTh[, c(1L, 1L + iCol)]
      , uStarSuffix = uStarSuffixes[iCol]
    )
  })
  filterMat <- do.call(cbind, filterCols)
  return(invisible(filterMat))
  ##value<<
  ## Matrix (columns correspond to u* Scenarios) with quality flag from
  ## filtering ustar (0 - good data, 1 - filtered data)
  ##
  ## Gap filling results in sTEMP data frame (with renamed columns), that
  ## can be retrieved by \code{\link{sEddyProc_sExportResults}}.
  ## Each of the outputs is calculated for several u* r-estimates and
  ## distinguished by a suffix after the variable.
  ## E.g. with an an entry "U05" in \code{uStarSuffixes} in
  ## \code{\link{sEddyProc_sSetUstarScenarios}}
  ## the corresponding filled NEE can be found in output column "NEE_U05_f".
}
grahamstewart12/tidyflux documentation built on June 4, 2019, 7:44 a.m.