FluxSynthU: Suite of functions to analyze eddy covariance data

Documented in fParseFLX2015

#' Reads (half-)hourly datafiles from FLUXNET2015 and prepares an export dataframe to be used by REddyProc.
#'
#' @export
#' @title Prepare FLUXNET2015 data for REddyProc
#' @param dat a dataframe containing raw (half-)hourly data from the FLUXNET2015 Network.
#' @param tz_name specify the timezone NAME (not abbrev) (e.g. "America/Winnipeg")
#' @param UTC_offset numeric, indicate the current timezone's number of hours offset from UTC
#' @param flag specify which qc flag level to screen out (e.g. a value of 3 will assign NA to all data that are flagged with a level 3 qc tag.)




# fParseFLX2015 - reads (half-)hourly datafiles from FLUXNET2015 and prepares an export dataframe to be used by REddyProc.
# args: "dat", "qc"
# dat - a FLUXNET 2015 dataframe
# qc - if TRUE, looks for data quality flags that are bad (equal to 3) and assigns NA to the corresponding datapoints.

# Note: As of v8.1, the following changes have been added to fParseFLX2015:
# This routine now grabs day- and night-partitioned products (GPP, RECO) as well.
# These are used later to compare with our partitioned results.

# Note: As of v8.0, the following changes have been added to fParseFLX2015:
# flag: specify which qc flag level to screen out
# Example: a value of 3 will assign NA to all data that are flagged with a level 3 qc tag.
# A value of c(2:3) will assign NA to med and bad gap-filled data.

fParseFLX2015 <- function(dat, tz_name, UTC_offset, flag) {
  # Convert datetime to be read by REddyProc
  # parse *end* time of averaging interval - this is required by REddyProc

  # Convert serial date to vectors for each datetime format (YY, MM, DD, HH, MIN)
  tmp <- dat[,"TIMESTAMP_END"]
  YY <-  floor(tmp/1e8)
  tmp <- tmp - YY*1e8
  MM <- floor(tmp/1e6)
  tmp <- tmp - MM*1e6
  DD <- floor(tmp/1e4)
  tmp <- tmp - DD*1e4
  HH <- floor(tmp/1e2)
  MIN <- tmp - HH*100

  ## The fDatenum function below is equivalent to the 'datenum' function in MatLab.
  # calculates serial dates using the same origin (0000-01-01) as MatLab. This may be trivial, but it works.
  k_datenum <- fDatenum(YY, MM, DD, HH, MIN, 0)

  # First, take the supplied 'Year', 'DoY', and 'Hour' columns from the processed NEON data set (note: these are local time for each site)
  # Next, create a datetime object, using 'UTC' as the default timezone (we'll change this later).
  # We now a vector of datetime values that match the original data, but are in the wrong timezone.
  k_POSIXdate_null <- strptime(paste(YY, MM, DD, HH, MIN, sep = "-"), format = "%Y-%m-%d-%H-%M", tz = "UTC")

  # Using a lookup table, determine the number of hours to offset from UTC
  # NOTE: '+' means behind UTC, '-' means ahead of UTC, so we need a negative sign to ensure these are in the correct zone.
  k_POSIXdate_UTC <- k_POSIXdate_null - (UTC_offset*60*60)

  # Finally, we format the UTC offset corrected time to be back in local time, using 'tz_name' from a lookup table.
  k_POSIXdate_local <- format(k_POSIXdate_UTC, tz=tz_name, usetz=TRUE) # character class

  # You'll notice that the time strings match the original data, and are now set to the appropriate timezone (by coordinates).
  # NOTE: As of 4/29/2020, this routine does NOT account for daylight savings time.


  k_week <- lubridate::week(k_POSIXdate_local)

  k_POSIXdate_plotting <- as.Date(make_datetime(YY,MM,DD,HH,MIN,0))

  k_datenum <- fDatenum(YY, MM, DD, HH, MIN, 0)
  k_dd <- k_datenum - fDatenum(YY, 1, 1, 0, 0, 0) + 1    # equates to 1 for the 1st day of the year, 2 for the 2nd and so on
  k_fracyr <- lubridate::decimal_date(k_POSIXdate_plotting)
  years_of_record <- as.numeric(unique(as.character(YY), 'rows'))   # one representation of each year in the record


  # Time variables for REddyProc
  k_YY <- YY
  k_HH <- HH + MIN/60
  k_doy <- floor(k_dd) # day of year

  rm(YY,MM,DD,HH,MIN, k_week, k_POSIXdate_null, k_POSIXdate_UTC)




  # Parse flux and PPFD data

  # Function below looks for the appropriate response variable column and extracts it to a new object.
  # If the column cannot be found, it assigns a value of zero to the new object and gives a warning.

  # NEE_VUT_USTAR50 Net Ecosystem Exchange, using Variable Ustar Threshold (VUT) for each year, from 50 percentile of USTAR threshold
  # LE_F_MDS Latent heat flux, gapfilled using MDS method
  # H_F_MDS Sensible heat flux, gapfilled using MDS method
  # SW_IN_F Shortwave radiation, incoming consolidated from SW_IN_F_MDS and SW_IN_ERA (negative values set to zero)
  # SW_OUT Shortwave radiation, outgoing
  # LW_IN_F Longwave radiation, incoming, consolidated from LW_IN_F_MDS and LW_IN_ERA
  # LW_OUT Longwave radiation, outgoing
  # TA_F Air temperature, consolidated from TA_F_MDS and TA_ERA
  # TS_F_MDS_1 Soil temperature, gapfilled with MDS (numeric index “#” increases with the depth, 1 is shallowest)
  # SWC_F_MDS_1 Soil water content, gapfilled with MDS (numeric index “#” increases with the depth, 1 is shallowest)
  # RH Relative humidity, range 0-100
  # VPD_F Vapor Pressure Deficit consolidated from VPD_F_MDS and VPD_ERA
  # USTAR Friction velocity
  # PPFD_IN Photosynthetic photon flux density, incoming


  # Flux vars
  k_NEE <- fCheckFLXCols(dat, "NEE_VUT_USTAR50")

  # Energy balance vars
  k_LE <- fCheckFLXCols(dat, "LE_F_MDS")
  k_H <- fCheckFLXCols(dat, "H_F_MDS")
  k_SW_in <- fCheckFLXCols(dat, "SW_IN_F")
  k_SW_out <- fCheckFLXCols(dat, "SW_OUT")
  k_LW_in <- fCheckFLXCols(dat, "LW_IN_F")
  k_LW_out <- fCheckFLXCols(dat, "LW_OUT")

  # Meteorological vars
  k_Tair <- fCheckFLXCols(dat, "TA_F")
  k_Tsoil <- fCheckFLXCols(dat, "TS_F_MDS_1")
  k_SWC <- fCheckFLXCols(dat, "SWC_F_MDS_1")
  k_RH <- fCheckFLXCols(dat, "RH")
  k_VPD <- fCheckFLXCols(dat, "VPD_F")
  k_ustar <- fCheckFLXCols(dat, "USTAR")
  k_PPFD <- fCheckFLXCols(dat, "PPFD_IN")

  # Gapfilled products
  k_GPP_NT <- fCheckFLXCols(dat, "GPP_NT_VUT_USTAR50")
  k_RECO_NT <- fCheckFLXCols(dat, "RECO_NT_VUT_USTAR50")
  k_GPP_DT <- fCheckFLXCols(dat, "GPP_DT_VUT_USTAR50")
  k_RECO_DT <- fCheckFLXCols(dat, "RECO_DT_VUT_USTAR50")


  # Synthetic vars
  #++++ This needs to be checked!
  if (!all(is.na(k_SW_in)) & !all(is.na(k_SW_out)) & !all(is.na(k_LW_in)) & !all(is.na(k_LW_out))) {
    k_Rnet <- (k_SW_in - k_SW_out) - (k_LW_in - k_LW_out)
  } else {
    k_Rnet <- NA
  }

  if (!all(is.na(k_SW_in)) & !all(is.na(k_SW_out))) {
    k_albedo <- k_SW_out/k_SW_in
  } else {
    k_albedo <- NA
  }


  # We also want to carry over the quality check flags for each variable of interest
  # Flux vars
  qc_NEE <- fCheckFLXCols(dat, "NEE_VUT_USTAR50_QC")  # Quality flag for NEE_VUT_USTAR50

  # Energy balance vars
  qc_LE <- fCheckFLXCols(dat, "LE_F_MDS_QC")          # Quality flag for LE_F_MDS
  qc_H <- fCheckFLXCols(dat, "H_F_MDS_QC")            # Quality flag for H_F_MDS
  qc_SW_in <- fCheckFLXCols(dat, "SW_IN_F_QC")        # Quality flag for SW_IN_F
  qc_SW_out <- fCheckFLXCols(dat, "SW_OUT_QC")        # Quality flag for SW_OUT
  qc_LW_in <- fCheckFLXCols(dat, "LW_IN_F_QC")        # Quality flag for LW_IN_F
  qc_LW_out <- fCheckFLXCols(dat, "LW_OUT_QC")        # Quality flag for LW_OUT

  # Meteorological vars
  qc_Tair <- fCheckFLXCols(dat, "TA_F_QC")		        # Quality flag for TA_F
  qc_Tsoil <- fCheckFLXCols(dat, "TS_F_MDS_1_QC") 	  # Quality flag for TS_F_MDS_#
  qc_SWC <- fCheckFLXCols(dat, "SWC_F_MDS_1_QC")      # Quality flag for SWC_F_MDS_#
  # no quality flag for RH
  qc_VPD <- fCheckFLXCols(dat, "VPD_F_QC")            # Quality flag for VPD_F
  qc_ustar <- fCheckFLXCols(dat, "USTAR_QC")          # Quality flag of USTAR
  qc_PPFD <- fCheckFLXCols(dat, "PPFD_IN_QC")         # Quality flag of PPFD_IN



  # Build output matrix for all years
  # columns needed for REddyProc
  #     k_YY
  #     k_doy
  #     k_HH
  #     k_NEE# including storage
  #     k_LE
  #     k_H
  #     k_SW_in
  #     k_Tair
  #     k_Tsoil
  #     k_RH
  #     k_VPD
  #     k_ustar
  #     k_PPFD  # not needed for original REddyProc

  # Build output matrix for REddyProc for all years
  k_full <- data.frame(k_YY, k_doy, k_HH, k_POSIXdate_local, k_POSIXdate_plotting, k_datenum, k_dd, k_fracyr,
                       k_NEE, k_LE, k_H, k_SW_in, k_SW_out, k_LW_in, k_LW_out, k_Rnet, k_albedo,
                       k_Tair, k_Tsoil, k_SWC, k_RH, k_VPD, k_ustar, k_PPFD,
                       qc_NEE, qc_LE, qc_H, qc_SW_in, qc_SW_out, qc_LW_in, qc_LW_out,
                       qc_Tair, qc_Tsoil, qc_SWC, qc_VPD, qc_ustar, qc_PPFD,
                       k_GPP_NT, k_RECO_NT, k_GPP_DT, k_RECO_DT)

  # Convert any logical columns to numeric
  k_full <- fLogic2Numeric(k_full)


  # Screen out bad data using the specified flag (0 = measured data, 1 = good gapfill, 2 = medium gapfill, 3 = bad gapfill)
  k_out <- k_full %>%
    select_all() %>%
    mutate(

      k_NEE = fCheckforQCFlags(k_NEE, qc_NEE, flag = flag),
      k_LE = fCheckforQCFlags(k_LE, qc_LE, flag = flag),
      k_H = fCheckforQCFlags(k_H, qc_H, flag = flag),
      k_SW_in = fCheckforQCFlags(k_SW_in, qc_SW_in, flag = flag),
      k_SW_out = fCheckforQCFlags(k_SW_out, qc_SW_out, flag = flag),
      k_LW_in = fCheckforQCFlags(k_LW_in, qc_LW_in, flag = flag),
      k_LW_out = fCheckforQCFlags(k_LW_out, qc_LW_out, flag = flag),
      k_Tair = fCheckforQCFlags(k_Tair, qc_Tair, flag = flag),
      k_Tsoil = fCheckforQCFlags(k_Tsoil, qc_Tsoil, flag = flag),
      k_SWC = fCheckforQCFlags(k_SWC, qc_SWC, flag = flag),
      # k_RH = fCheckforQCFlags(k_RH, qc_RH), ## As of 2020-03-24, no quality flags for RH exist
      k_VPD = fCheckforQCFlags(k_VPD, qc_VPD, flag = flag),
      k_ustar = fCheckforQCFlags(k_ustar, qc_ustar, flag = flag),
      k_PPFD = fCheckforQCFlags(k_PPFD, qc_PPFD, flag = flag)) %>%
    select_all()



  # Convert any logical columns to numeric
  k_out <- fLogic2Numeric(k_out)

  # k_out and k_full are equivalent datasets, except k_out contains NAs wherever a specified qc flag threshold is reached
  k_FLX_products <- k_full %>%
    select(k_YY, k_doy, k_HH, k_NEE, qc_NEE, k_GPP_NT, k_RECO_NT, k_GPP_DT, k_RECO_DT)


  k_plots <- k_out # Retains QC columns as well as data columns AFTER assigning NA to gapfilled data


  k_out <- k_out %>%
    select(k_YY, k_doy, k_HH,
           k_NEE, k_LE, k_H, k_SW_in, k_SW_out,
           k_LW_in, k_LW_out, k_Rnet, k_albedo,
           k_Tair, k_Tsoil, k_SWC, k_RH, k_VPD, k_ustar, k_PPFD)


  newlist <- list(a=years_of_record, b=k_out, c=k_plots, d=k_FLX_products)
  names(newlist) <- c("years_of_record", "k_out", "k_plots", "k_FLX_products")
  return(newlist)

  rm(k_YY, k_doy, k_HH, k_POSIXdate_plotting, k_datenum, k_fracyr, k_dd, k_NEE, k_LE, k_H, k_SW_in, k_SW_out, k_LW_in,
     k_LW_out, k_Rnet, k_albedo, k_Tair, k_Tsoil, k_SWC, k_RH, k_VPD, k_ustar, k_PPFD, k_out, k_plots, years_of_record, k_FLX_products)
}