R/FS_fProcessingThruREddyProc.R

Defines functions FS_fProcessingThruREddyProc

Documented in FS_fProcessingThruREddyProc

#' This routine uses a modified version of the REddyProc package to perform gap-filling and flux partitioning on AmeriFlux, FLUXNET2015, and NEON (half-)hourly flux data.

#' @export
#' @title Perform gap-filling and flux partitioning using REddyProc (modified)
#' @param method character, specify which partitioning method you'd like to use: 'night' (default), 'day', or 'both' which runs both partitioning routines
#' @param inpath character string, navigate to the directory containing REddyProc input files
#' @param outpath character string, set the directory by which gap-filled data will be exported
#' @param site.meta dataframe, site metadata file contained in FluxSynthU
#' @param batch_process logical, if TRUE, batch processes all sites within the inpath directory (defaults to FALSE).

#' @importFrom REddyProc fConvertTimeToPosix sEddyProc usGetAnnualSeasonUStarMap fWriteDataframeToFile



# Author: Kenny Smith

# Version notes -----------------------------------------------------------
# v2.1.KS
# 200826 KS
# Added minor tweaks to how site metadata is displayed

# v1.0.KS
# 200512
# FS_fProcessingThruREddyProc is a function that processes the files created from './Source/1a_RawDataProcessing_v1.0.KS.R/'
# (located in './Data/Processed data/REddyProc Inputs/') and performs gap-filling and flux partitioning using either the day or night method.

# This function executes fingerprint plots in real time, then exports gap-filled data to './Data/Processed data/REddyProc Outputs/'


FS_fProcessingThruREddyProc <- function(method = 'night', inpath, outpath, site.meta, batch_process = FALSE) {

  if (!dir.exists(outpath)) {
    message(paste('Warning! No such directory exists!'))
    x <- askYesNo(paste("Would you like to create a new directory in: ",outpath))

    if (x) {
      dir.create(file.path(paste0(outpath, "1_after-night-part/")), showWarnings = FALSE, recursive = TRUE)
      dir.create(file.path(paste0(outpath, "2_after-day-part/")), showWarnings = FALSE, recursive = TRUE)
    }
  }


  # find all raw data files (including rejected sites)
  all.files <- list.files(paste(inpath), recursive = TRUE)

  # extract paths for FLUXNET2015 and AMERIFLUX (half-) hourly data
  fluxnet.files <- all.files[grep(pattern = '\\FLX', all.files)]
  amf.files <- all.files[grep(pattern = '\\AMF', all.files)]
  neon.files <- all.files[grep(pattern = '\\NEO', all.files)]

  # concatenate the two flux datasets
  files <- c(fluxnet.files, amf.files, neon.files)

  # list of all available sites with database type (eg, 'AMF_CA-Qcu')
  db.and.sites <- unique(substr(x = files, start = 1,stop = 10))

  # matching database for each
  all.site.db <- substr(x = files, start = 1,stop = 3)

  neon_index <- all.site.db %in% "NEO"

  # all available site names
  flx.amf.site.names <- substr(x = files[!neon_index], start = 5,stop = 10)
  neon.site.names <- substr(x = files[neon_index], start = 5,stop = 8)
  all.site.names <- c(flx.amf.site.names, neon.site.names)

  # update db.and.sites to fix NEON names
  db.and.sites[neon_index] <- gsub("_N", "", db.and.sites[neon_index])



  # Select site(s) or batch process all sites at once
  if (batch_process) {

    selected.db_sites <- db.and.sites
    selected.sites <- all.site.names

  } else {

    # give a description for your routine:
    routine_description <- "Perform gap-filling and flux partitioning with REddyProc"

    # select one or more sites
    choices <- fSiteSelect(db.and.sites, descrip = routine_description)

    if (length(choices[["AMF"]]) > 0) {
      AMF_selected <- paste0("AMF_", choices[["AMF"]])
    } else {AMF_selected <- NULL}

    if (length(choices[["FLX"]]) > 0) {
      FLX_selected <- paste0("FLX_", choices[["FLX"]])
    } else {FLX_selected <- NULL}

    if (length(choices[["NEO"]]) > 0) {
      NEO_selected <- paste0("NEO_", choices[["NEO"]])
    } else {NEO_selected <- NULL}

    # Includes database name in site selection
    selected.db_sites <- c(AMF_selected, FLX_selected, NEO_selected)

    # Omits database name in site selection
    selected.sites <- c(choices[["AMF"]], choices[["FLX"]], choices[["NEO"]])

  }








  for (i in 1:length(selected.db_sites)) {

    # The current site (and database)
    db_site <- selected.db_sites[i]

    # Extract the database name and site name from the 'db_site' string
    db <- substr(x = db_site, start = 1, stop = 3)
    site <- str_remove(db_site, paste0(db, "_"))

    logical_test_for_selected_sites <- all.site.names %in% site & all.site.db %in% db
    site.files <- files[logical_test_for_selected_sites]

    years_of_record <- as.numeric(gsub(".*-(\\d{4})-.+", "\\1", site.files)) ## Looks for 4-char numeric strings, followed by a hyphen
    start_year <- years_of_record[1]
    end_year <- tail(years_of_record, 1)


    # Get site metadata
    site.info <- fGetSiteInfo(dat = site.meta,
                              site = site,
                              db = db,
                              site_col = "site_ID",
                              db_col = "database",
                              site_name_col = "site_Name",
                              start_yr_col = "start_year",
                              end_yr_col = "end_year",
                              exclude_yr_col = "exclude_years",
                              lat_col = "lat",
                              long_col = "long",
                              country_col = "country",
                              IGBP_col = "IGBP",
                              elev_col = "elev_orig",
                              MAT_col = "MAT_orig",
                              MAP_col = "MAP_orig")


    message('')
    message('Processing site (', min(which(unique(selected.sites) %in% site)), '/',
            length(unique(selected.sites)), '):\t', site, ', ', db, " (", site.info$country, ")")
    message('\t\tName: ', site.info$info)
    message('\t\tlat: ', round(site.info$lat,3), '\tlong: ', round(site.info$long,3))
    message('\t\tMAT: ', round(site.info$MAT,1), '°C\tMAP: ', round(site.info$MAP,0),  'mm')
    message('\t\tForest Type: ', site.info$IGBP)

    message('')
    message('  ', 'Years of record: ', site.info$start_year, '-', site.info$end_year)

    message('')
    message('  ', 'Navigating to data directory:  ')
    message('    ', paste0(inpath))



    if (method == 'both') {
      partition.methods  <-  c('night', 'day')

      for (k in seq_along(partition.methods)) {
        current.method <- partition.methods[k]

        message('')
        message('  ', 'Starting REddyProc (', paste0(current.method), ' method)...')


        # The following routine loops through each available year of data and performs gap-filling and flux partitioning.
        # Note: It is known to crash when large datagaps are encountered.
        # As of version 1.4 of this script, this routine should be able to handle hourly and half-hourly datatypes.
        for (process_year in start_year:end_year) {

          tryCatch(
            expr = {
              # Set infile name for data
              infile <- sprintf("%s_%s-%d-ToBePartitioned.txt", db, site, process_year)

              message('')
              message('  ', 'Reading site data: ', paste0(site, sep=', ', db, sep=' (Year = ', process_year, sep=")"))
              message('')


              #+++ Load data with 1 header and 1 unit row from (tab-delimited) text file
              EddyData.F <- read.table(paste(inpath, infile, sep=""), header=TRUE, na.strings = -9999, stringsAsFactors = FALSE)
              EddyData.F <- fCharacter2Numeric(EddyData.F)
              EddyData.F <- fLogic2Numeric(EddyData.F)

              #+++ Add time stamp in POSIX time format
              # the routine fConvertTimeToPosix() is a part of the REddyProc package,
              # but not called using EddyProc.C$fConvertTimeToPosix as with other examples below - why not?
              for (j in 1:ncol(EddyData.F)) {
                if (sum(is.na(EddyData.F[,j])) == nrow(EddyData.F)) {
                  EddyData.F[,j] = -9999
                }
              }


              EddyDataWithPosix.F <- fConvertTimeToPosix(EddyData.F, 'YDH', Year = 'Year', Day = 'DoY', Hour = 'Hour')


              # Logical test to determine whether the hour column contains any half-hours (hourly datafiles will return a vector of all TRUE)
              test_for_hourly_dat <- fCheckInteger(unique(EddyDataWithPosix.F$Hour))

              if (!FALSE %in% test_for_hourly_dat) {
                # TRUE for hourly data
                # Set DTS to 24 for hourly data

                # #+++ Initalize R5 reference class sEddyProc for post-processing of eddy data
                #+++ with the variables needed for post-processing later
                # note EddyProc.C has class "package"

                EddyProc.C <- sEddyProc$new(site, EddyDataWithPosix.F, c('NEE','Rg','Tair','Tsoil','VPD', 'Ustar', 'PPFD'), DTS = 24)
              } else {
                # ELSE for half-hourly data
                # Set DTS to 48 for half-hourly data

                EddyProc.C <- sEddyProc$new(site, EddyDataWithPosix.F, c('NEE','Rg','Tair','Tsoil','VPD', 'Ustar', 'PPFD'), DTS = 48)
              }




              # fingerprint plot for NEE, with gaps
              EddyProc.C$sPlotFingerprintY('NEE', Year = process_year)

              # get u* thresholds
              uStarTh <- EddyProc.C$sEstUstarThresholdDistribution(nSample = 100L, probs = c(0.05, 0.5, 0.95))
              # the [-2] omits the second element of any object
              uStarThAnnual <- usGetAnnualSeasonUStarMap(uStarTh)[-2]
              uStarSuffixes <- colnames(uStarThAnnual)[-1]

              # gap fill NEE
              EddyProc.C$sMDSGapFillAfterUStarDistr('NEE',
                                                    uStarTh = uStarThAnnual,
                                                    uStarSuffixes = uStarSuffixes,
                                                    FillAll = TRUE
              )

              # plot of gap-filled NEE (note year)
              EddyProc.C$sPlotFingerprintY('NEE_U50_f', Year = process_year)

              # specify location for site (needed to distinguish night and day)
              EddyProc.C$sSetLocationInfo(LatDeg = site.info$lat, LongDeg = site.info$long, TimeZoneHour = site.info$UTC_offset)
              # gap fill met
              EddyProc.C$sMDSGapFill('Tair', FillAll = FALSE)
              EddyProc.C$sMDSGapFill('VPD', FillAll = FALSE)
              EddyProc.C$sMDSGapFill('Tsoil', FillAll = FALSE)
              EddyProc.C$sMDSGapFill('Rg', FillAll = FALSE)
              EddyProc.C$sMDSGapFill('PPFD', T1 = 100, FillAll = FALSE)

              # partition NEE using night-time method, using default parameters
              # note that lapply is similar to putting a function call in a for loop
              if (current.method == 'night') {
                resPart <- lapply(uStarSuffixes, function(suffix){
                  EddyProc.C$sMRFluxPartition(Suffix = suffix)
                })
              }

              # partition NEE, daytime method, Tair
              # original
              if (current.method == 'day') {
                resPart <- lapply(uStarSuffixes, function(suffix) {
                  EddyProc.C$sGLFluxPartition(Suffix = suffix)
                })
              }

              # stack results on to the original data file
              FilledEddyData.F <- EddyProc.C$sExportResults()
              CombinedData.F <- cbind(EddyData.F, FilledEddyData.F)



              if (current.method == 'night') {
                # set outpath_directory and outfile names
                outpath_directory <-  paste0(outpath, "1_after-night-part/")
                outfile <- sprintf('%s_%s-%d-after-night-part.txt', db, site, process_year)
              } else {
                outpath_directory <-  paste0(outpath, "2_after-day-part/")
                outfile <- sprintf('%s_%s-%d-after-day-part.txt', db, site, process_year)
              }


              message('')
              message('')
              message('  ', 'Exporting output file for site: ', paste(site, sep=', ', db), ' (Year = ', paste(process_year), ')')
              message('    ', paste0(outpath_directory))
              message('')
              message('')

              fWriteDataframeToFile(CombinedData.F, outfile, Dir = outpath_directory)

            },

            # If loop fails for a particular year,
            error = function(e){
              message("    * Caught an error on process year: ", process_year)
              print(e)
            }

          ) # end TryCatch


        } # end year loop

        message('')
        message('  ', 'Finishing current REddyProc method (', paste0(current.method), ' method)...')
        message('')
        message('')

        rm(current.method)



      }


    } else {

      current.method <- method

      message('')
      message('  ', 'Starting REddyProc (', paste0(current.method), ' method)...')


      # The following routine loops through each available year of data and performs gap-filling and flux partitioning.
      # Note: It is known to crash when large datagaps are encountered.
      # As of version 1.4 of this script, this routine should be able to handle hourly and half-hourly datatypes.
      for (process_year in start_year:end_year) {

        tryCatch(
          expr = {
            # Set infile name for data
            infile <- sprintf("%s_%s-%d-ToBePartitioned.txt", db, site, process_year)

            message('')
            message('  ', 'Reading site data: ', paste0(site, sep=', ', db, sep=' (Year = ', process_year, sep=")"))
            message('')


            #+++ Load data with 1 header and 1 unit row from (tab-delimited) text file
            EddyData.F <- read.table(paste(inpath, infile, sep=""), header=TRUE, na.strings = -9999, stringsAsFactors = FALSE)
            EddyData.F <- fCharacter2Numeric(EddyData.F)
            EddyData.F <- fLogic2Numeric(EddyData.F)

            #+++ Add time stamp in POSIX time format
            # the routine fConvertTimeToPosix() is a part of the REddyProc package,
            # but not called using EddyProc.C$fConvertTimeToPosix as with other examples below - why not?
            for (j in 1:ncol(EddyData.F)) {
              if (sum(is.na(EddyData.F[,j])) == nrow(EddyData.F)) {
                EddyData.F[,j] = -9999
              }
            }


            EddyDataWithPosix.F <- fConvertTimeToPosix(EddyData.F, 'YDH', Year = 'Year', Day = 'DoY', Hour = 'Hour')


            # Logical test to determine whether the hour column contains any half-hours (hourly datafiles will return a vector of all TRUE)
            test_for_hourly_dat <- fCheckInteger(unique(EddyDataWithPosix.F$Hour))

            if (!FALSE %in% test_for_hourly_dat) {
              # TRUE for hourly data
              # Set DTS to 24 for hourly data

              # #+++ Initalize R5 reference class sEddyProc for post-processing of eddy data
              #+++ with the variables needed for post-processing later
              # note EddyProc.C has class "package"

              EddyProc.C <- sEddyProc$new(site, EddyDataWithPosix.F, c('NEE','Rg','Tair','Tsoil','VPD', 'Ustar', 'PPFD'), DTS = 24)
            } else {
              # ELSE for half-hourly data
              # Set DTS to 48 for half-hourly data

              EddyProc.C <- sEddyProc$new(site, EddyDataWithPosix.F, c('NEE','Rg','Tair','Tsoil','VPD', 'Ustar', 'PPFD'), DTS = 48)
            }




            # fingerprint plot for NEE, with gaps
            EddyProc.C$sPlotFingerprintY('NEE', Year = process_year)

            # get u* thresholds
            uStarTh <- EddyProc.C$sEstUstarThresholdDistribution(nSample = 100L, probs = c(0.05, 0.5, 0.95))
            # the [-2] omits the second element of any object
            uStarThAnnual <- usGetAnnualSeasonUStarMap(uStarTh)[-2]
            uStarSuffixes <- colnames(uStarThAnnual)[-1]

            # gap fill NEE
            EddyProc.C$sMDSGapFillAfterUStarDistr('NEE',
                                                  uStarTh = uStarThAnnual,
                                                  uStarSuffixes = uStarSuffixes,
                                                  FillAll = TRUE
            )

            # plot of gap-filled NEE (note year)
            EddyProc.C$sPlotFingerprintY('NEE_U50_f', Year = process_year)

            # specify location for site (needed to distinguish night and day)
            EddyProc.C$sSetLocationInfo(LatDeg = site.info$lat, LongDeg = site.info$long, TimeZoneHour = site.info$UTC_offset)
            # gap fill met
            EddyProc.C$sMDSGapFill('Tair', FillAll = FALSE)
            EddyProc.C$sMDSGapFill('VPD', FillAll = FALSE)
            EddyProc.C$sMDSGapFill('Tsoil', FillAll = FALSE)
            EddyProc.C$sMDSGapFill('Rg', FillAll = FALSE)
            EddyProc.C$sMDSGapFill('PPFD', T1 = 100, FillAll = FALSE)

            # partition NEE using night-time method, using default parameters
            # note that lapply is similar to putting a function call in a for loop
            if (current.method == 'night') {
              resPart <- lapply(uStarSuffixes, function(suffix){
                EddyProc.C$sMRFluxPartition(Suffix = suffix)
              })
            }

            # partition NEE, daytime method, Tair
            # original
            if (current.method == 'day') {
              resPart <- lapply(uStarSuffixes, function(suffix) {
                EddyProc.C$sGLFluxPartition(Suffix = suffix)
              })
            }

            # stack results on to the original data file
            FilledEddyData.F <- EddyProc.C$sExportResults()
            CombinedData.F <- cbind(EddyData.F, FilledEddyData.F)



            if (current.method == 'night') {
              # set outpath_directory and outfile names
              outpath_directory <-  paste0(outpath, "1_after-night-part/")
              outfile <- sprintf('%s_%s-%d-after-night-part.txt', db, site, process_year)
            } else {
              outpath_directory <-  paste0(outpath, "2_after-day-part/")
              outfile <- sprintf('%s_%s-%d-after-day-part.txt', db, site, process_year)
            }


            message('')
            message('')
            message('  ', 'Exporting output file for site: ', paste(site, sep=', ', db), ' (Year = ', paste(process_year), ')')
            message('    ', paste0(outpath_directory))
            message('')
            message('')

            fWriteDataframeToFile(CombinedData.F, outfile, Dir = outpath_directory)

          },

          # If loop fails for a particular year,
          error = function(e){
            message("    * Caught an error on process year: ", process_year)
            print(e)
          }

        ) # end TryCatch


      } # end year loop

    }

    message('')
    message('Finalizing site (', min(which(unique(selected.sites) %in% site)), '/',
            length(unique(selected.sites)), '): ', site, ', ', db)
    message('')
    message('')



  } # end site loop


} # end function
ksmiff33/FluxSynthU documentation built on Dec. 15, 2020, 10:29 p.m.