R/format_data.R

## =============================================================================
#' Correct Character Vector
#'
#' Substitute given characters or strings by their alternatives.
#'
#' String \code{"(z-d)/L"} is renamed to \code{"zeta"}. Substitute:
#' \itemize{\item \code{"co2_flux"} by \code{"NEE"} \item \code{"*"} by
#' \code{"star"} \item \code{"\%"} by \code{"perc"} \item \code{"-"} and
#' \code{"/"} by \code{"_"} \item round and square brackets by empty string}
#' using regular expression patterns.
#'
#' @param x A character vector.
#' @param type A character string ("varnames" or "units"). It determines the
#'   specific corrections that are made based on which attributes are being
#'   corrected.
#'
#' @return
#' A corrected character vector.
#'
#' @examples
#' correct(c("[m]", "qc_co2_flux", "(z-d)/L", "x_70%", "*[-(z-d)/L"))
correct <- function(x, type = c("varnames", "units")) {
  type <- match.arg(type)
  names <- names(x)
  x <- x %>% stringr::str_replace_all("[\\[|\\]|\\(|\\)]", "") # remove brackets
  if (type == "varnames") {
    x <- x %>%
      # Suffix & prefix corrections
      stringr::str_replace_all("rand_err", "unc") %>%
      stringr::str_replace_all("strg", "st") %>%
      stringr::str_replace_all("_spikes", "_sphf") %>%
      stringr::str_replace_all("_mixing_ratio", "") %>%
      stringr::str_replace_all("_unrot", "unrot") %>%
      stringr::str_replace_all("_rot", "rot") %>%
      stringr::str_replace_all("timelag|time_lag", "tlag") %>%
      stringr::str_replace_all("v_adv", "vadv") %>%
      # Specific variable corrections
      stringr::str_replace_all("co2_flux", "fco2") %>%
      stringr::str_replace_all("h2o_flux", "fh2o") %>%
      stringr::str_replace_all("ch4_flux", "fch4") %>%
      stringr::str_replace_all("z\\-d\\/L", "zeta") %>%
      stringr::str_replace_all("air_pressure", "pair") %>%
      stringr::str_replace_all("air_temperature", "tair") %>%
      stringr::str_replace_all("sonic_temperature", "ts") %>%
      stringr::str_replace_all("specific_humidity", "q") %>%
      stringr::str_replace_all("wind_speed", "ws") %>%
      stringr::str_replace_all("wind_dir", "wd") %>%
      stringr::str_replace_all("co2_signal_strength_7500_mean", "rssi75") %>%
      stringr::str_replace_all("rssi_77_mean", "rssi77") %>%
      # Biomet variable corrections
      stringr::str_replace_all("_1_1", "") %>%
      stringr::str_replace_all("SHF", "g") %>%
      stringr::str_replace_all("P_RAIN", "p") %>%
      stringr::str_replace_all("TS_", "tsoil_") %>%
      stringr::str_replace_all("TA_", "tair_") %>%
      # Syntax corrections
      stringr::str_replace_all("\\*", "star") %>%
      stringr::str_replace_all("\\%", "p") %>%
      stringr::str_replace_all("\\-|\\/", "_") %>%
      stringr::str_to_lower() # everything in lowercase
    names(x) <- names
  } else if (type == "units") {
    x <- x %>%
      stringr::str_replace_all("µmol\\/m\\^2\\/s", "µmol+1s-1m-2") %>%
      stringr::str_replace_all("W\\/m\\^2", "W+1m-2") %>%
      stringr::str_replace_all("m\\^3\\/m\\^3", "%")
    names(x) <- names
  }
  return(x)
}

## =============================================================================
#' Convert Character to Regular Date-time Sequence
#'
#' Converts character vector to class \code{"POSIXct"} using
#' \code{\link{strptime}} and validates the result. The input has to represent a
#' regular date-time sequence with given frequency. Additional attributes
#' \code{varnames} and \code{units} are assigned to returned vector with fixed
#' strings \code{"timestamp"} and \code{"-"}, respectively.
#'
#' Eddy covariance related measurements are usually stored with a timestamp
#' representing the end of the averaging period (typicaly 1800 s) in standard
#' time. This can however cause difficulties during data aggregation or
#' plotting. Therefore it is recommended to shift the date-time information
#' using \code{shift.by} to represent the center of averaging period prior to
#' any computations. It is also recommended to change the date-time information
#' to its original state before saving to a file (see Examples section).
#'
#' Any unsuccessful attempt to convert date-time information is considered to be
#' unexpected behaviour and returns an error message instead of \code{NA} value.
#' In case that multiple formats are present in the timestamp, it has to be
#' corrected prior using \code{strptime_eddy}. It is expected that time series
#' are continuous although no valid measurements are available for given time
#' interval. Therefore \code{freq} value is checked against the lagged
#' differences (\code{\link{diff}}) applied to the converted date-time vector
#' and returns an error message if mismatch is found.
#' @param x A character vector containing date-time information to be converted
#'   to class \code{"POSIXct"}.
#' @param freq A numeric value specifying the frequency (in seconds) of the
#'   input date-time vector.
#' @param shift_by A numeric value specifying the time shift (in seconds) to be
#'   applied to the date-time information.
#' @param tz A time zone (see \code{\link{time zones}}) specification to be used
#'   for the conversion.
#' @param ... Further arguments to be passed from or to other methods.
#' @seealso \code{\link{strptime}} provides the details about conversions
#'   between date-time character representation and \code{"POSIXct"} or
#'   \code{"POSIXlt"} classes. It also includes information about \code{format}
#'   \emph{conversion specification}.
#'
#'   \code{\link{DateTimeClasses}} further inform about the date-time classes.
#'
#'   See \code{\link{locales}} to query or set a locale.
#' @examples
#' xx <- c("01.01.2014  00:30:00", "01.01.2014  01:00:00",
#' "01.01.2014  01:30:00", "01.01.2014  02:00:00")
#' varnames(xx) <- "timestamp"
#' units(xx) <- "-"
#' str(xx)
#' (yy <- strptime_eddy(xx, "%d.%m.%Y %H:%M", shift.by = -900))
#' ## Convert to original format
#' format(yy + 900, format = "%d.%m.%Y %H:%M", tz = "GMT")
#' attributes(yy)
#' \dontrun{
#' zz <- xx[-3]
#' ## This is not a regular date-time sequence
#' strptime_eddy(zz, "%d.%m.%Y %H:%M")
#' ## freq argument provided incorrectly
#' strptime_eddy(xx, "%d.%m.%Y %H:%M", freq = 3600)}
strptime_eddy <- function(x, freq = 1800, shift_by = NULL, tz = "GMT", ...) {
  # change function name to "set_timestamp" ??
  orders <- c("Ymd HM", "mdy HM", "Ymd HMS", "mdy HMS")
  # Flexibility for class of x and parsing method based on input
  if (is.data.frame(x) & !"timestamp" %in% colnames(x)) {
    if ("timestamp" %in% colnames(x)) {
      out <- x %>%
        dplyr::mutate(
          timestamp = lubridate::parse_date_time(timestamp, orders, tz)
        )  %>%
        dplyr::select(timestamp, dplyr::everything())
    } else if (!"timestamp" %in% colnames(x)) {
      if (!c("date", "time") %in% colnames(x)) {
        stop(
          "If building timestamp from data frame components, ",
          "columns 'date' and 'time' must exist in 'x'.", call. = FALSE
        )
      }
      out <- x %>%
        dplyr::mutate(
          timestamp = lubridate::parse_date_time(paste(date, time), orders, tz)
        )  %>%
        dplyr::select(timestamp, dplyr::everything())
    }
  } else if (is.atomic(x)) {
    out <- lubridate::parse_date_time(x, orders, tz)
  }
  # Produce errors if timestamp fails to parse
  if (!length(out)) stop("'x' not supplied correctly.", call. = FALSE)
  if (anyNA(out)) {
    stop("'x' and/or 'format' not supplied correctly.", call. = FALSE)
  }
  # Produce warning if timestamp does not form a regular sequence
  if (any(diff(as.numeric(out)) != freq)) {
    warning(
      "Timestamp does not form regular sequence with specified 'freq'.",
      call. = FALSE
    )
  }
  if (!is.null(shift_by)) out <- out + shift_by
  varnames(out) <- "timestamp"
  units(out) <- "-"
  return(out)
}

get_vars <- function(
  # essential variables
  bowen = "bowen", # bowen ratio of heat fluxes
  ch4 = "ch4", # methane mixing ratio
  co2 = "co2", # carbon dioxide mixing ratio
  er = "er", # ecosystem respiration
  et = "et", # evapotranspiration
  fch4 = "fch4", # methane flux
  fco2 = "fco2", # carbon dioxide flux
  fh2o = "fh2o", # water vapor flux
  gpp = "gpp", # gross primary production
  h = "h", # sensible heat flux
  h2o = "h2o", # water vapor mixing ratio
  l = "l", # monin-obhukov length
  le = "le", # latent heat flux
  nee = "nee", # net ecosystem exchange
  pair = "pair", # air pressure
  q = "q", # specific humidity
  rssi75 = "rssi75", # LI-7500 relative signal strength
  rssi77 = "rssi77", # LI-7700 relative signal strength
  tdew = "tdew", # dewpoint temperature
  tau = "tau", # momentum flux
  tke = "tke", # turbulent kenetic energy
  ts = "ts", # sonic temperature
  urot = "urot", # rotated horizontal wind speed
  uunrot = "uunrot", # unrotated horizontal wind speed
  ustar = "ustar", # friction velocity
  vrot = "vrot", # rotated cross wind speed
  vunrot = "vunrot", # unrotated cross wind speed
  vpd = "vpd", # vapor pressure deficit
  wrot = "wrot", # rotated vertical wind speed
  wunrot = "wunrot", # unrotated vertical wind speed
  wd = "wd", # wind direction
  ws = "ws", # wind speed
  wsmax = "wsmax", # maximum wind speed
  xpeak = "xpeak", # peak fetch length
  x70p = "x70p", # 70% fetch length
  x90p = "x90p", # 90% fetch length
  zeta = "zeta", # monin-obhukov stability parameter

  # biomet variables
  g = "g", # soil heat flux
  lwin = "lwin", # incoming longwave radiation
  lwout = "lwout", # outgoing longwave radiation
  p = "p", # precipitation
  ppfd = "ppfd", # photosynthetic photon flux density
  rg = "rg", # global radiation
  rh = "rh", # relative humidity
  rn = "rn", # net radiation
  swin = "swin", # incoming shortwave radiation
  swout = "swout", # outgoing shortwave radiation
  swc = "swc", # soil water content
  tair = "tair", # air temperature
  tsoil = "tsoil", # soil temperature
  twater = "twater", # water column temperature
  wtd = "wtd" # water table depth
) {
  as.list(environment())
}

get_full_names <- function(
  # essential variables
  bowen = "Bowen ratio",
  ch4 = "CH4 mixing ratio",
  co2 = "CO2 mixing ratio",
  er = "Ecosystem respiration",
  et = "Evapotranspiration",
  fch4 = "CH4 flux",
  fco2 = "CO2 flux",
  fh2o = "H2O flux",
  gpp = "Gross primary production",
  h = "Sensible heat flux",
  h2o = "H2O mixing ratio",
  l = "Monin-Obukhov length",
  le = "Latent heat flux",
  nee = "Net ecosystem exchange",
  pair = "Air pressure",
  q = "Specific humidity",
  rssi75 = "LI-7500 relative signal strength",
  rssi77 = "LI-7700 relative signal strength",
  tdew = "Dew point temperature",
  tau = "Momentum flux",
  tke = "Turbulent kenetic energy",
  ts = "Sonic temperature",
  urot = "Rotated horizontal wind speed",
  uunrot = "Unrotated horizontal wind speed",
  ustar = "Friction velocity",
  vrot = "Rotated cross wind speed",
  vunrot = "Unrotated cross wind speed",
  vpd = "Vapor pressure deficit",
  wrot = "Rotated vertical wind speed",
  wunrot = "Unrotated vertical wind speed",
  wd = "Wind direction",
  ws = "Wind speed",
  wsmax = "Maximum wind speed",
  xpeak = "Peak fetch length",
  x70p = "70% fetch length",
  x90p = "90% fetch length",
  zeta = "Monin-Obukhov stability parameter",

  # biomet variables
  g = "Soil heat flux",
  lwin = "Incoming longwave radiation",
  lwout = "Outgoing longwave radiation",
  p = "Precipitation",
  ppfd = "Photosynthetic photon flux density",
  rg = "Global radiation",
  rh = "Relative humidity",
  rn = "Net radiation",
  swin = "Incoming shortwave radiation",
  swout = "Outgoing shortwave radiation",
  swc = "Soil water content",
  tair = "Air temperature",
  tsoil = "Soil temperature",
  twater = "Water column temperature",
  wtd = "Water table depth"
) {
  as.list(environment())
}

get_deps <- function(
  # separating instruments and fluxes prevents double-counting additive tests
  sa = c("u", "v", "w", "ts"),
  irga75 = c("irga75", "co2", "h2o"),
  irga77 = c("irga77", "ch4"),
  fco2 = c("fco2", "irga75", "sa", "all"),
  fch4 = c("fch4", "irga77", "sa", "all"),
  le = c("le", "irga75", "sa", "all"),
  h = c("h", "sa", "all"),
  tau = c("tau", "sa")
) {
  as.list(environment())
}

get_plaus <- function(
  co2 = c(220, 770),
  fco2 = c(-50, 100),
  fh2o = c(-2, 14),
  g = c(-110, 220),
  E_0 = c(0, 600),
  le = c(-100, 700),
  p = c(0, 0.05),
  pair = c(70000, 120000),
  ppfd = c(0, 2200),
  rg = c(0, 1200),
  rh = c(0, 100),
  rn = c(-200, 1000),
  rpot = c(0, 3000),
  swc = c(0, 0.7),
  tair = c(-50, 50),
  tau = c(-100, 100),
  tsoil = c(-20, 50),
  ustar = c(0, 6),
  vpd = c(0, 50),
  wd = c(0, 360),
  ws = c(0, 40)
) {

}
grahamstewart12/tidyflux documentation built on June 4, 2019, 7:44 a.m.