R/utilities.R

## =============================================================================
#' Convert Temperature between Kelvin and Celsius
#'
#' Converts temperature from degrees K to degrees C and vice versa.
#'
#' @param x A vector
#' @param method
#'
#' @return
#'
#' @export
convert_temperature <- function(x, method = c("KtoC", "CtoK")) {
  method <- match.arg(method)
  if (method == "KtoC") {
    out <- x - 273.15
    varnames(out) <- attr(x, "varnames")
    tidyflux::units(out) <- "C"
  } else if (method == "CtoK") {
    out <- x + 273.15
    varnames(out) <- attr(x, "varnames")
    tidyflux::units(out) <- "K"
  }
  return(out)
}

## =============================================================================
#' Convert Pressure
#'
#' Converts pressure
#'
#' @param x A vector
#' @param method
#'
#' @return
#'
#' @export
convert_pressure <- function(x, method = c("PatokPa", "kPatoPa")) {
  method <- match.arg(method)
  if (method == "PatokPa") {
    out <- x / 1000
    varnames(out) <- attr(x, "varnames")
    tidyflux::units(out) <- "kPa"
  } else if (method == "kPatoPa") {
    out <- x * 1000
    varnames(out) <- attr(x, "varnames")
    tidyflux::units(out) <- "Pa"
  }
  return(out)
}

## =============================================================================
#' Calculate Vapor Pressure Deficit
#'
#' Uses relative humidity (rh) and air temperature (tair) to calculate the vapor
#' pressure deficit (vpd.
#'
#' @param rh A numeric vector
#' @param tair A numeric vector
#'
#' @return
#'
#' @export
calculate_vpd <- function(rh, tair) {
  check_units(tair, "C")
  out <- 0.61078 * (1 - rh / 100) * exp(17.08085 * tair / (234.175 + tair))
  varnames(out) <- paste0(
    "calculate_vpd(", attr(rh, "varnames"), ", ", attr(tair, "varnames"), ")"
  )
  #varnames(out) <- deparse(match.call())
  tidyflux::units(out) <- "kPa"
  return(out)
}

## =============================================================================
#' Conversions between Global Radiation and Photosynthetic Photon Flux Density
#'
#' Converts radiation from W m-2 to µmol m-2 s-1 and vice versa.
#'
#' @param Rg Global radiation = incoming short-wave radiation at the surface
#'   (in units W m-2).
#' @param PPFD Photosynthetic photon flux density (µmol m-2 s-1).
#' @param J_to_mol Conversion factor from J m-2 s-1 (= W m-2) to µmol m-2 s-1.
#' @param frac_ppfd Fraction of incoming solar irradiance that is
#'   photosynthetically active radiation (ppfd); defaults to 0.5.
#'
#' @details
#' The conversion is given by:
#'
#'  \deqn{ppfd = rg * frac_ppfd * J_to_mol}
#'
#' by default, combined conversion factor (\code{frac_ppfd * J_to_mol}) is 2.3
#'
convert_radiation <- function(x, frac_ppfd = 0.5, calc_frac = FALSE,
                              method = c("to_rg", "to_ppfd")) {
  method <- match.arg(method)
  # TODO if calc_frac do regression between rg and ppfd to get conversion factor
  if (method == "to_rg") {
    if (tidyflux::units(x) != "µmol+1s-1m-2") {
      stop("Cannot perform ppfd to rg conversion if units not µmol+1s-1m-2.")
    }
    out <- x / frac_ppfd / 4.6
    varnames(out) <- paste0("convert_radiation(", varnames(x), ")")
    tidyflux::units(out) <- "W+1m-2"
  } else if (method == "to_ppfd") {
    if (tidyflux::units(x) != "W+1m-2") {
      stop("Cannot perform rg to ppfd conversion if units not W+1m-2.")
    }
    out <- x * frac_ppfd * 4.6
    varnames(out) <-  paste0("convert_radiation(", varnames(x), ")")
    tidyflux::units(out) <- "µmol+1s-1m-2"
  }
  out
}

## =============================================================================
#' Biochemical Energy
#'
#' Radiant energy absorbed in photosynthesis or heat release by respiration
#' calculated from net ecosystem exchange of CO2 (NEE).
#'
#' @param x Net ecosystem exchange (µmol CO2 m-2 s-1).
#' @param alpha Energy taken up/released by photosynthesis/respiration per mol
#'   CO2 fixed/respired (J umol-1).
#'
#' @details The following sign convention is employed: NEE is negative when
#'   carbon is taken up by the ecosystem. Positive values of the resulting
#'   biochemical energy mean that energy (heat) is taken up by the ecosystem,
#'   negative ones that heat is released. The value of alpha is taken from Nobel
#'   1974 (see Meyers & Hollinger 2004).
#'
#' @return \item{Sp -}{biochemical energy (W m-2)}
#'
#' @references
#'   Meyers, T.P., Hollinger, S.E. 2004: An assessment of storage terms in the
#'   surface energy balance of maize and soybean. Agricultural and Forest
#'   Meteorology 125, 105-115.
#'
#'   Nobel, P.S., 1974: Introduction to Biophysical Plant Physiology. Freeman,
#'   New York.
#'
#' @export
calculate_sp <- function(x, alpha = 0.422) {
  if (tidyflux::units(x) != "µmol+1s-1m-2") {
    stop("'x' must be in expected units µmol+1s-1m-2.")
  }
  out <- alpha * -x
  varnames(out) <- paste0("calculate_sp(", varnames(x), ")")
  tidyflux::units(out) <- "W+1m-2"
  out
}

## =============================================================================
#' Energy Balance Closure
#'
#' Calculates the degree of the energy balance non-closure for individual
#' records, or the entire time span based on the ratio of two sums (energy
#' balance ratio), and ordinary least squares (OLS).
#'
#' @param x A data.frame or matrix containing all required variables.
#' @param Rn Net radiation (W m-2).
#' @param G Ground heat flux (W m-2); optional.
#' @param S Sum of all storage fluxes (W m-2); optional.
#' @param LE Latent heat flux (W m-2).
#' @param H Sensible heat flux (W m-2).
#' @param instantaneous Should the energy balance be calculated at the time step
#'   of the observations (\code{TRUE}), or over the entire time period
#'   provided as input (\code{FALSE}).
#' @param missing.G.as.NA If \code{TRUE}, missing G are treated as \code{NA}s,
#'   otherwise set to 0.
#' @param missing.S.as.NA If \code{TRUE}, missing S are treated as \code{NA}s,
#'   otherwise set to 0.
#'
#' @details The energy balance ratio (EBR) is calculated as:
#'
#'   \deqn{EBR = sum(LE + H)/sum(Rn - G - S)}
#'
#'   where the sum is taken for all time steps with complete observations (i.e.
#'   all energy balance terms are available).
#'
#' @return A named vector.
#'
#' @param n Number of complete (all energy balance terms available)
#'   observations.
#' @param intercept Intercept of the OLS regression.
#' @param slope Slope of the OLS regression.
#' @param r_squared R^2 of the OLS regression.
#' @param EBR Energy balance ratio.
#'
#' if \code{instantaneous = TRUE}, only \code{EBR} is returned.
#'
#' @references Wilson K., et al. 2002: Energy balance closure at FLUXNET sites.
#'             Agricultural and Forest Meteorology 113, 223-243.
#'
#' @examples
#' ## characterize energy balance closure for DE-Tha in June 2014
#' energy.closure(DE_Tha_Jun_2014,instantaneous=FALSE)
#'
#' ## look at half-hourly closure
#' EBR_inst <- energy.closure(DE_Tha_Jun_2014,instantaneous=TRUE)
#' summary(EBR_inst)
#'
#' @importFrom stats complete.cases lm
#' @export
energy_closure <- function(data, Rn = "Rn", G = NULL, S = NULL, LE = "LE",
                           H = "H", instantaneous = TRUE, name_out = "EBR",
                           missing.G.as.NA = FALSE, missing.S.as.NA = FALSE) {
  check_input(data, Rn, LE, H, G, S)
  # check_range(Rn, c(-200, 1000))
  # check_range(LE, c(-100, 700))
  # check_range(H, c(-300, 700))
  if (!is.null(G)) {
    # check_range(G, c(-110, 220))
    if (!missing.G.as.NA) G[is.na(G)] <- 0
  } else {
    cat("Ground heat flux G is not provided and set to 0.", fill = TRUE)
    G <- rep(0, nrow(x))
  }
  if (!is.null(S)) {
    # check_range(G, c(-200, 300))
    if (!missing.S.as.NA) S[is.na(S)] <- 0
  } else {
    cat("Energy storage fluxes S are not provided and set to 0.", fill = TRUE)
    S <- rep(0, nrow(data))
  }
  if (!instantaneous) {
    comp <- complete.cases(Rn, G, S, LE, H)
    n <- sum(comp)
    EBR <- sum(LE[comp] + H[comp]) / sum(Rn[comp] - G[comp] - S[comp])
    emod <- lm(c(LE + H) ~ c(Rn - G - S))
    intercept <- summary(emod)$coef[1, 1]
    slope <- summary(emod)$coef[2, 1]
    r_squared <- summary(emod)$r.squared
    out <- c(
      "n" = n, "intercept" = round(intercept, 3),
      "slope" = round(slope, 3), "r^2" = round(r_squared, 3),
      "EBR" = round(EBR, 3)
    )
  } else {
    out <- (LE + H) / (Rn - G - S)
    varnames(out) <- name_out
    tidyflux::units(out) <- "-"
  }
  return(out)
}

## =============================================================================
#' Fix Negative Incoming Radiation
#'
#' Looks for negative and missing incoming radiation values, and checks against
#' potential incoming radiation.
#'
#' @param x
#' @param rpot
#'
#' @details
#' Checks for negative and missing incoming radiation data in relation to
#' potential incoming radiation (Rpot). For negative values, if Rpot is 0, the
#' values are set to 0. This is needed because negative radiation affects
#' partitioning quality, and to increase the number of nightime records to be
#' used for ustar threshold calculation.
#'
fix_rad <- function(x, rpot = rpot, ppfd = FALSE) {
  rad <- x # copy of x to allow for internal conversions
  if (ppfd) rad <- convert_radiation(rad)
  out <- dplyr::if_else(rad < 0 & rpot == 0, 0, x)
  out[out < 0] <- NA  # set remaining negatives to NA
  message(
    "Fixed ", sum(out == x, na.rm = TRUE),
    " cases of negative incoming radiation."
  )
  varnames(out) <- paste0("fix_rad(", varnames(x), ")")
  tidyflux::units(out) <- "W+1m-2"
  out
}

## =============================================================================
#' Check Incoming Radiation
#'
#'
#' @param x
#' @param rpot
#' @param ppfd
#' @param check_ppfd
#' @param min_val
#'
#' @return
#'
#' @details
#' Values are flagged when:
#' \itemize{
#' \item rpot = 0 and sw_in > 50
#' \item rpot > 200.0 and sw_in - rpot > 50
#' \item sw_in is > 15% greater than rpot
#' }
#'
#' Optionally, values can be flagged based on a regression between sw_in (rg)
#' and ppfd. If there are at least \code{min_val} pairwise records of sw_in and
#' ppfd, a simple linear regression is constructed and values differing by
#' greater than +/-5*sd of the residuals are flagged. This option is selected by
#' setting \code{check_ppfd} to \code{TRUE}.
#'
#' From OneFlux:
#' https://github.com/LI-COR/ONEFlux/blob/master/oneflux_steps/qc_auto/src
#'
#' @export
flag_rad <- function(x, rpot = rpot, ppfd = ppfd, check_ppfd = FALSE,
                     min_val = 11000) {
  out <- rep(0, length(x))
  out <- dplyr::case_when(
    rpot == 0 & x > 50 ~ 2,
    rpot > 200 & (x - rpot) > 50 & (x - rpot) / rpot > 0.15 ~ 2,
    (x - rpot) / rpot > 0.15 ~ 2,
    TRUE ~ 0
  )
  if (check_ppfd) {
    comp <- data.frame(x, ppfd)
    valid <- tidyr::drop_na(comp)
    if (nrow(valid) < min_val) {
      message(
        "Skipping rg-ppfd validation: ",
        "the number of complete rg and ppfd records, ", nrow(valid),
        ", does not fulfill the required amount of ", min_val, "."
      )
    } else {
      f <- paste(rg_name, "~", paste(ppfd_name, collapse = " + "))
      reg <- do.call(
        "lm",
        list(as.formula(f), data = as.name("comp"), na.action = "na.exclude")
      )
      resid <- unname(resid(reg))
      sd <- sd(resid, na.rm = TRUE)
      if (sd < 0.01) {
        message(
          "Skipping rg-ppfd validation: ",
          "the standard deviation of residuals is less than 0.01."
        )
      }
      # Threshold is +/- 5 * sd of the residuals
      lim <- 5 * sd
      out <- dplyr::case_when(
        resid < -lim ~ 2,
        resid > lim ~ 2,
        TRUE ~ out
      )
    }
  }
  attributes(out) <- list(check_ppfd = check_ppfd, additive = FALSE, na_as = NA)
  out
}

## =============================================================================
#' Calculate Potential Radiation
#'
#' Calculate the potential radiation from solar elevation and extraterrestrial
#' solar radiation. REddyProc name: fCalcPotRadiation.
#'
#' @param doy A numeric vector with day of the year, same length as hour or
#'   length 1.
#' @param hour A numeric vector with time as decimal hour of local time zone.
#' @param lat Latitude in decimal degrees.
#' @param long Longitude in decimal degrees.
#' @param tz Time zone (in hours).
#' @param solar_time Should hour (given in local winter time) be corrected for
#'   latitude to solar time where noon is exactly at 12:00 (default)? Set to
#'   \code{FALSE} to directly use local winter time.
#'
#' @return A numeric vector of potential radiation (rpot, W+1m-2).
#'
#' @export
calculate_rpot <- function(timestamp, lat, long, tz, solar_time = TRUE) {
  doy <- lubridate::yday(timestamp)
  hour <- lubridate::hour(timestamp) + lubridate::minute(timestamp) / 60
  sol_elev <- calc_sun_pos(doy, hour, lat, long, tz, solar_time)$sol_elev
  ext_rad <- calc_ext_rad(doy)
  out <- dplyr::if_else(sol_elev <= 0, 0, ext_rad * sin(sol_elev))
  attr(out, "varnames") <- "calculate_rpot()"
  attr(out, "units") <- attr(ext_rad, "units")
  out
}

## =============================================================================
#' Calculate Sun Position
#'
#' Calculate the position of the sun. REddyProc name: fCalcSunPosition.
#'
#' @param doy A vector with day of year, same length as hour or length 1.
#' @param hour A vector with time as decimal hour of local time zone.
#' @param lat Latitude in (decimal) degrees.
#' @param long Longitude in (decimal) degrees.
#' @param tz Time zone (in hours).
#' @param solar_time Should hour (given in local winter time) be corrected for
#'   latitude to solar time where noon is exactly at 12:00 (default)? Set to
#'   \code{FALSE} to directly use local winter time.
#'
#' @details
#' Assumes that hour is given in local winter time zone, and corrects it by
#' longitude to solar time (where noon is exactly at 12:00). Set argument
#' \code{solar_time} to FALSE to use the local winter time instead.
#'
#' @return A list with vectors:
#' \itemize{
#' \item{sol_time} Solar time in hours
#' \item{sol_decl} Solar declination in radians
#' \item{sol_elev} Solar elevation in radians with 0 at horizon
#' \item{sol_azim} Solar azimuth in radians with 0 at North
#' }
#'
#' @export
calc_sun_pos <- function(doy, hour, lat, long, tz, solar_time = TRUE) {
  # Fractional year in radians
  frac_year <- 2 * pi * (doy - 1) / 365.24
  # Time equation in hours, accounting for changes in the time of solar noon
  time_eq <- (
    0.0072 * cos(frac_year) - 0.0528 * cos(2 * frac_year) -
    0.0012 * cos(3 * frac_year) - 0.1229 * sin(frac_year) -
    0.1565 * sin(2 * frac_year) - 0.0041 * sin(3 * frac_year)
  )
  # Solar time
  sol_time <- if (solar_time) {
    # Correction for local time and equation of time
    hour + (long / 15 - tz) + time_eq
  } else {
    warning(
      "Solar position calculated without correction for local time ",
      "and equation of time.",
      call. = FALSE
    )
    hour
  }
  attr(sol_time, "varnames") <- "sol_time"
  attr(sol_time, "units") <- "hour"
  # Conversion to radians
  st <- (sol_time - 12) * pi / 12.0
  # Correction for solar time < -pi to positive, important for sol_azim below
  st <- dplyr::if_else(st < -pi, st + 2 * pi, st)
  # Solar declination in radians, accounting for the earth axis tilt
  sol_decl <- (
    (0.33281 - 22.984 * cos(frac_year) - 0.34990 * cos(2 * frac_year) -
    0.13980 * cos(3 * frac_year) + 3.7872 * sin(frac_year) +
    0.03205 * sin(2 * frac_year) + 0.07187 * sin(3 * frac_year)) / 180 * pi
  )
  attr(sol_decl, "varnames") <- "sol_decl"
  attr(sol_decl, "units") <- "rad"
  # Solar elevation (vertical, zenithal angle) in radians with zero for horizon
  sol_elev <- asin((
    sin(sol_decl) * sin(lat / 180 * pi) +
    cos(sol_decl) * cos(lat / 180 * pi) * cos(st)
  ))
  attr(sol_elev, "varnames") <- "sol_elev"
  attr(sol_elev, "units") <- "rad"
  # Solar azimuth (horizontal angle) with zero for North
  sol_azim <- (
    cos(sol_decl) * cos(st) + sin(sol_elev) * cos(lat / 180 * pi)
  ) / (sin(lat / 180 * pi) * cos(sol_elev))
  # Correction if off edge values
  sol_azim <- dplyr::if_else(!dplyr::between(sol_azim, -1, 1), 1, sol_azim)
  # Conversion to radians
  sol_azim <- acos(sol_azim)
  # Determine if solar azimuth is East or West depending on solar time
  sol_azim <- dplyr::if_else(st < 0, pi - sol_azim, pi + sol_azim)
  attr(sol_azim, "varnames") <- "sol_azim"
  attr(sol_azim, "units") <- "rad"
  # Return data list
  sol_pos <- list(
    sol_time = sol_time,
    sol_decl = sol_decl,
    sol_elev = sol_elev,
    sol_azim = sol_azim
  )
  return(sol_pos)
}

## =============================================================================
#' Calculate Extraterrestrial Solar Radiation
#'
#' Calculate the extraterrestrial solar radiation with the eccentricity
#' correction after Lanini 2010 (MSc Thesis, Bern University).
#' REddyProc name: fCalcExtRadiation.
#'
#' @param doy A numeric vector with day of year.
#'
#' @return A numeric vector with extraterrestrial radiation (ext_rad, W+1m-2)
#'
#' @export
calc_ext_rad <- function(doy) {
  # Fractional year in radians
  frac_year <- 2 * pi * (doy - 1) / 365.24
  # Total solar irradiance
  sol_irr <- 1366.1
  # Eccentricity correction
  ext_rad <- sol_irr * (
    1.00011 + 0.034221 * cos(frac_year) + 0.00128 * sin(frac_year) +
    0.000719 * cos(2 * frac_year) + 0.000077 * sin(2 * frac_year)
  )
  attr(ext_rad, "varnames") <- "ext_rad"
  attr(ext_rad, "units") <- "W+1m-2"
  return(ext_rad)
}

## =============================================================================
#' Split Variable by Wind Direction Bins
#'
#' @param data
#' @param var
#'
#' @return A data frame
#' @export
#'
#' @examples
split_wd <- function(x, wd = wd, by = 10) {
  l <- split(data[, var], cut(data$wd, seq(0, 360, by = by)))
  seq1 <- by / 2
  seq2 <- 360 - seq1 + 1
  data.frame(
    wd = seq(seq1, seq2, by = by),
    mean = sapply(l, mean, na.rm = TRUE),
    sd = sapply(l, sd, na.rm = TRUE)
  )
}

## =============================================================================
#' Average Replicate Measurements of One Variable
#'
#' @param ... numeric vectors
#'
#' @return
#' @export
#'
#' @examples
average_reps <- function(...) {
  # Create data frame from reps vectors
  rep_df <- dplyr::bind_cols(rlang::dots_list(...))
  # Average reps
  out <- rep_df %>% rowMeans(na.rm = TRUE)
  out[is.na(out)] <- NA # NaN -> NA
  # Set new varname
  varnames(out) <- paste0(
    "mean(", paste(varnames(rep_df), collapse = ", "), ", na.rm = TRUE)"
  )
  # Set new units
  tidyflux::units(out) <- tidyflux::units(rep_df[1])
  return(out)
}

## =============================================================================
#' Title
#'
#' @param x
#'
#' @return
#' @export
#'
#' @examples
convert_variance <- function(x) {
  # Take square root to get sd
  out <- sqrt(x)
  # Define new units, and varnames
  #varnames(out) <- paste0("sqrt(", deparse(substitute(x)), ")")
  varnames(out) <- paste0("sqrt(", attr(x, "varnames"), ")")
  tidyflux::units(out) <- "-"
  return(out)
}

## =============================================================================
#' Title
#'
#' @param hour
#' @param light
#' @param thr
#'
#' @return
#' @export
#'
#' @examples
mean_suntimes <- function(hour, light, thr = 0) {
  light <- dplyr::if_else(light <= thr, thr, light)
  rises <- hour[light > thr & dplyr::lag(light) == thr]
  sets <- hour[light == thr & dplyr::lag(light) > thr]
  c(mean(rises, na.rm = TRUE), mean(sets, na.rm = TRUE))
}

## =============================================================================
quiet <- function(x) {
  sink(tempfile())
  on.exit(sink())
  invisible(force(x))
}

## =============================================================================
doy <- function(x) {
  day <- lubridate::yday(x)
  hour <- lubridate::hour(x)
  minute <- lubridate::minute(x)
  out <- day + ((hour + (minute / 60)) / 24)
  out
}

## =============================================================================
#' Title
#'
#' @param x
#' @param y
#' @param group1
#' @param group2
#'
#' @return
#' @export
#'
#' @examples
normalize <- function(x, y, group1 = NULL, group2 = NULL) {
  df <- data.frame(x, y)
  if (!is.null(group1)) df$group1 <- group1
  if (!is.null(group2)) df$group2 <- group2
  df <- df %>%
  {if (!is.null(group1) & !is.null(group2)) {
    dplyr::group_by(., group1, group2)
  } else .} %>%
  {if (!is.null(group1) & is.null(group2)) {
    dplyr::group_by(., group1)
  } else .} %>%
    tidyr::drop_na() %>%
    {if (!is.null(group1)) {
      dplyr::summarize(
        ., x = mean(x, na.rm = TRUE),
        y = mean(y, na.rm = TRUE)
      )
    } else .} %>%
    dplyr::summarize(
      xmin = min(x, na.rm = TRUE),
      xmax = max(x, na.rm = TRUE),
      ymin = min(y, na.rm = TRUE),
      ymax = max(y, na.rm = TRUE)
    ) %>%
    {if (!is.null(group2)) {
      dplyr::summarize(
        ., xmin = min(x, na.rm = TRUE),
        xmax = max(x, na.rm = TRUE),
        ymin = min(y, na.rm = TRUE),
        ymax = max(y, na.rm = TRUE)
      )
    } else .}
  (df$ymax - df$ymin) * ((x - df$xmin) / (df$xmax - df$xmin)) + df$ymin
}

## =============================================================================
#' Title
#'
#' @param wd
#' @param bin_size
#'
#' @return
#' @export
#'
#' @examples
bin_wd <- function(wd, bin_size = 10) {
  seq <- seq(0, 360, by = bin_size)
  labels <- seq[1:length(seq) - 1] + bin_size / 2
  as.numeric(as.character(cut(wd, seq, labels = labels)))
}

## =============================================================================
#' Title
#'
#' @param x
#' @param n
#'
#' @return
#' @export
#'
#' @examples
moving_avg <- function(x, n = 5) {
  out <- stats::filter(x, rep(1 / n, n), sides = 2)
  as.numeric(out)
}

## =============================================================================
#' Title
#'
#' @param x
#' @param y
#' @param z
#' @param method
#'
#' @return
#' @export
#'
#' @examples
get_outliers <- function(x, y = NULL, z,
                         method = c("default", "mad", "doublemad", "iqr",
                                    "spikes", "cooks", "mahalanobis", "aq",
                                    "pcout")) {
  method <- match.arg(method)
  if (is.null(y)) {
    if (method %in% c("default", "mad")) {
      # default mad z = 7
      if (missing(z)) z <- 7
      min <- median(x, na.rm = TRUE) - mad(x, na.rm = TRUE) * z
      max <- median(x, na.rm = TRUE) + mad(x, na.rm = TRUE) * z
      !dplyr::between(x, min, max)
    } else if (method == "doublemad") {
      # default mad z = 7
      if (missing(z)) z <- 7
      abs_dev <- abs(x[!is.na(x)] - median(x[!is.na(x)]))
      left <- median(abs_dev[x[!is.na(x)] <= median(x[!is.na(x)])])
      right <- median(abs_dev[x[!is.na(x)] >= median(x[!is.na(x)])])
      x_mad <- dplyr::if_else(x < median(x, na.rm = TRUE), left, right)
      mad_dist <- abs(x - median(x, na.rm = TRUE)) / x_mad
      mad_dist > z
    } else if (method == "iqr") {
      # AKA Tukey's rule: default iqr z = 1.5
      if (missing(z)) z <- 1.5
      quant <- quantile(x, probs = c(.25, .75), na.rm = TRUE)
      iqr <- z * IQR(x, na.rm = TRUE)
      min <- quant[1] - iqr
      max <- quant[2] + iqr
      !dplyr::between(x, min, max)
    } else if (method == "spikes") {
      # x is flagged if it spikes beyond what is expected given variability for
      # time period
      if (missing(z)) z <- 7
      df <- data.frame(x = x)
      n <- 624
      #browser()
      df <- df %>%
        # calculate immediate differences, cut out 2-week windows
        dplyr::mutate(
          di = (x - dplyr::lag(x, 1)) - (dplyr::lead(x, 1) - x),
          window = ggplot2::cut_interval(dplyr::row_number(), length = n),
          window = forcats::fct_explicit_na(window)
        ) %>%
        # separate daytime and nighttime fluxes
        dplyr::group_by(window) %>%
        # calculate mad and median of the differences
        dplyr::mutate(
          med = median(di, na.rm = TRUE),
          mad = median(abs(di - med), na.rm = TRUE) * z / 0.6745,
          #mad = mad(di, na.rm = TRUE) * z / 0.6745,
          min = med - mad,
          max = med + mad
        )
      #!dplyr::between(df$di, df$min, df$max)
      df$di < df$min | df$di > df$max & !is.na(df$di)
    }
  } else {
    if (method %in% c("default", "cooks")) {
      # default cooks z = 4
      if (missing(z)) z <- 4
      cooksd <- cooks.distance(lm(x ~ y, na.action = na.exclude))
      lim <- mean(cooksd, na.rm = TRUE) * z
      cooksd > lim
    } else if (method == "mahalanobis") {
      # default mahalanobis z = 12
      if (missing(z)) z <- 12
      m_df <- dplyr::bind_cols(list(x, y))
      m_dist <- mahalanobis(
        m_df,
        colMeans(m_df, na.rm = TRUE),
        cov(m_df, use = "pairwise.complete.obs")
      )
      m_dist > z
    } else if (method == "aq") {
      # default z = 0.05
      # mvoutlier::aq()
    } else if (method == "pcout") {
      # mvoutlier::pcout() - $wfinal01: 0 = outlier, $wfinal: smaller values =
      # more likely to be outliers
    }
  }
}


roll_corr <- function(x, y, n, zmax = 0.25 * n, ...) {
  df <- data.frame(x, y)
  df$z <- zoo::rollapply(
    data[, c("tair", "fch4")], n,
    function(x) length(x[!is.na(x[, 1]) & !is.na(x[, 2])]) / 2,
    by.column = FALSE, fill = NA
  )
  df$x <- dplyr::if_else(df$z < zmax, NA_real_, df$x)
  df$y <- dplyr::if_else(df$z < zmax, NA_real_, df$y)
  df$z <- NULL
  out <- zoo::rollapply(
    df, n, function(x) cor(x[, 1], x[, 2], use = "pairwise.complete.obs"),
    by.column = FALSE, fill = NA
  )
  out <- dplyr::if_else(out == 1, NA_real_, out)
  out[which(!is.na(out))][1:4] <- NA
  out
}

summarize_vars <- function(data) {
  # mean
  # sd
  # range
  # NA count
  data %>%
    dplyr::select_if(is.numeric)
}

read_pt_data <- function(file, skip = 1, header = TRUE, tz, diff_out = 30,
                         tz_out = "Etc/GMT+5", type = c("water", "baro")) {
  type <- match.arg(type)
  data <- read.csv(file, skip = skip, header = header)
  data <- data[2:4]
  p_units <- names(data)[2]
  t_units <- names(data)[3]
  names(data) <- c("timestamp", "pres", "temp")
  # Parse timestamp
  data <- data %>%
    dplyr::mutate(
      timestamp = lubridate::parse_date_time(
        timestamp, c("mdyI!MSp!", "mdyHMS"), tz = tz
      ),
      timestamp = lubridate::with_tz(timestamp, tz_out)
    )
  # Get input time difference
  diff_in <- as.numeric(diff(data$timestamp)[1])
  # Temporary hack for converting time difference
  if (diff_in != diff_out) {
    if (diff_out == 30) {
      data <- data %>%
        dplyr::mutate(
          minute = lubridate::minute(timestamp),
          minute = dplyr::if_else(dplyr::between(minute, 0, 29), 0, 30)
        )
      lubridate::minute(data$timestamp) <- data$minute
      data <- data %>%
        dplyr::group_by(timestamp) %>%
        dplyr::summarize(
          pres = mean(pres, na.rm = TRUE),
          temp = mean(temp, na.rm = TRUE)
        )
    }
  }
  # Assign units to pabs
  if (stringr::str_detect(p_units, "psi")) {
    data$pres <- data$pres * 6.8947572931783
  } else if (!stringr::str_detect(p_units, "kPa")) {
    warning("Pressure units were not recognized.")
  }
  attr(data[["pres"]], "units") <- "kPa"
  # Assign units to twater
  if (stringr::str_detect(t_units, "F")) {
    data$temp <- (data$temp - 32) * 5 / 9
  } else if (!stringr::str_detect(t_units, "C")) {
    warning("Temperature units were not recognized.")
  }
  attr(data[["temp"]], "units") <- "C"
  # Rename columns according to type of pt
  if (type == "water") {
    names(data) <- c("timestamp", "pabs", "twater")
  } else if (type == "baro") {
    names(data) <- c("timestamp", "pbaro", "tbaro")
  }
  for (i in 1:3) varnames(data[[i]]) <- names(data)[i]
  data
}

calculate_wtd <- function(pabs, pair, twater) {
  # pabs & pair units should be in kPa, twater units should be in C
  out <- (pabs - pair) * 0.10197162
  # Set new varname
  #vars <- c(varnames(pabs), varnames(pair), varnames(twater))
  #varnames(out) <- paste0("calculate_wtd(", paste(vars, collapse = ", "), ")")
  # Set new units
  tidyflux::units(out) <- "m"
  out
}

calculate_swater <- function(twater, wtd) {
  out <- 4180 * wtd * 1000 * (twater - dplyr::lag(twater)) * (1 / 1800)
}

mutate_to <- function(data, data_to, ...) {
  #browser()
  dots <- rlang::enexprs(...)
  out <- dplyr::transmute(data, !!!dots)
  df_out <- dplyr::bind_cols(data_to, out)
  arg_name <- deparse(substitute(data_to))
  assign(arg_name, df_out, env = .GlobalEnv)
  data
}
grahamstewart12/tidyflux documentation built on June 4, 2019, 7:44 a.m.