## =============================================================================
#' Convert Temperature between Kelvin and Celsius
#'
#' Converts temperature from degrees K to degrees C and vice versa.
#'
#' @param x A vector
#' @param method
#'
#' @return
#'
#' @export
convert_temperature <- function(x, method = c("KtoC", "CtoK")) {
method <- match.arg(method)
if (method == "KtoC") {
out <- x - 273.15
varnames(out) <- attr(x, "varnames")
tidyflux::units(out) <- "C"
} else if (method == "CtoK") {
out <- x + 273.15
varnames(out) <- attr(x, "varnames")
tidyflux::units(out) <- "K"
}
return(out)
}
## =============================================================================
#' Convert Pressure
#'
#' Converts pressure
#'
#' @param x A vector
#' @param method
#'
#' @return
#'
#' @export
convert_pressure <- function(x, method = c("PatokPa", "kPatoPa")) {
method <- match.arg(method)
if (method == "PatokPa") {
out <- x / 1000
varnames(out) <- attr(x, "varnames")
tidyflux::units(out) <- "kPa"
} else if (method == "kPatoPa") {
out <- x * 1000
varnames(out) <- attr(x, "varnames")
tidyflux::units(out) <- "Pa"
}
return(out)
}
## =============================================================================
#' Calculate Vapor Pressure Deficit
#'
#' Uses relative humidity (rh) and air temperature (tair) to calculate the vapor
#' pressure deficit (vpd.
#'
#' @param rh A numeric vector
#' @param tair A numeric vector
#'
#' @return
#'
#' @export
calculate_vpd <- function(rh, tair) {
check_units(tair, "C")
out <- 0.61078 * (1 - rh / 100) * exp(17.08085 * tair / (234.175 + tair))
varnames(out) <- paste0(
"calculate_vpd(", attr(rh, "varnames"), ", ", attr(tair, "varnames"), ")"
)
#varnames(out) <- deparse(match.call())
tidyflux::units(out) <- "kPa"
return(out)
}
## =============================================================================
#' Conversions between Global Radiation and Photosynthetic Photon Flux Density
#'
#' Converts radiation from W m-2 to µmol m-2 s-1 and vice versa.
#'
#' @param Rg Global radiation = incoming short-wave radiation at the surface
#' (in units W m-2).
#' @param PPFD Photosynthetic photon flux density (µmol m-2 s-1).
#' @param J_to_mol Conversion factor from J m-2 s-1 (= W m-2) to µmol m-2 s-1.
#' @param frac_ppfd Fraction of incoming solar irradiance that is
#' photosynthetically active radiation (ppfd); defaults to 0.5.
#'
#' @details
#' The conversion is given by:
#'
#' \deqn{ppfd = rg * frac_ppfd * J_to_mol}
#'
#' by default, combined conversion factor (\code{frac_ppfd * J_to_mol}) is 2.3
#'
convert_radiation <- function(x, frac_ppfd = 0.5, calc_frac = FALSE,
method = c("to_rg", "to_ppfd")) {
method <- match.arg(method)
# TODO if calc_frac do regression between rg and ppfd to get conversion factor
if (method == "to_rg") {
if (tidyflux::units(x) != "µmol+1s-1m-2") {
stop("Cannot perform ppfd to rg conversion if units not µmol+1s-1m-2.")
}
out <- x / frac_ppfd / 4.6
varnames(out) <- paste0("convert_radiation(", varnames(x), ")")
tidyflux::units(out) <- "W+1m-2"
} else if (method == "to_ppfd") {
if (tidyflux::units(x) != "W+1m-2") {
stop("Cannot perform rg to ppfd conversion if units not W+1m-2.")
}
out <- x * frac_ppfd * 4.6
varnames(out) <- paste0("convert_radiation(", varnames(x), ")")
tidyflux::units(out) <- "µmol+1s-1m-2"
}
out
}
## =============================================================================
#' Biochemical Energy
#'
#' Radiant energy absorbed in photosynthesis or heat release by respiration
#' calculated from net ecosystem exchange of CO2 (NEE).
#'
#' @param x Net ecosystem exchange (µmol CO2 m-2 s-1).
#' @param alpha Energy taken up/released by photosynthesis/respiration per mol
#' CO2 fixed/respired (J umol-1).
#'
#' @details The following sign convention is employed: NEE is negative when
#' carbon is taken up by the ecosystem. Positive values of the resulting
#' biochemical energy mean that energy (heat) is taken up by the ecosystem,
#' negative ones that heat is released. The value of alpha is taken from Nobel
#' 1974 (see Meyers & Hollinger 2004).
#'
#' @return \item{Sp -}{biochemical energy (W m-2)}
#'
#' @references
#' Meyers, T.P., Hollinger, S.E. 2004: An assessment of storage terms in the
#' surface energy balance of maize and soybean. Agricultural and Forest
#' Meteorology 125, 105-115.
#'
#' Nobel, P.S., 1974: Introduction to Biophysical Plant Physiology. Freeman,
#' New York.
#'
#' @export
calculate_sp <- function(x, alpha = 0.422) {
if (tidyflux::units(x) != "µmol+1s-1m-2") {
stop("'x' must be in expected units µmol+1s-1m-2.")
}
out <- alpha * -x
varnames(out) <- paste0("calculate_sp(", varnames(x), ")")
tidyflux::units(out) <- "W+1m-2"
out
}
## =============================================================================
#' Energy Balance Closure
#'
#' Calculates the degree of the energy balance non-closure for individual
#' records, or the entire time span based on the ratio of two sums (energy
#' balance ratio), and ordinary least squares (OLS).
#'
#' @param x A data.frame or matrix containing all required variables.
#' @param Rn Net radiation (W m-2).
#' @param G Ground heat flux (W m-2); optional.
#' @param S Sum of all storage fluxes (W m-2); optional.
#' @param LE Latent heat flux (W m-2).
#' @param H Sensible heat flux (W m-2).
#' @param instantaneous Should the energy balance be calculated at the time step
#' of the observations (\code{TRUE}), or over the entire time period
#' provided as input (\code{FALSE}).
#' @param missing.G.as.NA If \code{TRUE}, missing G are treated as \code{NA}s,
#' otherwise set to 0.
#' @param missing.S.as.NA If \code{TRUE}, missing S are treated as \code{NA}s,
#' otherwise set to 0.
#'
#' @details The energy balance ratio (EBR) is calculated as:
#'
#' \deqn{EBR = sum(LE + H)/sum(Rn - G - S)}
#'
#' where the sum is taken for all time steps with complete observations (i.e.
#' all energy balance terms are available).
#'
#' @return A named vector.
#'
#' @param n Number of complete (all energy balance terms available)
#' observations.
#' @param intercept Intercept of the OLS regression.
#' @param slope Slope of the OLS regression.
#' @param r_squared R^2 of the OLS regression.
#' @param EBR Energy balance ratio.
#'
#' if \code{instantaneous = TRUE}, only \code{EBR} is returned.
#'
#' @references Wilson K., et al. 2002: Energy balance closure at FLUXNET sites.
#' Agricultural and Forest Meteorology 113, 223-243.
#'
#' @examples
#' ## characterize energy balance closure for DE-Tha in June 2014
#' energy.closure(DE_Tha_Jun_2014,instantaneous=FALSE)
#'
#' ## look at half-hourly closure
#' EBR_inst <- energy.closure(DE_Tha_Jun_2014,instantaneous=TRUE)
#' summary(EBR_inst)
#'
#' @importFrom stats complete.cases lm
#' @export
energy_closure <- function(data, Rn = "Rn", G = NULL, S = NULL, LE = "LE",
H = "H", instantaneous = TRUE, name_out = "EBR",
missing.G.as.NA = FALSE, missing.S.as.NA = FALSE) {
check_input(data, Rn, LE, H, G, S)
# check_range(Rn, c(-200, 1000))
# check_range(LE, c(-100, 700))
# check_range(H, c(-300, 700))
if (!is.null(G)) {
# check_range(G, c(-110, 220))
if (!missing.G.as.NA) G[is.na(G)] <- 0
} else {
cat("Ground heat flux G is not provided and set to 0.", fill = TRUE)
G <- rep(0, nrow(x))
}
if (!is.null(S)) {
# check_range(G, c(-200, 300))
if (!missing.S.as.NA) S[is.na(S)] <- 0
} else {
cat("Energy storage fluxes S are not provided and set to 0.", fill = TRUE)
S <- rep(0, nrow(data))
}
if (!instantaneous) {
comp <- complete.cases(Rn, G, S, LE, H)
n <- sum(comp)
EBR <- sum(LE[comp] + H[comp]) / sum(Rn[comp] - G[comp] - S[comp])
emod <- lm(c(LE + H) ~ c(Rn - G - S))
intercept <- summary(emod)$coef[1, 1]
slope <- summary(emod)$coef[2, 1]
r_squared <- summary(emod)$r.squared
out <- c(
"n" = n, "intercept" = round(intercept, 3),
"slope" = round(slope, 3), "r^2" = round(r_squared, 3),
"EBR" = round(EBR, 3)
)
} else {
out <- (LE + H) / (Rn - G - S)
varnames(out) <- name_out
tidyflux::units(out) <- "-"
}
return(out)
}
## =============================================================================
#' Fix Negative Incoming Radiation
#'
#' Looks for negative and missing incoming radiation values, and checks against
#' potential incoming radiation.
#'
#' @param x
#' @param rpot
#'
#' @details
#' Checks for negative and missing incoming radiation data in relation to
#' potential incoming radiation (Rpot). For negative values, if Rpot is 0, the
#' values are set to 0. This is needed because negative radiation affects
#' partitioning quality, and to increase the number of nightime records to be
#' used for ustar threshold calculation.
#'
fix_rad <- function(x, rpot = rpot, ppfd = FALSE) {
rad <- x # copy of x to allow for internal conversions
if (ppfd) rad <- convert_radiation(rad)
out <- dplyr::if_else(rad < 0 & rpot == 0, 0, x)
out[out < 0] <- NA # set remaining negatives to NA
message(
"Fixed ", sum(out == x, na.rm = TRUE),
" cases of negative incoming radiation."
)
varnames(out) <- paste0("fix_rad(", varnames(x), ")")
tidyflux::units(out) <- "W+1m-2"
out
}
## =============================================================================
#' Check Incoming Radiation
#'
#'
#' @param x
#' @param rpot
#' @param ppfd
#' @param check_ppfd
#' @param min_val
#'
#' @return
#'
#' @details
#' Values are flagged when:
#' \itemize{
#' \item rpot = 0 and sw_in > 50
#' \item rpot > 200.0 and sw_in - rpot > 50
#' \item sw_in is > 15% greater than rpot
#' }
#'
#' Optionally, values can be flagged based on a regression between sw_in (rg)
#' and ppfd. If there are at least \code{min_val} pairwise records of sw_in and
#' ppfd, a simple linear regression is constructed and values differing by
#' greater than +/-5*sd of the residuals are flagged. This option is selected by
#' setting \code{check_ppfd} to \code{TRUE}.
#'
#' From OneFlux:
#' https://github.com/LI-COR/ONEFlux/blob/master/oneflux_steps/qc_auto/src
#'
#' @export
flag_rad <- function(x, rpot = rpot, ppfd = ppfd, check_ppfd = FALSE,
min_val = 11000) {
out <- rep(0, length(x))
out <- dplyr::case_when(
rpot == 0 & x > 50 ~ 2,
rpot > 200 & (x - rpot) > 50 & (x - rpot) / rpot > 0.15 ~ 2,
(x - rpot) / rpot > 0.15 ~ 2,
TRUE ~ 0
)
if (check_ppfd) {
comp <- data.frame(x, ppfd)
valid <- tidyr::drop_na(comp)
if (nrow(valid) < min_val) {
message(
"Skipping rg-ppfd validation: ",
"the number of complete rg and ppfd records, ", nrow(valid),
", does not fulfill the required amount of ", min_val, "."
)
} else {
f <- paste(rg_name, "~", paste(ppfd_name, collapse = " + "))
reg <- do.call(
"lm",
list(as.formula(f), data = as.name("comp"), na.action = "na.exclude")
)
resid <- unname(resid(reg))
sd <- sd(resid, na.rm = TRUE)
if (sd < 0.01) {
message(
"Skipping rg-ppfd validation: ",
"the standard deviation of residuals is less than 0.01."
)
}
# Threshold is +/- 5 * sd of the residuals
lim <- 5 * sd
out <- dplyr::case_when(
resid < -lim ~ 2,
resid > lim ~ 2,
TRUE ~ out
)
}
}
attributes(out) <- list(check_ppfd = check_ppfd, additive = FALSE, na_as = NA)
out
}
## =============================================================================
#' Calculate Potential Radiation
#'
#' Calculate the potential radiation from solar elevation and extraterrestrial
#' solar radiation. REddyProc name: fCalcPotRadiation.
#'
#' @param doy A numeric vector with day of the year, same length as hour or
#' length 1.
#' @param hour A numeric vector with time as decimal hour of local time zone.
#' @param lat Latitude in decimal degrees.
#' @param long Longitude in decimal degrees.
#' @param tz Time zone (in hours).
#' @param solar_time Should hour (given in local winter time) be corrected for
#' latitude to solar time where noon is exactly at 12:00 (default)? Set to
#' \code{FALSE} to directly use local winter time.
#'
#' @return A numeric vector of potential radiation (rpot, W+1m-2).
#'
#' @export
calculate_rpot <- function(timestamp, lat, long, tz, solar_time = TRUE) {
doy <- lubridate::yday(timestamp)
hour <- lubridate::hour(timestamp) + lubridate::minute(timestamp) / 60
sol_elev <- calc_sun_pos(doy, hour, lat, long, tz, solar_time)$sol_elev
ext_rad <- calc_ext_rad(doy)
out <- dplyr::if_else(sol_elev <= 0, 0, ext_rad * sin(sol_elev))
attr(out, "varnames") <- "calculate_rpot()"
attr(out, "units") <- attr(ext_rad, "units")
out
}
## =============================================================================
#' Calculate Sun Position
#'
#' Calculate the position of the sun. REddyProc name: fCalcSunPosition.
#'
#' @param doy A vector with day of year, same length as hour or length 1.
#' @param hour A vector with time as decimal hour of local time zone.
#' @param lat Latitude in (decimal) degrees.
#' @param long Longitude in (decimal) degrees.
#' @param tz Time zone (in hours).
#' @param solar_time Should hour (given in local winter time) be corrected for
#' latitude to solar time where noon is exactly at 12:00 (default)? Set to
#' \code{FALSE} to directly use local winter time.
#'
#' @details
#' Assumes that hour is given in local winter time zone, and corrects it by
#' longitude to solar time (where noon is exactly at 12:00). Set argument
#' \code{solar_time} to FALSE to use the local winter time instead.
#'
#' @return A list with vectors:
#' \itemize{
#' \item{sol_time} Solar time in hours
#' \item{sol_decl} Solar declination in radians
#' \item{sol_elev} Solar elevation in radians with 0 at horizon
#' \item{sol_azim} Solar azimuth in radians with 0 at North
#' }
#'
#' @export
calc_sun_pos <- function(doy, hour, lat, long, tz, solar_time = TRUE) {
# Fractional year in radians
frac_year <- 2 * pi * (doy - 1) / 365.24
# Time equation in hours, accounting for changes in the time of solar noon
time_eq <- (
0.0072 * cos(frac_year) - 0.0528 * cos(2 * frac_year) -
0.0012 * cos(3 * frac_year) - 0.1229 * sin(frac_year) -
0.1565 * sin(2 * frac_year) - 0.0041 * sin(3 * frac_year)
)
# Solar time
sol_time <- if (solar_time) {
# Correction for local time and equation of time
hour + (long / 15 - tz) + time_eq
} else {
warning(
"Solar position calculated without correction for local time ",
"and equation of time.",
call. = FALSE
)
hour
}
attr(sol_time, "varnames") <- "sol_time"
attr(sol_time, "units") <- "hour"
# Conversion to radians
st <- (sol_time - 12) * pi / 12.0
# Correction for solar time < -pi to positive, important for sol_azim below
st <- dplyr::if_else(st < -pi, st + 2 * pi, st)
# Solar declination in radians, accounting for the earth axis tilt
sol_decl <- (
(0.33281 - 22.984 * cos(frac_year) - 0.34990 * cos(2 * frac_year) -
0.13980 * cos(3 * frac_year) + 3.7872 * sin(frac_year) +
0.03205 * sin(2 * frac_year) + 0.07187 * sin(3 * frac_year)) / 180 * pi
)
attr(sol_decl, "varnames") <- "sol_decl"
attr(sol_decl, "units") <- "rad"
# Solar elevation (vertical, zenithal angle) in radians with zero for horizon
sol_elev <- asin((
sin(sol_decl) * sin(lat / 180 * pi) +
cos(sol_decl) * cos(lat / 180 * pi) * cos(st)
))
attr(sol_elev, "varnames") <- "sol_elev"
attr(sol_elev, "units") <- "rad"
# Solar azimuth (horizontal angle) with zero for North
sol_azim <- (
cos(sol_decl) * cos(st) + sin(sol_elev) * cos(lat / 180 * pi)
) / (sin(lat / 180 * pi) * cos(sol_elev))
# Correction if off edge values
sol_azim <- dplyr::if_else(!dplyr::between(sol_azim, -1, 1), 1, sol_azim)
# Conversion to radians
sol_azim <- acos(sol_azim)
# Determine if solar azimuth is East or West depending on solar time
sol_azim <- dplyr::if_else(st < 0, pi - sol_azim, pi + sol_azim)
attr(sol_azim, "varnames") <- "sol_azim"
attr(sol_azim, "units") <- "rad"
# Return data list
sol_pos <- list(
sol_time = sol_time,
sol_decl = sol_decl,
sol_elev = sol_elev,
sol_azim = sol_azim
)
return(sol_pos)
}
## =============================================================================
#' Calculate Extraterrestrial Solar Radiation
#'
#' Calculate the extraterrestrial solar radiation with the eccentricity
#' correction after Lanini 2010 (MSc Thesis, Bern University).
#' REddyProc name: fCalcExtRadiation.
#'
#' @param doy A numeric vector with day of year.
#'
#' @return A numeric vector with extraterrestrial radiation (ext_rad, W+1m-2)
#'
#' @export
calc_ext_rad <- function(doy) {
# Fractional year in radians
frac_year <- 2 * pi * (doy - 1) / 365.24
# Total solar irradiance
sol_irr <- 1366.1
# Eccentricity correction
ext_rad <- sol_irr * (
1.00011 + 0.034221 * cos(frac_year) + 0.00128 * sin(frac_year) +
0.000719 * cos(2 * frac_year) + 0.000077 * sin(2 * frac_year)
)
attr(ext_rad, "varnames") <- "ext_rad"
attr(ext_rad, "units") <- "W+1m-2"
return(ext_rad)
}
## =============================================================================
#' Split Variable by Wind Direction Bins
#'
#' @param data
#' @param var
#'
#' @return A data frame
#' @export
#'
#' @examples
split_wd <- function(x, wd = wd, by = 10) {
l <- split(data[, var], cut(data$wd, seq(0, 360, by = by)))
seq1 <- by / 2
seq2 <- 360 - seq1 + 1
data.frame(
wd = seq(seq1, seq2, by = by),
mean = sapply(l, mean, na.rm = TRUE),
sd = sapply(l, sd, na.rm = TRUE)
)
}
## =============================================================================
#' Average Replicate Measurements of One Variable
#'
#' @param ... numeric vectors
#'
#' @return
#' @export
#'
#' @examples
average_reps <- function(...) {
# Create data frame from reps vectors
rep_df <- dplyr::bind_cols(rlang::dots_list(...))
# Average reps
out <- rep_df %>% rowMeans(na.rm = TRUE)
out[is.na(out)] <- NA # NaN -> NA
# Set new varname
varnames(out) <- paste0(
"mean(", paste(varnames(rep_df), collapse = ", "), ", na.rm = TRUE)"
)
# Set new units
tidyflux::units(out) <- tidyflux::units(rep_df[1])
return(out)
}
## =============================================================================
#' Title
#'
#' @param x
#'
#' @return
#' @export
#'
#' @examples
convert_variance <- function(x) {
# Take square root to get sd
out <- sqrt(x)
# Define new units, and varnames
#varnames(out) <- paste0("sqrt(", deparse(substitute(x)), ")")
varnames(out) <- paste0("sqrt(", attr(x, "varnames"), ")")
tidyflux::units(out) <- "-"
return(out)
}
## =============================================================================
#' Title
#'
#' @param hour
#' @param light
#' @param thr
#'
#' @return
#' @export
#'
#' @examples
mean_suntimes <- function(hour, light, thr = 0) {
light <- dplyr::if_else(light <= thr, thr, light)
rises <- hour[light > thr & dplyr::lag(light) == thr]
sets <- hour[light == thr & dplyr::lag(light) > thr]
c(mean(rises, na.rm = TRUE), mean(sets, na.rm = TRUE))
}
## =============================================================================
quiet <- function(x) {
sink(tempfile())
on.exit(sink())
invisible(force(x))
}
## =============================================================================
doy <- function(x) {
day <- lubridate::yday(x)
hour <- lubridate::hour(x)
minute <- lubridate::minute(x)
out <- day + ((hour + (minute / 60)) / 24)
out
}
## =============================================================================
#' Title
#'
#' @param x
#' @param y
#' @param group1
#' @param group2
#'
#' @return
#' @export
#'
#' @examples
normalize <- function(x, y, group1 = NULL, group2 = NULL) {
df <- data.frame(x, y)
if (!is.null(group1)) df$group1 <- group1
if (!is.null(group2)) df$group2 <- group2
df <- df %>%
{if (!is.null(group1) & !is.null(group2)) {
dplyr::group_by(., group1, group2)
} else .} %>%
{if (!is.null(group1) & is.null(group2)) {
dplyr::group_by(., group1)
} else .} %>%
tidyr::drop_na() %>%
{if (!is.null(group1)) {
dplyr::summarize(
., x = mean(x, na.rm = TRUE),
y = mean(y, na.rm = TRUE)
)
} else .} %>%
dplyr::summarize(
xmin = min(x, na.rm = TRUE),
xmax = max(x, na.rm = TRUE),
ymin = min(y, na.rm = TRUE),
ymax = max(y, na.rm = TRUE)
) %>%
{if (!is.null(group2)) {
dplyr::summarize(
., xmin = min(x, na.rm = TRUE),
xmax = max(x, na.rm = TRUE),
ymin = min(y, na.rm = TRUE),
ymax = max(y, na.rm = TRUE)
)
} else .}
(df$ymax - df$ymin) * ((x - df$xmin) / (df$xmax - df$xmin)) + df$ymin
}
## =============================================================================
#' Title
#'
#' @param wd
#' @param bin_size
#'
#' @return
#' @export
#'
#' @examples
bin_wd <- function(wd, bin_size = 10) {
seq <- seq(0, 360, by = bin_size)
labels <- seq[1:length(seq) - 1] + bin_size / 2
as.numeric(as.character(cut(wd, seq, labels = labels)))
}
## =============================================================================
#' Title
#'
#' @param x
#' @param n
#'
#' @return
#' @export
#'
#' @examples
moving_avg <- function(x, n = 5) {
out <- stats::filter(x, rep(1 / n, n), sides = 2)
as.numeric(out)
}
## =============================================================================
#' Title
#'
#' @param x
#' @param y
#' @param z
#' @param method
#'
#' @return
#' @export
#'
#' @examples
get_outliers <- function(x, y = NULL, z,
method = c("default", "mad", "doublemad", "iqr",
"spikes", "cooks", "mahalanobis", "aq",
"pcout")) {
method <- match.arg(method)
if (is.null(y)) {
if (method %in% c("default", "mad")) {
# default mad z = 7
if (missing(z)) z <- 7
min <- median(x, na.rm = TRUE) - mad(x, na.rm = TRUE) * z
max <- median(x, na.rm = TRUE) + mad(x, na.rm = TRUE) * z
!dplyr::between(x, min, max)
} else if (method == "doublemad") {
# default mad z = 7
if (missing(z)) z <- 7
abs_dev <- abs(x[!is.na(x)] - median(x[!is.na(x)]))
left <- median(abs_dev[x[!is.na(x)] <= median(x[!is.na(x)])])
right <- median(abs_dev[x[!is.na(x)] >= median(x[!is.na(x)])])
x_mad <- dplyr::if_else(x < median(x, na.rm = TRUE), left, right)
mad_dist <- abs(x - median(x, na.rm = TRUE)) / x_mad
mad_dist > z
} else if (method == "iqr") {
# AKA Tukey's rule: default iqr z = 1.5
if (missing(z)) z <- 1.5
quant <- quantile(x, probs = c(.25, .75), na.rm = TRUE)
iqr <- z * IQR(x, na.rm = TRUE)
min <- quant[1] - iqr
max <- quant[2] + iqr
!dplyr::between(x, min, max)
} else if (method == "spikes") {
# x is flagged if it spikes beyond what is expected given variability for
# time period
if (missing(z)) z <- 7
df <- data.frame(x = x)
n <- 624
#browser()
df <- df %>%
# calculate immediate differences, cut out 2-week windows
dplyr::mutate(
di = (x - dplyr::lag(x, 1)) - (dplyr::lead(x, 1) - x),
window = ggplot2::cut_interval(dplyr::row_number(), length = n),
window = forcats::fct_explicit_na(window)
) %>%
# separate daytime and nighttime fluxes
dplyr::group_by(window) %>%
# calculate mad and median of the differences
dplyr::mutate(
med = median(di, na.rm = TRUE),
mad = median(abs(di - med), na.rm = TRUE) * z / 0.6745,
#mad = mad(di, na.rm = TRUE) * z / 0.6745,
min = med - mad,
max = med + mad
)
#!dplyr::between(df$di, df$min, df$max)
df$di < df$min | df$di > df$max & !is.na(df$di)
}
} else {
if (method %in% c("default", "cooks")) {
# default cooks z = 4
if (missing(z)) z <- 4
cooksd <- cooks.distance(lm(x ~ y, na.action = na.exclude))
lim <- mean(cooksd, na.rm = TRUE) * z
cooksd > lim
} else if (method == "mahalanobis") {
# default mahalanobis z = 12
if (missing(z)) z <- 12
m_df <- dplyr::bind_cols(list(x, y))
m_dist <- mahalanobis(
m_df,
colMeans(m_df, na.rm = TRUE),
cov(m_df, use = "pairwise.complete.obs")
)
m_dist > z
} else if (method == "aq") {
# default z = 0.05
# mvoutlier::aq()
} else if (method == "pcout") {
# mvoutlier::pcout() - $wfinal01: 0 = outlier, $wfinal: smaller values =
# more likely to be outliers
}
}
}
roll_corr <- function(x, y, n, zmax = 0.25 * n, ...) {
df <- data.frame(x, y)
df$z <- zoo::rollapply(
data[, c("tair", "fch4")], n,
function(x) length(x[!is.na(x[, 1]) & !is.na(x[, 2])]) / 2,
by.column = FALSE, fill = NA
)
df$x <- dplyr::if_else(df$z < zmax, NA_real_, df$x)
df$y <- dplyr::if_else(df$z < zmax, NA_real_, df$y)
df$z <- NULL
out <- zoo::rollapply(
df, n, function(x) cor(x[, 1], x[, 2], use = "pairwise.complete.obs"),
by.column = FALSE, fill = NA
)
out <- dplyr::if_else(out == 1, NA_real_, out)
out[which(!is.na(out))][1:4] <- NA
out
}
summarize_vars <- function(data) {
# mean
# sd
# range
# NA count
data %>%
dplyr::select_if(is.numeric)
}
read_pt_data <- function(file, skip = 1, header = TRUE, tz, diff_out = 30,
tz_out = "Etc/GMT+5", type = c("water", "baro")) {
type <- match.arg(type)
data <- read.csv(file, skip = skip, header = header)
data <- data[2:4]
p_units <- names(data)[2]
t_units <- names(data)[3]
names(data) <- c("timestamp", "pres", "temp")
# Parse timestamp
data <- data %>%
dplyr::mutate(
timestamp = lubridate::parse_date_time(
timestamp, c("mdyI!MSp!", "mdyHMS"), tz = tz
),
timestamp = lubridate::with_tz(timestamp, tz_out)
)
# Get input time difference
diff_in <- as.numeric(diff(data$timestamp)[1])
# Temporary hack for converting time difference
if (diff_in != diff_out) {
if (diff_out == 30) {
data <- data %>%
dplyr::mutate(
minute = lubridate::minute(timestamp),
minute = dplyr::if_else(dplyr::between(minute, 0, 29), 0, 30)
)
lubridate::minute(data$timestamp) <- data$minute
data <- data %>%
dplyr::group_by(timestamp) %>%
dplyr::summarize(
pres = mean(pres, na.rm = TRUE),
temp = mean(temp, na.rm = TRUE)
)
}
}
# Assign units to pabs
if (stringr::str_detect(p_units, "psi")) {
data$pres <- data$pres * 6.8947572931783
} else if (!stringr::str_detect(p_units, "kPa")) {
warning("Pressure units were not recognized.")
}
attr(data[["pres"]], "units") <- "kPa"
# Assign units to twater
if (stringr::str_detect(t_units, "F")) {
data$temp <- (data$temp - 32) * 5 / 9
} else if (!stringr::str_detect(t_units, "C")) {
warning("Temperature units were not recognized.")
}
attr(data[["temp"]], "units") <- "C"
# Rename columns according to type of pt
if (type == "water") {
names(data) <- c("timestamp", "pabs", "twater")
} else if (type == "baro") {
names(data) <- c("timestamp", "pbaro", "tbaro")
}
for (i in 1:3) varnames(data[[i]]) <- names(data)[i]
data
}
calculate_wtd <- function(pabs, pair, twater) {
# pabs & pair units should be in kPa, twater units should be in C
out <- (pabs - pair) * 0.10197162
# Set new varname
#vars <- c(varnames(pabs), varnames(pair), varnames(twater))
#varnames(out) <- paste0("calculate_wtd(", paste(vars, collapse = ", "), ")")
# Set new units
tidyflux::units(out) <- "m"
out
}
calculate_swater <- function(twater, wtd) {
out <- 4180 * wtd * 1000 * (twater - dplyr::lag(twater)) * (1 / 1800)
}
mutate_to <- function(data, data_to, ...) {
#browser()
dots <- rlang::enexprs(...)
out <- dplyr::transmute(data, !!!dots)
df_out <- dplyr::bind_cols(data_to, out)
arg_name <- deparse(substitute(data_to))
assign(arg_name, df_out, env = .GlobalEnv)
data
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.