### File/data management utilities =============================================
#' Find the latest version of an output file
#'
#' @param dir A string indicating the directory that contains the output files
#' @param name A string indicating the name (prefix) of the file
#' @param ext A string indicating the expected file extension
#'
#' @return
#' @export
#'
#' @examples
latest_version <- function(dir, name, ext = ".csv") {
# Add path separator to dir (if needed)
if (stringr::str_sub(dir, -1) != "/") dir <- stringr::str_c(dir, "/")
# Add string separator to name (if needed)
if (stringr::str_sub(name, -1) != "_") name <- stringr::str_c(name, "_")
# Get files matching name and extension
files <- list.files(dir) %>%
stringr::str_subset(paste0("\\b", name)) %>%
stringr::str_subset(ext)
# Find file with the latest date tag
ext_len <- stringr::str_length(ext)
dates <- files %>%
stringr::str_sub(-10 - ext_len, -1 - ext_len) %>%
lubridate::ymd()
date <- dates[which(dates == dplyr::last(sort(dates)))]
out <- stringr::str_subset(files, as.character(date))
# Choose longest file name if multiple files still remain
if (length(out) > 1) out <- dplyr::first(out, stringr::str_length(out))
paste0(dir, out)
}
#' Read exported HOBOware data
#'
#' @param file
#' @param skip
#' @param header
#' @param tz
#' @param diff_out
#' @param tz_out
#' @param type
#'
#' @return
#' @export
#'
#' @examples
read_pt_data <- function(file, skip = 1, header = TRUE, tz, diff_out = 30,
tz_out = "Etc/GMT+5", type = c("water", "baro")) {
type <- match.arg(type)
data <- read.csv(file, skip = skip, header = header)
data <- data[2:4]
p_units <- names(data)[2]
t_units <- names(data)[3]
names(data) <- c("timestamp", "pres", "temp")
# Parse timestamp
data <- data %>%
dplyr::mutate(
timestamp = lubridate::parse_date_time(
timestamp, c("mdyI!MSp!", "mdyHMS", "mdyHM"), tz = tz
),
timestamp = lubridate::with_tz(timestamp, tz_out)
)
# Get input time difference
diff_in <- as.numeric(diff(data$timestamp)[1])
# Temporary hack for converting time difference
if (diff_in != diff_out) {
if (diff_out == 30) {
data <- data %>%
dplyr::mutate(
minute = lubridate::minute(timestamp),
minute = dplyr::if_else(dplyr::between(minute, 0, 29), 0, 30)
)
lubridate::minute(data$timestamp) <- data$minute
data <- data %>%
dplyr::group_by(timestamp) %>%
dplyr::summarize(
pres = mean(pres, na.rm = TRUE),
temp = mean(temp, na.rm = TRUE)
)
}
}
# Assign units to pabs
if (stringr::str_detect(p_units, "psi")) {
data$pres <- data$pres * 6.8947572931783
} else if (!stringr::str_detect(p_units, "kPa")) {
warning("Pressure units were not recognized.")
}
attr(data[["pres"]], "units") <- "kPa"
# Assign units to twater
if (stringr::str_detect(t_units, "F")) {
data$temp <- (data$temp - 32) * 5 / 9
} else if (!stringr::str_detect(t_units, "C")) {
warning("Temperature units were not recognized.")
}
attr(data[["temp"]], "units") <- "C"
# Rename columns according to type of pt
if (type == "water") {
names(data) <- c("timestamp", "pabs", "twater")
} else if (type == "baro") {
names(data) <- c("timestamp", "pbaro", "tbaro")
}
for (i in 1:3) varnames(data[[i]]) <- names(data)[i]
data
}
check_units <- function(x, units) {
x_name <- rlang::enquo(x)
inherits <- openeddy::units(x)
if (inherits != units) {
warning(
"Units for variable ", x_name, " are not as expected (", units, ")."
)
}
}
#' Combine Multiple EddyPro Output Files into One Dataset
#'
#' Takes files created by different EddyPro processing sessions and merges into
#' one dataset without dropping any columns.
#'
#' @return A data frame with columns bound by name each with the attributes
#' 'units' and 'varnames'.
#'
#' @param ... Zero or more objects of class data.frame that have been read using
#' \code{\link{read_data}}.
#' @param fill A character string indicating placeholder for missing units or
#' varnames.
#'
#' @export
combine_data <- function(..., fill = "-") {
inputs <- rlang::list2(...)
# Store full units and varnames attribute since they will be dropped
units <- list()
varnames <- list()
for (i in seq_along(inputs)) {
units[[i]] <- openeddy::units(inputs[[i]], names = TRUE)
varnames[[i]] <- varnames(inputs[[i]], names = TRUE)
}
names(units) <- rlang::ensyms(...)
units_full <- units[[1]]
names(varnames) <- rlang::ensyms(...)
varnames_full <- varnames[[1]]
for (i in 2:length(units)) {
units_full <- suppressWarnings(dplyr::bind_rows(units_full, units[[i]]))
varnames_full <- suppressWarnings(
dplyr::bind_rows(varnames_full, varnames[[i]])
)
}
# Replace NA units/varnames with "-"
units_full <- units_full %>% replace(is.na(.), fill)
varnames_full <- varnames_full %>% replace(is.na(.), fill)
# Test for equality among input data frames in units/varnames of each variable
for (i in seq_along(units_full)) {
if (!length(unique(units_full[, i])) == 1) {
warning(
"Units of variable ", names(units_full[i]),
" are not identical across all input data frames."
)
}
if (!length(unique(varnames_full[, i])) == 1) {
warning(
"Varnames of variable ", names(varnames_full[i]),
" are not identical across all input data frames."
)
}
}
units_full <- units_full %>% dplyr::slice(1) %>% unlist()
varnames_full <- varnames_full %>% dplyr::slice(1) %>% unlist()
# Bind all data frames together
out <- suppressWarnings(dplyr::bind_rows(...))
# Define helper function for getting a count of duplicate timestamps
dupes <- function(data) {
data %>%
dplyr::add_count(timestamp) %>%
dplyr::ungroup() %>%
dplyr::summarize(n = sum(n > 1, na.rm = TRUE)) %>%
dplyr::pull(n)
}
# If there are duplicate timestamps, choose which ones to keep
if (dupes(out) > 0) {
# First remove records that explicitly do not contain data
if ("file_records" %in% names(out)) {
out <- out %>%
dplyr::filter(!is.na(file_records))
} else if ("vin_1" %in% names(out)) {
out <- out %>%
dplyr::filter(!is.na(vin_1))
}
# For any remaining duplicates, keep the one with the most non-NA data
if (dupes(out) > 0) {
out <- out %>%
dplyr::mutate(n_na = rowSums(is.na(.))) %>%
dplyr::arrange(n_na) %>%
dplyr::group_by(timestamp) %>%
dplyr::slice(1) %>%
dplyr::select(-n_na) %>%
dplyr::ungroup()
if (dupes(out) > 0) {
# Ensure no duplicates are left
dupes_left <- out %>%
dplyr::add_count(timestamp) %>%
dplyr::filter(n > 1) %>%
dplyr::distinct(timestamp) %>%
dplyr::pull(timestamp) %>%
dplyr::ungroup()
out <- out %>%
dplyr::distinct(timestamp, .keep_all = TRUE)
# Produce warning since this arbitrarily chooses which records to keep
warning(
"Duplicate records with timestamp ", dupes_left,
"could not be removed using logical methods. ",
"Kept records were chosen arbitrarily."
)
}
}
}
# Assign units and varnames back to the combined data frame
for (i in seq_len(ncol(out))) {
varnames(out[[i]]) <- varnames_full[[i]]
openeddy::units(out[[i]]) <- units_full[[i]]
}
out
}
#' Title
#'
#' @param x
#'
#' @return
#' @export
#'
#' @examples
combine_summaries <- function(x) {
files <- list.files(x)
files <- files[grep(files, pattern = "\\.txt")]
files <- paste(x, files, sep = "/")
data_list <- list()
pbar <- dplyr::progress_estimated(length(files))
for (i in seq_along(files)) {
data_list[[i]] <- openeddy::read_eddy(
files[i], sep = "", stringsAsFactors = FALSE,
na.strings = c("NA", "-9999.0", "-9999", "", " ", "NaN")
)
pbar$tick()$print()
}
data <- dplyr::bind_rows(data_list)
data
}
### Date-time utilities ========================================================
#' Get decimal day-of-year component of a date-time
#'
#' @param x
#'
#' @return
#' @export
#'
#' @examples
doy <- function(x) {
day <- lubridate::yday(x)
hour <- lubridate::hour(x)
minute <- lubridate::minute(x)
day + ((hour + (minute / 60)) / 24)
}
#' Title
#'
#' @param x
#'
#' @return
#' @export
#'
#' @examples
decimal_hour <- function(x) {
lubridate::hour(x) + lubridate::minute(x) / 60
}
#' Get season component of a date-time
#'
#' @param x a date-time object
#' @param label logical. TRUE displays season as a character string such as
#' "Winter". False displays the season as a number, with "Winter" as 1.
#' @param abbr logical. FALSE displays label as its full character string. TRUE
#' displays a three-letter abbreviation of the label such as "Win".
#' @param by one of "month" or "date". Determines whether seasons are assigned
#' according to the month (e.g. "Spring" is Apr--Jun) or the date (e.g.
#' "Spring" is Mar 20--Jun 20) component of the date-time.
#'
#' @return
#' @export
#'
#' @examples
season <- function(x, label = FALSE, abbr = TRUE, by = c("month", "date")) {
by <- match.arg(by)
# Seasons defined by month
if (by == "month") {
m <- lubridate::month(x) %>% as.numeric()
out <- dplyr::case_when(
dplyr::between(m, 1, 3) ~ 1,
dplyr::between(m, 4, 6) ~ 2,
dplyr::between(m, 7, 9) ~ 3,
dplyr::between(m, 10, 12) ~ 4
)
}
# Seasons defined by date
if (by == "date") {
d <- lubridate::yday(x) %>% as.numeric()
out <- dplyr::case_when(
dplyr::between(d, 1, 78) ~ 1,
dplyr::between(d, 79, 171) ~ 2,
dplyr::between(d, 172, 264) ~ 3,
dplyr::between(d, 265, 354) ~ 4,
dplyr::between(d, 355, 366) ~ 1
)
}
# Label if specified
if (label) {
out <- dplyr::recode_factor(
out, `1` = "Winter", `2` = "Spring", `3` = "Summer", `4` = "Autumn"
)
if (abbr) {
out <- substr(out, 1, 3)
}
}
out
}
### QA/QC utilities ============================================================
#' Fix Negative Incoming Radiation
#'
#' Looks for negative and missing incoming radiation values, and checks against
#' potential incoming radiation.
#'
#' @param x
#' @param rpot
#'
#' @details
#' Checks for negative and missing incoming radiation data in relation to
#' potential incoming radiation (Rpot). For negative values, if Rpot is 0, the
#' values are set to 0. This is needed because negative radiation affects
#' partitioning quality, and to increase the number of nightime records to be
#' used for ustar threshold calculation.
#'
fix_rad <- function(x, rpot = rpot, ppfd = FALSE) {
rad <- x # copy of x to allow for internal conversions
if (ppfd) rad <- convert_radiation(rad)
out <- dplyr::if_else(rad < 0 & rpot == 0, 0, x)
out[out < 0] <- NA # set remaining negatives to NA
message(
"Fixed ", sum(out == x, na.rm = TRUE),
" cases of negative incoming radiation."
)
out
}
#' Check Incoming Radiation
#'
#'
#' @param x
#' @param rpot
#' @param ppfd
#' @param check_ppfd
#' @param min_val
#'
#' @return
#'
#' @details
#' Values are flagged when:
#' \itemize{
#' \item rpot = 0 and sw_in > 50
#' \item rpot > 200.0 and sw_in - rpot > 50
#' \item sw_in is > 15% greater than rpot
#' }
#'
#' Optionally, values can be flagged based on a regression between sw_in (rg)
#' and ppfd. If there are at least \code{min_val} pairwise records of sw_in and
#' ppfd, a simple linear regression is constructed and values differing by
#' greater than +/-5*sd of the residuals are flagged. This option is selected by
#' setting \code{check_ppfd} to \code{TRUE}.
#'
#' From OneFlux:
#' https://github.com/LI-COR/ONEFlux/blob/master/oneflux_steps/qc_auto/src
#'
#' @export
flag_rad <- function(x, rpot = rpot, ppfd = ppfd, check_ppfd = FALSE,
min_val = 11000) {
out <- rep(0, length(x))
value <- x - rpot
out <- dplyr::case_when(
value > 0 & rpot == 0 & x > 50 ~ 2,
value > 50 & rpot > 200 & value / rpot > 0.15 ~ 2,
TRUE ~ 0
)
if (check_ppfd) {
comp <- data.frame(x, ppfd)
valid <- tidyr::drop_na(comp)
if (nrow(valid) < min_val) {
message(
"Skipping Rg-PPFD validation: ",
"the number of complete Rg and PPFD records, ", nrow(valid),
", does not fulfill the required amount of ", min_val, "."
)
} else {
f <- paste(rg_name, "~", paste(ppfd_name, collapse = " + "))
reg <- do.call(
"lm",
list(as.formula(f), data = as.name("comp"), na.action = "na.exclude")
)
resid <- unname(resid(reg))
sd <- sd(resid, na.rm = TRUE)
if (sd < 0.01) {
message(
"Skipping Rg-PPFD validation: ",
"the standard deviation of residuals is less than 0.01."
)
}
# Threshold is +/- 5 * sd of the residuals
lim <- 5 * sd
out <- dplyr::case_when(
resid < -lim ~ 2,
resid > lim ~ 2,
TRUE ~ out
)
}
}
attributes(out) <- list(check_ppfd = check_ppfd, additive = FALSE, na_as = NA)
out
}
simple_despike <- function(x, max_diff, max_lag = 1, na.as = 0,
look = c("backward", "forward", "both")) {
look <- match.arg(look)
if (look == "forward") max_lag <- -max_lag
# A good way to get max_diff is: mad(diff(x), na.rm = TRUE) * 7
flag <- rep(0L, length(x))
for (i in 0:max_lag) {
diff <- abs(as.vector(diff(zoo::zoo(x), lag = i, na.pad = TRUE)))
out <- dplyr::if_else(diff < max_diff, flag, 2L)
if (look == "both") {
diff2 <- abs(as.vector(diff(zoo::zoo(x), lag = -i, na.pad = TRUE)))
out <- dplyr::if_else(diff2 < max_diff, out, 2L)
}
}
out <- tidyr::replace_na(out, na.as)
out
}
flag_ends <- function(x, n = 1) {
df <- data.frame(x = x)[, 0]
for (i in 1:n) {
df[, paste0("lag_", i)] <- dplyr::if_else(is.na(dplyr::lag(x, i)), 2L, 0L)
df[, paste0("lead_", i)] <- dplyr::if_else(is.na(dplyr::lead(x, i)), 2L, 0L)
}
openeddy::combn_QC(df, names(df))
}
rollapply_thr <- function(x, roll_fun, thr_fun, z = 1, width, check_runs = TRUE) {
.w <- parse_width(width)
if (check_runs) {
run_flag <- openeddy::flag_runs(x)
.x <- zoo::zoo(clean(x, run_flag))
} else .x <- zoo::zoo(x)
roll <- zoo::rollapplyr(.x, .w, FUN = roll_fun, na.rm = TRUE, fill = NA)
thr <- zoo::rollapplyr(.x, .w, FUN = thr_fun, na.rm = TRUE, fill = NA)
low <- roll - thr * z
high <- roll + thr * z
out <- dplyr::if_else(.x > low & .x < high, 0L, 2L)
tidyr::replace_na(out, 0)
}
coalesce_flags <- function(data, ..., prefix = "qc_[[:alnum:]]+\\_") {
data %>%
tidyr::gather("flag", "value", ...) %>%
dplyr::mutate(
flag = stringr::str_remove(flag, prefix),
value = dplyr::if_else(value > 1, flag, NA_character_)
) %>%
tidyr::spread(flag, value) %>%
dplyr::mutate(
flag = dplyr::coalesce(!!! dplyr::select(., ...)),
flag = tidyr::replace_na(flag, "none")
)
}
simple_clean <- function(x, max_diff, max_lag = 1, na.as = 0) {
clean <- x
for (i in 1:max_lag) {
diff <- as.vector(abs(diff(zoo::zoo(clean), i, na.pad = TRUE)))
while (max(diff, na.rm = TRUE) > max_diff) {
clean <- dplyr::if_else(diff > max_diff, NA_real_, clean)
clean <- fill_linear(clean)
diff <- as.vector(abs(diff(zoo::zoo(clean), i, na.pad = TRUE)))
}
}
clean
}
#' Clean
#'
#' Simple wrapper around dplyr::if_else() to clean a variable according to an
#' existing QC flag.
#'
#' @param x
#' @param flag
#' @param value Integer indicating the minimum flag value at which to reject x
#' @param replace_with
#'
#' @return
#' @export
#'
#' @examples
clean <- function(x, flag, value = 2, replace_with = NA_real_, na.as = 0) {
if (is.character(value)) {
flag <- dplyr::if_else(flag == value, 2, 0)
value <- 2
}
attrs <- attributes(x)
flag <- tidyr::replace_na(flag, na.as)
out <- dplyr::if_else(flag >= value, replace_with, x)
attributes(out) <- attrs
out
}
clean_all <- function(data, ...) {
names <- names(data)
qc_names <- stringr::str_subset(names, "qc_")
varnames <- stringr::str_remove(qc_names, "qc_")
exists <- which(varnames %in% names)
varnames <- varnames[exists]
qc_names <- qc_names[exists]
if (length(varnames) != length(qc_names)) {
stop("Cannot match each flag with one unique variable.", call. = FALSE)
}
out <- data
for (i in seq_along(varnames)) {
var <- out[, varnames[i]]
qc <- out[, qc_names[i]]
clean <- clean(var, qc, ...)
out[, varnames[i]] <- clean
}
out
}
## =============================================================================
#' Split Variable by Wind Direction Bins
#'
#' @param data
#' @param var
#' @param by
#'
#' @return A data frame
#' @export
#'
#' @examples
split_wd <- function(x, wd = wd, by = 10) {
l <- split(data[, var], cut(data$wd, seq(0, 360, by = by)))
seq1 <- by / 2
seq2 <- 360 - seq1 + 1
data.frame(
wd = seq(seq1, seq2, by = by),
mean = sapply(l, mean, na.rm = TRUE),
sd = sapply(l, sd, na.rm = TRUE)
)
}
## =============================================================================
#' Title
#'
#' @param wd
#' @param bin_size
#'
#' @return
#' @export
#'
#' @examples
bin_wd <- function(wd, bin_size = 10) {
seq <- seq(0, 360, by = bin_size)
labels <- seq[1:length(seq) - 1] + bin_size / 2
as.numeric(as.character(cut(wd, seq, labels = labels)))
}
## =============================================================================
#' Average Replicate Measurements of One Variable
#'
#' @param ... numeric vectors
#'
#' @return
#' @export
#'
#' @examples
average_reps <- function(...) {
# Create data frame from reps vectors
rep_df <- dplyr::bind_cols(rlang::dots_list(...))
# Average reps
out <- rep_df %>% rowMeans(na.rm = TRUE)
out[is.na(out)] <- NA # NaN -> NA
# Set new varname
varnames(out) <- paste0(
"mean(", paste(varnames(rep_df), collapse = ", "), ", na.rm = TRUE)"
)
# Set new units
openeddy::units(out) <- openeddy::units(rep_df[1])
out
}
## =============================================================================
#' Title
#'
#' @param hour
#' @param light
#' @param thr
#'
#' @return
#' @export
#'
#' @examples
mean_suntimes <- function(hour, light, thr = 0) {
light <- dplyr::if_else(light <= thr, thr, light)
rises <- hour[light > thr & dplyr::lag(light) == thr]
sets <- hour[light == thr & dplyr::lag(light) > thr]
c(mean(rises, na.rm = TRUE), mean(sets, na.rm = TRUE))
}
quiet <- function(x) {
sink(tempfile())
on.exit(sink())
invisible(force(x))
}
## =============================================================================
#' Title
#'
#' @param pabs
#' @param pair
#' @param twater
#' @param name_out
#'
#' @return
#' @export
#'
#' @examples
calculate_wtd <- function(pabs, pair, twater, name_out = "WTD") {
# pabs & pair units should be in kPa, twater units should be in C
check_units(pabs, "kPa")
check_units(pair, "kPa")
check_units(twater, "C")
out <- (pabs - pair) * 0.10197162
# Set new units
varnames(out) <- name_out
openeddy::units(out) <- "m"
out
}
calculate_swater <- function(twater, wtd) {
out <- 4180 * wtd * 1000 * (twater - dplyr::lag(twater)) * (1 / 1800)
}
## =============================================================================
#' Generate Year-long Half-hourly Time Step Vector
#'
#' Generate vector with half-hourly time steps of full year, stamped in the
#' center of time unit.
#' Adapted from: REddyProc - fFullYearTimeSteps
#'
#' @param data A data frame to be converted.
#' @param DTS The number of daily time steps (24 or 48).
#' @param tz The time zone used to store data (advised to keep GMT to avoid
#' daytime shifting issues).
#'
#' @return
#' A vector with time steps of full year in POSIX format.
#'
create_timesteps <- function(year, dts = 48, tz = "GMT", shift_by = 720 / dts) {
if (!dts %in% c(24, 48)) {
stop("Only implemented for 24 or 48 daily time steps, not ", dts, ".")
}
format <- "%Y-%m-%d-%H-%M"
start <- paste(year, 1, 1, 0, shift_by, sep = "-")
end <- paste(year + 1, 1, 1, 0, 30 - shift_by, sep = "-")
# timestamp vector with half-hourly timestamps
out <- seq(
strptime(start, format, tz), strptime(end, format, tz), (24 / dts * 60 * 60)
)
out
}
fill_single_gaps <- function(x, n_fill = 1) {
y <- seq_along(x)
out <- approx(y, x, y)$y
# Use the gap-filled data iff gap is of length 1
out <- dplyr::if_else(
!is.na(dplyr::lag(x, 1)) & !is.na(dplyr::lead(x, 1)),
out, x
)
out
}
fill_linear <- function(x, n_max = NULL) {
y <- seq_along(x)
out <- approx(y, x, y)$y
if (!is.null(n_max)) {
.x <- tidyr::replace_na(x, 9999)
rl <- rle(c(.x))
n <- rep(rl$lengths, times = rl$lengths)
out <- dplyr::if_else(n > n_max & is.na(x), NA_real_, out)
}
out
}
#' Title
#'
#' @param x A data frame
#' @param attr
#' @param values
#'
#' @return
#' @export
#'
#' @examples
add_attr <- function(x, attr, values) {
#purrr::assign_in(x, list(1, purrr::attr_getter("units")))
#purrr::pluck(purrr::attr_getter("units"))
values <- tidyr::replace_na(values, "-")
names <- colnames(x)
l <- purrr::map2(names, values, ~ (x[[.x]] <-`attr<-`(x[[.x]], attr, .y)))
names(l) <- names
as.data.frame(l)
}
keep_attrs <- function(x, ..., keep = "all") {
inherits(x, "data.frame")
attrs <- attributes(x)
f <- rlang::enquo(...)
}
apply_thr_around <- function(x, thr, n_before, n_after, name_out = "-",
flag = c("higher", "outside", "between", "lower")) {
flag <- match.arg(flag)
len <- length(x)
df <- data.frame("t" = openeddy::apply_thr(x, thr, flag = flag))
if (n_after > 0) {
for (i in 1:n_after) {
x_lag <- dplyr::lag(x, i) # 'lag' flags record ahead of present
df[, paste0("b", i)] <- openeddy::apply_thr(x_lag, thr, flag = flag)
}
}
if (n_before > 0) {
for (i in 1:n_before) {
x_lead <- dplyr::lead(x, i) # 'lead' flags record behind present
df[, paste0("a", i)] <- openeddy::apply_thr(x_lead, thr, flag = flag)
}
}
#browser()
openeddy::combn_QC(df, names(df), name_out = name_out)
}
lag_at <- function(x, n) {
len <- length(x)
out <- rep(NA, len)
for (i in 1:len) {
out[i] <- dplyr::lag(x, n[i])[i]
}
out
}
expand_list <- function(list, pull = NULL) {
# Neat little function that "expands" a reference list
names <- names(list)
out <- list
for (i in seq_along(names)) {
flags <- out[[names[i]]]
while (any(flags %in% names)) {
conds <- flags[which(flags %in% names)]
for (j in seq_along(conds)) {
replace <- out[[conds[j]]]
flags <- flags[!flags == conds[j]]
flags <- c(flags, replace)
}
out[[names[i]]] <- flags
if (!any(flags %in% names)) break
}
}
if (!is.null(pull)) out <- out[[pull]]
out
}
map_behind <- function(x, fun, n_behind) {
var <- x
behind_seq <- 1:behind
lags <- purrr::map(behind_seq, ~ dplyr::lag(var, .x)) %>%
rlang::set_names(-behind_seq) %>%
dplyr::bind_rows() %>% rev()
out <- dplyr::mutate_all(lags, fun)
out
}
parse_width <- function(x) {
#browser()
if (is.character(x)) {
time <- purrr::flatten(stringr::str_split(x, " "))
n <- as.numeric(time[1])
unit <- time[2]
if (stringr::str_detect(unit, "hour")) out <- n * 2
if (stringr::str_detect(unit, "day")) out <- n * 2 * 24
if (stringr::str_detect(unit, "week")) out <- n * 2 * 24 * 7
if (stringr::str_detect(unit, "month")) out <- n * 2 * 24 * 7 * 4
} else if (is.numeric(x)) out <- x
out
}
predict_along <- function(.x, .y, .p) {
if (!missing(.p)) {
p <- as.formula(.p)
.x <- if_else(p, .x, NA_real_)
.y <- if_else(p, .y, NA_real_)
}
len <- length(.y)
y_int <- approx(1:len, .y, 1:len)$.y
y <- dplyr::if_else(is.na(.y), y_int, .y)
df <- data.frame(
x = .x, y = .y,
y_dif = diff(zoo::zoo(y), na.pad = TRUE),
x_lag = dplyr::lag(.x, 1)
)
mod_df <- dplyr::filter(df, y_dif != 0)
coefs <- unname(coef(lm(x ~ y + y_dif + x_lag, data = mod_df)))
#browser()
gaps <- which(is.na(df$x))
out <- df$x
for (i in seq_along(gaps)) {
t <- gaps[i]
out[t] <- coefs[1] + coefs[2] * df$y[t] + coefs[3] * df$y_dif[t] + coefs[4] * dplyr::lag(out, 1)[t]
}
out
}
#' Simple loop ticker
#'
#' @param i An integer, set to the loop index
#' @param n An integer, set to the length of the loop (optional)
#'
#' @return
#' @export
#'
#' @examples
progress_n <- function(i, n = NULL) {
msg <- as.character(i)
flush.console()
if (!is.null(n)) msg <- paste0(msg, " | ", round(i / n * 100, 2), "pct")
message(msg, "\r", appendLF = FALSE)
}
write_every <- function(n, i, x, file, type = c("table", "raster"), ...) {
type = match.arg(type)
if (i %% n == 0) {
if (type == "table") {
write.table(x, file, row.names = FALSE, ...)
} else if (type == "raster") {
raster::writeRaster(x, file, ...)
}
}
}
rescale_by <- function(x, y) {
x_mean <- mean(x, na.rm = TRUE)
x_sd <- sd(x, na.rm = TRUE)
y_mean <- mean(y, na.rm = TRUE)
y_sd <- sd(y, na.rm = TRUE)
y_range <- range(y, na.rm = TRUE)
.x <- (x - x_mean) / x_sd
(.x * y_sd) + y_mean
#scales::rescale(x, y_range)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.