R/utilities.R

Defines functions latest_version read_pt_data check_units combine_data

### File/data management utilities =============================================

#' Find the latest version of an output file
#'
#' @param dir A string indicating the directory that contains the output files
#' @param name A string indicating the name (prefix) of the file
#' @param ext A string indicating the expected file extension
#'
#' @return
#' @export
#'
#' @examples
latest_version <- function(dir, name, ext = ".csv") {

  # Add path separator to dir (if needed)
  if (stringr::str_sub(dir, -1) != "/") dir <- stringr::str_c(dir, "/")
  # Add string separator to name (if needed)
  if (stringr::str_sub(name, -1) != "_") name <- stringr::str_c(name, "_")

  # Get files matching name and extension
  files <- list.files(dir) %>%
    stringr::str_subset(paste0("\\b", name)) %>%
    stringr::str_subset(ext)

  # Find file with the latest date tag
  ext_len <- stringr::str_length(ext)
  dates <- files %>%
    stringr::str_sub(-10 - ext_len, -1 - ext_len) %>%
    lubridate::ymd()
  date <- dates[which(dates == dplyr::last(sort(dates)))]
  out <- stringr::str_subset(files, as.character(date))

  # Choose longest file name if multiple files still remain
  if (length(out) > 1) out <- dplyr::first(out, stringr::str_length(out))
  paste0(dir, out)

}

#' Read exported HOBOware data
#'
#' @param file
#' @param skip
#' @param header
#' @param tz
#' @param diff_out
#' @param tz_out
#' @param type
#'
#' @return
#' @export
#'
#' @examples
read_pt_data <- function(file, skip = 1, header = TRUE, tz, diff_out = 30,
                         tz_out = "Etc/GMT+5", type = c("water", "baro")) {
  type <- match.arg(type)
  data <- read.csv(file, skip = skip, header = header)
  data <- data[2:4]
  p_units <- names(data)[2]
  t_units <- names(data)[3]
  names(data) <- c("timestamp", "pres", "temp")
  # Parse timestamp
  data <- data %>%
    dplyr::mutate(
      timestamp = lubridate::parse_date_time(
        timestamp, c("mdyI!MSp!", "mdyHMS", "mdyHM"), tz = tz
      ),
      timestamp = lubridate::with_tz(timestamp, tz_out)
    )
  # Get input time difference
  diff_in <- as.numeric(diff(data$timestamp)[1])
  # Temporary hack for converting time difference
  if (diff_in != diff_out) {
    if (diff_out == 30) {
      data <- data %>%
        dplyr::mutate(
          minute = lubridate::minute(timestamp),
          minute = dplyr::if_else(dplyr::between(minute, 0, 29), 0, 30)
        )
      lubridate::minute(data$timestamp) <- data$minute
      data <- data %>%
        dplyr::group_by(timestamp) %>%
        dplyr::summarize(
          pres = mean(pres, na.rm = TRUE),
          temp = mean(temp, na.rm = TRUE)
        )
    }
  }
  # Assign units to pabs
  if (stringr::str_detect(p_units, "psi")) {
    data$pres <- data$pres * 6.8947572931783
  } else if (!stringr::str_detect(p_units, "kPa")) {
    warning("Pressure units were not recognized.")
  }
  attr(data[["pres"]], "units") <- "kPa"
  # Assign units to twater
  if (stringr::str_detect(t_units, "F")) {
    data$temp <- (data$temp - 32) * 5 / 9
  } else if (!stringr::str_detect(t_units, "C")) {
    warning("Temperature units were not recognized.")
  }
  attr(data[["temp"]], "units") <- "C"
  # Rename columns according to type of pt
  if (type == "water") {
    names(data) <- c("timestamp", "pabs", "twater")
  } else if (type == "baro") {
    names(data) <- c("timestamp", "pbaro", "tbaro")
  }
  for (i in 1:3) varnames(data[[i]]) <- names(data)[i]
  data
}

check_units <- function(x, units) {
  x_name <- rlang::enquo(x)
  inherits <- openeddy::units(x)
  if (inherits != units) {
    warning(
      "Units for variable ", x_name, " are not as expected (", units, ")."
    )
  }
}

#' Combine Multiple EddyPro Output Files into One Dataset
#'
#' Takes files created by different EddyPro processing sessions and merges into
#' one dataset without dropping any columns.
#'
#' @return A data frame with columns bound by name each with the attributes
#'   'units' and 'varnames'.
#'
#' @param ... Zero or more objects of class data.frame that have been read using
#'   \code{\link{read_data}}.
#' @param fill A character string indicating placeholder for missing units or
#'   varnames.
#'
#' @export
combine_data <- function(..., fill = "-") {
  inputs <- rlang::list2(...)
  # Store full units and varnames attribute since they will be dropped
  units <- list()
  varnames <- list()
  for (i in seq_along(inputs)) {
    units[[i]] <- openeddy::units(inputs[[i]], names = TRUE)
    varnames[[i]] <- varnames(inputs[[i]], names = TRUE)
  }
  names(units) <- rlang::ensyms(...)
  units_full <- units[[1]]
  names(varnames) <- rlang::ensyms(...)
  varnames_full <- varnames[[1]]
  for (i in 2:length(units)) {
    units_full <- suppressWarnings(dplyr::bind_rows(units_full, units[[i]]))
    varnames_full <- suppressWarnings(
      dplyr::bind_rows(varnames_full, varnames[[i]])
    )
  }
  # Replace NA units/varnames with "-"
  units_full <- units_full %>% replace(is.na(.), fill)
  varnames_full <- varnames_full %>% replace(is.na(.), fill)
  # Test for equality among input data frames in units/varnames of each variable
  for (i in seq_along(units_full)) {
    if (!length(unique(units_full[, i])) == 1) {
      warning(
        "Units of variable ", names(units_full[i]),
        " are not identical across all input data frames."
      )
    }
    if (!length(unique(varnames_full[, i])) == 1) {
      warning(
        "Varnames of variable ", names(varnames_full[i]),
        " are not identical across all input data frames."
      )
    }
  }
  units_full <- units_full %>% dplyr::slice(1) %>% unlist()
  varnames_full <- varnames_full %>% dplyr::slice(1) %>% unlist()
  # Bind all data frames together
  out <- suppressWarnings(dplyr::bind_rows(...))
  # Define helper function for getting a count of duplicate timestamps
  dupes <- function(data) {
    data %>%
      dplyr::add_count(timestamp) %>%
      dplyr::ungroup() %>%
      dplyr::summarize(n = sum(n > 1, na.rm = TRUE)) %>%
      dplyr::pull(n)
  }
  # If there are duplicate timestamps, choose which ones to keep
  if (dupes(out) > 0) {
    # First remove records that explicitly do not contain data
    if ("file_records" %in% names(out)) {
      out <- out %>%
        dplyr::filter(!is.na(file_records))
    } else if ("vin_1" %in% names(out)) {
      out <- out %>%
        dplyr::filter(!is.na(vin_1))
    }
    # For any remaining duplicates, keep the one with the most non-NA data
    if (dupes(out) > 0) {
      out <- out %>%
        dplyr::mutate(n_na = rowSums(is.na(.))) %>%
        dplyr::arrange(n_na) %>%
        dplyr::group_by(timestamp) %>%
        dplyr::slice(1) %>%
        dplyr::select(-n_na) %>%
        dplyr::ungroup()
      if (dupes(out) > 0) {
        # Ensure no duplicates are left
        dupes_left <- out %>%
          dplyr::add_count(timestamp) %>%
          dplyr::filter(n > 1) %>%
          dplyr::distinct(timestamp) %>%
          dplyr::pull(timestamp) %>%
          dplyr::ungroup()
        out <- out %>%
          dplyr::distinct(timestamp, .keep_all = TRUE)
        # Produce warning since this arbitrarily chooses which records to keep
        warning(
          "Duplicate records with timestamp ", dupes_left,
          "could not be removed using logical methods. ",
          "Kept records were chosen arbitrarily."
        )
      }
    }
  }
  # Assign units and varnames back to the combined data frame
  for (i in seq_len(ncol(out))) {
    varnames(out[[i]]) <- varnames_full[[i]]
    openeddy::units(out[[i]]) <- units_full[[i]]
  }
  out
}

#' Title
#'
#' @param x
#'
#' @return
#' @export
#'
#' @examples
combine_summaries <- function(x) {
  files <- list.files(x)
  files <- files[grep(files, pattern = "\\.txt")]
  files <- paste(x, files, sep = "/")
  data_list <- list()
  pbar <- dplyr::progress_estimated(length(files))
  for (i in seq_along(files)) {
    data_list[[i]] <- openeddy::read_eddy(
      files[i], sep = "", stringsAsFactors = FALSE,
      na.strings = c("NA", "-9999.0", "-9999", "", " ", "NaN")
    )
    pbar$tick()$print()
  }
  data <- dplyr::bind_rows(data_list)
  data
}

### Date-time utilities ========================================================

#' Get decimal day-of-year component of a date-time
#'
#' @param x
#'
#' @return
#' @export
#'
#' @examples
doy <- function(x) {
  day <- lubridate::yday(x)
  hour <- lubridate::hour(x)
  minute <- lubridate::minute(x)
  day + ((hour + (minute / 60)) / 24)
}

#' Title
#'
#' @param x
#'
#' @return
#' @export
#'
#' @examples
decimal_hour <- function(x) {
  lubridate::hour(x) + lubridate::minute(x) / 60
}

#' Get season component of a date-time
#'
#' @param x a date-time object
#' @param label logical. TRUE displays season as a character string such as
#'   "Winter". False displays the season as a number, with "Winter" as 1.
#' @param abbr logical. FALSE displays label as its full character string. TRUE
#'   displays a three-letter abbreviation of the label such as "Win".
#' @param by one of "month" or "date". Determines whether seasons are assigned
#'   according to the month (e.g. "Spring" is Apr--Jun) or the date (e.g.
#'   "Spring" is Mar 20--Jun 20) component of the date-time.
#'
#' @return
#' @export
#'
#' @examples
season <- function(x, label = FALSE, abbr = TRUE, by = c("month", "date")) {

  by <- match.arg(by)

  # Seasons defined by month
  if (by == "month") {
    m <- lubridate::month(x) %>% as.numeric()
    out <- dplyr::case_when(
      dplyr::between(m, 1, 3) ~ 1,
      dplyr::between(m, 4, 6) ~ 2,
      dplyr::between(m, 7, 9) ~ 3,
      dplyr::between(m, 10, 12) ~ 4
    )
  }

  # Seasons defined by date
  if (by == "date") {
    d <- lubridate::yday(x) %>% as.numeric()
    out <- dplyr::case_when(
      dplyr::between(d, 1, 78) ~ 1,
      dplyr::between(d, 79, 171) ~ 2,
      dplyr::between(d, 172, 264) ~ 3,
      dplyr::between(d, 265, 354) ~ 4,
      dplyr::between(d, 355, 366) ~ 1
    )
  }

  # Label if specified
  if (label) {
    out <- dplyr::recode_factor(
      out, `1` = "Winter", `2` = "Spring", `3` = "Summer", `4` = "Autumn"
    )
    if (abbr) {
      out <- substr(out, 1, 3)
    }
  }

  out

}


### QA/QC utilities ============================================================

#' Fix Negative Incoming Radiation
#'
#' Looks for negative and missing incoming radiation values, and checks against
#' potential incoming radiation.
#'
#' @param x
#' @param rpot
#'
#' @details
#' Checks for negative and missing incoming radiation data in relation to
#' potential incoming radiation (Rpot). For negative values, if Rpot is 0, the
#' values are set to 0. This is needed because negative radiation affects
#' partitioning quality, and to increase the number of nightime records to be
#' used for ustar threshold calculation.
#'
fix_rad <- function(x, rpot = rpot, ppfd = FALSE) {
  rad <- x # copy of x to allow for internal conversions
  if (ppfd) rad <- convert_radiation(rad)
  out <- dplyr::if_else(rad < 0 & rpot == 0, 0, x)
  out[out < 0] <- NA  # set remaining negatives to NA
  message(
    "Fixed ", sum(out == x, na.rm = TRUE),
    " cases of negative incoming radiation."
  )
  out
}

#' Check Incoming Radiation
#'
#'
#' @param x
#' @param rpot
#' @param ppfd
#' @param check_ppfd
#' @param min_val
#'
#' @return
#'
#' @details
#' Values are flagged when:
#' \itemize{
#' \item rpot = 0 and sw_in > 50
#' \item rpot > 200.0 and sw_in - rpot > 50
#' \item sw_in is > 15% greater than rpot
#' }
#'
#' Optionally, values can be flagged based on a regression between sw_in (rg)
#' and ppfd. If there are at least \code{min_val} pairwise records of sw_in and
#' ppfd, a simple linear regression is constructed and values differing by
#' greater than +/-5*sd of the residuals are flagged. This option is selected by
#' setting \code{check_ppfd} to \code{TRUE}.
#'
#' From OneFlux:
#' https://github.com/LI-COR/ONEFlux/blob/master/oneflux_steps/qc_auto/src
#'
#' @export
flag_rad <- function(x, rpot = rpot, ppfd = ppfd, check_ppfd = FALSE,
                     min_val = 11000) {
  out <- rep(0, length(x))
  value <- x - rpot
  out <- dplyr::case_when(
    value > 0 & rpot == 0 & x > 50 ~ 2,
    value > 50 & rpot > 200 & value / rpot > 0.15 ~ 2,
    TRUE ~ 0
  )
  if (check_ppfd) {
    comp <- data.frame(x, ppfd)
    valid <- tidyr::drop_na(comp)
    if (nrow(valid) < min_val) {
      message(
        "Skipping Rg-PPFD validation: ",
        "the number of complete Rg and PPFD records, ", nrow(valid),
        ", does not fulfill the required amount of ", min_val, "."
      )
    } else {
      f <- paste(rg_name, "~", paste(ppfd_name, collapse = " + "))
      reg <- do.call(
        "lm",
        list(as.formula(f), data = as.name("comp"), na.action = "na.exclude")
      )
      resid <- unname(resid(reg))
      sd <- sd(resid, na.rm = TRUE)
      if (sd < 0.01) {
        message(
          "Skipping Rg-PPFD validation: ",
          "the standard deviation of residuals is less than 0.01."
        )
      }
      # Threshold is +/- 5 * sd of the residuals
      lim <- 5 * sd
      out <- dplyr::case_when(
        resid < -lim ~ 2,
        resid > lim ~ 2,
        TRUE ~ out
      )
    }
  }
  attributes(out) <- list(check_ppfd = check_ppfd, additive = FALSE, na_as = NA)
  out
}

simple_despike <- function(x, max_diff, max_lag = 1, na.as = 0,
                           look = c("backward", "forward", "both")) {
  look <- match.arg(look)
  if (look == "forward") max_lag <- -max_lag
  # A good way to get max_diff is: mad(diff(x), na.rm = TRUE) * 7
  flag <- rep(0L, length(x))
  for (i in 0:max_lag) {
    diff <- abs(as.vector(diff(zoo::zoo(x), lag = i, na.pad = TRUE)))
    out <- dplyr::if_else(diff < max_diff, flag, 2L)
    if (look == "both") {
      diff2 <- abs(as.vector(diff(zoo::zoo(x), lag = -i, na.pad = TRUE)))
      out <- dplyr::if_else(diff2 < max_diff, out, 2L)
    }
  }
  out <- tidyr::replace_na(out, na.as)
  out
}

flag_ends <- function(x, n = 1) {
  df <- data.frame(x = x)[, 0]
  for (i in 1:n) {
    df[, paste0("lag_", i)] <- dplyr::if_else(is.na(dplyr::lag(x, i)), 2L, 0L)
    df[, paste0("lead_", i)] <- dplyr::if_else(is.na(dplyr::lead(x, i)), 2L, 0L)
  }
  openeddy::combn_QC(df, names(df))
}

rollapply_thr <- function(x, roll_fun, thr_fun, z = 1, width, check_runs = TRUE) {
  .w <- parse_width(width)

  if (check_runs) {
    run_flag <- openeddy::flag_runs(x)
    .x <- zoo::zoo(clean(x, run_flag))
  } else .x <- zoo::zoo(x)

  roll <- zoo::rollapplyr(.x, .w, FUN = roll_fun, na.rm = TRUE, fill = NA)
  thr <- zoo::rollapplyr(.x, .w, FUN = thr_fun, na.rm = TRUE, fill = NA)
  low <- roll - thr * z
  high <- roll + thr * z
  out <- dplyr::if_else(.x > low & .x < high, 0L, 2L)
  tidyr::replace_na(out, 0)
}

coalesce_flags <- function(data, ..., prefix = "qc_[[:alnum:]]+\\_") {
  data %>%
    tidyr::gather("flag", "value", ...) %>%
    dplyr::mutate(
      flag = stringr::str_remove(flag, prefix),
      value = dplyr::if_else(value > 1, flag, NA_character_)
    ) %>%
    tidyr::spread(flag, value) %>%
    dplyr::mutate(
      flag = dplyr::coalesce(!!! dplyr::select(., ...)),
      flag = tidyr::replace_na(flag, "none")
    )
}

simple_clean <- function(x, max_diff, max_lag = 1, na.as = 0) {
  clean <- x
  for (i in 1:max_lag) {
    diff <- as.vector(abs(diff(zoo::zoo(clean), i, na.pad = TRUE)))
    while (max(diff, na.rm = TRUE) > max_diff) {
      clean <- dplyr::if_else(diff > max_diff, NA_real_, clean)
      clean <- fill_linear(clean)
      diff <- as.vector(abs(diff(zoo::zoo(clean), i, na.pad = TRUE)))
    }
  }

  clean
}

#' Clean
#'
#' Simple wrapper around dplyr::if_else() to clean a variable according to an
#' existing QC flag.
#'
#' @param x
#' @param flag
#' @param value Integer indicating the minimum flag value at which to reject x
#' @param replace_with
#'
#' @return
#' @export
#'
#' @examples
clean <- function(x, flag, value = 2, replace_with = NA_real_, na.as = 0) {
  if (is.character(value)) {
    flag <- dplyr::if_else(flag == value, 2, 0)
    value <- 2
  }
  attrs <- attributes(x)
  flag <- tidyr::replace_na(flag, na.as)
  out <- dplyr::if_else(flag >= value, replace_with, x)
  attributes(out) <- attrs
  out
}

clean_all <- function(data, ...) {
  names <- names(data)
  qc_names <- stringr::str_subset(names, "qc_")
  varnames <- stringr::str_remove(qc_names, "qc_")

  exists <- which(varnames %in% names)
  varnames <- varnames[exists]
  qc_names <- qc_names[exists]
  if (length(varnames) != length(qc_names)) {
    stop("Cannot match each flag with one unique variable.", call. = FALSE)
  }

  out <- data
  for (i in seq_along(varnames)) {
    var <- out[, varnames[i]]
    qc <- out[, qc_names[i]]
    clean <- clean(var, qc, ...)
    out[, varnames[i]] <- clean
  }

  out
}

## =============================================================================
#' Split Variable by Wind Direction Bins
#'
#' @param data
#' @param var
#' @param by
#'
#' @return A data frame
#' @export
#'
#' @examples
split_wd <- function(x, wd = wd, by = 10) {
  l <- split(data[, var], cut(data$wd, seq(0, 360, by = by)))
  seq1 <- by / 2
  seq2 <- 360 - seq1 + 1
  data.frame(
    wd = seq(seq1, seq2, by = by),
    mean = sapply(l, mean, na.rm = TRUE),
    sd = sapply(l, sd, na.rm = TRUE)
  )
}

## =============================================================================
#' Title
#'
#' @param wd
#' @param bin_size
#'
#' @return
#' @export
#'
#' @examples
bin_wd <- function(wd, bin_size = 10) {
  seq <- seq(0, 360, by = bin_size)
  labels <- seq[1:length(seq) - 1] + bin_size / 2
  as.numeric(as.character(cut(wd, seq, labels = labels)))
}


## =============================================================================
#' Average Replicate Measurements of One Variable
#'
#' @param ... numeric vectors
#'
#' @return
#' @export
#'
#' @examples
average_reps <- function(...) {
  # Create data frame from reps vectors
  rep_df <- dplyr::bind_cols(rlang::dots_list(...))
  # Average reps
  out <- rep_df %>% rowMeans(na.rm = TRUE)
  out[is.na(out)] <- NA # NaN -> NA
  # Set new varname
  varnames(out) <- paste0(
    "mean(", paste(varnames(rep_df), collapse = ", "), ", na.rm = TRUE)"
  )
  # Set new units
  openeddy::units(out) <- openeddy::units(rep_df[1])
  out
}

## =============================================================================
#' Title
#'
#' @param hour
#' @param light
#' @param thr
#'
#' @return
#' @export
#'
#' @examples
mean_suntimes <- function(hour, light, thr = 0) {
  light <- dplyr::if_else(light <= thr, thr, light)
  rises <- hour[light > thr & dplyr::lag(light) == thr]
  sets <- hour[light == thr & dplyr::lag(light) > thr]
  c(mean(rises, na.rm = TRUE), mean(sets, na.rm = TRUE))
}


quiet <- function(x) {
  sink(tempfile())
  on.exit(sink())
  invisible(force(x))
}

## =============================================================================
#' Title
#'
#' @param pabs
#' @param pair
#' @param twater
#' @param name_out
#'
#' @return
#' @export
#'
#' @examples
calculate_wtd <- function(pabs, pair, twater, name_out = "WTD") {
  # pabs & pair units should be in kPa, twater units should be in C
  check_units(pabs, "kPa")
  check_units(pair, "kPa")
  check_units(twater, "C")
  out <- (pabs - pair) * 0.10197162
  # Set new units
  varnames(out) <- name_out
  openeddy::units(out) <- "m"
  out
}

calculate_swater <- function(twater, wtd) {
  out <- 4180 * wtd * 1000 * (twater - dplyr::lag(twater)) * (1 / 1800)
}



## =============================================================================
#' Generate Year-long Half-hourly Time Step Vector
#'
#' Generate vector with half-hourly time steps of full year, stamped in the
#' center of time unit.
#' Adapted from: REddyProc - fFullYearTimeSteps
#'
#' @param data A data frame to be converted.
#' @param DTS The number of daily time steps (24 or 48).
#' @param tz The time zone used to store data (advised to keep GMT to avoid
#'   daytime shifting issues).
#'
#' @return
#' A vector with time steps of full year in POSIX format.
#'
create_timesteps <- function(year, dts = 48, tz = "GMT", shift_by = 720 / dts) {
  if (!dts %in% c(24, 48)) {
    stop("Only implemented for 24 or 48 daily time steps, not ", dts, ".")
  }
  format <- "%Y-%m-%d-%H-%M"
  start <- paste(year, 1, 1, 0, shift_by, sep = "-")
  end <- paste(year + 1, 1, 1, 0, 30 - shift_by, sep = "-")
  # timestamp vector with half-hourly timestamps
  out <- seq(
    strptime(start, format, tz), strptime(end, format, tz), (24 / dts * 60 * 60)
  )
  out
}




fill_single_gaps <- function(x, n_fill = 1) {
  y <- seq_along(x)
  out <- approx(y, x, y)$y
  # Use the gap-filled data iff gap is of length 1
  out <- dplyr::if_else(
    !is.na(dplyr::lag(x, 1)) & !is.na(dplyr::lead(x, 1)),
    out, x
  )
  out
}

fill_linear <- function(x, n_max = NULL) {
  y <- seq_along(x)
  out <- approx(y, x, y)$y
  if (!is.null(n_max)) {
    .x <- tidyr::replace_na(x, 9999)
    rl <- rle(c(.x))
    n <- rep(rl$lengths, times = rl$lengths)
    out <- dplyr::if_else(n > n_max & is.na(x), NA_real_, out)
  }
  out
}


#' Title
#'
#' @param x A data frame
#' @param attr
#' @param values
#'
#' @return
#' @export
#'
#' @examples
add_attr <- function(x, attr, values) {
  #purrr::assign_in(x, list(1, purrr::attr_getter("units")))
  #purrr::pluck(purrr::attr_getter("units"))
  values <- tidyr::replace_na(values, "-")
  names <- colnames(x)
  l <- purrr::map2(names, values, ~ (x[[.x]] <-`attr<-`(x[[.x]], attr, .y)))
  names(l) <- names
  as.data.frame(l)
}

keep_attrs <- function(x, ..., keep = "all") {
  inherits(x, "data.frame")
  attrs <- attributes(x)
  f <- rlang::enquo(...)
}

apply_thr_around <- function(x, thr, n_before, n_after, name_out = "-",
                             flag = c("higher", "outside", "between", "lower")) {
  flag <- match.arg(flag)
  len <- length(x)
  df <- data.frame("t" = openeddy::apply_thr(x, thr, flag = flag))
  if (n_after > 0) {
    for (i in 1:n_after) {
      x_lag <- dplyr::lag(x, i) # 'lag' flags record ahead of present
      df[, paste0("b", i)] <- openeddy::apply_thr(x_lag, thr, flag = flag)
    }
  }
  if (n_before > 0) {
    for (i in 1:n_before) {
      x_lead <- dplyr::lead(x, i) # 'lead' flags record behind present
      df[, paste0("a", i)] <- openeddy::apply_thr(x_lead, thr, flag = flag)
    }
  }
  #browser()
  openeddy::combn_QC(df, names(df), name_out = name_out)
}

lag_at <- function(x, n) {
  len <- length(x)
  out <- rep(NA, len)
  for (i in 1:len) {
    out[i] <- dplyr::lag(x, n[i])[i]
  }
  out
}

expand_list <- function(list, pull = NULL) {
  # Neat little function that "expands" a reference list
  names <- names(list)
  out <- list
  for (i in seq_along(names)) {
    flags <- out[[names[i]]]
    while (any(flags %in% names)) {
      conds <- flags[which(flags %in% names)]
      for (j in seq_along(conds)) {
        replace <- out[[conds[j]]]
        flags <- flags[!flags == conds[j]]
        flags <- c(flags, replace)
      }
      out[[names[i]]] <- flags
      if (!any(flags %in% names)) break
    }
  }
  if (!is.null(pull)) out <- out[[pull]]
  out
}

map_behind <- function(x, fun, n_behind) {
  var <- x
  behind_seq <- 1:behind
  lags <- purrr::map(behind_seq, ~ dplyr::lag(var, .x)) %>%
    rlang::set_names(-behind_seq) %>%
    dplyr::bind_rows() %>% rev()
  out <- dplyr::mutate_all(lags, fun)
  out
}

parse_width <- function(x) {
  #browser()
  if (is.character(x)) {
    time <- purrr::flatten(stringr::str_split(x, " "))
    n <- as.numeric(time[1])
    unit <- time[2]
    if (stringr::str_detect(unit, "hour")) out <- n * 2
    if (stringr::str_detect(unit, "day")) out <- n * 2 * 24
    if (stringr::str_detect(unit, "week")) out <- n * 2 * 24 * 7
    if (stringr::str_detect(unit, "month")) out <- n * 2 * 24 * 7 * 4
  } else if (is.numeric(x)) out <- x
  out
}

predict_along <- function(.x, .y, .p) {
  if (!missing(.p)) {
    p <- as.formula(.p)
    .x <- if_else(p, .x, NA_real_)
    .y <- if_else(p, .y, NA_real_)
  }

  len <- length(.y)
  y_int <- approx(1:len, .y, 1:len)$.y
  y <- dplyr::if_else(is.na(.y), y_int, .y)

  df <- data.frame(
    x = .x, y = .y,
    y_dif = diff(zoo::zoo(y), na.pad = TRUE),
    x_lag = dplyr::lag(.x, 1)
  )

  mod_df <- dplyr::filter(df, y_dif != 0)
  coefs <- unname(coef(lm(x ~ y + y_dif + x_lag, data = mod_df)))
  #browser()

  gaps <- which(is.na(df$x))
  out <- df$x
  for (i in seq_along(gaps)) {
    t <- gaps[i]
    out[t] <- coefs[1] + coefs[2] * df$y[t] + coefs[3] * df$y_dif[t] + coefs[4] * dplyr::lag(out, 1)[t]
  }
  out
}

#' Simple loop ticker
#'
#' @param i An integer, set to the loop index
#' @param n An integer, set to the length of the loop (optional)
#'
#' @return
#' @export
#'
#' @examples
progress_n <- function(i, n = NULL) {
  msg <- as.character(i)
  flush.console()
  if (!is.null(n)) msg <- paste0(msg, " | ", round(i / n * 100, 2), "pct")
  message(msg, "\r", appendLF = FALSE)
}

write_every <- function(n, i, x, file, type = c("table", "raster"), ...) {
  type = match.arg(type)
  if (i %% n == 0) {
    if (type == "table") {
      write.table(x, file, row.names = FALSE, ...)
    } else if (type == "raster") {
      raster::writeRaster(x, file, ...)
    }
  }
}

rescale_by <- function(x, y) {
  x_mean <- mean(x, na.rm = TRUE)
  x_sd <- sd(x, na.rm = TRUE)
  y_mean <- mean(y, na.rm = TRUE)
  y_sd <- sd(y, na.rm = TRUE)
  y_range <- range(y, na.rm = TRUE)
  .x <- (x - x_mean) / x_sd
  (.x * y_sd) + y_mean
  #scales::rescale(x, y_range)
}
grahamstewart12/fluxtools documentation built on Dec. 29, 2019, 10:53 a.m.