R/short_term.R

Defines functions short_term

Documented in short_term

#' Calculate short-term changes for binned data
#'
#' \code{short_term} computes the bin-to-bin change of a specified value using linear regression.
#'
#' @param data (data.frame) A data frame or tibble containing the value column
#'   and the bin column
#' @param value (unquoted expression) Variable name of the value for which the
#'   short-term changes should be calculated.
#' @param bin (unquoted expression) Variable name of the discrete bin numbers.
#' @param bin.one (character) Either "oldest" when the first bin is the oldest
#'   out of all bins or "youngest" when the first bin is the most recent one.
#'   "oldest" is the default and corresponds to the convention that stage 1 is
#'   the oldest geologic stage.
#' @param mult.observations (logical) Are there more observations per bin?
#'   Defaults to one observation per bin.
#' @param print.result (character) Either "vector" for output given in a named
#'   vector or "df" for output given within a tibble (default).
#'
#' @section Details:
#' To calculate the short-term change for each row, the coefficient of a linear
#' regression between the value and bin of interest and each corresponding lag
#' (the value and bin before the focal bin) is used. This corresponds to a unit
#' change of the value compared to the previous bin. For example, if we have
#' 20°C at bin 10, and 22°C at bin 11, the short-term temperature change from
#' bin 10 to 11 is 2. If bin 11 is older than bin 10, one needs to specify this
#' by setting bin.one = "youngest" and the corresponding short-term temperature
#' change would hence be calculated from bin 11 to bin 10. If there is more than
#' one observation per bin, and mult.observations is set to TRUE, the regression
#' is based on all values per bin. The default output is a data frame (tibble)
#' with the first column corresponding to the raw value of interest, the second
#' column the bins, and the third columns to the calculated short-term change.
#' The data frame is ordered starting with the oldest bin in the first row. If
#' there are more observations per bin (mult.observations = "TRUE"), the output
#' is a nested data frame with all observations per bin grouped together. If
#' print.result is set to "vector", the output is a named vector with the
#' short-term change as values and each corresponding bin as name.
#'
#' @return The output is a tibble data frame (default) containing the value column, the
#'   bin column and the calculated short-term change.
#' @export
#'
#' @examples
#' dfr <- data.frame(x = rnorm(10), stg = 1:10)
#' short_term(data = dfr, value = x, bin = stg)
short_term <- function(data, value, bin, bin.one = "oldest", mult.observations = FALSE, print.result = "df") {
  lag.bin <- NULL
  lag.val <- NULL
  comb.val <- NULL
  comb.bin <- NULL
  model <- NULL
  val.com <- NULL

  if (!tibble::is_tibble(data)) {
    data <- data %>%
      tibble::as_tibble()
  }

  ori.data <- data

  if (bin.one != "oldest" && bin.one != "youngest") {
    stop("bin.one can either be 'earliest' or 'latest'")
  } else if (bin.one == "oldest") {
    data <- data %>%
      dplyr::arrange({{ bin }})
  } else if (bin.one == "youngest") {
    data <- data %>%
      dplyr::arrange(dplyr::desc({{ bin }}))
  }

  if (mult.observations == FALSE) {
    suppressMessages(
      output <- data %>%
        dplyr::mutate(
          lag.val = dplyr::lag({{ value }}),
          lag.bin = dplyr::lag({{ bin }})
        ) %>%
        dplyr::group_by({{ bin }}) %>%
        dplyr::mutate(
          comb.val = list(c({{ value }}, lag.val)),
          comb.bin = list(c({{ bin }}, lag.bin))
        ) %>%
        tidyr::drop_na() %>%
        dplyr::ungroup() %>%
        tidyr::unnest(cols = c(comb.val, comb.bin)) %>%
        dplyr::nest_by({{ bin }}) %>%
        dplyr::mutate(model = list(stats::lm(comb.val ~ comb.bin, data = data))) %>%
        dplyr::ungroup() %>%
        dplyr::mutate(
          short_term = purrr::map(model, "coefficients"),
          short_term = purrr::map_dbl(short_term, purrr::pluck, 2)) %>%
        dplyr::select({{ bin }}, short_term) %>%
        dplyr::full_join(ori.data) %>%
        dplyr::select({{ value }}, {{ bin }}, short_term) %>%
        dplyr::arrange({{ bin }})
    )
  } else if (mult.observations == TRUE) {
    data.mult <- data %>%
      dplyr::group_by({{ bin }}) %>%
      tidyr::nest()

    suppressMessages(
      output <- data %>%
        dplyr::group_by({{ bin }}) %>%
        tidyr::nest() %>%
        dplyr::mutate(val.com = purrr::map(data, rlang::as_label(rlang::enquo(value)))) %>%
        dplyr::ungroup() %>%
        dplyr::select(val.com, {{ bin }}) %>%
        dplyr::mutate(
          lag.val = dplyr::lag(val.com),
          lag.bin = dplyr::lag({{ bin }})
        ) %>%
        dplyr::group_by({{ bin }}) %>%
        dplyr::mutate(
          comb.val = purrr::map2(val.com, lag.val, c),
          comb.bin = list(rep(c({{ bin }}, lag.bin), each = 2))
        ) %>%
        tidyr::drop_na() %>%
        dplyr::ungroup() %>%
        dplyr::select({{ bin }}, comb.val, comb.bin) %>%
        tidyr::unnest(cols = c(comb.val, comb.bin)) %>%
        dplyr::nest_by({{ bin }}) %>%
        dplyr::mutate(model = list(stats::lm(comb.val ~ comb.bin, data = data))) %>%
        dplyr::ungroup() %>%
        dplyr::mutate(
          short_term = purrr::map(model, "coefficients"),
          short_term = purrr::map_dbl(short_term, purrr::pluck, 2)) %>%
        dplyr::select({{ bin }}, short_term) %>%
        dplyr::full_join(data.mult) %>%
        dplyr::select(data, {{ bin }}, short_term) %>%
        dplyr::arrange({{ bin }})
    )
  }

  if (bin.one == "oldest") {
    output <- output %>%
      dplyr::arrange({{ bin }})
  } else if (bin.one == "youngest") {
    output <- output %>%
      dplyr::arrange(dplyr::desc({{ bin }})) %>%
      dplyr::mutate(short_term = short_term * -1)
  }

  if (print.result != "df" && print.result != "vector") {
    stop("print.result can either be 'df' or 'vector'")
  } else if (print.result == "df") {
    return(output)
  } else if (print.result == "vector") {
    output <- output %>%
      dplyr::select({{ bin }}, short_term) %>%
      tibble::deframe()
    return(output)
  }
}
Ischi94/palint documentation built on Feb. 7, 2021, 4:37 a.m.