R/lazarus.R

#' Replace traced values in an inventory table.
#'
#' `lazarus()` performs a backwards replacement of values starting from a
#' transition pattern.
#'
#' This function acts on a standard inventory table for an IDENT site. For a
#' specified `variable`, it traces chains of values equal to `backtrace_val`,
#' starting at entries that are the beginning of a `transition` pattern. The
#' entries in the chains are then replaced with `replace_val`. The standard
#' output contains the original data set and to two additional columns:
#' \itemize{
#'   \item `*variable*_replace` indicates whether an entry was replaced.
#'   \item `*variable*_new` holds the new sequence of values after performing
#'     replacements.
#' }
#' The performance of the function should remain stable for several hundred
#' thousand rows.
#'
#' @param data Data frame containing measurements for an IDENT site (wide
#'     format). It must contain the index columns `Block`, `Plot`, and `Pos`.
#' @param variable Variable (i.e. column) in the data set in which values are
#'     to be replaced.
#' @param sort_var The data set is automatically partitioned according to
#'     `Block`, `Plot`, and `Pos` (i.e. position of the individual). For each
#'     position, the sequence of values is then sorted (in ascending order) by the
#'     variable (i.e. column the data set) specified with `sort_var`. In most
#'     cases, this column should be of type `numeric` or `integer`. If it is a
#'     factor, it will be sorted according to its `levels`. If it is of type
#'     `character`, it will be sorted alphabetically, which means that a value
#'     of `10` would be placed after `1` and before `2`.
#' @param transition A vector of length 2, specifying the first and second value
#'     of a transition. These transitions will mark the start from which values
#'     equal to `backtrace_val` are traced backwards.
#' @param backtrace_val Values that are to be traced, starting at a
#'     `transition`.
#' @param replace_val New value that will replace `backtrace_val`. Replacement
#'     will only take place if the entry belongs to a traced chain that starts
#'     at a `transition`.
#' @param append If `TRUE`, the resulting data frame is joined with the input
#'     `data`, that is all columns will be returned. Otherwise, only index
#'     columns, the original `*variable*` column, and the new columns,
#'     `*variable*_replace` and `*variable*_new`, are returned.
#' @return A data frame with index columns, `*variable*`, `*variable*_new`
#'     holding the new values, and `*variable*_replace` indicating whether a
#'     value in the row has been replaced (`1`) or not (`0`).
#' @examples
#' # Backwards replace a chain of subsequent "Dead" values that occur before a
#' # transition from "Dead" to "Alive".
#' data(mortality)
#' lazarus(mortality,
#'         variable = "StateDesc",
#'         sort_var = "YearInv",
#'         transition = c("Dead", "Alive"),
#'         backtrace_val = "Dead", replace_val = "Alive",
#'         append = TRUE)
#' @export
lazarus <- function(data,
                    variable,
                    sort_var,
                    transition,
                    backtrace_val,
                    replace_val,
                    append = TRUE) {

    # Table to hold new values
    variable_replace <- paste0(variable,"_replace")
    variable_new <- paste0(variable, "_new")

    data_replace <- tibble::tibble(Block = character(0),
                                   Plot = character(0),
                                   Pos = character(0),
                                   !!sort_var,
                                   !!variable := character(0),
                                   !!variable_replace := integer(0),
                                   !!variable_new := character(0))

    # Get combinations from data
    filter_t <- expand.grid("Pos" = unique(data$Pos),
                            "Plot" = unique(data$Plot),
                            "Block" = unique(data$Block))

    # Detect and replace per individual
    for(i in 1:nrow(filter_t)) {
      data_subset <- data %>%
            dplyr::select(Plot, Pos, Block, !!sort_var, !!variable) %>%
            dplyr::filter(Plot == filter_t$Plot[i],
                          Pos == filter_t$Pos[i],
                          Block == filter_t$Block[i])

        ord_sub <- order(data_subset$Block,
                     data_subset$Plot,
                     data_subset$Pos,
                     data_subset[[sort_var]])
        data_subset <- data_subset[ord_sub, ]

        x <- dplyr::pull(data_subset, variable)

        starts <- detect_transitions(x = x, transition = transition)
        backtrace <- backtrace_values(x = x, backtrace_val = backtrace_val, starts = starts)
        replaced <- replace_backtraced(x = x, backtrace = backtrace, replace_val = replace_val)

        data_subset_replace <- data_subset %>%
            dplyr::mutate(!!variable_replace := backtrace,
                          !!variable_new := replaced)

        data_replace <- rbind(data_replace, data_subset_replace)
    }

    n_replaced <- sum(data_replace[[variable_replace]])
    print(paste("Replaced", n_replaced, "of", nrow(data_replace), "entries."))

    if(append) {
        data_replace <- data %>%
            dplyr::select(-!!variable) %>%
            dplyr::left_join(data_replace,
                             by = c("Block", "Plot", "Pos", sort_var))
    }
    ord <- order(data_replace$Block,
                 data_replace$Plot,
                 data_replace$Pos,
                 data_replace[[sort_var]])
    data_replace <- data_replace[ord, ]

    return(data_replace)
}
dschoenig/IDENTcc documentation built on May 16, 2019, 4:07 a.m.