R/caliper_relevance_dynamic.R

Defines functions caliper_relevance_dynamic

Documented in caliper_relevance_dynamic

#' @title Dynamic caliper width determination function
#'
#' @description
#' This function dynamically choses the caliper width and calculates
#' weights.
#'
#' @details
#' Details go here.
#'
#' @param atomic_df Data frame of atomic predictions.
#' @param sotw Data frame of smoothing variables. First column should 
#'   be called "t", and should contain an unbroken sequence of integers.
#' @param start_agg For which time point to start aggregating (what
#'   does this even mean??).
#' @param calip_data A data table that contains two columns: time (t) 
#'   and the optimal caliper width based on all predictions previous
#'   to this time (cw). That is, cw is the caliper width the decision
#'   maker should use at time t to pick the best caliper method based
#'   on historical data (from BERFORE t).
#' @param mvc The minimum viable cluster parameter used by the caliper
#'   method.
#'
#' @return A similarity data frame
#' @export


caliper_relevance_dynamic <- function(
        atomic_df,
        sotw,
        start_agg = 401,
        calip_data,
        mvc = 1
) {
    
    similarity <- t2 <- NULL # NSE NOTES from R CMD check

    T <- max(atomic_df$t)
    start <- min(atomic_df$t)
    p <- (T - start_agg + 1)
    rows <- sum(seq_len(p))

    sim_df <- data.frame(matrix(ncol = 3, nrow = rows))
    x <- c("t", "t2", "similarity")
    colnames(sim_df) <- x
    sotw <- data.table::data.table(sotw)

    j <- 0
    for (i in start_agg:T) {
        print(sprintf(
            "caliper_relevance_dynamic %i of %i",
            i + 1 - start_agg,
            T + 1 - start_agg))
        for (k in start:(i - 1)) {
            j <- j + 1
            sim_df[j, 1] <- i
            sim_df[j, 2] <- k
            # max sum of predabil BERFORE i:
            cw <- calip_data$cw[calip_data$t ==  i] 
            if (sum((sotw[t == (i - 1), -1] - sotw[t == (k - 1), -1])^2) < cw) {
                sim_df[j, 3] <- 1
            } else {
                sim_df[j, 3] <- 0
            }
        }
    }

    sim_df <- data.table::data.table(sim_df)
    sim_df <- sim_df[
        order(-t, -t2), 
        .(t2, similarity = 
            similarity + 
            (1 - similarity) * 
                max((mvc - sum(similarity)), 0) /
                ifelse(
                    .N - sum(similarity) > 0,
                    .N - sum(similarity),
                    1
                )
        ),
        by = .(t)
    ]

    return(sim_df)
}
ooelrich/oscbvar documentation built on Sept. 8, 2021, 3:31 p.m.