R/caliper_relevance_new.R

Defines functions caliper_relevance_new

Documented in caliper_relevance_new

#' @title Caliper method for local weights, second iteration
#'
#' @description
#' Calculates weights for which previous log scores to use based on the
#' caliper method. This is an implementation of the simpel caliper
#' method where local predictive ability is the sum of relevant log 
#' scores. In case there are no relevant data to base the measure on, 
#' each observation gets weight zero, and so each model will have equal
#' weight.
#'
#' @param atomic_df Data frames with agent predictions.
#' @param sotw Data frame containing the state of the world at each
#'   time point, which can include decision maker variables not in any
#'   of the atomic models. The first column of this data frame should
#'   be t (as in time).
#' @param start_agg From which value of t to start aggregating, ie 
#'   producing aggregate predictions.
#' @param cw The caliper width.
#' @param matching_vars Data frame with matching variables, ie pooling
#'   variables we want to fully match. First column should be t (and
#'   correspond in time to the other t columns).
#' 
#' @return A data table that consists of several stacked data tables
#'   (each indexed by the column t). Each subtable has a value t2 for
#'   each previous observation, and to each of those a corresponding
#'   similarity.
#'
#' @import data.table
#' @export

caliper_relevance_new <- function(
        atomic_df,
        sotw,
        start_agg = 161,
        cw = 5,
        matching_vars
) {

    t2 <- similarity <- NULL # to silence NSE NOTEs in R CMD check

    if(!(is.null(matching_vars))) {
        stopifnot(colnames(matching_vars[[1]])== "t")
    }
    
    T <- max(atomic_df$t)
    start <- min(atomic_df$t)
    p <- (T - start_agg + 1)
    rows <- sum(seq_len(p))

    sim_df <- data.frame(matrix(ncol = 3, nrow = rows))
    x <- c("t", "t2", "similarity")
    colnames(sim_df) <- x
    sotw <- data.table::data.table(sotw)

    j <- 0
    for (i in start_agg:T) {
        print(sprintf(
            "caliper_relevance iter %i of %i",
            i+1-start_agg,
            T-start_agg+1))
        
        for (k in start:(i - 1)) {
            j <- j + 1
            sim_df[j, 1] <- i
            sim_df[j, 2] <- k
            if(is.null(matching_vars)) {
                if (sum((sotw[t == (i ), -1] - sotw[t == (k ), -1])^2) < cw) {
                sim_df[j, 3] <- 1
                } else {
                    sim_df[j, 3] <- 0
                }
            } else {
                if (
                    (sum((sotw[t == (i ), -1] - sotw[t == (k ), -1])^2) < cw) &
                    (all(
                        matching_vars[matching_vars$t == i, -1] ==
                        matching_vars[matching_vars$t == k, -1]
                        ))) {
                        sim_df[j, 3] <- 1
                } else {
                    sim_df[j, 3] <- 0
                }
            }
            
        }
    }

    sim_df <- data.table::data.table(sim_df)
    return(sim_df)
}
ooelrich/oscbvar documentation built on Sept. 8, 2021, 3:31 p.m.