#' @title Generate baseline aggregations
#'
#' @description Generates baseline aggregations to compare with the relevance
#' adjusted ones.
#'
#' @details
#' Generates the following baseline aggregations:
#' * Equal weight: gives equal weight to all models.
#' * Gewisano: gives weight using linear prediction pools (GewIsano).
#'
#' @param atomic_df A data frame consisting of atomic predictions.
#' @param start_agg Which time point to start creating aggregate
#' predictions for. Note that this is based on the variable t in the
#' supplied data frame, and not row number.,
#' @import data.table
gen_baseline <- function(atomic_df, start_agg) {
lpdens <- pmean <- NULL
baseline_df <- gen_atomic_df()
T <- max(atomic_df$t)
# Giving all models equal weight
df_all <- data.table::data.table(atomic_df)
df_equal_wt <- df_all[
t >= start_agg,
list(
pmean = mean(pmean),
lpdens = log(mean(exp(lpdens))),
method = "equal_wt",
t
),
by = list(t)][, 2:5]
baseline_df <- rbind(baseline_df, df_equal_wt)
ytrue <- rep(NA, nrow(baseline_df))
baseline_df <- cbind(baseline_df, ytrue)
# Optimal prediction pools (Geweke & Amisano)
df_gewisano <- gen_gewisano(data = atomic_df, start_agg)
baseline_df <- rbind(baseline_df, df_gewisano)
return(baseline_df)
}
#' @title Generates local aggregations
#'
#' @description
#' Generates relevance adjusted aggregations given a data set with
#' relevance adjusted log scores. This is just a switcher function that
#' sends the data on either to the propto or the "select best" (which
#' is kind of not in use atm) function.
#'
#' @param RAL_data Data set containing relevance adjusted logscores.
#' @param agg_meth Which method should be used to aggregate the agents
#' based on their relevance-adjusted logscores. Currently available
#' are propto (which uses a softmax transformation) and select_best
#' which picks the model with the best RAL at each time point.
#' @param sim_measure Which method was used to calculate the weights
#' (caliper or mahala(nobis)).
#'
#' @import data.table
gen_RAA <- function(RAL_data, agg_meth, sim_measure) {
df_RAL <- switch(
agg_meth,
"propto" = propto_weighting(RAL_data, sim_measure),
"select_best" = selbest_weighting(RAL_data, sim_measure),
stop("Unknown aggregation method.")
)
return(df_RAL)
}
#' @title Weighting proportional to LPA (local predictive ability)
#'
#' @param data Dataset to use.
#' @param sim_measure Caliper or mahala.
#'
#' @import data.table
propto_weighting <- function(data, sim_measure) {
pmean <- RAL <- lpdens <- NULL
method_name <- sprintf("%s_propto", sim_measure)
df_RAL <- data[,
list(
pmean = sum(pmean * exp(RAL)) / sum(exp(RAL)),
lpdens = log(
sum(
exp(lpdens) * exp(RAL) / sum(exp(RAL))
)
),
method = method_name,
t,
ytrue = NA
),
by = list(t)
][, -1]
return(df_RAL)
}
#' @title Weighting by picking the best model
#'
#' @param data Which dataset to use.
#' @param sim_measure Caliper or mahala.
#'
#' @import data.table
selbest_weighting <- function(data, sim_measure) {
RAL <- NULL
method_name <- sprintf("%s_selbest", sim_measure)
df_RAL <- data[data[, .I[RAL == max(RAL)], by = t]$V1]
df_RAL$method <- method_name
df_RAL[, RAL := NULL]
return(df_RAL)
}
#' @title Local logscore calculator
#'
#' @description
#' Calculates the local logscores given a weight data frame and a data
#' frame of atomic predictions.
#'
#' @details
#' Starts with a weight data frame on the form
#' \tabular{rrr}{
#' \strong{t} \tab \strong{t2} \tab \strong{similar} \cr
#' 730 \tab 729 \tab 1 \cr
#' 730 \tab 728 \tab 0 \cr
#' 730 \tab 727 \tab 1 \cr
#' }
#'
#' @param weight_df Data frame containing relevance adjusted weights.
#' Generated by a relevance function (caliper relevance or
#' mahalanobis atm).
#' @param atomic_df Data frame with atomic predictions from any number
#' of models.
#' @param pratig Use TRUE to run the function in a debuggy mode where a
#' bunch of extra information is returned and printed.
#'
#' @import data.table
RAL_calculator <- function(weight_df, atomic_df, pratig = FALSE) {
t2 <- similarity <- lpdens <- method <- adj_lpdens <- NULL
start_agg <- min(weight_df$t)
stop_agg <- max(weight_df$t)
atomic_df <- data.table::data.table(atomic_df)
dfdf <- data.table::data.table(matrix(ncol = 3, nrow = 0)) # df to store RAL
colnames(dfdf) <- c("method", "RAL", "t")
for (i in start_agg:stop_agg) {
sim_df <- weight_df[t == i, list(t = t2, similarity)]
atomic_sub <- atomic_df[t < i, .(lpdens, method, t)]
#data.table::setkey(sim_df, t)
#data.table::setkey(atomic_sub, t)
#df_all <- atomic_sub[sim_df]
df_all <- merge(atomic_sub, sim_df, by.x = "t", by.y = "t")
df_all[, `:=`(adj_lpdens = lpdens * similarity)]
collapsed <- df_all[, list(RAL = sum(adj_lpdens)), by = list(method)]
collapsed[, `:=`(t = i)]
dfdf <- rbind(dfdf, collapsed)
}
keycols <- c("method", "t")
data.table::setkeyv(dfdf, keycols)
data.table::setkeyv(atomic_df, keycols)
joined_df <- atomic_df[dfdf]
return(joined_df)
}
#' @title Generate GewiSano Weights
#'
#' @description
#' Generates weights according to Geweke & Amisano 2011/2012, also
#' known as linear predicition pools. Obviously it takes data on a
#' specific form, which you should describe..
#'
#' @param data Data set of atomic predictions.
#' @param start_t Which timepoint to start generating weights for.
#' Obs! This is based on the variable t in the data, not the row
#' number!
#' @param pratig If true, the function prints a bunch of information
#' when run. It also returns a data frame that contains the weights
#' at each time, so pratig has to be FALSE when running from one of
#' the meta-funktions that generate predictions. This is because the
#' object returned changes from a single data frame to a list with the
#' original data frame returned as the first object, and a matrix of
#' weights as the second object.
#' @import data.table
#' @importFrom pracma fmincon
#' @export
gen_gewisano <- function(data, start_t, pratig = FALSE) {
fun_to_opt <- function(x, dataopt) {
-sum(log(exp(dataopt) %*% x))
}
data <- data.table::data.table(data)
df_fat <- data.table::dcast(data, t ~ method, value.var = "lpdens")
# print(colnames(df_fat)) # Debugging to check the order
k <- ncol(df_fat) - 1
start_val <- rep((1 / k), k)
tt <- seq(start_t, max(data$t))
wts <- matrix(ncol = k, nrow = length(tt))
j <- 0
for (i in start_t:(max(data$t))) {
j <- j + 1
vecop <- as.matrix(df_fat[df_fat$t <= i, 2:(k + 1)])
opt_sol <- pracma::fmincon(start_val, fun_to_opt, Aeq = matrix(1, 1, k),
beq = 1, lb = rep(0.0001, k), ub = rep(1, k), dataopt = vecop)
wts[j, ] <- opt_sol$par
if (pratig) print(wts[j, ])
}
lpdens <- log(rowSums(exp(df_fat[df_fat$t >= start_t, 2:(k + 1)]) * wts))
df_pmean <- data.table::dcast(data, t ~ method, value.var = "pmean")
pmean <- rowSums(df_pmean[df_pmean$t >= start_t, 2:(k + 1)] * wts)
method <- rep("gewisano", length(tt))
ytrue <- rep(NA, length(tt))
df_gew_res <- data.frame(pmean, lpdens, method, tt, ytrue)
colnames(df_gew_res) <- c("pmean", "lpdens", "method", "t", "ytrue")
df_gew <- gen_atomic_df()
df_gew <- rbind(df_gew, df_gew_res)
wts <- data.frame(tt, wts)
if (pratig) {
return(list(df_gew, wts))
} else {
return(df_gew)
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.