knitr::opts_chunk$set(echo = TRUE)


Read in the results

# define where the cache is located
db <- DBI::dbConnect(RSQLite::SQLite(), here::here("output", "drake-cache.sqlite"))
cache <- storr::storr_dbi("datatable", "keystable", db)

# load results
loadd(full_results, cache = cache)

Process results together

We do some cleaning of the dataset names in full_results):

full_results <- full_results %>%
        mutate(dataset = sub("data_(.+)$", "\\1", dataset))


Again, we combine the species_table from within the metadata column, and join it with the results:

# function to combine elements from the three columns
process_row <- function(results, metadata, dataset, method, args) {
    results %>%
        mutate(dataset = dataset, 
               method = method, 
               args = list(args)) %>%
        left_join(mutate(metadata$species_table, id = as.character(id)), 
                  by = "id")

# apply process_row to each dataset, then combine into a single tibble
results <- full_results %>%
    pmap(process_row) %>%
    bind_rows() %>%

# what is the structure of results?

Prepare for plotting

What we want to plot is a summary of the results for each time series (represented by unique combinations of id x dataset). Since the observed and predicted values are going to have very different scales across each time series, let's just count the fraction of times the observed value fell within the predicted 95% range:

to_plot <- results %>%
    group_by(id, dataset, method) %>%
    summarize(frac_correct = sum(observed > lower_CI & observed < upper_CI) / n(), 
              species = first(species), 
              class = first(class))


For each level of class, produce a histogram for frac_correct:

ggplot(data = to_plot, 
       mapping = aes(x = frac_correct, fill = class)) + 
    facet_wrap(~method, scales = "free_y") + 
    geom_density(position = "stack") + 

