knitr::opts_chunk$set(echo = TRUE)

# Set root dir to project directory to ensure that code is always run relative to the project directory, no matter if it is run using `knitr` or interactively.
knitr::opts_knit$set(root.dir = rprojroot::find_root(rprojroot::has_file("DESCRIPTION")))

# Attach tideverse package to enable access to pipe (%>%)
require(tidyverse)

Overview

Preliminaries

notebook_name <- 
   stringr::str_to_lower(stringr::str_replace_all(params$title, " ", "_"))
# Data from computational modeling will be read from here
model_comparison_group_dir <- 
  file.path("data","derivatives", "06_model_comparison_group")

bic_aggregate_winning_model_data_dir <- 
  file.path(model_comparison_group_dir, "winning_models_bic_aggregate")

bic_count_winning_model_data_dir <- 
  file.path(model_comparison_group_dir, "winning_models_bic_count")

# Derivatives will be written here
derivatives_dir <- 
  file.path("data","derivatives", notebook_name) 

# Figures will be written here
figures_dir <- 
  file.path("figures", notebook_name)

# Create non-existing dirs if they don't exist
cmdsddfeitc::check_dir(all_dirs = c(derivatives_dir, figures_dir))

Read data

get_pattern <- function(dir_path, data_type) {
  if (data_type == "auc") {
    sprintf("^auc_task-%s.*%s_BIC.*.csv$",
            params$task[[1]],
            params$algorithm[[1]]
    )
  } else if (data_type == "par_vals") {
    sprintf("^best_fitting_params_task-%s.*%s_BIC.*.csv$",
            params$task[[1]],
            params$algorithm[[1]]
    )
  }
}

read_data <- function(dir_path, file_pattern, best_model_type, col_types, key_name) {

  # if (key_name == "auc") {
  #   group_cols <- c("participant_id", "model", "parameterization", "bound_settings", "algorithm")
  # } else if (key_name == "params") {
  #   group_cols <- c("participant_id", "model_name", "parameterization", "bound_settings", "algorithm")
  # }

  group_cols <- c("participant_id", "model_name", "parameterization", "bound_settings", "algorithm")

  tibble::tibble(best_model_type = factor(levels = c("overall", "individual")),
                 path = character()) %>%
  tibble::add_row(best_model_type = best_model_type,
                  path = dir_path) %>%
  dplyr::mutate(files = purrr::map(path, .f = list.files,
                                   pattern = file_pattern,
                                   full.names = TRUE
                                   )) %>%
  dplyr::select(-path) %>%
  tidyr::unnest() %>%
  dplyr::mutate(data = purrr::map(files, readr::read_csv, col_types = col_types)) %>%
  dplyr::mutate(data = purrr::pmap(.l = list(in1 = .$data,
                                             in2 = key_name),
                                    .f = function(in1, in2) {


                                      if (in2 == "auc") {
                                        in1 %>%
                                          dplyr::rename(model_name = model) %>%
                                          dplyr::group_by_at(group_cols) %>%
                                          dplyr::select(-auc) %>% 
                                          tidyr::spread(key = frame, value = norm_auc) %>%
                                          tidyr::nest(.key = !!dplyr::sym(key_name))
                                      } else if (in2 == "params") {
                                        in1 %>%
                                          dplyr::group_by_at(group_cols) %>%
                                          tidyr::nest(.key = !!dplyr::sym(key_name))
                                      }
                                    })) %>%
  tidyr::unnest()
}


# File pattern
pattern_auc_bic_aggregate <- 
  get_pattern(dir_path = bic_aggregate_winning_model_data_dir,
              data_type = "auc")

pattern_auc_bic_count <- 
  get_pattern(dir_path = bic_count_winning_model_data_dir,
              data_type = "auc")

pattern_best_fit_params_bic_aggregate <- 
  get_pattern(dir_path = bic_aggregate_winning_model_data_dir,
              data_type = "par_vals")

pattern_best_fit_params_bic_count <- 
  get_pattern(dir_path = bic_count_winning_model_data_dir,
              data_type = "par_vals")

# Column types
col_types_best_fitting_pars <- 
  cmdsddfeitc::get_col_types("best_fitting_params")

col_types_auc <- 
  cmdsddfeitc::get_col_types(stringr::str_c("auc_", params$task[[1]]))

# Read data

data_best_fit_params_bic_aggregate <- 
  read_data(dir_path = bic_aggregate_winning_model_data_dir, 
            file_pattern = pattern_best_fit_params_bic_aggregate, 
            best_model_type = "overall", 
            col_types = col_types_best_fitting_pars,
            key_name = "params")

data_auc_bic_aggregate <- 
  read_data(dir_path = bic_aggregate_winning_model_data_dir, 
            file_pattern = pattern_auc_bic_aggregate,
            best_model_type = "overall", 
            col_types = col_types_auc,
            key_name = "auc")

Preprocess data

Parameter values of the best-fitting model overall

best_fit_params_overall <- 
  data_best_fit_params_bic_aggregate %>%
  dplyr::filter(best_model_type == "overall") %>%
  # Reorder to facilitate inspection
  dplyr::select(best_model_type, participant_id, model_name, parameterization, params) %>%
  # dplyr::mutate(params = purrr::map(params, gather)) %>%
  tidyr::unnest() %>%
  print()

auc_overall <- 
  data_auc_bic_aggregate %>%
  dplyr::filter(best_model_type == "overall") %>%
  # Reorder to facilitate inspection
  dplyr::select(best_model_type, participant_id, model_name, parameterization, auc) %>%
  # dplyr::mutate(auc = purrr::map(auc, gather)) %>%
  tidyr::unnest() %>%
  print()

auc_best_fit_params_data <- 
  dplyr::left_join(auc_overall, best_fit_params_overall,
                   by = c("best_model_type", "participant_id", "model_name", "parameterization"))

Pairs plot

tibb <- auc_best_fit_params_data

GGally::ggpairs(tibb,
                  progress = FALSE,
                  columns = dplyr::select(tibb, - best_model_type, -participant_id, -model_name, -parameterization, -mu, -kappa1) %>% colnames(),
                  # Plots on the diagonal
                  # diag = list(continuous = "barDiag"),
                  diag = list(continuous = GGally::wrap('barDiag', bins = 10)),
                  # Plots below the diagonal
                  lower = list(continuous = "smooth"),
                  # Plots above the diagonal
                  upper = list(continuous = GGally::wrap('cor', method = "spearman")))


bramzandbelt/cmdsddfeitc documentation built on June 28, 2019, 8:19 a.m.