# Set root dir to project directory to ensure that code is always run relative to the project directory, no matter if it is run using `knitr` or interactively.
knitr::opts_knit$set(root.dir = rprojroot::find_root(rprojroot::has_file("DESCRIPTION")))
knitr::opts_knit$set(eval.after = 'fig.cap')

# Attach tideverse package to enable access to pipe (%>%)
require(tidyverse)

Overview

Preliminaries

Before reading the data the following is specified:

notebook_name <- 
   stringr::str_to_lower(stringr::str_replace_all(params$title, " ", "_"))
# Data from computational modeling will be read from here

# Experiment data
preprocessed_data_dir <- 
  file.path("data","derivatives", "01_preprocessing", "included")

model_comparison_group_dir <- 
  file.path("data","derivatives", "06_model_comparison_group")

bic_aggregate_winning_model_data_dir <- 
  file.path(model_comparison_group_dir, "winning_models_bic_aggregate")

bic_count_winning_model_data_dir <- 
  file.path(model_comparison_group_dir, "winning_models_bic_count")

# Derivatives will be written here
derivatives_dir <- 
  file.path("data","derivatives", notebook_name)

# Figures will be written here
figures_dir <- 
  file.path("figures", notebook_name)

# Create non-existing dirs if they don't exist
cmdsddfeitc::check_dir(all_dirs = c(derivatives_dir, figures_dir))
# Determine which files should be read

# Experiment data
expt_standard_trials_filename <- 
  list.files(path = preprocessed_data_dir, 
             pattern = sprintf("^experiment_standard_trials_task-%s.*.csv$",
                               params$task[[1]])
             )

# Best-fitting models
best_fit_params_bic_aggregate_filenames <- 
  list.files(path = bic_aggregate_winning_model_data_dir, 
             pattern = sprintf("best_fitting_params_task-%s.*%s_BIC.*.csv",
                               params$task[[1]],
                               params$algorithm[[1]])
             )

best_fit_params_bic_count_filenames <- 
  list.files(path = bic_count_winning_model_data_dir, 
             pattern = sprintf("best_fitting_params_task-%s.*%s_BIC.*.csv",
                               params$task[[1]],
                               params$algorithm[[1]])
             )

Read data

Having specified all relevant variables, the AUC data and optimization statistics are read:

# Experiment data
expt_trials <- tibble::tibble()

for (i_file in 1:length(expt_standard_trials_filename)) {
  expt_trials <- 
    dplyr::bind_rows(expt_trials,
                     readr::read_csv(file = file.path(preprocessed_data_dir, expt_standard_trials_filename[[i_file]]),
                                     col_types = cmdsddfeitc::get_col_types(stringr::str_c("expt_standard_trials_",
                                                                          params$task[[1]])))
                     )
}

# Best-fitting parameters - BIC aggregate
best_fit_params_bic_aggregate <- tibble::tibble()

for (i_file in 1:length(best_fit_params_bic_aggregate_filenames)) {
  best_fit_params_bic_aggregate <- 
    dplyr::bind_rows(best_fit_params_bic_aggregate,
                     readr::read_csv(file = file.path(bic_aggregate_winning_model_data_dir, best_fit_params_bic_aggregate_filenames[[i_file]]),
                                     col_types = cmdsddfeitc::get_col_types("best_fitting_params"))
                     )


}


# Best-fitting parameters - based on BIC count
best_fit_params_bic_count <- tibble::tibble()

for (i_file in 1:length(best_fit_params_bic_count_filenames)) {
  best_fit_params_bic_count <- 
    dplyr::bind_rows(best_fit_params_bic_count,
                     readr::read_csv(file = file.path(bic_count_winning_model_data_dir,
                                                      best_fit_params_bic_count_filenames[[i_file]]),
                                     col_types = cmdsddfeitc::get_col_types("best_fitting_params"))
                     )


}

Preprocess data

get_obs_prd_performance_one_participant <- 
  function(pid, expt_data, params_data, collapse_t_l = TRUE) {

  # Preliminaries ==============================================================

  # Filter data for participant pid
  params_data_pid <- 
    params_data %>% 
    dplyr::filter(participant_id == pid)

  model_name <- dplyr::pull(params_data_pid, model_name) %>% as.character()
  pmz <- 
    dplyr::pull(params_data_pid, parameterization) %>% as.character() %>%
    stringr::str_c(params$task[[1]], ., sep = "_")

  # Clean parameter data =======================================================

  # Remove parameters containing NA values (which happens when params_data comes 
  # from multiple models, each with a different set of parameters)
  nonparcols <- c("participant_id", "model_name", "parameterization", "bound_settings", "algorithm")

  params_data_pid <- 
    params_data_pid %>% 
    dplyr::group_by_at(nonparcols) %>%
    tidyr::nest(.key = "pars") %>%
    dplyr::mutate(pars = 
                    purrr::pmap(.l = list(x = .$pars,
                                          model_name = as.character(.$model_name),
                                          pmz = as.character(.$parameterization),
                                          task = params$task),
                                .f = function(x, model_name, pmz, task) {
                                  task_pmz <- 
                                    stringr::str_c(task, pmz, sep = "_")      

                                  cor_col_order <- 
                                    itchmodel::get_par_names(model = model_name, 
                                                             parameterization = task_pmz)
                                  x %>%
                                    tidyr::gather() %>%
                                    tidyr::drop_na() %>% 
                                    tidyr::spread(key = "key", value = "value") %>%
                                    # Reorder, as specified by itchmodel code
                                    dplyr::select(cor_col_order)
                                    })
                  ) %>%
    tidyr::unnest()

  # Retrieve observed and predicted performance ================================
  obs_and_prds <- 
    cmdsddfeitc::tidy_obs_prd_choice_rt(return_var = 'all',
                                        obs = expt_data %>% dplyr::filter(subject_ix == pid), 
                                        model = model_name, 
                                        pmz = pmz,
                                        parameters = params_data_pid)

  # Retrieve parameter data per frame ==========================================
  pars_per_frame <- 
    obs_and_prds$all_nested %>% 
    dplyr::select(participant_id, frame, parameters) %>% 
    tidyr::unnest()

  # Clean data =================================================================

  # Observations ---------------------------------------------------------------
  if (params$task == "defer_speedup") {
    frame_levels <- c("neutral", "defer", "speedup")
  } else if (params$task == "date_delay") {
    frame_levels <- c("delay", "date")
  }

  obs_performance <- 
    obs_and_prds$obs_nested %>%
    dplyr::select(-summary_stats) %>%
    tidyr::unnest() %>%
    # Make sure data types of obs and prd are identical
    dplyr::mutate(participant_id = as.integer(participant_id),
                  frame = factor(frame, 
                                 levels = frame_levels, 
                                 ordered = TRUE),
                  t_l = as.integer(t_l),
                  t_s = as.integer(t_s),
                  m_s_grp = as.integer(round(m_s / 0.17))) %>%
    dplyr::select(-m_s)


  # Predictions ----------------------------------------------------------------

  # Make tibble containing stimuli and model-predicted variables
  stim_model_vars_per_frame <- 
    obs_and_prds$all_nested %>% 
    dplyr::select(-parameters) %>% 
    tidyr::unnest() %>%
    dplyr::mutate(participant_id = as.integer(participant_id),
                  t_s = as.integer(t_s),
                  m_s_grp = as.integer(round(m_s / 0.17))) %>%
    dplyr::select(-m_s) %>%
    dplyr::left_join(pars_per_frame, 
                     by = c("participant_id", "frame"))

  # Merge with observations
  obs_prd_performance <- 
    dplyr::left_join(x = obs_performance, 
                     y = stim_model_vars_per_frame, 
                     by = c("participant_id", "frame", "m_s_grp", "t_s", "t_l","m_l"))


  # Compute predicted median RT
  if (model_name == "DDM") {
    obs_prd_performance <- 
      obs_prd_performance %>%
      dplyr::mutate(med_rt = 
                      purrr::pmap_dbl(.l = list(a = .$a,
                                                v = .$d,
                                                t0 = .$t0,
                                                z = 0.5 * .$a),
                                      .f = rtdists::qdiffusion,
                                      response = "upper", 
                                      s = 1,
                                      p = 0.5,
                                      scale_p = TRUE)
                    )
    # Note: for typical values, predictions for median RTs were found not to 
    # differ between "lower" and "upper" responses, hence only "upper"

    } else if (model_name == "DFT_CRT") {

      obs_prd_performance <- 
      obs_prd_performance %>%
      dplyr::mutate(med_rt = 
                      purrr::pmap_dbl(.l = list(s = .$s,
                                                a = .$theta_star * .$s,
                                                v = .$d,
                                                t0 = .$t0,
                                                z = 0.5 * .$theta_star * .$s),
                                      .f = rtdists::qdiffusion,
                                      response = "upper", 
                                      p = 0.5,
                                      scale_p = TRUE)
                    )
      # Note: for typical values, predictions for median RTs were found not to 
      # differ between "lower" and "upper" responses, hence only "upper"
    }

  # Group obs_prd_performance & # compute observed and predicted performance =
  if (collapse_t_l) {
    by_cols <- c("participant_id", "frame")
  } else {
    by_cols <- c("participant_id", "frame", "t_l")
  }

  # Compute descriptive statistics and return ===================================
  obs_prd_performance %>%
    dplyr::group_by_at(by_cols) %>%
    dplyr::summarize(med_rt_obs = median(rt),
                     p_ll_obs = sum((response == "upper")) / n(),
                     med_rt_prd = median(med_rt),
                     p_ll_prd = mean(p_ll)) %>%
    dplyr::ungroup()

}

get_obs_prd_performance_all_participants <- function(pids, expt_data, params_data, collapse_t_l = FALSE) {

  tibb <- tibble::tibble()

  for (pid in pids) {

    # Since this step can take a while, print progress
    print(sprintf("Processing data from participant %.0f", pid))
    tibb <-
      dplyr::bind_rows(tibb,
                       get_obs_prd_performance_one_participant(pid = pid,
                                                        expt_data = expt_data,
                                                        params_data = params_data,
                                                        collapse_t_l = collapse_t_l))
  }

  # Return
  tibb
}

Get the data

pids <- unique(expt_trials$subject_ix)

Best model overall

obs_prd_performance_overall_by_frame <- 
  get_obs_prd_performance_all_participants(pids = pids, 
                                           expt_data = expt_trials, 
                                           params_data = best_fit_params_bic_aggregate, 
                                           collapse_t_l = TRUE)
obs_prd_performance_overall_by_frame_t_l <- 
  get_obs_prd_performance_all_participants(pids = pids, 
                                           expt_data = expt_trials, 
                                           params_data = best_fit_params_bic_aggregate, 
                                           collapse_t_l = FALSE)

Best model per individual

obs_prd_performance_per_individual_by_frame <- 
  get_obs_prd_performance_all_participants(pids = pids, 
                                           expt_data = expt_trials, 
                                           params_data = best_fit_params_bic_count, 
                                           collapse_t_l = TRUE)
obs_prd_performance_per_individual_by_frame_t_l <- 
  get_obs_prd_performance_all_participants(pids = pids, 
                                           expt_data = expt_trials, 
                                           params_data = best_fit_params_bic_count, 
                                           collapse_t_l = FALSE)

Visualize data

Best model overall

plt_obs_prd_p_ll_bic_overall_by_frame <- 
  cmdsddfeitc::plot_obs_prd_performance(tibb = obs_prd_performance_overall_by_frame, 
                                        plotvar = "p_ll")

plt_obs_prd_p_ll_bic_overall_by_frame
plt_obs_prd_med_rt_bic_overall_by_frame <- 
  cmdsddfeitc::plot_obs_prd_performance(tibb = obs_prd_performance_overall_by_frame, 
                                        plotvar = "med_rt")

plt_obs_prd_med_rt_bic_overall_by_frame
plt_obs_prd_p_ll_bic_overall_by_frame_t_l <- 
  cmdsddfeitc::plot_obs_prd_performance(tibb = obs_prd_performance_overall_by_frame_t_l, 
                                        plotvar = "p_ll")

plt_obs_prd_p_ll_bic_overall_by_frame_t_l
plt_obs_prd_med_rt_bic_overall_by_frame_t_l <- 
  cmdsddfeitc::plot_obs_prd_performance(tibb = obs_prd_performance_overall_by_frame_t_l, 
                                        plotvar = "med_rt")

plt_obs_prd_med_rt_bic_overall_by_frame_t_l

Best model per participant

plt_obs_prd_p_ll_bic_idv_by_frame <- 
  cmdsddfeitc::plot_obs_prd_performance(tibb = obs_prd_performance_per_individual_by_frame, 
                                        plotvar = "p_ll") %>%
  print()
plt_obs_prd_med_rt_bic_idv_by_frame <- 
  cmdsddfeitc::plot_obs_prd_performance(tibb = obs_prd_performance_per_individual_by_frame, 
                                        plotvar = "med_rt") %>%
  print()
plt_obs_prd_p_ll_bic_idv_by_frame_t_l <- 
  cmdsddfeitc::plot_obs_prd_performance(tibb = obs_prd_performance_per_individual_by_frame_t_l, 
                                        plotvar = "p_ll") %>%
  print()
plt_obs_prd_med_rt_bic_idv_by_frame_t_l <- 
  cmdsddfeitc::plot_obs_prd_performance(tibb = obs_prd_performance_per_individual_by_frame_t_l, 
                                        plotvar = "med_rt") %>%
  print()

Write data and plots

Data

# Observed and predicted performance data by frame from BIC aggregate model 
readr::write_csv(x = obs_prd_performance_overall_by_frame,
                 path = file.path(derivatives_dir,
                                  sprintf("obs_prd_performance_bic_overall_by_frame_task-%s.csv",
                                          params$task)
                                  )
                 )

# Observed and predicted performance data by frame and t_l from BIC aggregate model 
readr::write_csv(x = obs_prd_performance_overall_by_frame_t_l,
                 path = file.path(derivatives_dir,
                                  sprintf("obs_prd_performance_bic_overall_by_frame_t_l_task-%s.csv", 
                                          params$task)
                                  )
                 )

# Observed and predicted performance data by frame from BIC count model 
readr::write_csv(x = obs_prd_performance_per_individual_by_frame,
                 path = file.path(derivatives_dir,
                                  sprintf("obs_prd_performance_bic_idv_by_frame_task-%s.csv", 
                                          params$task)
                                  )
                 )

# Observed and predicted performance data by frame and t_l from BIC count model 
readr::write_csv(x = obs_prd_performance_per_individual_by_frame_t_l,
                 path = file.path(derivatives_dir,
                                  sprintf("obs_prd_performance_bic_idv_by_frame_t_l_task-%s.csv", 
                                          params$task)
                                  )
                 )

Plots

# Save plots to disk ===========================================================

# Resolution (for raster images)
dpi <- 300

# Make function for saving
save_figure <- function(fn, plt) {
  # As raster image ------------------------------------------------------------
  ggplot2::ggsave(path = figures_dir,
                  filename = paste0(fn, ".png"),
                  plot = plt,
                  width = 8.5,
                  units = "cm",
                  dpi = dpi)

  # As vector image ------------------------------------------------------------
  ggplot2::ggsave(path = figures_dir,
                  filename = paste0(fn, ".pdf"),
                  plot = plt_obs_prd_p_ll_bic_overall_by_frame,
                  width = 8.5,
                  units = "cm")
}

# Save the figures
save_figure(fn = sprintf("obs_prd_p_ll_bic_overall_by_frame_task-%s_dpi-%d", 
                         params$task, 
                         dpi), 
            plt = plt_obs_prd_p_ll_bic_overall_by_frame)

save_figure(fn = sprintf("obs_prd_med_rt_bic_overall_by_frame_task-%s_dpi-%d",
                         params$task, 
                         dpi), 
            plt = plt_obs_prd_med_rt_bic_overall_by_frame)

save_figure(fn = sprintf("obs_prd_p_ll_bic_overall_by_frame_t_l_task-%s_dpi-%d",
                         params$task, 
                         dpi), 
            plt = plt_obs_prd_p_ll_bic_overall_by_frame_t_l)

save_figure(fn = sprintf("obs_prd_med_rt_bic_overall_by_frame_t_l_task-%s_dpi-%d",
                         params$task, 
                         dpi), 
            plt = plt_obs_prd_med_rt_bic_overall_by_frame_t_l)

save_figure(fn = sprintf("obs_prd_p_ll_bic_idv_by_frame_task-%s_dpi-%d",
                         params$task, 
                         dpi), 
            plt = plt_obs_prd_p_ll_bic_idv_by_frame)

save_figure(fn = sprintf("obs_prd_med_rt_bic_idv_by_frame_task-%s_dpi-%d",
                         params$task, 
                         dpi), 
            plt = plt_obs_prd_med_rt_bic_idv_by_frame)

save_figure(fn = sprintf("obs_prd_p_ll_bic_idv_by_frame_t_l_task-%s_dpi-%d",
                         params$task, 
                         dpi), 
            plt = plt_obs_prd_p_ll_bic_idv_by_frame_t_l)

save_figure(fn = sprintf("obs_prd_med_rt_bic_idv_by_frame_t_l_task-%s_dpi-%d",
                         params$task, 
                         dpi), 
            plt = plt_obs_prd_med_rt_bic_idv_by_frame_t_l)


bramzandbelt/cmdsddfeitc documentation built on June 28, 2019, 8:19 a.m.