knitr::opts_chunk$set(echo = TRUE) require(tidyverse) require(rlang) source("../R/utils-sdt.R") super_spread <- function (data, key, ..., sep = "_", fill = NA, convert = FALSE, drop = TRUE) { dots <- exprs(...) key <- enquo(key) output <- data %>% gather(gkey, value, !!! dots) %>% unite(ukey, !! key, gkey, sep = sep) %>% spread(ukey, value, fill = fill, convert = convert, drop = drop) return (output) }
Signal detection theory provides a framework for determining the ability of an observer to discriminate "signal" from "noise".
It stands to reason that for a given observer, with some underlying sensitivity, their performance on a signal detection task would not be the same every time they did the task, but would take the form of a distribution centered on their most likely performance. Thus, our goal in administering a single signal detection task to an observer is to estimate the center of that distribution, to find their most likely performance.
It makes sense that administering more trials to an observer would yield a less noisy estimate of their performance. Signal detection measures, however, crucially do not have an associated standard error that depends on the number of trials completed by an observer.
big_fat_do <- function (df, n_iterations) { out <- df %>% do(sim = data.frame(type = rep(c("signal", "noise"), times = .$n_trials_per_iteration/2), param_dprime = .$this_dprime, param_c = .$this_c) %>% slice(rep(1:n(), times = n_iterations)) %>% mutate(iteration = rep(1:n_iterations, each = n() / n_iterations)) %>% group_by(iteration, type) %>% mutate(n_trials = n(), latent = if_else(type == "noise", rnorm(n(), -param_dprime/2, 1), # centering d' so c = 0 is unbiased rnorm(n(), param_dprime/2, 1)), resp = if_else(latent >= param_c, "signal", "noise")) %>% group_by(iteration, param_dprime, param_c, n_trials, type, resp) %>% summarize(rate = n() / unique(n_trials), rate_snodgrass = (n() + .5) / (unique(n_trials) + 1)) %>% # snodgrass correction complete(iteration, type, resp) %>% distinct() %>% # not sure why but complete() is creating duplicate rows group_by(iteration, param_dprime, param_c, n_trials) %>% mutate(rate = coalesce(rate, 0), rate_snodgrass = coalesce(rate_snodgrass, .5 / (unique(n_trials) + 1))) %>% # need to populate 0 false alarms row in case of perfect performance filter(resp == "signal") %>% select(-resp) %>% mutate(type = recode(type, signal = "hit", noise = "fa")) %>% super_spread(type, starts_with("rate")) %>% summarize(dprime = sdt_dprime(hit_rate_snodgrass, fa_rate_snodgrass), sdt_c = sdt_c(hit_rate_snodgrass, fa_rate_snodgrass), aprime = sdt_aprime(hit_rate_snodgrass, fa_rate_snodgrass), b2prime = sdt_b2prime(hit_rate_snodgrass, fa_rate_snodgrass))) return (out) }
# ASSUME equal number of signal and noise trials presented params <- crossing(this_dprime = c(0.5, 1:3), this_c = c(-0.5, 0, 0.5), n_trials_per_iteration = c(20, 100, 200)) %>% group_by(this_dprime, this_c, n_trials_per_iteration) %>% big_fat_do(n_iterations = 1000) sims <- bind_rows(params$sim) sims_summary <- sims %>% group_by(param_dprime, param_c, n_trials) %>% summarize_at(vars(dprime, sdt_c, aprime, b2prime), .funs = c("median", "mad"))
sims %>% ggplot(aes(x = dprime, color = factor(n_trials), fill = factor(n_trials))) + #geom_histogram(bins = 20, position = "dodge", size = 0.2, alpha = 0.3) + geom_freqpoly(bins = 20) + geom_vline(aes(xintercept = dprime_median, color = factor(n_trials)), data = sims_summary) + geom_vline(aes(xintercept = param_dprime), linetype = 3) + facet_grid(param_c ~ param_dprime) + theme_bw()
sims %>% ggplot(aes(x = sdt_c, color = factor(n_trials), fill = factor(n_trials))) + #geom_histogram(bins = 20, position = "dodge", size = 0.2, alpha = 0.3) + geom_freqpoly(bins = 20) + geom_vline(aes(xintercept = sdt_c_median, color = factor(n_trials)), data = sims_summary) + geom_vline(aes(xintercept = param_c), linetype = 3) + facet_grid(param_c ~ param_dprime) + theme_bw()
sims %>% ggplot(aes(x = aprime, color = factor(n_trials), fill = factor(n_trials))) + #geom_histogram(bins = 20, position = "dodge", size = 0.2, alpha = 0.3) + geom_freqpoly(bins = 20) + geom_vline(aes(xintercept = aprime_median, color = factor(n_trials)), data = sims_summary) + facet_grid(param_c ~ param_dprime, scales = "free_x") + theme_bw()
n_iterations <- 300 within_subjs <- tibble(subj_num = 1:10) %>% mutate(good = rnorm(n(), 1.5, .5), bad = good - 0.5, param_c = 0) %>% gather("condition", "param_dprime", good, bad) %>% slice(rep(1:n(), each = 40)) %>% # each = total number of trials!! group_by(subj_num, condition) %>% mutate(type = rep(c("signal", "noise"), times = n()/2)) %>% ungroup() %>% slice(rep(1:n(), times = n_iterations)) %>% mutate(iteration = rep(1:n_iterations, each = n() / n_iterations)) %>% group_by(iteration, subj_num, condition, type) %>% mutate(n_trials = n(), latent = if_else(type == "noise", rnorm(n(), -param_dprime/2, 1), # centering d' so c = 0 is unbiased rnorm(n(), param_dprime/2, 1)), resp = if_else(latent >= param_c, "signal", "noise")) %>% group_by(iteration, subj_num, condition, param_dprime, type, resp) %>% summarize(n_trials = unique(n_trials), rate = n() / n_trials, rate_snodgrass = (n() + .5) / (n_trials + 1)) %>% # snodgrass correction complete(iteration, type, resp) %>% distinct() %>% # not sure why but complete() is creating duplicate rows group_by(iteration, subj_num, condition, param_dprime) %>% mutate(rate = coalesce(rate, 0), rate_snodgrass = coalesce(rate_snodgrass, .5 / (n_trials + 1))) %>% # need to populate 0 false alarms row in case of perfect performance filter(resp == "signal") %>% select(-resp) %>% mutate(type = recode(type, signal = "hit", noise = "fa")) %>% super_spread(type, starts_with("rate")) %>% summarize(dprime = sdt_dprime(hit_rate_snodgrass, fa_rate_snodgrass), sdt_c = sdt_c(hit_rate_snodgrass, fa_rate_snodgrass)) within_subjs_sims_summary <- within_subjs %>% select(-sdt_c) %>% group_by(subj_num, condition, param_dprime) %>% summarize_at(vars(dprime), .funs = c("median", "mad"), na.rm = TRUE) within_subjs_sims_summary_spread <- within_subjs %>% select(-sdt_c) %>% super_spread(condition, param_dprime, dprime) %>% filter(!is.na(bad_dprime), !is.na(good_dprime)) %>% mutate(param_dprime_diff = good_param_dprime - bad_param_dprime, dprime_diff = good_dprime - bad_dprime) %>% group_by(subj_num) %>% summarize_at(vars(dprime_diff), .funs = c("median", "mad"), na.rm = TRUE) %>% mutate(ci_95_lower = median - 2*mad, ci_95_upper = median + 2*mad)
within_subjs_sims_summary %>% ggplot(aes(x = condition, y = median, group = factor(subj_num), color = factor(subj_num), fill = factor(subj_num))) + geom_ribbon(aes(ymin = median - mad, ymax = median + mad), alpha = 0.1, size = 0) + geom_line() + geom_point() + theme_bw() + guides(color = FALSE, fill = FALSE)
within_subjs %>% select(-sdt_c) %>% super_spread(condition, param_dprime, dprime) %>% filter(!is.na(bad_dprime), !is.na(good_dprime)) %>% mutate(param_dprime_diff = good_param_dprime - bad_param_dprime, dprime_diff = good_dprime - bad_dprime) %>% ggplot(aes(x = dprime_diff)) + geom_histogram() + geom_vline(aes(xintercept = ci_95_lower), data = within_subjs_sims_summary_spread, linetype = 3) + geom_vline(aes(xintercept = ci_95_upper), data = within_subjs_sims_summary_spread, linetype = 3) + geom_vline(aes(xintercept = param_dprime_diff)) + facet_wrap(~ subj_num) + theme_bw()
within_subjs %>% select(-sdt_c) %>% super_spread(condition, param_dprime, dprime) %>% filter(!is.na(bad_dprime), !is.na(good_dprime)) %>% mutate(param_dprime_diff = good_param_dprime - bad_param_dprime, dprime_diff = good_dprime - bad_dprime) %>% group_by(iteration) %>% summarize(group_diff = mean(dprime_diff), se_group_diff = sd(dprime_diff) / sqrt(n())) %>% ggplot(aes(x = iteration, y = group_diff)) + geom_errorbar(aes(ymin = group_diff - 2*se_group_diff, ymax = group_diff + 2*se_group_diff), width = 0) + theme_bw()
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.