knitr::opts_chunk$set(echo = TRUE) ## shortcut for pretty printing for integers .pi <- function(x) prettyNum(x, big.mark=',') ## write out results of statistical test report <- function(betas, ser, chisq, df, p, eff, alpha = .05) { part1 <- if (p < alpha) { paste0("* There was a significant ", eff) } else { paste0("* The ", eff, " was not significant") } part2 <- paste(paste(paste0("$\\hat{\\beta}_{", names(betas), "}"), sprintf("%0.3f$ $(SE=%0.3f)$", betas, ser), sep = "="), collapse = ", ") part3 <- sprintf("$\\chi^2(%d) = %0.3f$", df, chisq) part4 <- if (p < .001) { "$p < .001$." } else if (p > .999) { "$p > .999$." } else { sprintf("$p = %0.3f$.", p) } paste(part1, part2, part3, part4, sep = ", ") } needed_pkgs <- c("truthiness", "ez", "emmeans", "ordinal", "dplyr", "tibble", "tidyr", "readr", "ggplot2", "forcats") .junk <- lapply(needed_pkgs, function(.x) { if (!requireNamespace(.x)) { stop("You must install the '", .x, "' package to run this analysis.") } }) parallel_installed <- if (requireNamespace("parallel")) { TRUE } else { warning("You can speed up computation by installing the 'parallel' package.") FALSE }
## Load in the required add-on packages. library("ez") library("emmeans") library("ordinal") library("truthiness") library("dplyr") library("tibble") library("tidyr") library("readr") library("ggplot2") library("forcats")
For information about the structure of the anonymized data files please see help("truth_trajectory_data", "truthiness")
, which documents the data objects sessions
, phases
, ratings
, and cjudgments
.
The anonymized files contain r .pi(nrow(ratings))
truth ratings of
r .pi(nrow(stimulus_materials))
statements from r .pi(nrow(sessions))
participants.
There were eight main exclusion criteria for removing participants, applied in the following order:
n_total <- sessions %>% filter(!keep) %>% nrow() sessions %>% filter(!keep) %>% count(excl_reason)
In total, the exclusion criteria resulted in the removal of
r .pi(n_total)
participants.
## remove truth ratings from excluded participants ratings2 <- ratings %>% semi_join(sessions %>% filter(keep), "ID") ## remove category judgments while we're at it cjudgments2 <- cjudgments %>% semi_join(sessions %>% filter(keep), "ID")
Removing data from these
r .pi(n_total)
participants resulted in the removal of
r .pi(nrow(ratings) - nrow(ratings2))
truth ratings
and
r .pi(nrow(cjudgments) - nrow(cjudgments2))
category judgments.
excluded <- sessions %>% filter(!keep) %>% count(Gender, .drop = FALSE) %>% rename(excluded = n) recruited <- sessions %>% count(Gender, .drop = FALSE) %>% rename(recruited = n) .r1 <- recruited %>% inner_join(excluded, "Gender") %>% mutate(analyzed = recruited - excluded, Gender = as.character(Gender)) .rtot <- summarize(.r1, recruited = sum(recruited), excluded = sum(excluded), analyzed = sum(analyzed)) %>% mutate(Gender = "**TOTAL**") %>% select(Gender, everything()) bind_rows(.r1, .rtot) %>% knitr::kable()
The only exclusion criteria applied at the phase-level were (1) lack of consent for that phase; (2) failure to complete all of the ratings in the phase; (3) any other reason specified by the researcher (manual exclusion).
phase1_excl <- phases %>% filter(phase_id == "1") %>% anti_join(sessions %>% filter(excl_phase == "1"), "ID") %>% filter(!(chk_consent & chk_finished & chk_notmanex)) phase2_excl <- phases %>% filter(phase_id == "2") %>% anti_join(sessions %>% filter(excl_phase == "2"), "ID") %>% filter(!(chk_consent & chk_finished & chk_notmanex)) phase3_excl <- phases %>% filter(phase_id == "3") %>% anti_join(sessions %>% filter(excl_phase == "3"), "ID") %>% filter(!(chk_consent & chk_finished & chk_notmanex)) phase4_excl <- phases %>% filter(phase_id == "4") %>% anti_join(sessions %>% filter(excl_phase == "4"), "ID") %>% filter(!(chk_consent & chk_finished & chk_notmanex)) excluded_p <- bind_rows(phase1_excl, phase2_excl, phase3_excl, phase4_excl) tots_phs <- excluded_p %>% mutate(p_excl_reason = factor(p_excl_reason) %>% fct_relevel("Did not give consent for phase (or consent missing)", "Did not complete phase")) %>% count(p_excl_reason) n_phs_removed <- tots_phs %>% summarize(n = sum(n)) %>% pull() ## now remove the truth ratings ratings_keep <- ratings2 %>% anti_join(excluded_p, c("ID", "phase_id")) ## also remove the category judgments cjudgments_keep <- cjudgments2 %>% anti_join(excluded_p %>% filter(phase_id == "1"), "ID") tots_phs %>% rename(reason = p_excl_reason)
Applying these criteria resulted in the removal of
r .pi(n_phs_removed)
phases
(r .pi(nrow(ratings2) - nrow(ratings_keep))
truth ratings).
subj_excl <- sessions %>% filter(!keep) %>% mutate(phase_id = factor(excl_phase, levels = 1:4)) %>% count(phase_id, Gender, .drop = FALSE, name = "n_subj_excluded") n_excluded_p <- excluded_p %>% inner_join(sessions %>% select(ID, Gender), "ID") %>% count(phase_id, Gender, .drop = FALSE) %>% rename(n_phases_excluded = n) n_starting <- phases %>% inner_join(sessions %>% select(ID, Gender), "ID") %>% count(phase_id, Gender, .drop = FALSE, name = "n_recruited") analyzed_gender <- phases %>% filter(keep) %>% inner_join(sessions, "ID") %>% count(phase_id, Gender, .drop = FALSE, name = "n_analysed") tots <- inner_join(n_starting, subj_excl, c("phase_id", "Gender")) %>% inner_join(n_excluded_p, c("phase_id", "Gender")) %>% mutate(n_retained = n_recruited - n_subj_excluded - n_phases_excluded) %>% inner_join(analyzed_gender, c("phase_id", "Gender")) tots2 <- tots %>% group_by(phase_id) %>% summarize(n_recruited = sum(n_recruited), n_subj_excluded = sum(n_subj_excluded), n_phases_excluded = sum(n_phases_excluded), n_retained = sum(n_retained), n_analysed = sum(n_analysed)) %>% mutate(Gender = "**TOTAL**") alltots <- bind_rows(tots %>% mutate(Gender = factor(Gender, levels = c(levels(Gender), "**TOTAL**"))), tots2) %>% mutate(Gender = fct_relevel(Gender, "Female", "Male", "Gender variant", "Prefer not to say", "(Missing)", "**TOTAL**")) %>% mutate(n_excluded = n_subj_excluded + n_phases_excluded) %>% select(phase_id, Gender, n_recruited, n_excluded, n_retained, n_analysed) alltots %>% arrange(phase_id, Gender) %>% knitr::kable(caption = "Participants and Phases Recruited, Excluded, and Retained by Experimental Phase and Gender")
## do the final stages of pre-processing and ## prepare the data for modeling ## lookup which condition each rating belongs to
First, let's collapse over interval and look at the means and SDs for repetition.
ratecond <- ratings_keep %>% inner_join(sessions %>% select(ID, list_id), "ID") %>% inner_join(stimulus_conditions, c("list_id", "stim_id")) ratecond %>% group_by(repetition) %>% summarise(mean = round(mean(trating), 2), `standard deviation` = round(sd(trating), 2)) %>% ungroup()
Now let's look at the repetition effects for each interval.
rcagg <- ratecond %>% group_by(repetition, interval) %>% summarise(mean = round(mean(trating), 2), sd = round(sd(trating), 2)) %>% ungroup() rcagg_subj <- ratecond %>% group_by(ID, repetition, interval) %>% summarise(mean = mean(trating)) %>% ungroup() rcagg_stim <- ratecond %>% group_by(stim_id, repetition, interval) %>% summarise(mean = mean(trating)) %>% ungroup() rcagg %>% pivot_wider(names_from = c(repetition), values_from = c(mean, sd)) %>% mutate(`difference (repeated - new)` = round(mean_repeated - mean_new, 2)) %>% select(interval, `mean repeated` = mean_repeated, `mean new` = mean_new, `difference (repeated - new)`, `SD repeated` = sd_repeated, `SD new` = sd_new)
rcagg_both <- bind_rows( rcagg_subj %>% mutate(unit = "participant") %>% select(ID, unit, everything()), rcagg_stim %>% mutate(unit = "stimulus", stim_id = as.character(stim_id)) %>% select(ID = stim_id, unit, everything())) g <- ggplot(rcagg_both, aes(repetition, mean, color = repetition, fill = repetition)) + geom_violin(alpha = .2) + geom_jitter(alpha = .1) + geom_point(data = rcagg, color = "black", size = 3, alpha = .5) + geom_line(aes(group = interval), data = rcagg, alpha = .5, color = "black") + facet_grid(unit~interval) + theme(legend.position = "none") + labs(y = "mean rating") if (params$savefig) { ggsave("means_plot.png", g, width = 8, height = 6) } g
We analyzed the data using cumulative logit mixed models via the clmm()
function from the ordinal
package [@ordinal]. We tested the repetition main effect and the repetition-by-interval interaction using likelihood ratio tests (LRTs), using the anova()
function. Each test consisted of a comparison of two models, a base model containing the fixed effect (or effects) of interest and a comparison model identical to the base model except that the fixed effect (or effects) of interest had been removed. Each test was conducted with an $\alpha$ level of .05.
mod1 <- T ~ R + I1 + I2 + I3 + R:I1 + R:I2 + R:I3 + (R | subj_id) + (R | stim_id) mod2 <- T ~ I1 + I2 + I3 + R:I1 + R:I2 + R:I3 + (R | subj_id) + (R | stim_id) mod3 <- T ~ R + I1 + I2 + I3 + R:I1 + R:I2 + R:I3 + (R:I1 + R:I2 + R:I3 | subj_id) + (R:I1 + R:I2 + R:I3 | stim_id) mod4 <- T ~ R + I1 + I2 + I3 + (R:I1 + R:I2 + R:I3 | subj_id) + (R:I1 + R:I2 + R:I3 | stim_id) ## prepare data for clmm modeling: ## make ID into a factor (subj_id); ## make T into a factor; ## add numerical (deviation-coded) predictors moddata <- ratecond %>% mutate(subj_id = factor(ID), T = factor(trating, levels = 1:7, ordered = TRUE), R = if_else(repetition == "repeated", 1/2, -1/2), I1 = if_else(interval == "1 day", 3/4, -1/4), I2 = if_else(interval == "1 week", 3/4, -1/4), I3 = if_else(interval == "1 month", 3/4, -1/4)) %>% select(subj_id, stim_id, repetition, interval, R, I1, I2, I3, T) ## need to re-fit the model with predictors as factors ## so that we can use emmeans ## for the planned comparisons or for the equivalence test mod5 <- T ~ Rep * Int + (R:I1 + R:I2 + R:I3 | subj_id) + (R:I1 + R:I2 + R:I3 | stim_id) mod6 <- T ~ Rep * Int + (Rep | subj_id) + (Rep | stim_id) moddata2 <- moddata %>% mutate(Rep = C(repetition, matrix(c(.5, -.5), nrow = 2, dimnames = list(c("repeated", "new")))), Int = C(interval, matrix(c(-1/4, -1/4, -1/4, 3/4, -1/4, -1/4, -1/4, 3/4, -1/4, -1/4, -1/4, 3/4), nrow = 4, byrow = TRUE, dimnames = list(c("immediate", "1 day", "1 week", "1 month"), c("I1", "I2", "I3")))))
cat("* Model 1: ", "`", as.character(as.expression(mod1)), "`\n", sep = "") cat("* Model 2: ", "`", as.character(as.expression(mod2)), "`\n", sep = "") cat("* Model 3: ", "`", as.character(as.expression(mod3)), "`\n", sep = "") cat("* Model 4: ", "`", as.character(as.expression(mod4)), "`\n", sep = "")
where:
T
: individual truth rating;R
: deviation-coded repetition effect (repeated = .5, new = -.5);I1
: deviation-coded contrast of interval, 1 day (3/4) vs. immediate (-1/4);I2
: deviation-coded contrast of interval, 1 week (3/4) vs. immediate (-1/4);I3
: deviation-coded contrast of interval, 1 month (3/4) vs. immediate (-1/4).The comparison of models 1 and 2 test the significance of the main effect of repetition (R
). The comparison of models 3 and 4 test the significance of the repetition-by-interval interaction (coded by R:I1
, R:I2
, and R:I3
).
mods <- truth_trajectory_models cat("<div class=\"warn\">", "**WARNING! Because of the computationally intensive nature of fitting ordinal models (with fitting times on a multicore machine of ~24 hours), the results below use fitted models data objects built into the truthiness package. To re-fit the models and re-compute the results, re-compile this script with the parameter `refit` set to `TRUE`.**", "</div>", sep = "\n")
## fit models using serial version (no multi-core) mods <- list() doFit <- TRUE ## NOTE: moddata is the main dataset that we'll use for analysis mt1 <- system.time( mods[["main_base"]] <- clmm(mod1, data = moddata, Hess = TRUE, model = FALSE, doFit = doFit)) mt2 <- system.time( mods[["main_comp"]] <- clmm(mod2, data = moddata, Hess = FALSE, model = FALSE, doFit = doFit)) mt3 <- system.time( mods[["ix_base"]] <- clmm(mod3, data = moddata, Hess = TRUE, model = FALSE, doFit = doFit)) mt4 <- system.time( mods[["ix_comp"]] <- clmm(mod4, data = moddata, Hess = FALSE, model = FALSE, doFit = doFit)) mt5 <- system.time( mods[["ix2"]] <- clmm(mod5, data = moddata2, Hess = TRUE, model = FALSE, doFit = doFit)) mt6 <- system.time( mods[["main2"]] <- clmm(mod6, data = moddata2, Hess = TRUE, model = FALSE, doFit = doFit)) dur <- mt1[["elapsed"]] + mt2[["elapsed"]] + mt3[["elapsed"]] + mt4[["elapsed"]] + mt5[["elapsed"]] + mt6[["elapsed"]] cat("* Total computation time: ", sprintf("%0.2f minutes", dur / 60), "\n")
## fit models using parallelized version (multi-core) ncores <- parallel::detectCores() - 1L ncores <- if (ncores < 1L) 1L else ncores ncores <- if (ncores > 6L) 6L else ncores cl <- parallel::makeCluster(ncores) dur <- system.time( mods <- parallel::parLapply( cl, list(list(m = mod1, h = TRUE, d = moddata), list(m = mod2, h = FALSE, d = moddata), list(m = mod3, h = TRUE, d = moddata), list(m = mod4, h = FALSE, d = moddata), list(m = mod5, h = TRUE, d = moddata2), list(m = mod6, h = TRUE, d = moddata2)), function(.x) { ordinal::clmm(.x[["m"]], data = .x[["d"]], Hess = .x[["h"]], doFit = TRUE) }))[["elapsed"]] names(mods) <- c("main_base", "main_comp", "ix_base", "ix_comp", "ix2", "main2") parallel::stopCluster(cl) cat("* Total computation time: ", sprintf("%0.2f minutes", dur / 60), "\n")
## run likelihood ratio tests ## main effect chisq_m <- as.numeric(-2 * logLik(mods[["main_comp"]]) - -2 * logLik(mods[["main_base"]])) df_m <- length(coefficients(mods[["main_base"]])) - length(coefficients(mods[["main_comp"]])) p_m <- pchisq(chisq_m, df_m, lower.tail = FALSE) b_m <- coefficients(mods[["main_base"]])["R"] se_m <- sqrt(diag(vcov(mods[["main_base"]])))["R"] ## interaction chisq_i <- as.numeric(-2 * logLik(mods[["ix_comp"]]) - -2 * logLik(mods[["ix_base"]])) df_i <- length(coefficients(mods[["ix_base"]])) - length(coefficients(mods[["ix_comp"]])) p_i <- pchisq(chisq_i, df_i, lower.tail = FALSE) b_i <- coefficients(mods[["ix_base"]])[c("R:I1", "R:I2", "R:I3")] se_i <- sqrt(diag(vcov(mods[["ix_base"]])))[c("R:I1", "R:I2", "R:I3")]
cat(report(b_m, se_m, chisq_m, df_m, p_m, "main effect of repetition"), "\n")
if ((p_m > .05) && (p_i > .05)) { cat("\n", "### Equivalence test", "\n", "$p$-values less than $\\alpha$ = .05 imply equivalence to raw effect of .14 log odds (1/10 of a scale point on a 7 point scale)", "", sep = "\n") }
## only run in the case of nonsignificant main effect and interaction ## perform equivalence test using emmeans main_emm <- emmeans(mods[["main2"]], pairwise ~ Rep, data = moddata2) test(main_emm, delta = .14, side = "equivalence")$contrasts %>% as.data.frame() %>% knitr::kable(digits = 3)
cat(report(b_i, se_i, chisq_i, df_i, p_i, "repetition-by-interval interaction"), "\n")
if (p_i < .05) { cat("\n", "### Planned comparisons", "\n", paste0("To follow up the significant interaction, ", "we tested the illusory truth effect at each of the four intervals, ", "using the Holm-Bonferroni procedure to maintain ", "the familywise error rate at $\\alpha = .05$."), sep = "\n") } else { cat("\n", "### Equivalence test", "\n", "$p$-values less than $\\alpha$ = .05 imply equivalence to raw effect of .14 log odds (1/10 of a scale point on a 7 point scale)", "", sep = "\n") }
## perform planned comparisons using Holm-Bonferroni correction ## (uniformly more powerful than simple Bonferroni) ## sort p-values in ascending order mod_emm <- emmeans(mods[["ix2"]], pairwise ~ Rep | Int, data = moddata2) mod_cont <- mod_emm$contrasts %>% as.data.frame() %>% select(-df) p_sort <- order(mod_cont[["p.value"]]) ## calculate cutoff p-values, alpha / (m + 1 - k) p_k <- .05 / (4 + 1 - 1:4) ## get minimal index such that p_k > cutoff nonsig_ix <- which(mod_cont[["p.value"]][p_sort] > p_k) if (length(nonsig_ix) > 0L) { min_ix <- min(nonsig_ix) reject_null <- rep(FALSE, 4) reject_null[seq_len(min_ix - 1L)] <- TRUE } else { reject_null <- rep(TRUE, 4) } mod_cont[["reject_null"]][p_sort] <- reject_null mod_cont %>% select(-p.value) %>% knitr::kable(digits = 3)
mod_emm <- emmeans(mods[["ix2"]], allsimp ~ Rep * Int, data = moddata2) ## perform equivalence test using emmeans test(mod_emm, delta = .14, side = "equivalence")$contrasts %>% as.data.frame() %>% knitr::kable(digits = 3)
## restructure the output to reduce size and save mods2 <- lapply(mods, function(.x) { .x[["gfList"]] <- .x[["L"]] <- .x[["condVar"]] <- .x[["model"]] <- .x[["Zt"]] <- .x[["fitted.values"]] <- NULL attr(.x[["terms"]], ".Environment") <- NULL attr(.x[["formula"]], ".Environment") <- NULL .x }) saveRDS(mods2, "fitted_models.rds")
To validate the model, we simulate data based on the model fit and plot the distribution of the simulated data against the raw data and cell means.
## let's validate the interaction model (mods[["ix2"]]) ## by simulating data from the model simdat <- function(md, sdat, rdat, mdat) { subj_rfx <- VarCorr(md)$stim_id item_rfx <- VarCorr(md)$stim_id cf <- coef(md) srfx <- MASS::mvrnorm(nrow(sdat), mu = c(sub_int = 0, sub_ri1 = 0, sub_ri2 = 0, sub_ri3 = 0), Sigma = subj_rfx) %>% as_tibble() %>% mutate(ID = sdat[["ID"]], list_id = sdat[["list_id"]]) %>% select(ID, list_id, everything()) irfx <- MASS::mvrnorm(nrow(stimulus_materials), mu = c(stim_int = 0, stim_ri1 = 0, stim_ri2 = 0, stim_ri3 = 0), Sigma = item_rfx) %>% as_tibble() %>% mutate(stim_id = factor(1:nrow(stimulus_materials))) %>% select(stim_id, everything()) srfx %>% inner_join(stimulus_conditions, "list_id") %>% select(ID, stim_id, sub_int, sub_ri1, sub_ri2, sub_ri3) %>% semi_join(rdat, c("ID", "stim_id")) %>% inner_join(irfx, "stim_id") %>% inner_join(mdat %>% mutate(subj_id = as.character(subj_id)) %>% select(subj_id, stim_id, repetition, interval, R, I1, I2, I3), c("ID" = "subj_id", "stim_id")) %>% mutate(eta = sub_int + stim_int + cf["Rep1"] * R + cf["IntI1"] * I1 + cf["IntI2"] * I2 + cf["IntI3"] * I3 + (sub_ri1 + stim_ri1 + cf["Rep1:IntI1"]) * R * I1 + (sub_ri2 + stim_ri2 + cf["Rep1:IntI2"]) * R * I2 + (sub_ri3 + stim_ri3 + cf["Rep1:IntI3"]) * R * I3, trating = eta2resp(eta, cf[c(paste0(1:6, "|", 2:7))])) %>% select(ID, stim_id, repetition, interval, eta, trating) } dsim <- simdat(mods[["ix2"]], sessions %>% filter(keep), ratings_keep, moddata2) sim_subj_means <- dsim %>% group_by(ID, repetition, interval) %>% summarize(mean = mean(trating)) %>% ungroup() %>% mutate(unit = "participant") sim_stim_means <- dsim %>% group_by(stim_id, repetition, interval) %>% summarize(mean = mean(trating)) %>% ungroup() %>% mutate(unit = "stimulus") g <- ggplot(rcagg_both, aes(repetition, mean, color = repetition, fill = repetition)) + geom_violin(data = bind_rows(sim_subj_means, sim_stim_means), alpha = .2) + geom_jitter(alpha = .1) + geom_point(data = rcagg, color = "black", size = 3, alpha = .5) + geom_line(aes(group = interval), data = rcagg, alpha = .5, color = "black") + facet_grid(unit~interval) + theme(legend.position = "none") + labs(y = "mean rating") if (params$savefig) { ggsave("validation_plot.png", g, width=8, height=6) } g
Most of the literature on the illusory truth effect uses the analysis of variance as the primary inferential approach. For comparison with the existing literature, we perform an ANOVA on our data. The ANOVA analysis treats stimuli as fixed effects and violates the assumption of a continuous DV with variance proportional to the mean, so we provide this only for illustrative purposes, and only draw our conclusions from the results of the cumulative logit mixed model analysis above.
## convert DV to an integer ## calculate subject means ## fill in missing cells with NA adata <- moddata %>% mutate(trating = as.integer(as.character(T))) %>% group_by(subj_id, repetition, interval) %>% summarize(mean_rating = mean(trating)) %>% ungroup() ## listwise deletion adata2 <- adata %>% add_count(subj_id) %>% filter(n == 8L) %>% select(-n) %>% mutate(subj_id = fct_drop(subj_id)) ez::ezANOVA(adata2, dv = mean_rating, wid = subj_id, within = .(repetition, interval), type = 3)
## before exiting, print out session info sessionInfo()
cat(readLines(system.file("repr_instr.Rmd", package = "truthiness")), sep = "\n")
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.