knitr::opts_chunk$set(echo = TRUE) ## shortcut for pretty printing for integers .pi <- function(x) prettyNum(x, big.mark=',') ## write out results of statistical test report <- function(betas, ser, chisq, df, p, eff, alpha = .05) { part1 <- if (p < alpha) { paste0("* There was a significant ", eff) } else { paste0("* The ", eff, " was not significant") } part2 <- paste(paste(paste0("$\\hat{\\beta}_{", names(betas), "}"), sprintf("%0.3f$ $(SE=%0.3f)$", betas, ser), sep = "="), collapse = ", ") part3 <- sprintf("$\\chi^2(%d) = %0.3f$", df, chisq) part4 <- if (p < .001) { "$p < .001$." } else if (p > .999) { "$p > .999$." } else { sprintf("$p = %0.3f$.", p) } paste(part1, part2, part3, part4, sep = ", ") } needed_pkgs <- c("truthiness", "ez", "emmeans", "ordinal", "dplyr", "tibble", "tidyr", "readr", "ggplot2", "forcats") .junk <- lapply(needed_pkgs, function(.x) { if (!requireNamespace(.x)) { stop("You must install the '", .x, "' package to run this analysis.") } }) parallel_installed <- if (requireNamespace("parallel")) { TRUE } else { warning("You can speed up computation by installing the 'parallel' package.") FALSE } if (is.null(params$subdir)) { stop("You need to specify the subdirectory containing the anonymized data") } if (!dir.exists(params$subdir)) { stop("subdirectory '", params$subdir, "' does not exist") }
## Load in the required add-on packages. library("ez") library("emmeans") library("ordinal") library("truthiness") library("dplyr") library("tibble") library("tidyr") library("readr") library("ggplot2") library("forcats")
Importing data from files in subdirectory r params$subdir
.
For information about the structure of the anonymized data files and stimulus materials, either run truthiness::codebook()
in the R console, or download and view the file codebook.html in the OSF repository for this project.
r warn(params$subdir)
sess <- read_csv(file.path(params$subdir, "ANON_sessions.csv"), col_types = "cfccccllllllc") %>% mutate(list_id = fct_relevel(list_id, levels(stimulus_conditions[["list_id"]])), gender = fct_relevel(gender, c("Female", "Male", "Gender Variant", "Not Reported"))) phase <- read_csv(file.path(params$subdir, "ANON_phases.csv"), col_types = "cfilllc") %>% mutate(phase_id = fct_relevel(phase_id, as.character(1:4))) cjudgments <- read_csv(file.path(params$subdir, "ANON_categories.csv"), col_types = "cfc") %>% mutate(stim_id = fct_relevel(stim_id, levels(stimulus_conditions[["stim_id"]]))) ratings <- read_csv(file.path(params$subdir, "ANON_ratings.csv"), col_types = "cffi") %>% mutate(phase_id = fct_relevel(phase_id, as.character(1:4)), stim_id = fct_relevel(stim_id, levels(stimulus_conditions[["stim_id"]]))) n_item <- ratings %>% distinct(stim_id) %>% nrow()
The anonymized files contain r .pi(nrow(ratings))
truth ratings of
r .pi(n_item)
statements from r .pi(nrow(sess))
participants. Any
participants who did not provide consent for the entire study will
already have had their data removed during the pre-processing
stage. Also any phases of the study where a participant withheld
consent (despite consenting overall) will also have already been
removed before this stage.
There were four main exclusion criteria for removing participants, applied in the following order:
sess_native <- sess %>% filter(chk_native) n_nonnative <- nrow(sess) - nrow(sess_native) sess_nocheat <- sess_native %>% filter(chk_nocheat) n_cheat <- nrow(sess_native) - nrow(sess_nocheat) sess_noflat <- sess_nocheat %>% filter(chk_noflatline) n_flatline <- nrow(sess_nocheat) - nrow(sess_noflat) sess_dur_all <- sess_noflat %>% filter(chk_dur_all) n_dur_out_range <- nrow(sess_noflat) - nrow(sess_dur_all) ## manual exclusions at the participant level should be stored in the file ## 'manually_exclusions_participants.csv' in the raw data directory sess_keep0 <- sess_dur_all %>% filter(chk_notmanex) n_other_participant <- nrow(sess_dur_all) - nrow(sess_keep0) n_total <- n_nonnative + n_cheat + n_flatline + n_dur_out_range + n_other_participant
Applying these criteria resulted in the removal of:
r .pi(n_nonnative)
non-native English speaking participants;r .pi(n_cheat)
participants who reported looking up answers;r .pi(n_flatline)
participants who flat-lined in at least one phase; andr .pi(n_dur_out_range)
participants who had at least one phase with completion times outside the acceptable range.r if (.pi(n_other_participant > 0L)) paste0(n_other_participant, " participants were excluded manually, for the reasons listed below.") else "No participants were excluded for any other reasons."
sess_keep0 %>% filter(!chk_notmanex) %>% select(ID, reason_for_manual_exclusion) %>% knitr::kable()
In total, the exclusion criteria resulted in the removal of
r .pi(n_total)
participants.
## remove truth ratings from excluded participants ratings2 <- ratings %>% semi_join(sess_keep0, "ID") ## remove category judgments while we're at it cjudgments2 <- cjudgments %>% semi_join(sess_keep0, "ID")
Removing data from these
r .pi(n_total)
participants resulted in the removal of
r .pi(nrow(ratings) - nrow(ratings2))
truth ratings.
The only exclusion criterion applied at the phase-level was failure to complete all of the ratings in the phase.
phase_trim <- phase %>% semi_join(sess_keep0, "ID") phase_fin <- phase_trim %>% filter(chk_finished) n_incomplete <- nrow(phase_trim) - nrow(phase_fin) phase_keep <- phase_fin %>% filter(chk_notmanex) n_other_phase <- nrow(phase_fin) - nrow(phase_keep) ## now remove the truth ratings ratings_keep <- ratings2 %>% semi_join(phase_keep, c("ID", "phase_id")) ## also remove the category judgments cjudgments_keep <- cjudgments2 %>% semi_join(phase_keep %>% filter(phase_id == "1"), "ID")
This criterion resulted in the removal of
r .pi(n_incomplete)
phases
(r .pi(nrow(ratings2) - nrow(ratings_keep))
truth ratings).
r if (n_other_phase) paste0("An additional ", n_other_phase, " phases were excluded manually for the following reasons: ")
phase_fin %>% filter(!chk_notmanex) %>% select(ID, phase_id, reason_for_manual_exclusion) %>% knitr::kable()
r warn(params$subdir)
## do the final stages of pre-processing and ## prepare the data for modeling ## make sure there are no entries in sess_keep0 for subjects ## who have lost all of their phase data. sess_keep <- semi_join(sess_keep0, ratings, "ID") n_orphaned <- nrow(sess_keep0) - nrow(sess_keep) ## lookup which condition each rating belongs to ratecond <- ratings %>% inner_join(sess_keep %>% select(ID, list_id), "ID") %>% inner_join(stimulus_conditions, c("list_id", "stim_id")) mod1 <- T ~ R + I1 + I2 + I3 + R:I1 + R:I2 + R:I3 + (R | subj_id) + (R | stim_id) mod2 <- T ~ I1 + I2 + I3 + R:I1 + R:I2 + R:I3 + (R | subj_id) + (R | stim_id) mod3 <- T ~ R + I1 + I2 + I3 + R:I1 + R:I2 + R:I3 + (R:I1 + R:I2 + R:I3 | subj_id) + (R:I1 + R:I2 + R:I3 | stim_id) mod4 <- T ~ R + I1 + I2 + I3 + (R:I1 + R:I2 + R:I3 | subj_id) + (R:I1 + R:I2 + R:I3 | stim_id) ## prepare data for clmm modeling: ## make ID into a factor (subj_id); ## make T into a factor; ## add numerical (deviation-coded) predictors moddata <- ratecond %>% mutate(subj_id = factor(ID), T = factor(trating, levels = 1:7, ordered = TRUE), R = if_else(repetition == "repeated", 1/2, -1/2), I1 = if_else(interval == "1 day", 3/4, -1/4), I2 = if_else(interval == "1 week", 3/4, -1/4), I3 = if_else(interval == "1 month", 3/4, -1/4)) %>% select(subj_id, stim_id, repetition, interval, R, I1, I2, I3, T) ## need to re-fit the model with predictors as factors ## so that we can use emmeans ## for the planned comparisons or for the equivalence test mod5 <- T ~ Rep * Int + (R:I1 + R:I2 + R:I3 | subj_id) + (R:I1 + R:I2 + R:I3 | stim_id) mod6 <- T ~ Rep * Int + (Rep | subj_id) + (Rep | stim_id) moddata2 <- moddata %>% mutate(Rep = C(repetition, matrix(c(.5, -.5), nrow = 2, dimnames = list(c("repeated", "new")))), Int = C(interval, matrix(c(-1/4, -1/4, -1/4, 3/4, -1/4, -1/4, -1/4, 3/4, -1/4, -1/4, -1/4, 3/4), nrow = 4, byrow = TRUE, dimnames = list(c("immediate", "1 day", "1 week", "1 month"), c("I1", "I2", "I3")))))
if (n_orphaned) { cat("The phase exclusions above combined with dropouts meant ", "that an additional ", n_orphaned, if (n_orphaned == 1L) " participant was " else " participants were ", "left with no ratings data.", sep = "") }
Dropouts and the application of exclusion criteria left
r .pi(nrow(sess_keep))
participants remaining for analysis, who
contributed a total of r .pi(nrow(ratings_keep))
ratings.
included <- phase_keep %>% inner_join(sess_keep %>% select(ID, gender), "ID") %>% count(phase_id, gender, .drop = FALSE) %>% rename(n_analysed = n) excluded <- anti_join(phase, phase_keep, c("ID", "phase_id")) %>% inner_join(sess %>% select(ID, gender), "ID") %>% count(phase_id, gender, .drop = FALSE) %>% rename(n_excluded = n) inner_join(excluded, included, c("phase_id", "gender")) %>% mutate(n_recruited = n_excluded + n_analysed) %>% select(phase_id, gender, n_recruited, n_excluded, n_analysed) %>% knitr::kable(caption = "Participants Recruited, Excluded, and Analysed, Separated by Experimental Phase and Gender")
First, let's collapse over interval and look at the means and SDs for repetition.
ratecond %>% group_by(repetition) %>% summarise(mean = round(mean(trating), 2), `standard deviation` = round(sd(trating), 2)) %>% ungroup()
Now let's look at the repetition effects for each interval.
rcagg <- ratecond %>% group_by(repetition, interval) %>% summarise(mean = round(mean(trating), 2), sd = round(sd(trating), 2)) %>% ungroup() rcagg_subj <- ratecond %>% group_by(ID, repetition, interval) %>% summarise(mean = mean(trating)) %>% ungroup() rcagg_stim <- ratecond %>% group_by(stim_id, repetition, interval) %>% summarise(mean = mean(trating)) %>% ungroup() rcagg %>% pivot_wider(names_from = c(repetition), values_from = c(mean, sd)) %>% mutate(`difference (repeated - new)` = round(mean_repeated - mean_new, 2)) %>% select(interval, `mean repeated` = mean_repeated, `mean new` = mean_new, `difference (repeated - new)`, `SD repeated` = sd_repeated, `SD new` = sd_new)
rcagg_both <- bind_rows( rcagg_subj %>% mutate(unit = "participant") %>% select(ID, unit, everything()), rcagg_stim %>% mutate(unit = "stimulus", stim_id = as.character(stim_id)) %>% select(ID = stim_id, unit, everything())) ggplot(rcagg_both, aes(repetition, mean, color = repetition, fill = repetition)) + geom_violin(alpha = .2) + geom_jitter(alpha = .1) + geom_point(data = rcagg, color = "black", size = 3, alpha = .5) + geom_line(aes(group = interval), data = rcagg, alpha = .5, color = "black") + facet_grid(unit~interval) + theme(legend.position = "none") + labs(y = "mean rating")
We analyzed the data using cumulative logit mixed models via the clmm()
function from the ordinal
package [@ordinal]. We tested the repetition main effect and the repetition-by-interval interaction using likelihood ratio tests (LRTs), using the anova()
function. Each test consisted of a comparison of two models, a base model containing the fixed effect (or effects) of interest and a comparison model identical to the base model except that the fixed effect (or effects) of interest had been removed. Each test was conducted with an $\alpha$ level of .05.
cat("* Model 1: ", "`", as.character(as.expression(mod1)), "`\n", sep = "") cat("* Model 2: ", "`", as.character(as.expression(mod2)), "`\n", sep = "") cat("* Model 3: ", "`", as.character(as.expression(mod3)), "`\n", sep = "") cat("* Model 4: ", "`", as.character(as.expression(mod4)), "`\n", sep = "")
where:
T
: individual truth rating;R
: deviation-coded repetition effect (repeated = .5, new = -.5);I1
: deviation-coded contrast of interval, 1 day (3/4) vs. immediate (-1/4);I2
: deviation-coded contrast of interval, 1 week (3/4) vs. immediate (-1/4);I3
: deviation-coded contrast of interval, 1 month (3/4) vs. immediate (-1/4).The comparison of models 1 and 2 test the significance of the main effect of repetition (R
). The comparison of models 3 and 4 test the significance of the repetition-by-interval interaction (coded by R:I1
, R:I2
, and R:I3
).
## fit models using serial version (no multi-core) mods <- list() doFit <- TRUE ## NOTE: moddata is the main dataset that we'll use for analysis mt1 <- system.time( mods[["main_base"]] <- clmm(mod1, data = moddata, Hess = TRUE, model = FALSE, doFit = doFit)) mt2 <- system.time( mods[["main_comp"]] <- clmm(mod2, data = moddata, Hess = FALSE, model = FALSE, doFit = doFit)) mt3 <- system.time( mods[["ix_base"]] <- clmm(mod3, data = moddata, Hess = TRUE, model = FALSE, doFit = doFit)) mt4 <- system.time( mods[["ix_comp"]] <- clmm(mod4, data = moddata, Hess = FALSE, model = FALSE, doFit = doFit)) mt5 <- system.time( mods[["ix2"]] <- clmm(mod5, data = moddata2, Hess = TRUE, model = FALSE, doFit = doFit)) mt6 <- system.time( mods[["main2"]] <- clmm(mod6, data = moddata2, Hess = TRUE, model = FALSE, doFit = doFit)) dur <- mt1[["elapsed"]] + mt2[["elapsed"]] + mt3[["elapsed"]] + mt4[["elapsed"]] + mt5[["elapsed"]] + mt6[["elapsed"]] cat("* Total computation time: ", sprintf("%0.2f minutes", dur / 60), "\n")
## fit models using parallelized version (multi-core) ncores <- parallel::detectCores() ncores <- if (ncores > 6L) 6L else ncores cl <- parallel::makeCluster(ncores) dur <- system.time( mods <- parallel::parLapply( cl, list(list(m = mod1, h = TRUE, d = moddata), list(m = mod2, h = FALSE, d = moddata), list(m = mod3, h = TRUE, d = moddata), list(m = mod4, h = FALSE, d = moddata), list(m = mod5, h = TRUE, d = moddata2), list(m = mod6, h = TRUE, d = moddata2)), function(.x) { ordinal::clmm(.x[["m"]], data = .x[["d"]], Hess = .x[["h"]], doFit = TRUE) }))[["elapsed"]] names(mods) <- c("main_base", "main_comp", "ix_base", "ix_comp", "ix2", "main2") parallel::stopCluster(cl) cat("* Total computation time: ", sprintf("%0.2f minutes", dur / 60), "\n")
## run likelihood ratio tests ## main effect chisq_m <- as.numeric(-2 * logLik(mods[["main_comp"]]) - -2 * logLik(mods[["main_base"]])) df_m <- length(coefficients(mods[["main_base"]])) - length(coefficients(mods[["main_comp"]])) p_m <- pchisq(chisq_m, df_m, lower.tail = FALSE) b_m <- coefficients(mods[["main_base"]])["R"] se_m <- sqrt(diag(vcov(mods[["main_base"]])))["R"] ## interaction chisq_i <- as.numeric(-2 * logLik(mods[["ix_comp"]]) - -2 * logLik(mods[["ix_base"]])) df_i <- length(coefficients(mods[["ix_base"]])) - length(coefficients(mods[["ix_comp"]])) p_i <- pchisq(chisq_i, df_i, lower.tail = FALSE) b_i <- coefficients(mods[["ix_base"]])[c("R:I1", "R:I2", "R:I3")] se_i <- sqrt(diag(vcov(mods[["ix_base"]])))[c("R:I1", "R:I2", "R:I3")]
r warn(params$subdir)
cat(report(b_m, se_m, chisq_m, df_m, p_m, "main effect of repetition"), "\n")
if ((p_m > .05) && (p_i > .05)) { cat("\n", "### Equivalence test", "\n", "$p$-values less than $\\alpha$ = .05 imply equivalence to raw effect of .14 log odds (1/10 of a scale point on a 7 point scale)", "", sep = "\n") }
## only run in the case of nonsignificant main effect and interaction ## perform equivalence test using emmeans main_emm <- emmeans(mods[["main2"]], pairwise ~ Rep, data = moddata2) test(main_emm, delta = .14, side = "equivalence")$contrasts %>% as.data.frame() %>% knitr::kable(digits = 3)
r warn(params$subdir)
cat(report(b_i, se_i, chisq_i, df_i, p_i, "repetition-by-interval interaction"), "\n")
if (p_i < .05) { cat("\n", "### Planned comparisons", "\n", paste0("To follow up the significant interaction, ", "we tested the illusory truth effect at each of the four intervals, ", "using the Holm-Bonferroni procedure to maintain ", "the familywise error rate at $\\alpha = .05$."), sep = "\n") } else { cat("\n", "### Equivalence test", "\n", "$p$-values less than $\\alpha$ = .05 imply equivalence to raw effect of .14 log odds (1/10 of a scale point on a 7 point scale)", "", sep = "\n") }
## perform planned comparisons using Holm-Bonferroni correction ## (uniformly more powerful than simple Bonferroni) ## sort p-values in ascending order mod_emm <- emmeans(mods[["ix2"]], pairwise ~ Rep | Int, data = moddata2) mod_cont <- mod_emm$contrasts %>% as.data.frame() %>% select(-df) p_sort <- order(mod_cont[["p.value"]]) ## calculate cutoff p-values, alpha / (m + 1 - k) p_k <- .05 / (4 + 1 - 1:4) ## get minimal index such that p_k > cutoff min_ix <- min(which(mod_cont[["p.value"]][p_sort] > p_k)) reject_null <- rep(FALSE, 4) reject_null[seq_len(min_ix - 1L)] <- TRUE mod_cont[["reject_null"]][p_sort] <- reject_null mod_cont %>% select(-p.value) %>% knitr::kable(digits = 3)
mod_emm <- emmeans(mods[["ix2"]], allsimp ~ Rep * Int, data = moddata2) ## perform equivalence test using emmeans test(mod_emm, delta = .14, side = "equivalence")$contrasts %>% as.data.frame() %>% knitr::kable(digits = 3)
To validate the model, we simulate data based on the model fit and plot the distribution of the simulated data against the raw data and cell means.
## let's validate the interaction model (mods[["ix2"]]) ## by simulating data from the model simdat <- function(md, sdat, rdat, mdat) { subj_rfx <- VarCorr(md)$stim_id item_rfx <- VarCorr(md)$stim_id cf <- coef(md) srfx <- MASS::mvrnorm(nrow(sdat), mu = c(sub_int = 0, sub_ri1 = 0, sub_ri2 = 0, sub_ri3 = 0), Sigma = subj_rfx) %>% as_tibble() %>% mutate(ID = sdat[["ID"]], list_id = sdat[["list_id"]]) %>% select(ID, list_id, everything()) irfx <- MASS::mvrnorm(nrow(stimulus_materials), mu = c(stim_int = 0, stim_ri1 = 0, stim_ri2 = 0, stim_ri3 = 0), Sigma = item_rfx) %>% as_tibble() %>% mutate(stim_id = factor(1:nrow(stimulus_materials))) %>% select(stim_id, everything()) srfx %>% inner_join(stimulus_conditions, "list_id") %>% select(ID, stim_id, sub_int, sub_ri1, sub_ri2, sub_ri3) %>% semi_join(rdat, c("ID", "stim_id")) %>% inner_join(irfx, "stim_id") %>% inner_join(mdat %>% mutate(subj_id = as.character(subj_id)) %>% select(subj_id, stim_id, repetition, interval, R, I1, I2, I3), c("ID" = "subj_id", "stim_id")) %>% mutate(eta = sub_int + stim_int + cf["Rep1"] * R + cf["IntI1"] * I1 + cf["IntI2"] * I2 + cf["IntI3"] * I3 + (sub_ri1 + stim_ri1 + cf["Rep1:IntI1"]) * R * I1 + (sub_ri2 + stim_ri2 + cf["Rep1:IntI2"]) * R * I2 + (sub_ri3 + stim_ri3 + cf["Rep1:IntI3"]) * R * I3, trating = eta2resp(eta, cf[c(paste0(1:6, "|", 2:7))])) %>% select(ID, stim_id, repetition, interval, eta, trating) } dsim <- simdat(mods[["ix2"]], sess_keep, ratings_keep, moddata2) sim_subj_means <- dsim %>% group_by(ID, repetition, interval) %>% summarize(mean = mean(trating)) %>% ungroup() %>% mutate(unit = "participant") sim_stim_means <- dsim %>% group_by(stim_id, repetition, interval) %>% summarize(mean = mean(trating)) %>% ungroup() %>% mutate(unit = "stimulus") ggplot(rcagg_both, aes(repetition, mean, color = repetition, fill = repetition)) + geom_violin(data = bind_rows(sim_subj_means, sim_stim_means), alpha = .2) + geom_jitter(alpha = .1) + geom_point(data = rcagg, color = "black", size = 3, alpha = .5) + geom_line(aes(group = interval), data = rcagg, alpha = .5, color = "black") + facet_grid(unit~interval) + theme(legend.position = "none") + labs(y = "mean rating")
Most of the literature on the illusory truth effect uses the analysis of variance as the primary inferential approach. For comparison with the existing literature, we perform an ANOVA on our data. The ANOVA analysis treats stimuli as fixed effects and violates the assumption of a continuous DV with variance proportional to the mean, so we provide this only for illustrative purposes, and only draw our conclusions from the results of the cumulative logit mixed model analysis above.
r warn(params$subdir)
## convert DV to an integer ## calculate subject means ## fill in missing cells with NA adata <- moddata %>% mutate(trating = as.integer(as.character(T))) %>% group_by(subj_id, repetition, interval) %>% summarize(mean_rating = mean(trating)) %>% ungroup() ## listwise deletion adata2 <- adata %>% add_count(subj_id) %>% filter(n == 8L) %>% select(-n) %>% mutate(subj_id = fct_drop(subj_id)) ez::ezANOVA(adata2, dv = mean_rating, wid = subj_id, within = .(repetition, interval), type = 3)
## before exiting, print out session info sessionInfo()
cat(readLines(system.file("repr_instr.Rmd", package = "truthiness")), sep = "\n")
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.