knitr::opts_chunk$set( collapse = TRUE, comment = "#>" )
knitr::opts_chunk$set(echo = TRUE) # Libraries necessary for this vignette library(rio) library(flextable) library(dplyr) library(tidyr) library(semanticprimeR) set.seed(583902) # Function for simulation item_power <- function(data, # name of data frame dv_col, # name of DV column as a character item_col, # number of items column as a character nsim = 10, # small for cran sample_start = 20, sample_stop = 200, sample_increase = 5, decile = .5){ DF <- cbind.data.frame( "dv" = data[ , dv_col], "items" = data[ , item_col] ) # just in case colnames(DF) <- c("dv", "items") # figure out the "sufficiently narrow" ci value SE <- tapply(DF$dv, DF$items, function (x) { sd(x)/sqrt(length(x)) }) cutoff <- quantile(SE, probs = decile) # sequence of sample sizes to try samplesize_values <- seq(sample_start, sample_stop, sample_increase) # create a blank table for us to save the values in sim_table <- matrix(NA, nrow = length(samplesize_values)*nsim, ncol = length(unique(DF$items))) # make it a data frame sim_table <- as.data.frame(sim_table) # add a place for sample size values sim_table$sample_size <- NA iterate <- 1 for (p in 1:nsim){ # loop over sample sizes for (i in 1:length(samplesize_values)){ # temp that samples and summarizes temp <- DF %>% group_by(items) %>% sample_n(samplesize_values[i], replace = T) %>% summarize(se = sd(dv)/sqrt(length(dv))) # dv on items colnames(sim_table)[1:length(unique(DF$items))] <- temp$items sim_table[iterate, 1:length(unique(DF$items))] <- temp$se sim_table[iterate, "sample_size"] <- samplesize_values[i] sim_table[iterate, "nsim"] <- p iterate <- iterate + 1 } } # figure out cut off final_sample <- sim_table %>% pivot_longer(cols = -c(sample_size, nsim)) %>% dplyr::rename(item = name, se = value) %>% group_by(sample_size, nsim) %>% summarize(percent_below = sum(se <= cutoff)/length(unique(DF$items))) %>% ungroup() %>% # then summarize all down averaging percents dplyr::group_by(sample_size) %>% summarize(percent_below = mean(percent_below)) %>% dplyr::arrange(percent_below) %>% ungroup() return(list( SE = SE, cutoff = cutoff, DF = DF, sim_table = sim_table, final_sample = final_sample )) }
Cue Word Triggered Memory’s Phenomenological
Data provided by: Krystian Barzykowski
Participants participated in a voluntary memory task, where they were provided with a word cue in response to which they were about to recall an autobiographical memory. The item set consists of 30 word cues that were rated/classified by 142 separate participants.
They briefly described the content of their thoughts recalled in response to the word-cue and rated it on a 7-point scale: (a) to what extent the content was accompanied by unexpected physiological sensations (henceforth, called physiological sensation), (b) to what extent they had deliberately tried to bring the thought to mind (henceforth, called effort), (c) clarity (i.e. how clearly and well an individual remembered a given memory/mental content), (d) how detailed the content was, (e) how specific and concrete the content was, (f) intensity of emotions experienced in response to the content, (g) how surprising the content was, (h) how personal it was, and (i) the relevance to current life situation (not included).
Data included within this vignette.
DF <- import("data/barzykowski_data.xlsx") %>% bind_rows(import("data/barzykowski_data.xlsx", sheet = 2)) %>% bind_rows(import("data/barzykowski_data.xlsx", sheet = 3)) %>% bind_rows(import("data/barzykowski_data.xlsx", sheet = 4)) %>% bind_rows(import("data/barzykowski_data.xlsx", sheet = 5)) %>% bind_rows(import("data/barzykowski_data.xlsx", sheet = 6)) %>% bind_rows(import("data/barzykowski_data.xlsx", sheet = 7)) %>% bind_rows(import("data/barzykowski_data.xlsx", sheet = 8)) %>% bind_rows(import("data/barzykowski_data.xlsx", sheet = 9)) %>% bind_rows(import("data/barzykowski_data.xlsx", sheet = 10)) %>% bind_rows(import("data/barzykowski_data.xlsx", sheet = 11)) %>% bind_rows(import("data/barzykowski_data.xlsx", sheet = 12)) %>% bind_rows(import("data/barzykowski_data.xlsx", sheet = 13)) %>% bind_rows(import("data/barzykowski_data.xlsx", sheet = 14)) %>% bind_rows(import("data/barzykowski_data.xlsx", sheet = 15)) %>% bind_rows(import("data/barzykowski_data.xlsx", sheet = 16)) %>% bind_rows(import("data/barzykowski_data.xlsx", sheet = 17)) %>% bind_rows(import("data/barzykowski_data.xlsx", sheet = 18)) %>% bind_rows(import("data/barzykowski_data.xlsx", sheet = 19)) %>% bind_rows(import("data/barzykowski_data.xlsx", sheet = 20)) %>% bind_rows(import("data/barzykowski_data.xlsx", sheet = 21)) %>% bind_rows(import("data/barzykowski_data.xlsx", sheet = 22)) %>% bind_rows(import("data/barzykowski_data.xlsx", sheet = 23)) %>% bind_rows(import("data/barzykowski_data.xlsx", sheet = 24)) %>% bind_rows(import("data/barzykowski_data.xlsx", sheet = 25)) %>% bind_rows(import("data/barzykowski_data.xlsx", sheet = 26)) %>% bind_rows(import("data/barzykowski_data.xlsx", sheet = 27)) %>% bind_rows(import("data/barzykowski_data.xlsx", sheet = 28)) %>% bind_rows(import("data/barzykowski_data.xlsx", sheet = 29)) %>% bind_rows(import("data/barzykowski_data.xlsx", sheet = 30)) str(DF)
No official publication, see citation below.
The cues were used in study published here: Barzykowski, K., Niedźwieńska, A., & Mazzoni, G. (2019). How intention to retrieve a memory and expectation that it will happen influence retrieval of autobiographical memories. Consciousness and Cognition, 72, 31-48. DOI: https://doi.org/10.1016/j.concog.2019.03.011
cue-word, valence, memory retrieval
Open access with reference to original paper (Attribution-NonCommercial-ShareAlike CC BY-NC-SA)
Poland, Kraków
metadata <- import("data/barzykowski_metadata.xlsx") flextable(metadata) %>% autofit()
Note that the data is already in long format (each item has one row), and therefore, we do not need to restructure the data.
In this example, we have multiple variables to choose from for our analysis. We could include several to find the sample size rules for further study. In this example, we'll use the variables with the least and most variability and take the average of the 40% decile as suggested in our manuscript. This choice is somewhat arbitrary - in a real study, you could choose to use only the variables you were interested in and pick the most conservative values or simply average together estimates from all variables.
apply(DF[ , -c(1,2)], 2, sd)
These are Likert type items. The variance within them appears roughly equal. The lowest variance appears to be r names(which.min(apply(DF[ , -c(1,2)], 2, sd)))
, and the maximum appears to be r names(which.max(apply(DF[ , -c(1,2)], 2, sd)))
.
Run the function proposed in the manuscript:
# set seed set.seed(8548) # Function for simulation var1 <- item_power(data = DF, # name of data frame dv_col = "How surprising", # name of DV column as a character item_col = "Cue no", # number of items column as a character nsim = 10, sample_start = 20, sample_stop = 100, sample_increase = 5, decile = .4) var2 <- item_power(DF, # name of data frame "Personal nature", # name of DV column as a character item_col = "Cue no", # number of items column as a character nsim = 10, sample_start = 20, sample_stop = 100, sample_increase = 5, decile = .4)
What the usual standard error for the data that could be considered for our stopping rule?
# individual SEs for how surprising var1$SE # var 1 cut off var1$cutoff # individual SEs for personal nature var2$SE # var 2 cut off var2$cutoff # overall cutoff cutoff <- mean(var1$cutoff, var2$cutoff) cutoff
The average SE cutoff across both variables is r round(cutoff, digits = 3)
.
How large does the sample have to be for 80% to 95% of the items to be below our stopping SE rule?
cutoff_personal <- calculate_cutoff(population = DF, grouping_items = "Cue no", score = "Personal nature", minimum = as.numeric(min(DF$`Personal nature`)), maximum = as.numeric(max(DF$`Personal nature`))) # showing how this is the same as the person calculated version versus semanticprimeR's function cutoff_personal$cutoff final_table_personal <- calculate_correction( proportion_summary = var1$final_sample, pilot_sample_size = length(unique(DF$`Participant's ID`)), proportion_variability = cutoff_personal$prop_var ) flextable(final_table_personal) %>% autofit()
cutoff_surprising <- calculate_cutoff(population = DF, grouping_items = "Cue no", score = "How surprising", minimum = as.numeric(min(DF$`How surprising`)), maximum = as.numeric(max(DF$`How surprising`))) # showing how this is the same as the person calculated version versus semanticprimeR's function cutoff_surprising$cutoff final_table_surprising <- calculate_correction( proportion_summary = var2$final_sample, pilot_sample_size = length(unique(DF$`Participant's ID`)), proportion_variability = cutoff_surprising$prop_var ) flextable(final_table_surprising) %>% autofit()
In this scenario, we could go with the point wherein they both meet the 80% criterion, which is $n_{personal}$ = r final_table_personal %>% filter(percent_below >= 80) %>% arrange(percent_below, corrected_sample_size) %>% slice_head() %>% pull(corrected_sample_size) %>% round()
to $n_{surprising}$ = r final_table_surprising %>% filter(percent_below >= 80) %>% arrange(percent_below, corrected_sample_size) %>% slice_head() %>% pull(corrected_sample_size) %>% round()
. In these scenarios, it is probably better to estimate a larger sample.
If you decide to use 95% power as your criterion, you would see that items need somewhere between $n_{personal}$ = r final_table_personal %>% filter(percent_below >= 95) %>% arrange(percent_below, corrected_sample_size) %>% slice_head() %>% pull(corrected_sample_size) %>% round()
to $n_{surprising}$ = r final_table_surprising %>% filter(percent_below >= 95) %>% arrange(percent_below, corrected_sample_size) %>% slice_head() %>% pull(corrected_sample_size) %>% round()
participants for both variables. In this case, you could choose to make the larger value for participants your maximum sample size to ensure both variables reach the criterion.
You should also consider any potential for missing data and/or unusable data given the requirements for your study. Given that participants are likely to see all items in this study, we could use the minimum, stopping rule, and maximum defined above. However, one should consider that not all participants will be able to respond to all items within a memory.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.