scripts/readAndScoreIAT.R

# This script reads and processes raw IAT data from the iat.txt datafile
# generated by Project Implicit.

# Tables generated:
# tbl_iat : raw IAT experimental data + calculated pairing information
# tbl_D_calc_statistics_long : basic blockwise statistics to be used in D calculations
# tbl_D_calc_statistics : blockwise statistics - one line per subject

library(bit64)
library(tidyr)
library(ggplot2)
library(dplyr)
library(data.table)
library(dtplyr)
source("R/IATfunctions.R")
# detach("package:plyr", unload=TRUE)

# Read in experimental data
tbl_iat <- fread("extdata/August2015/iat.txt")
task_names <- c("demo_racea", "demo_raceb", "demo_racec","demo_raced")
tbl_iat <- filter(tbl_iat, task_name %in% task_names)

# # For each subject, map blocks to pairing conditions as follows (number of trials)
# 1 practice good vs bad (20)
# 2 practice white vs black (20)
# 3 practice white/good vs black/bad (20)
# 4 test white/good vs black/bad (40)
# 5 switch practice (40)
# 6 practice white/bad vs black/good (20)
# 7 test white/bad vs black/good (40)

tbl_iat$pairing <- NA

# Pairing 1 (attribute)
tbl_iat$pairing <- ifelse(tbl_iat$block_pairing_definition=="Good,Bad"
                      & (tbl_iat$block_number==0 | tbl_iat$block_number==1), 1, tbl_iat$pairing)
tbl_iat$pairing <- ifelse(tbl_iat$block_pairing_definition=="Bad,Good"
                      & (tbl_iat$block_number==0 | tbl_iat$block_number==1), 1, tbl_iat$pairing)
# Pairing 2 (concept)
tbl_iat$pairing <- ifelse(tbl_iat$block_pairing_definition=="European American,African American"
                      & (tbl_iat$block_number==0 | tbl_iat$block_number==1), 2, tbl_iat$pairing)
tbl_iat$pairing <- ifelse(tbl_iat$block_pairing_definition=="African American,European American"
                      & (tbl_iat$block_number==0 | tbl_iat$block_number==1), 2, tbl_iat$pairing)
# Pairing 3 (practice: white/good, african american/bad)
tbl_iat$pairing <- ifelse(tbl_iat$block_pairing_definition=="European American/Good,African American/Bad"
                      & (tbl_iat$block_number==2 | tbl_iat$block_number==5), 3, tbl_iat$pairing)
tbl_iat$pairing <- ifelse(tbl_iat$block_pairing_definition=="African American/Bad,European American/Good"
                      & (tbl_iat$block_number==2 | tbl_iat$block_number==5), 3, tbl_iat$pairing)
# Pairing 4 (test: white/good, african american/bad)
tbl_iat$pairing <- ifelse(tbl_iat$block_pairing_definition=="European American/Good,African American/Bad"
                      & (tbl_iat$block_number==3 | tbl_iat$block_number==6), 4, tbl_iat$pairing)
tbl_iat$pairing <- ifelse(tbl_iat$block_pairing_definition=="African American/Bad,European American/Good"
                      & (tbl_iat$block_number==3 | tbl_iat$block_number==6), 4, tbl_iat$pairing)
# Pairing 5 (switch practice)
tbl_iat$pairing <- ifelse(tbl_iat$block_pairing_definition=="Good,Bad"
                      & tbl_iat$block_number==4, 5, tbl_iat$pairing)
tbl_iat$pairing <- ifelse(tbl_iat$block_pairing_definition=="Bad,Good"
                      & tbl_iat$block_number==4, 5, tbl_iat$pairing)
tbl_iat$pairing <- ifelse(tbl_iat$block_pairing_definition=="European American,African American"
                      & tbl_iat$block_number==4, 5, tbl_iat$pairing)
tbl_iat$pairing <- ifelse(tbl_iat$block_pairing_definition=="African American,European American"
                      & tbl_iat$block_number==4, 5, tbl_iat$pairing)
# Pairing 6 (practice: white/bad, african american/good)
tbl_iat$pairing <- ifelse(tbl_iat$block_pairing_definition=="African American/Good,European American/Bad"
                      & (tbl_iat$block_number==2 | tbl_iat$block_number==5), 6, tbl_iat$pairing)
tbl_iat$pairing <- ifelse(tbl_iat$block_pairing_definition=="European American/Bad,African American/Good"
                      & (tbl_iat$block_number==2 | tbl_iat$block_number==5), 6, tbl_iat$pairing)
# Pairing 7 (test: white/bad, african american/good)
tbl_iat$pairing <- ifelse(tbl_iat$block_pairing_definition=="African American/Good,European American/Bad"
                      & (tbl_iat$block_number==3 | tbl_iat$block_number==6), 7, tbl_iat$pairing)
tbl_iat$pairing <- ifelse(tbl_iat$block_pairing_definition=="European American/Bad,African American/Good"
                      & (tbl_iat$block_number==3 | tbl_iat$block_number==6), 7, tbl_iat$pairing)

# Convert milliseconds to seconds
tbl_iat$trial_latency <- tbl_iat$trial_latency/1000

tbl_D_calc_statistics_long <- tbl_iat %>%
  select(session_id, pairing, trial_latency) %>%
  group_by(session_id, pairing) %>%
  summarize(mean_latency = mean(trial_latency),
            sd_latency = sd(trial_latency),
            n_trials = n()) %>%
  filter(!is.na(pairing) & !is.na(mean_latency) & !is.na(sd_latency) & !is.na(n_trials))

# Separately spread statistics into wide form and rename the variables appropriately

temp1 <- tbl_D_calc_statistics_long %>% select(session_id, pairing, mean_latency) %>% spread(pairing, mean_latency)
temp2 <- tbl_D_calc_statistics_long %>% select(session_id, pairing, sd_latency) %>% spread(pairing, sd_latency)
temp3 <- tbl_D_calc_statistics_long %>% select(session_id, pairing, n_trials) %>% spread(pairing, n_trials)
names(temp1) <- c("session_id", paste("lat", 1:7, sep=""))
names(temp2) <- c("session_id", paste("sd", 1:7, sep=""))
names(temp3) <- c("session_id", paste("n", 1:7, sep=""))

# Join temporary tables into one wide-form table
tbl_D_calc_statistics <- temp1 %>% left_join(temp2) %>% left_join(temp3)

# Remove temporary tables
rm(list=c(paste("temp", 1:3, sep="")))

# Create a variable that indicates whether or not a dataset is complete
tbl_D_calc_statistics$complete <- !is.na(rowSums(tbl_D_calc_statistics))

# Check if someone is complete
tbl_D_calc_statistics[tbl_D_calc_statistics$session_id==2616738347,]$complete

tbl_D_calc_statistics$meanLatencyAll <- with(tbl_D_calc_statistics, (lat3+lat4+lat6+lat7)/4)

# # # Perform data reduction based on inclusion/drop criteria

# # Set criteria here

criterion_under300 <- .1  # Allowed overall proportion of responses under 300ms

criterion_blockErrorRate <- .4  # Allowed proportional error rate in ANY BLOCK

# Create flags for ANY paired block with > specified error rate
# Flag variables are TRUE if a subject should be EXCLUDED based on the criterion

flagError40 <- tbl_iat %>%
  select(session_id, pairing, trial_error) %>%
  group_by(session_id, pairing) %>%
  summarize(error_rate = mean(trial_error)) %>%
  group_by(session_id) %>%
  summarize(flag = max(error_rate) >= criterion_blockErrorRate)

message("Subjects exceeding block error rate of ",
        criterion_blockErrorRate, ": ",
        sum(flagError40$flag), " of ",
        length(flagError40$session_id), ", approximately ",
        round(mean(flagError40$flag),2)*100, "%")

# Create flags for total % trials under 300ms in all paired blocks

flagUnder300 <- tbl_iat %>%
  select(session_id, pairing, trial_latency) %>%
  filter(pairing %in% c("3","4","6","7")) %>% # only examine in paired blocks
  group_by(session_id) %>%
  summarize(meanUnder300 = mean(trial_latency <= .3), # returns proportion under 300ms
            flag = mean(trial_latency <= .3) > criterion_under300)

subjectsFlaggedUnder300 <- flagUnder300 %>% filter(flag) %>% select(session_id)

message("Subjects with more than ",
        criterion_under300*100, "% of trials under 300ms: ",
        sum(flagUnder300$flag), " of ",
        length(flagUnder300$session_id), ", approximately ",
        round(mean(flagUnder300$flag),2)*100, "%")

# Perform filtration

tbl_D_calc_statistics <- tbl_D_calc_statistics %>%
  filter(complete) %>%
  filter(session_id %in% unique(flagUnder300[flagUnder300$flag==FALSE,]$session_id)) %>%
  filter(session_id %in% unique(flagError40[flagError40$flag==FALSE,]$session_id))

message("Proceeding with D score calculation. \nTotal subjects: ",
        length(tbl_D_calc_statistics$session_id))

# # IAT calculations # #

tbl_D_calc_statistics$dPractice <- with(tbl_D_calc_statistics, dScore(lat6, lat3, sd6, sd3, n6, n3))
tbl_D_calc_statistics$dTest <- with(tbl_D_calc_statistics, dScore(lat7, lat4, sd7, sd4, n7, n4))
tbl_D_calc_statistics$dAll <- (.5*tbl_D_calc_statistics$dPractice+.5*tbl_D_calc_statistics$dTest)

# # For each subject, determine an order
# # white/good first = 1, black/good first = 2

tbl_iat$order <- NA
tbl_iat$order2 <- NA

# If block 6 was pairing 7 (AA Good, EA Bad), order is incompatible first
# If block 3 was pairing 7 (AA Good, EA Bad), order is compatible first

tbl_iat$order <- ifelse(tbl_iat$block_number==6 & tbl_iat$pairing == 7, 2, tbl_iat$order)
tbl_iat$order <- ifelse(tbl_iat$block_number==3 & tbl_iat$pairing == 7, 1, tbl_iat$order)

# Subjects with order White/Good first/second, method 1
subsWG1M1 <- unique(tbl_iat[tbl_iat$order==1,]$session_id)
subsWG2M1 <- unique(tbl_iat[tbl_iat$order==2,]$session_id)

# Reverse it: If block 3 was pairing 4 (AA Bad, EA Good), incompatible first
# If block 3 was pairing 4 (AA Bad, EA Good), compatible first

tbl_iat$order2 <- ifelse(tbl_iat$block_number==3 & tbl_iat$pairing == 4, 2, tbl_iat$order2)
tbl_iat$order2 <- ifelse(tbl_iat$block_number==6 & tbl_iat$pairing == 4, 1, tbl_iat$order2)

# Subjects with order White/Good first/second, method 2
subsWG1M2 <- unique(tbl_iat[tbl_iat$order2==1,]$session_id)
subsWG2M2 <- unique(tbl_iat[tbl_iat$order2==2,]$session_id)

# List of complete subjects post filtration
tbl_completeSubjects <- tbl_D_calc_statistics %>% select(session_id)

# Check if order calculations worked - sums should be equal
# sum(tbl_completeSubjects$session_id %in% subsWG1M1) == sum(tbl_completeSubjects$session_id %in% subsWG1M2)
# sum(tbl_completeSubjects$session_id %in% subsWG2M1) == sum(tbl_completeSubjects$session_id %in% subsWG2M2)

# Add order variable to IAT wide table - 1 = compatible first, 2 = incompatible first

tbl_D_calc_statistics$order <- ifelse(tbl_D_calc_statistics$session_id %in% subsWG1M1, 1, 2)

# Create a relative order variable - that is, order relative to subject's D score sign

tbl_D_calc_statistics$relativeOrder <- ifelse(tbl_D_calc_statistics$dAll >= 0,
                                                   tbl_D_calc_statistics$order,
                                                   3-tbl_D_calc_statistics$order)

# Summarize D scores
summary(tbl_D_calc_statistics$dAll)
hist(tbl_D_calc_statistics$dAll)

compRelFirst <- tbl_D_calc_statistics %>%
  filter(relativeOrder == 1) %>% select(dPractice:dAll)
incompRelFirst <- tbl_D_calc_statistics %>%
  filter(relativeOrder == 2) %>% select(dPractice:dAll)

summary(compRelFirst)
summary(incompRelFirst)

hist(compRelFirst$dAll)
hist(incompRelFirst$dAll)

compFirst <- tbl_D_calc_statistics %>%
  filter(order == 1) %>% select(dPractice:dAll)
incompFirst <- tbl_D_calc_statistics %>%
  filter(order == 2) %>% select(dPractice:dAll)

summary(compFirst)
summary(incompFirst)

hist(compFirst$dAll)
hist(incompFirst$dAll)

# Test for effects of relative order
# lm(I(abs(dAll))~relativeOrder, data=tbl_D_calc_statistics) %>% summary
# lm(I(abs(dAll))~order, data=tbl_D_calc_statistics) %>% summary

# Save data file containing IAT results along with all blockwise statistics
write.csv(tbl_D_calc_statistics, "data/iatVerboseOLPSAug2015.csv")

# Save list of all subjects with complete IAT data
write.csv(tbl_completeSubjects, "data/completeIATsessionIDsAug2015.csv", row.names=FALSE)

# Create stats for DDM analysis from IAT data

tbl_DDM_statistics_long <- tbl_iat %>%
  select(session_id, pairing, trial_latency, trial_error) %>%
  filter(session_id %in% tbl_completeSubjects$session_id) %>%
  group_by(session_id, pairing) %>%
  summarize(mean_latency = mean(trial_latency),
            var_latency = var(trial_latency),
            n_trials = n(),
            accuracy_rate = 1-mean(trial_error))

temp1 <- tbl_DDM_statistics_long %>% select(session_id, pairing, mean_latency) %>% spread(pairing, mean_latency)
temp2 <- tbl_DDM_statistics_long %>% select(session_id, pairing, var_latency) %>% spread(pairing, var_latency)
temp3 <- tbl_DDM_statistics_long %>% select(session_id, pairing, accuracy_rate) %>% spread(pairing, accuracy_rate)
temp4 <- tbl_DDM_statistics_long %>% select(session_id, pairing, n_trials) %>% spread(pairing, n_trials)

names(temp1) <- c("session_id", paste("lat", 1:7, sep=""))
names(temp2) <- c("session_id", paste("var", 1:7, sep=""))
names(temp3) <- c("session_id", paste("acc", 1:7, sep=""))
names(temp4) <- c("session_id", paste("n", 1:7, sep=""))

# Join temporary tables into one wide-form table
tbl_DDM_statistics <- temp1 %>% left_join(temp2) %>% left_join(temp3) %>% left_join(temp4)

# Remove temporary tables
rm(list=c(paste("temp", 1:4, sep="")))

# Save tbl_iat

write.csv(tbl_iat, "data/tbl_iat.csv", row.names=FALSE, sep = ",")

# Save the file to be used by DDM analyses
write.csv(tbl_DDM_statistics, "data/DDMstatisticsAug2015.csv", row.names = FALSE)
michaelpmcdonald/IATanalyses documentation built on May 22, 2019, 9:52 p.m.