Based on process.words.R
available from the French lexicon project results folder:
https://sites.google.com/site/frenchlexicon/results
download >> "French Lexicon Project trial-level results with R scripts.zip"
rm(list=ls()) # change directory to chronolex folder setwd("./chronolex") a<-read.table("results.utf8.txt", as.is=T, encoding="utf-8", sep='\t', col.names=c('sujet','num','mot','rt','acc','typemot','ordre','ordresuite')) a <- a[order(a$sujet),] a$sujet <- as.factor(a$sujet) #a$sujet = paste("subj",a$sujet,sep="") # number of subjects length(unique(a$sujet))
# suppress the pseudowords b = subset(a, typemot=="mot") (n = nrow(b))
# plot accuracy ~ rt per subject rtsuj = tapply(a$rt, a$sujet, median) accsuj = tapply(a$acc, a$sujet, mean) ntrials = tapply(a$rt, a$sujet, length) # one P with 500 trials per condition plot(rtsuj, accsuj, main="RT & accuracy (one point per subject)") rtcutoff <- 1500 #1100 acccutoff <- .4 #.75 abline(h=acccutoff) abline(v=rtcutoff)
Check participants with RT > 1500
list.p <- unique(a$sujet) pp <- list.p[rtsuj>1500] hist(a$rt[a$sujet==pp[1]],breaks=50,xlim=c(0,2500))
list.p <- unique(a$sujet) pp <- list.p[rtsuj>1500] hist(a$rt[a$sujet==pp[2]],breaks=50,xlim=c(0,2500))
list.p <- unique(a$sujet) pp <- list.p[rtsuj>1500] hist(a$rt[a$sujet==pp[3]],breaks=50,xlim=c(0,2500))
list.p <- unique(a$sujet) pp <- list.p[rtsuj>1500] hist(a$rt[a$sujet==pp[4]],breaks=50,xlim=c(0,2500))
Clearly some participants with low accuracy and high RT did not follow the task instructions.
# plot accuracy ~ rt 3rd quartile per subject qrtsuj = tapply(a$rt, a$sujet, quantile, probs = 0.75) accsuj = tapply(a$acc, a$sujet, mean) ntrials = tapply(a$rt, a$sujet, length) # one P with 500 trials per condition plot(qrtsuj, accsuj, main="RT & accuracy (one point per subject)") qrtcutoff <- 1500 #1100 acccutoff <- .4 #.75 abline(h=acccutoff) abline(v=rtcutoff)
Also, several participants have very unusual RT distributions, with a second bump of late RT.
list.p <- unique(a$sujet) pp <- list.p[qrtsuj>1500] hist(a$rt[a$sujet==pp[1]],breaks=50,xlim=c(0,2500)) # hist(a$rt[a$sujet==631],breaks=50,xlim=c(0,2500))
list.p <- unique(a$sujet) pp <- list.p[qrtsuj>1500] hist(a$rt[a$sujet==pp[2]],breaks=50,xlim=c(0,2500)) # hist(a$rt[a$sujet==876],breaks=50,xlim=c(0,2500))
# select subjects according to cutoffs goodsubjects <- (rtsuj<rtcutoff) & (accsuj>acccutoff) & (ntrials>1500) & (qrtsuj<qrtcutoff) a <- subset(a, goodsubjects[a$sujet]==TRUE) length(unique(a$sujet)) nrow(a)
# number of trials per item (including errors) ntrials = tapply(a$rt, a$mot, length)
# number of trials per participant (including errors) aw = subset(a, typemot=="mot") ntrials = tapply(aw$rt, aw$sujet, length) summary(ntrials) anw = subset(a, typemot=="nonmot") ntrials = tapply(anw$rt, anw$sujet, length) summary(ntrials) ntrials = tapply(a$rt, list(a$sujet,a$typemot), length) summary(ntrials)
summary(tapply(aw$rt, aw$sujet, median)) summary(tapply(anw$rt, anw$sujet, median)) par(mfcol=c(1,2)) hist(tapply(aw$rt, aw$sujet, median),breaks=50,xlim=c(0,2000),main="Median RT by part.: words") hist(tapply(anw$rt, anw$sujet, median),breaks=50,xlim=c(0,2000),main="Median RT by part.: non-words")
# suppress trials with wrong responses n = nrow(b) b = subset(b, b$acc==1) 1-(nrow(b)/n) # proportion of rejected datapoints nhits = tapply(b$rt, b$mot, length)
library(tibble) # save data frame b <- subset(a, a$typemot=="mot" | a$typemot=="nonmot") b$typemot <- factor(b$typemot) library(plyr) b$typemot <- revalue(b$typemot, c("mot"="word", "nonmot"="non-word")) flp <- tibble(`participant` = as.factor(b$sujet), `rt` = b$rt, `acc` = b$acc, `condition` = b$typemot) # get rid of missing participants flp$participant <- factor(flp$participant) save(flp, file="french_lexicon_project_rt_data.RData")
graphics.off() proc.time()
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.