# Martin R. Vasilev, 2016
# V0.1
## The script may seem a bit chaotic because of the large number of additional functions.
## The reason for this approach is that processing is faster compared to using for loops.
#########
# SETUP #
#########
data<- readLines("data/raw/files.txt")
#data<- readLines("//bournemouth.ac.uk/data/staff/home/mvasilev/Profile/Desktop/Adam/data.txt")
ResX<- 1024
ResY<- 768
#ResX<- 1920
#ResY<- 1080
maxtrial<- 24
#maxtrial<- 3
#filename<- "TEST.asc"
#filename<- "//bournemouth.ac.uk/data/staff/home/mvasilev/Profile/Desktop/Cdiff_data/CD1.asc"
#########################################################################################
# LOAD UP REQUIRED FUNCTIONS #
#########################################################################################
get_text<- function(file){ ## extracts the loaded text material #
start<- which(grepl("DISPLAY TEXT", file))+1 # start point
end<- which(grepl("START", file))
text<- file[start:end] # subset file
a= which(gregexpr(pattern ='BUTTON', text)==1)
if(length(a)>0){
text<- text[-a]
text<- text[-a] # because one line is removed
}
input<- which(grepl("INPUT", text)); input<- input[1]-1 # end point
text<- text[1:input]
#end<- which(grepl("INPUT", file)); end<- end[1]-1 # end point
#text<- file[start:end] # subset file
return(text)
}
get_coord<- function(string){ # extracts text coordinates from trial info
# check to make sure there are no button press flags here..
a= which(gregexpr(pattern ='BUTTON', string)==1)
if(length(a)>0){
string<- string[-a]
}
#if(which(grepl(' ', string))==1){
# loc<- as.numeric(gregexpr("CHAR", string))
# part1<- substr(string, 1, loc+7)
# part2<- substr(string, loc+9, nchar(string))
# string2<- paste(part1, part2, sep="")
#}
#else{
#out <- data.frame( do.call( rbind, strsplit( string, ' ' ) ) )
out <- do.call( rbind, strsplit( string, '\t' ) )
out<- out[,2]
out <- data.frame(do.call( rbind, strsplit( out, ' ' ) ) )
#}
out<- subset(out, X2!="DELAY") # Remove DELAY 1ms lines
out$X7<- as.numeric(as.character(out$X7))
out$X8<- as.numeric(as.character(out$X8))
out$X9<- as.numeric(as.character(out$X9))
out$X10<- as.numeric(as.character(out$X10))
out$X11<- as.numeric(as.character(out$X11))
fix_spaces<- function(out){
out$space<- NULL
a<- which(out$X6=="") # find position of empty spaces
out$space<- NA
out$space[a]<- 1
# Re-arrange misplaced coords
out$X7[a]<- out$X8[a]
out$X8[a]<- out$X9[a]
out$X9[a]<- out$X10[a]
out$X10[a]<- out$X11[a]
out<- out[,-11]
}
out<- fix_spaces(out)
# clean_MSG<- function(msg){as.numeric(unlist(gsub("[^0-9]", "", unlist(msg)), ""))}
# out$X1<- clean_MSG(out$X1) # get rid of MSG text
out<- out[,-c(1,2,3,5)] # remove useless columns
# map sentences:
map_sent<- function(out){
sent_bnd<- which(out$X6=="."| out$X6=="?");
sent<- NULL
for(i in 1:length(sent_bnd)){
if(i==1){
sent[1:sent_bnd[i]]<- 1
}
else{
sent<- c(sent, c(rep(i, length(sent_bnd[i-1]+1:sent_bnd[i])- length(sent))))
# sent[sent_bnd[i-1]+1:sent_bnd[i]]<- i
}
if(i==length(sent_bnd)){
sent<- c(sent, c(rep(i+1, length(sent_bnd[i]+1:nrow(out))- length(sent))))
}
}
out$sent<- sent
return(out)
}
out<- map_sent(out)
# map lines:
map_line<- function(out){
line<- NULL
lines<- sort(unique(out$X8));
# as.numeric.factor <- function(x) {as.numeric(levels(x))[x]}
#lines<- as.numeric.factor(lines)
for(i in 1:length(lines)){
loc_lines<- which(out$X8==lines[i])
line<- c(line, rep(i, length(loc_lines)))
out$space[length(line)]<- 2
}
out$line<- line
return(out)
}
out<- map_line(out)
# map words:
map_words<- function(out){
out$word<- NULL
curr_sent=1
curr_word=1
for(i in 1:nrow(out)){
newSent<- curr_sent!= out$sent[i]
if(newSent){
curr_sent<- curr_sent+1
curr_word<- 1
}
out$word[i]<- curr_word
if(out$X6[i]== ""& !newSent){
curr_word<- curr_word+1
out$word[i]<- curr_word
}
}
return(out)
}
out<- map_words(out)
# change column names:
colnames(out)<- c("char", "letter", "x1", "y1", "x2", "y2", "space", "sent",
"line", "word")
return(out)
}
# Uses stimuli information to create a coordinate map for each pixel on the screen
coord_map<- function(coords, x=ResX, y= ResY){
#as.numeric.factor <- function(x) {as.numeric(levels(x))[x]}
#coords$x1<- as.numeric.factor(coords$x1); coords$x2<- as.numeric.factor(coords$x2)
#coords$y1<- as.numeric.factor(coords$y1); coords$y2<- as.numeric.factor(coords$y2)
coords$id<- 1:nrow(coords)
map<- data.frame(matrix(NA, nrow = y, ncol = x))
for(i in 1:nrow(coords)){
map[coords$y1[i]:coords$y2[i],coords$x1[i]:coords$x2[i]]<- coords$id[i]
}
return(map)
}
####################################################
trial_info<- function(file, maxtrial){ # extracts information for processing trials
### get trial names:
ID<- which(grepl('TRIALID', file));
trial_text<- file[ID]
trials<- substr(trial_text, unlist(gregexpr(pattern =' ',trial_text[1]))[2]+1, nchar(trial_text))
#trials<- trials[which(letter!="P" & letter!="F")] # remove practice items and questions
trials<- gsub(" ", "", trials)
# sometimes there is an extra empty space that can mess up detection of duplicates
### get condition:
I<- unlist(gregexpr(pattern ='I',trials)) # start of item info
cond<- as.numeric(substr(trials, 2, I-1)) # extract condition number
### get item:
D<- unlist(gregexpr(pattern ='D',trials)) # start of dependent info
item<- as.numeric(substr(trials, I+1, D-1)) # extract condition number
depend<- as.numeric(substr(trials, nchar(trials), nchar(trials)))
### get sequence:
#seq<- 1:length(trials)
### get start & end times
start<- which(grepl('DISPLAY ON', file))
end <- which(grepl('DISPLAY OFF', file))
duplicated<- trials[duplicated(trials)]
if(length(duplicated)>0){ # if there were aborted trials..
message(paste(" Diplicated trial", duplicated, "for file:", data[i], "\n"))
message("Analysing only last attempt at the trial!")
toBeRemoved<- NULL
for(i in 1:length(duplicated)){
dup_rem<- which(trials==duplicated[i])
for(j in 1:length(dup_rem)){
if(j!=length(dup_rem)){
toBeRemoved[length(toBeRemoved)+1]= dup_rem[j]
}
} # end of j
} # end of i
#start<- start[-toBeRemoved]
# end<- end[-toBeRemoved]
cond<- cond[-toBeRemoved]
item<- item[-toBeRemoved]
# seq<- seq[-toBeRemoved]
depend<- depend[-toBeRemoved]
ID<- ID[-toBeRemoved]
} # end of aborted conditional
trial_db<- data.frame(cond, item, depend, start, end, ID)
trial_db<- subset(trial_db, depend==0 & item< maxtrial+1)
trial_db$seq<- 1:nrow(trial_db)
###
# trials<- trials[which(!is.element(trials, duplicated))]
return(trial_db)
}
parse_fix<- function(file, map, coords, trial_db, i){
get_FIX_stamp<- function(string){as.numeric(substr(string, 1, unlist(gregexpr(pattern ='\t', string))[1]-1))}
loc_blinks<- function(blink_time, s_time){findInterval(blink_time, s_time)+1 }
# +1 because it affects the next fixation
get_x_pixel<- function(string){
tab_loc<- unlist(gregexpr(pattern ='\t', string))[3:4]
sub_str<- substr(string, tab_loc[1], tab_loc[2])
x<- as.numeric(unlist(gsub("[^0-9.]", "", unlist(sub_str)), ""))
x<- floor(x)
return(x)
}
get_y_pixel<- function(string){
tab_loc<- unlist(gregexpr(pattern ='\t', string))[4:5]
sub_str<- substr(string, tab_loc[1], tab_loc[2])
y<- as.numeric(unlist(gsub("[^0-9.]", "", unlist(sub_str)), ""))
y<- floor(y)
return(y)
}
# trial_db<- trial_info(file)
# for(i in 1:nrow(trial_db)){
# get position of fixation stamps:
SFIX_stamps<- which(grepl('SFIX', file[trial_db$start:trial_db$end]))
EFIX_stamps<- which(grepl('EFIX', file[trial_db$start:trial_db$end]))
if(length(SFIX_stamps)==0 | length(EFIX_stamps)==0){
# means that there was no complete fixation on this trial (i.e,
# participant likely pressed end button by mistake)
raw_fix<- NULL
return(raw_fix)
message(sprintf("No fixations in Trial %d: excluded", j))
}
if(EFIX_stamps[1]<SFIX_stamps[1]){ # removes fixation that triggered gaze box
EFIX_stamps<- EFIX_stamps[-1]
}
if(EFIX_stamps[length(EFIX_stamps)]< SFIX_stamps[length(SFIX_stamps)]){
SFIX_stamps<- SFIX_stamps[-length(SFIX_stamps)]
} # fixation was not terminated before the end of trial
# get start and end time of fixations:
s_time<- get_FIX_stamp(file[SFIX_stamps+ trial_db$start]) # start time of fixation
e_time<- get_FIX_stamp(file[EFIX_stamps+ trial_db$start-2]) # end time of fixation
# calculate fixation durations:
fixDur<- e_time- s_time
# get x pixel position:
x<- get_x_pixel(file[EFIX_stamps+ trial_db$start-1])
# get y pixel position:
y<- get_y_pixel(file[EFIX_stamps+ trial_db$start-1])
# find blinks:
blink_stamp<- which(grepl('EBLINK', file[trial_db$start:trial_db$end]))
blink_time<- get_FIX_stamp(file[blink_stamp+ trial_db$start])-1
blink_out<- which(blink_time<s_time[1]| blink_time>e_time[length(e_time)])
if(length(blink_out>0)){ # blinks outside time window that is analysed
blink_time<- blink_time[-blink_out] # remove them
}
blink_pos<- loc_blinks(blink_time, s_time)
blink<- rep(0, length(s_time))
blink[blink_pos]<-1
#if(length(blink)>length(e_time)){
# blink<- blink[-length(blink)]
#}
# merge into a dataframe:
fix<- data.frame(s_time, e_time, fixDur, x, y, blink)
#-----------------------------------------------#
# map fixations to stimuli on the screen: #
#-----------------------------------------------#
loc<- NULL; raw_fix<- NULL; temp<- NULL; sub<- NULL
s_time<- NULL; e_time<- NULL; xPos<- NULL; yPos<- NULL
item<- NULL; cond<- NULL; seq<- NULL; fix_num<- NULL; fix_dur<- NULL
sent<- NULL; line<- NULL; word<- NULL; char_sent<- NULL
max_sent<- NULL; max_word<- NULL; intersent_regr<- NULL
intrasent_regr<- NULL; blink<- NULL; outOfBnds<- NULL; outsideText<- NULL
# max word for each sentence:
curr_sent<- matrix(0, max(coords$sent),2)
curr_sent[,1]<- c(1:max(coords$sent))
for(j in 1:nrow(fix)){
loc<- map[fix$y[j], fix$x[j]] # locate fixation
if(length(loc)==0){
outsideText[j]<- 1
} else{
outsideText[j]<- 0
}
# general info:
sub[j]<- i
item[j]<- trial_db$item
cond[j]<- trial_db$cond
seq[j]<- trial_db$seq
fix_num[j]<- j
fix_dur[j]<- fix$fixDur[j]
# info from asc file:
s_time[j]<- fix$s_time[j];
e_time[j]<- fix$e_time[j]
xPos[j]<- fix$x[j];
yPos[j]<- fix$y[j]
if(xPos[j]<1 | xPos[j]> ResX | yPos[j]< 1 | yPos[j]>ResY){ # fixation is out of bounds
outOfBnds[j]<- 1
} else{
outOfBnds[j]<- 0
}
# stimuli info:
if(!is.null(loc)){
sent[j]<- coords$sent[loc]
line[j]<- coords$line[loc]
word[j]<- coords$word[loc]
char_sent[j]<- as.numeric(as.character(levels(coords$char[loc])[coords$char[loc]]))+1
# +1 bc Eyetrack counts from 0
} else{
sent[j]<- NA; line[j]<- NA; word[j]<- NA; char_sent[j]<- NA
}
# saccade stuff:
if(j==1){
max_sent[j]<- sent[j]
} else{
max_sent[j]<- max_sent[j-1]
if(!is.na(max_sent[j])& !is.na(sent[j]) & sent[j]> max_sent[j]){
max_sent[j]<- sent[j]
}
}
# maximum word:
if(j==1){
max_word[j]<- abs(word[j])
curr_sent[sent[j],2]<- abs(word[j])
} else{
max_word[j]<- curr_sent[sent[j],2]
if(!is.na(word[j])& abs(word[j])>curr_sent[sent[j],2]){
max_word[j]<- abs(word[j])
curr_sent[sent[j],2]<- abs(word[j]) # replace new max word
}
}
# Regression stuff:
if(!is.na(max_sent[j])& !is.na(sent[j]) & sent[j]< max_sent[j]){
intersent_regr[j]<- 1
} else{
intersent_regr[j]<- 0
}
# intra-sentence regressions:
if(!is.na(word[j])& abs(word[j])<max_word[j]){
intrasent_regr[j]<- 1
}else{
intrasent_regr[j]<- 0
}
blink[j]<- fix$blink[j]
} # end of j loop
raw_fix<- data.frame(sub,item, cond, seq, s_time, e_time,xPos, yPos, fix_num, fix_dur,
sent, max_sent, line, word, max_word, char_sent, intrasent_regr, intersent_regr, blink,
outOfBnds, outsideText)
# remove last fixation on trial (due to participants' making a decision to press the button)
raw_fix<- raw_fix[-nrow(raw_fix),]
#raw_fix<- rbind(raw_fix, temp)
#rm(item, cond, seq, s_time, e_time,xPos, yPos, fix_num, fix_dur,
# sent, max_sent, line, word, max_word, char_sent, intrasent_regr, intersent_regr, blink)
# } # end of i loop
return(raw_fix)
}
###################################################################################################
# Process data here #
###################################################################################################
#
source("plot_fix.R")
raw_fix<- NULL
for (i in 1:length(data)){ # for each subject..
# i=1; # temporary
cat(sprintf("Processing subject %i", i)); cat("\n")
cat(sprintf("Loading data %s ...", data[i]));
file<- readLines(data[i]) # load file
cat(" Done"); cat("\n")
trial_db<- trial_info(file, maxtrial) # extract info about trials to be processed
cat("Trial... ")
for(j in 1:nrow(trial_db)){ # for each item
text<- get_text(file[trial_db$ID[j]:trial_db$start[j]])
coords<- get_coord(text)
map<- coord_map(coords, x=ResX, y= ResY)
raw_fix_temp<- parse_fix(file, map, coords, trial_db[j,], i)
raw_fix<- rbind(raw_fix, raw_fix_temp) # plot it
if(length(raw_fix_temp)>1){ # if data was extracted from trial
# create picture of fixations:
plot_fix(coords, raw_fix_temp, i, j)
}
cat(toString(j)); cat(" ")
} # end of item loop
cat("\n DONE \n \n");
} # end of subject loop
#text<- get_text(file)
#coords<- get_coord(text)
#map<- coord_map(coords, x=ResX, y= ResY)
#raw_fix<- parse_fix(file, map, coords)
cat("Saving data.");
save(raw_fix, file= "data/raw_fix.Rda"); cat(".")
write.csv(raw_fix, file= "data/raw_fix.csv"); cat(".")
cat("\n \n All Done!");
#fileConn<-file("Fixation_log.txt")
#writeLines(c("Hello","World"), fileConn)
#close(fileConn)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.