popEye: Analysis of eye-tracking data from reading experiments

Documented in popEye

#' Main popEye encoding function 
#' 
#' This function is used when you want to analyze eye tracking data using popEye.
#' 
#' @param datpath Path to eye tracking files
#' @param tracker.model Eye tracker used for data collection (only "eyelink"
#' available at present)
#' @param tracker.software Software used for data collection ("EB" for 
#' "Experiment Builder" or "ET" for "EyeTrack")
#' @param tracker.results Name of the results file that is generated by the 
#' eye tracking software (if any)
#' @param type Type of experiment used in the study ("text", "sentence", "target", 
#' "boundary", "fast")
#' @param message.start Message in eye tracking file indicating the start of
#' the trial
#' @param message.stop Message in eye tracking file indicating the end of the trial
#' @param message.boundary Message in eye tracking file indicating when the boundary
#' is triggered (only relevant for boundary and fast priming experiments)
#' @param message.target Message in eye tracking file indicating the onset of
#' the target display (only relevant for boundary experiments)
#' @param message.prime Message in eye tracking file indicating the onset of the
#' prime display (only relevant for fast priming experiments)
#' @param variable.id Variable in eye tracking experiment indicating which item 
#' is displayed in a trial ("id" by default)
#' @param variable.cond Variable in eye tracking experiment indicating in which
#' (within-item) condition the item is displayed in a trial (if any; no
#' conditions by default)
#' @param item.practice Indicator for a practice item that is displayed at the 
#' beginning of an experiment (only relevant for EyeTrack experiments)
#' @param item.trigger Indicator for an gaze trigger element used in the 
#' experiment (only relevant for EyeTrack experiments) 
#' @param item.question Indicator for a comprehension question trial used  
#' during the experiment (only relevant for EyeTrack experiments)
#' @param item.pracnum Number of practice items shown at the beginning of an
#' experiment (which are discarded during the analysis)
#' @param stimulus.file Path and name of stimulus file
#' @param stimulus.id Name of the column providing the item number 
#' in stimulus file
#' @param stimulus.cond Name of the column providing the condition name in the
#' stimulus file
#' @param stimulus.preview Name of the column providing the preview display
#' in the stimulus file (only relevant for boundary and fast priming experiments)
#' @param stimulus.prime Name of the column providing the prime display in the 
#' stimulus file (only relevant for fast priming experiments)
#' @param stimulus.text Name of the column providing the target (or only) display
#' in the stimulus file 
#' @param stimulus.hyphenwrap Indicator whether words are split at hyphens at 
#' line breaks (default is TRUE)
#' @param indicator.word Indicator used to separate words from each other
#' (empty by default)
#' @param indicator.ia Indicator used to separate interest areas from each other
#' (words are used as interest areas by default)
#' @param indicator.target Indicator used to denote the target interest area 
#' ("*" by default)
#' @param indicator.line Indicator used for manual line breaks ("\\n" by default)
#' @param separator.word Characters used to separate words from each other
#' (white space by default)
#' @param separator.sentence Characters used to separate sentences
#' (.!? by default)
#' @param display.marginLeft Size of the margin at the left of the screen
#' (in pixels)
#' @param display.marginTop Size of the margin at the top of the screen (in pixels)
#' @param display.marginRight Size of the margin at the right of the screen 
#' (in pixels)
#' @param display.marginBottom Size of the margin at the bottom of the screen
#' (in pixels)
#' @param font.name Name of the font used in the experiment (currently, "Arial", 
#' "CourierNew","Consolas", "Times New Roman", and "Symbol" are supported for most
#' font sizes between 12 and 24 pt in steps of 2)
#' @param font.size Size of the font (in pixels, 16 by default)
#' @param font.spacing Spacing between lines (numeric, 2 by default)
#' @param analysis.eyelink Should the real-time parsing from the eyelink system 
#' be used? (TRUE or FALSE, default is TRUE)
#' @param analysis.smooth Amount of smoothing applied to raw x and y data. 
#' Number of samples used to compute a (two-sided) moving average 
#' (numeric, default is 5)
#' @param analysis.vfac Velocity threshold used for saccade detection (see Engbert & Kliegl, 
#' 2003; default is 5)
#' @param analysis.mindur Minimum duration of a saccade (see Engbert & Kliegl, 2003; 
#' default is 10 ms)
#' @param analysis.postdur Minimum duration of a fixation (see Engbert & Kliegl, 
#' 2003; default is 30 ms)
#' @param analysis.drift Threshold for the decision whether a fixation is treated 
#' as a blink; amount of drift on the x or y dimension in terms of the font height
#' (numeric; default is 1. See Engbert & Kliegl, 2003)
#' @param analysis.sparse If TRUE, the msg, sample, and event slots are cleaned
#' during the analysis (TRUE or FALSE, default is TRUE)
#' @param assign.driftX If TRUE fixation is corrected for drift on the x
#' axis (TRUE or FALSE, default is FALSE; only relevant for EB experiments in
#' which the drift correct element is used) 
#' @param assign.driftY If TRUE fixation is corrected for drift on the y
#' axis (TRUE or FALSE, default is FALSE; only relevant for EB experiments in
#' which the drift correct element is used)
#' @param assign.outlier Indicates whether outlier detection should be carried out
#' (fixations deviating from the text box in the x and y dimension by a certain amount
#' are flagged as outliers, default is TRUE)
#' @param assign.outlierDist Parameter that controls the definition of the outlier 
#' (i.e., the necessary distance to the text area in % of the text area; 
#' input values 0-1, default is 0.2)
#' @param assign.moveMethod If TRUE fixations are moved on the x axis to fit 
#' into text area (TRUE or FALSE, default is FALSE)
#' @param assign.moveX If TRUE fixations are moved on the x axis to fit 
#' into text area (TRUE or FALSE, default is FALSE)
#' @param assign.moveY If TRUE fixations are moved on the y axis to fit 
#' into text area (TRUE or FALSE, default is FALSE)
#' @param assign.lineMethod Method used to assign fixations to lines ("attach",
#' "chain" or "merge", "chain" is default; see Vignette)
#' @param assign.lineX Parameter used to detect runs on the x axis (default is 35)
#' @param assign.lineY Parameter used to detect runs on the y axis (default is 0.5)
#' @param assign.lineS Parameter used to decide that two lines are the same 
#' (only relevant for the slice assignment method, default is 0.45)
#' @param assign.lineN Parameter used to decide that two lines are different 
#' (only relevant for the slice assignment method, default is 0.5)
#' @param clean.stage1Dur Minimum duration for fixation during stage 1 
#' cleaning (default: 80 ms)
#' @param clean.stage1Dist Minimum distance between fixations (in number
#' of letters) during stage 1 cleaning (default: 1)
#' @param clean.stage2Dur Minimum duration of fixation during stage 2
#' cleaning (default: 40 ms)
#' @param clean.stage2Dist Minimum distance between fixations (in number
#' of letters) during stage 2 cleaning
#' @param clean.stage3 If TRUE, stage 3 cleaning is conducted (default is FALSE)
#' @param clean.stage3Dur Minimum duration of fixation during stage 3 cleaning 
#' (default is 140 ms)
#' @param clean.stage4 If TRUE stage 4 cleaning is conducted (default is FALSE)
#' @param clean.stage4Min Minimum duration of fixation during stage 4 cleaning
#' (default is 80 ms)
#' @param clean.stage4Max Maximum duration of fixation during stage 4 cleaning
#' (default is 800 ms)
#' @param clean.outlier If TRUE outlying fixations at the begining and the end
#' of a trial are deleted (default is FALSE)
#' @param exclude.blink If TRUE a trial is flagged as critical if at least one
#' blink has occured on it (default is FALSE)
#' @param exclude.nfix Minimum number of fixations that a trial has to have 
#' received (deleted otherwise; default is 3)
#' @param exclude.sac Duration for screen for unplausibel long saccades (in ms;
#' 200 ms as default).
#' @param outpath Path were output file should be saved
#' @param outname Name of output file
#' @param select.version Restrict analysis to a specific version of the experiment
#' (numeric; internal for debugging; only relevant for EB experiments)
#' @param select.subjects Restrict analysis to a subset of subjects (within a version).
#' Select a single subject by providing the corresponding subject ID, e.g., select.subjects = "t01". 
#' Select a set of subjects by providing a vector, e.g., select.subjects = c("t01", t02").
#' @param skip.subjects Remove a subset of subjects (within a version) from the analysis.
#' Select a single subject by providing the corresponding subject ID, e.g., skip.subjects = "t01". 
#' Select a set of subjects by providing a vector, e.g., skip.subjects = c("t01", t02").
#' @param select.items Restrict analysis to a subset of trials (usually within a single subject).
#' Select a single item by providing the corresponding item ID, e.g., select.items = "item01". 
#' Select a set of itemss by providing a vector, e.g., select.items = c("item01", item02").
#' @param skip.items Remove a subset of items (within a version) from the analysis.
#' Select a single item by providing the corresponding item ID, e.g., skip.item = "item01". 
#' Select a set of items by providing a vector, e.g., skip.items = c("item01", "item02").
#' @param select.trials Restrict analysis to a subset of trials (usually within a single subject).
#' The difference between "items" and "trials" is just that the trials are selected
#' by their item ID and "trials" by the position within the analysis. Both arguments
#' can also be combined.
#' Select a single trial by providing the corresponding trial ID, e.g., select.trials = 1. 
#' Select a set of trials by providing a vector, e.g., select.trials = c(1, 2).
#' @param skip.trials Remove a subset of trials (within a version) from the analysis.
#' Select a single trial by providing the corresponding trial ID, e.g., skip.trials = 10. 
#' Select a set of trials by providing a vector, e.g., skip.trials = c(10, 11).
#' @param debug Perform analysis only for specific steps of the analysis 
#' ("setup", "subjects", "read", "remove", "create", "add", "extract", "line", 
#' "assign", "combine", "aggregate")


popEye <- function(datpath, 
                   stimulus.file,
                   # NOTE: maybe change name to stimfile
                   tracker.model = "eyelink", 
                   tracker.software = "EB", 
                   tracker.results = NA,
                   type = "sentence", 
                   message.start = "SYNCTIME", 
                   message.stop = "stop",
                   message.boundary = "boundary", 
                   message.target = "target",
                   message.prime = "prime", 
                   variable.id = "id", 
                   variable.cond = NA,
                   # NOTE: maybe change to missing in functions
                   item.practice = "^P", 
                   item.trigger = "999",
                   item.question = 1000, 
                   item.pracnum = 0,
                   stimulus.id = "id",
                   stimulus.cond = NA, 
                   # NOTE: maybe change to missing in functions
                   stimulus.preview = "preview",
                   stimulus.prime = "prime", 
                   stimulus.text = "text", 
                   stimulus.hyphenwrap = T,
                   indicator.word = "", 
                   indicator.ia = "", 
                   indicator.target = "\\*", 
                   indicator.line = "\\\\n",
                   separator.word = " ",
                   separator.sentence = c(".", "!", "?", ";"),
                   separator.sentence2 = c(" ", "\""),
                   display.marginLeft = 150, 
                   display.marginTop = 300, 
                   display.marginRight = 50, 
                   display.marginBottom = 100, 
                   font.name = "CourierNew", 
                   font.size = 16, 
                   font.spacing = 2,
                   font.wrap = TRUE,
                   analysis.eyelink = TRUE, 
                   analysis.smooth = 5, 
                   analysis.vfac = 5, 
                   analysis.mindur = 10, 
                   analysis.postdur = 30,
                   analysis.drift = 1, 
                   analysis.sparse = TRUE,
                   assign.driftX = FALSE, 
                   assign.driftY = FALSE,
                   assign.outlier = TRUE,
                   assign.outlierDist = 0.2,
                   assign.moveMethod = "hit",
                   assign.moveX = FALSE,
                   assign.moveY = FALSE,
                   assign.lineMethod = "chain",
                   
                   assign.lineX = 35,
                   assign.lineY = 0.5,
                   assign.lineS = 0.45,
                   assign.lineN = 1.5,
                   
                   clean.stage1Dur = 80, 
                   clean.stage1Dist = 1,
                   clean.stage2Dur = 40, 
                   clean.stage2Dist = 3,
                   clean.stage3 = FALSE, 
                   clean.stage3Dur = 140, 
                   clean.stage4 = FALSE, 
                   clean.stage4Min = 80,
                   clean.stage4Max = 800, 
                   clean.delete = FALSE,
                   clean.outlier = FALSE,
                   exclude.nfix = 3, 
                   exclude.sac = 150,
                   outpath = getwd(), 
                   outname = "",
                   # NOTE: Maybe combine outpath and outname to one parameter?
                   select.version = NULL,
                   select.subjects = NULL,
                   skip.subjects = NULL,
                   select.items = NULL,
                   skip.items = NULL,
                   select.trials = NULL,
                   skip.trials = NULL,
                   debug = "none"
                   
) {
  
  
  # ----------------------------------
  # setup experiment 
  # ----------------------------------
  
  message("Initialize experiment")
  
  # create output file
  exp <- list(setup = NA, subjects = list())
  
  # retrieve setup infomation
  exp$setup <- SetupExperiment()
  
  # create output files
  CreateOutput()
  
  if (debug == "setup") {
    return (exp)
  }
  
  
  # create version list
  # --------------------
  
  # check for version
  if (tracker.software == "EB") {
    if (length(grep("results", list.files(datpath))) == 0) {
      version.list <- list.files(datpath)
      version.list <- paste("/", version.list, "/", sep = "") # here ?
    } else {
      version.list <- ""
    }  
  } else if (tracker.software == "ET") {
    version.list <- ""
  } else if (tracker.software == "psychopy") {
    version.list <- ""
  } 
  
  # initialize number of subjects
  nsub <- 0
  
  
  # ----------------------------------
  # version loop
  # ----------------------------------
  
  if (missing(select.version) == T) {
    version.arg1 <- 1
    version.arg2 <- length(version.list)
  } else {
    version.arg1 <- select.version
    version.arg2 <- select.version
  }
  
  for (v in version.arg1:version.arg2) {
    
    # v <- 2
    
    if ((version.arg2 - version.arg1) > 0) {
      message(paste("Version ", v, sep = ""))
    }
    
    # list of subjects
    if (tracker.software == "EB") {
      filepath <- paste(datpath, version.list[v], "results/", sep = "")  
      sub.list <- list.files(filepath)
      sub.list <- gsub(".asc", "", sub.list)
    } else if (tracker.software == "ET") {
      filepath <- paste(datpath, version.list[v], sep = "")  
      sub.list <- list.files(filepath)
      sub.list <- sub.list[grep("asc", sub.list)]
      sub.list <- gsub(".asc", "", sub.list)
    } else if (tracker.software == "psychopy") {
      filepath <- paste(datpath, version.list[v], sep = "")  
      sub.list <- list.files(filepath)
      sub.list <- sub.list[grep("hdf5", sub.list)]
      sub.list <- gsub(".hdf5", "", sub.list)
    }
    
    # select subjects
    if (missing(select.subjects) == F) {
      sub.list <- sub.list[is.element(sub.list, select.subjects)]
    }
    
    # skip subjects
    if (missing(skip.subjects) == F) {
      sub.list <- sub.list[is.element(sub.list, skip.subjects) == F]
    }
    
    if (debug == "subjects") {
      return (sub.list)
    }
    
    
    # ----------------------------------
    # subject loop
    # ----------------------------------
    
    subject.arg1 <- 1
    subject.arg2 <- length(sub.list)
    
    for (s in subject.arg1:subject.arg2) {
      
      # increment number of subjects
      nsub <- nsub + 1

      # generate header slot
      header <- list()
      
      subid <- sub.list[s]
      header$subid <- subid
      header$version <- v
      
      # message subject
      message(paste(". Subject ", s, ": ", subid, sep = ""))
      
      # TODO: store other information about subject (e.g., version)
      
      t1 <- Sys.time()
      
      
      # --------------------------------------------
      # Modul 1: Preprocessing
      # --------------------------------------------
      
      # read data
      # -----------
      
      message(".. Read data")
      dat <-  ReadData(filepath, subid)
      # TODO: read in edf directly (-> external packages)
      # TODO: asc data processing rather slow
      
      if (debug == "read") {
        return (dat)
      }
      
      
      # remove data
      # --------------
      
      message(".. Remove data")
      
      dat <- RemoveData(dat) 
      
      if (debug == "remove") {
        return (dat)
      }
      
      
      # create trials
      # ---------------
      
      message(".. Create trials")
     
      dat <- CreateTrials(dat)
      
      if (debug == "create") {
        return (dat)
      }
      
      
      # -----------------------
      # Modul 2: Cleaning
      # -----------------------
      
      # add stimulus information
      # -------------------------
      
      message(".. Add stimulus")
      
      dat <- ReadStimulus(dat)
      
      if (debug == "add") {
        return (dat)
      }
      
      
      # extract fixations
      # --------------------
      
      message(".. Extract fixations")
      
      dat <- ExtractFixations(dat)
      
      if (debug == "extract") {
        return (dat)
      }
      
      
      # assign letters/words
      # ---------------------
      
      message(".. Assign stimulus")
      
      dat <- MatchStim(dat)
      
      if (debug == "assign" | debug == "line") {
        return (dat)
      }
      
      
      # clean IAs
      # -----------
      
      message(".. Cleaning interest area")
      
      dat <- CleanIA(dat)
      
      # NOTE: not sure whether this should be implemented here (or at all)
      # NOTE: stage3 cleaning is completely useless !
      # NOTE: stage4 cleaning is dangerous !
      # TODO: report deleted fixations
     
       
      # compute measures
      # -----------------
      
      message(".. Compute measures")
      
      dat <- ComputeFixationMeasures(dat)
      dat <- ProcessSaccades(dat)
      
      
      # combine events
      # ----------------
      
      message(".. Combine events")
      
      dat <- CombineEvents(dat)
      
      if (debug == "combine") {
        return (dat)
      }
      
      
      # cleaning
      # ---------
      
      message(".. Cleaning trial")
      
      dat <- CleanAll(dat)
      # NOTE: think about relationship between cleaning here and in main analysis
      
      
      # clean trials
      # -------------
      
      if (exp$setup$analysis$sparse == TRUE) {
        dat <- Sparse(dat)
      }
      
      
      # finalize
      # ---------
      
      # names for trial slots
      for (i in 1:length(dat$item)) {
        
        names(dat$item)[i] <- paste("item", dat$item[[i]]$meta$itemid, sep = ".")
        
        dat$item[[i]]$fix$trialid <- i
        dat$item[[i]]$sac$trialid <- i
        dat$item[[i]]$meta$stimmat$trialid <- i
        
        fix <- rbind(fix, dat$item[[i]]$fix)
        row.names(fix) <- NULL
        
        sac <- rbind(sac, dat$item[[i]]$sac)
        
      }
      
      # save in experiment slot
      exp$subjects[[nsub]] <- list(header = header, items = dat$item)
      
      # names for participant slot
      names(exp$subjects)[nsub] <- paste("subject", subid, sep = ".")
      
      
      # -----------------------
      # Modul 3: Aggregation
      # -----------------------
      
      # item file
      # -----------
      
      message(".. Load item file")
      
      word.item <- ItemFileWord(dat)
      row.names(word.item) <- NULL
      
      ia.item <- ItemFileIA(dat)
      row.names(ia.item) <- NULL
      
      sent.item <- ItemFileSent(dat)
      row.names(sent.item) <- NULL
      
      
      # results file
      # -------------
      
      # TODO: Further processing of results file?
      
      if (exp$setup$tracker$software == "EB" & is.na(exp$setup$tracker$results) == F) {
        
        message(".. Load results file")
       
        resultstmp <- read.table(paste(datpath, "results/", subid, "/", exp$setup$tracker$results, sep = ""), 
                              header = T, stringsAsFactor = F)
        resultstmp$subid <- subid
        
        results <- rbind(results, resultstmp)
        
      }
      
      
      # clean file
      # -------------
      
      message(".. Create clean file")
      
      cleantmp <- CreateClean(dat)
      cleantmp$subid <- subid
      clean <- rbind(clean, cleantmp)
      
      
      # aggregate data
      # ---------------
      
      message(".. Aggregate")
      
      wordfirst <- AggregateWordsFirstrun(fix)
      wordtmp <- AggregateWords(fix, word.item)
      wordcomb <- CombineWords(fix, wordfirst, wordtmp)
      exp$reports$words <- rbind(exp$reports$words, wordcomb)
      
      iafirst <- AggregateIAsFirstrun(fix)
      iatmp <- AggregateIAs(fix, ia.item, exp)
      combia <- CombineIAs(fix, iafirst, iatmp, exp)
      exp$reports$ias <- rbind(exp$reports$ias, combia)
      
      sent <- ComputeSentenceMeasures(fix, sent.item)
      # NOTE: rename to AggregateSentences
      exp$reports$sentences <- rbind(exp$reports$sent, sent)
      
      trials <- AggregateTrials(fix, wordcomb)
      exp$reports$trials <- rbind(exp$reports$trials, trials)
      
      row.names(fix) <- NULL
      exp$reports$fix <- rbind(exp$reports$fix, fix)
      fix <- NULL
       
      row.names(sac) <- NULL
      exp$reports$sac <- rbind(exp$reports$sac, sac)
      sac <- NULL
      
      t2 <- Sys.time()
      print(round(t2 - t1, 1))
      
    } # end subject loop
    
  } # end version loop
  
  # NOTE: save number of subjects in setup slot?
  
  
  # collect results file
  # ---------------------
  
  if (exp$setup$tracker$software == "EB" & is.na(exp$setup$tracker$results) == F) {
    
    exp$reports$results <- results
    
  }
  
  exp$reports$clean <- clean[-1, ]
  row.names(exp$reports$clean) <- NULL
  
  
  if (debug == "aggregate") {
    return (exp)
  }
  
  
  # aggregate participants
  # -----------------------
  
  exp <- AggregateSubjects(exp)
  
  
  # save
  # -----
  
  message("Save")
  
  # set outpath
  if (outpath == "") {
    outpath <- getwd()
  }

  # set outname
  if (outname == "") {
    tmp <- unlist(strsplit(datpath, "/"))
    outname <- tmp[length(tmp)]
  }

  # save
  saveRDS(exp, file = paste(outpath, "/", outname, ".RDS", sep = ""))
  
}