R/pipeline-loadasc.R

Defines functions normalize_time_orig any_block_entries add_unique_identifiers_to_df add_unique_event_identifiers load_asc

Documented in add_unique_event_identifiers add_unique_identifiers_to_df load_asc

#' Load and parse SR Research EyeLink `.asc` files
#'
#' This function builds upon the [eyelinker::read.asc()] function to parse the
#' messages and metadata within the EyeLink `.asc` file. After loading and
#' additional processing, this function returns an S3 `eyeris` class for use in
#' all subsequent `eyeris` pipeline steps and functions.
#'
#' @note
#' This function is part of the `glassbox()` preprocessing pipeline and is not
#' intended for direct use in most cases. Provide parameters via
#' `load_asc = list(...)`.
#'
#' Advanced users may call it directly if needed.
#'
#' @details
#' This function is automatically called by `glassbox()` by default. If needed,
#' customize the parameters for `load_asc` by providing a parameter list.
#'
#' Users should prefer using `glassbox()` rather than invoking this function
#' directly unless they have a specific reason to customize the pipeline
#' manually.
#'
#' @param file An SR Research EyeLink `.asc` file generated by the official
#' EyeLink `edf2asc` command
#' @param block Optional block number specification. The following are valid
#' options:
#'   * "auto" (default): Automatically handles multiple recording segments
#'   embedded within the same `.asc` file. We recommend using this default as
#'   this is likely the safer choice then assuming a single-block recording
#'   (unless you know what you're doing).
#'   * `NULL`: Omits block column. Suitable for single-block recordings.
#'   * Numeric value: Manually sets block number based on the value provided
#'   here.
#'
#' @return An object of S3 class `eyeris` with the following attributes:
#' \enumerate{
#'   \item `file`: Path to the original `.asc` file.
#'   \item `timeseries`: Dataframe of all raw timeseries data from the tracker.
#'   \item `events`: Dataframe of all event messages and their timestamps.
#'   \item `blinks`: Dataframe of all blink events.
#'   \item `info`: Dataframe of various metadata parsed from the file header.
#'   \item `latest`: `eyeris` variable for tracking pipeline run history.
#' }
#'
#' @seealso [eyelinker::read.asc()] which this function wraps.
#'
#' @seealso [eyeris::glassbox()] for the recommended way to run this step as
#' part of the full eyeris glassbox preprocessing pipeline.
#'
#' @examples
#' demo_data <- eyelink_asc_demo_dataset()
#'
#' demo_data |>
#'   eyeris::glassbox(load_asc = list(block = 1))
#'
#' # Other useful parameter configurations
#' ## (1) Basic usage (no block column specified)
#' demo_data |>
#'   eyeris::load_asc()
#'
#' ## (2) Manual specification of block number
#' demo_data |>
#'   eyeris::load_asc(block = 3)
#'
#' ## (3) Auto-detect multiple recording segments embedded within the same file
#' ##  (i.e., the default behavior)
#' demo_data |>
#'   eyeris::load_asc(block = "auto")
#'
#' ## (4) Omit block column
#' demo_data |>
#'   eyeris::load_asc(block = NULL)
#'
#' @export
load_asc <- function(file, block = "auto") {
  if (!tools::file_ext(file) %in% c("asc", "gz")) {
    stop(sprintf("Error: The file '%s' is not a .asc file.", file))
  }

  x <- eyelinker::read.asc(
    fname = file,
    samples = TRUE,
    events = TRUE,
    parse_all = FALSE
  )

  # parse metadata
  is_mono <- x$info$mono
  is_left <- x$info$left
  is_right <- x$info$right
  if (is_mono) {
    if (is_left) eye <- "L"
    if (is_right) eye <- "R"
  } else {
    if (is_left && is_right) eye <- "LR"
  }
  hz <- x$info$sample.rate
  pupil_type <- tolower(x$info$pupil.dtype)

  # raw data processing
  raw_df <- x$raw |>
    dplyr::select(
      block,
      time_orig = time,
      pupil_raw = ps,
      eye_x = xp,
      eye_y = yp
    ) |>
    dplyr::mutate(
      eye = eye,
      hz = hz,
      type = pupil_type
    ) |>
    dplyr::relocate(pupil_raw, .after = type)

  # return list object
  list_out <- vector("list", length = 6)
  names.out <- c(
    "file",
    "timeseries",
    "events",
    "blinks",
    "info",
    "latest"
  )
  names(list_out) <- names.out

  # block handler
  if (!is.null(block)) {
    if (block == "auto") {
      # check existing blocks parsed by eyelinker
      existing_blocks <- unique(x$raw$block)
      if (length(existing_blocks) > 1) {
        # split raw data by eyelinker-detected blocks
        list_out$timeseries <- split(
          raw_df,
          paste0("block_", x$raw$block)
        )
        list_out$events <- split(
          x$msg,
          paste0("block_", x$msg$block)
        )
        list_out$blinks <- split(
          x$blinks,
          paste0("block_", x$blinks$block)
        )
      } else {
        # if eyelinker parses only 1 block, then use that single block
        list_out$timeseries <- list("block_1" = raw_df)
        list_out$events <- list("block_1" = x$msg)
        list_out$blinks <- list("block_1" = x$blinks)
      }
    } else if (is.numeric(block)) {
      # manually set block number inside the data
      list_out$timeseries <- setNames(
        list(raw_df |> dplyr::mutate(block = !!as.numeric(block))),
        paste0("block_", as.character(block))
      )
      list_out$events <- setNames(
        list(x$msg |> dplyr::mutate(block = !!as.numeric(block))),
        paste0("block_", as.character(block))
      )
      list_out$blinks <- setNames(
        list(x$blinks |> dplyr::mutate(block = !!as.numeric(block))),
        paste0("block_", as.character(block))
      )
    } else {
      stop("`block` must be either: NULL, numeric, or 'auto'.")
    }
  } else {
    # fallback to direct assignment if all block cases fail
    list_out$timeseries <- list("block_1" = raw_df)

    # omit the block column from the timeseries, events, and blinks
    list_out$timeseries$block_1 <- list_out$timeseries$block_1 |>
      dplyr::select(-block)
    list_out$events <- x$msg |> dplyr::select(-block)
    list_out$blinks <- x$blinks |> dplyr::select(-block)
  }

  # add unique event identifiers to handle duplicate event messages
  list_out$events <- add_unique_event_identifiers(list_out$events)

  # fix metadata (info) for newer versions of eyelink
  fixed_info <- parse_eyelink_info(x$info$version, x$info$model)
  x$info$version <- fixed_info$version
  x$info$model <- fixed_info$model

  list_out$file <- file
  list_out$info <- x$info

  # set latest pointer based on block structure
  if (is.list(list_out$timeseries) && !is.data.frame(list_out$timeseries)) {
    # multiblock: set a named list of pointers
    list_out$latest <- setNames(
      as.list(rep("pupil_raw", length(list_out$timeseries))),
      names(list_out$timeseries)
    )
  } else {
    # single block: set a single pointer
    list_out$latest <- "pupil_raw"
  }

  list_out$decimated.sample.rate <- NA_integer_
  list_out <- normalize_time_orig(list_out)
  class(list_out) <- "eyeris"

  return(list_out)
}

#' Add unique event identifiers to handle duplicate event messages
#'
#' This function adds a new column `text_unique` to each events table that
#' creates unique identifiers for each occurrence of the same event message
#' by appending a count number. This prevents events like "GOAL" from being
#' merged across all separate goals.
#'
#' This function is called by the exposed wrapper [eyeris::load_asc()]
#'
#' @param events_list A list of event data frames (one per block)
#'
#' @return Updated events list with `text_unique` column added to each dataframe
#'
#' @keywords internal
add_unique_event_identifiers <- function(events_list) {
  if (is.data.frame(events_list)) {
    # single data frame case
    events_list <- add_unique_identifiers_to_df(events_list)
  } else if (is.list(events_list)) {
    # list of data frames case (multiple blocks)
    events_list <- lapply(events_list, add_unique_identifiers_to_df)
  }

  events_list
}

#' Add unique identifiers to a single events data frame
#'
#' This function is called by the exposed wrapper [eyeris::load_asc()]
#'
#' @param events_df A single events data frame
#'
#' @return Updated events data frame with `text_unique` column
#'
#' @keywords internal
add_unique_identifiers_to_df <- function(events_df) {
  if (!"text" %in% colnames(events_df)) {
    return(events_df)
  }

  # create a counter for each unique text message
  events_df <- events_df |>
    dplyr::group_by(text) |>
    dplyr::mutate(
      text_unique = if (dplyr::n() > 1) {
        paste0(text, "_", dplyr::row_number())
      } else {
        text
      }
    ) |>
    dplyr::ungroup()

  events_df
}

# normalize "time_orig" to seconds and to start at 0
any_block_entries <- function(eyeris_obj) {
  is.list(eyeris_obj$timeseries) &&
    any(grepl("^block_", names(eyeris_obj$timeseries)), na.rm = TRUE)
}

normalize_time_orig <- function(eyeris_obj) {
  if (any_block_entries(eyeris_obj)) {
    # case: one or more multiple "blocks"
    eyeris_obj$timeseries <- lapply(eyeris_obj$timeseries, function(block_df) {
      block_df |>
        dplyr::mutate(
          time_secs = (time_orig - dplyr::first(time_orig)) / 1000,
          time_scaled = (time_orig - dplyr::first(time_orig)) / 1000,
          .after = "time_orig"
        )
    })
  } else { # safety mechanism: shouldn't ever get to this condition b/c of 167
    # case: no tibble "block_{}" in list timeseries; timeseries is the tibble
    eyeris_obj$timeseries <- eyeris_obj$timeseries |>
      dplyr::mutate(
        time_secs = (time_orig - dplyr::first(time_orig)) / 1000,
        time_scaled = (time_orig - dplyr::first(time_orig)) / 1000,
        .after = "time_orig"
      )
  }

  eyeris_obj
}

Try the eyeris package in your browser

Any scripts or data that you put into this service are public.

eyeris documentation built on July 3, 2025, 9:08 a.m.