R/datawiz.R

Defines functions as_word convert_datawiz_code_to_aoi as_time melt_datawiz str_reject backfill_column_names fill_column_names read_datawiz

Documented in convert_datawiz_code_to_aoi melt_datawiz read_datawiz

#' Read eyetracking data from a datawiz file
#'
#' @param filename a txt file generated by datawiz
#' @param sampling_rate the rate of the video recording in ms. By default, the
#'   value is 33.3 for 1 frame every 33.3 ms.
#' @return a dataframe containing the cleaned-up eyetracking data
#' @export
#' @details
#' The files exported by DataWiz are a series of tab-separated data files all
#' combined into a single file. This means that the header row (with the column
#' names separated by tabs) will be repeated throughout the file. These repeated
#' header rows are removed.
#'
#' The header rows indicate the time of eyetracking samples by columns named
#' "F0", "F33", "F67", etc. There are also columns with blank names before
#' column "F0". These are also looking samples before "F0". This function
#' back-fills the column names so that the first column before "F0" changes from
#' " " to "X33", where the X indicates a negative time sample.
read_datawiz <- function(filename, sampling_rate = 33.3333) {
  # Datawiz files are tab-separated files containing several tables concatenated
  # together, so there are multiple rows with the column names:

  # Header1	Header2	Header3	 	 	 	 	 	F0	F33	F67
  # KidA	Day1	Data3	-	-	-	-	-	-	-	-
  # KidA	Day1	Data3	.	.	.	.	.	.	.	.
  # KidA	Day1	Data3	0	0	0	0	0	0	0	0
  # Header1	Header2	Header3	 	 	 	 	 	F0	F33	F67
  # KidA	Day2	Data3	1	1	1	1	1	1	1	1
  # KidA	Day2	Data3	.	-	0	1	.	-	0	1
  # KidA	Day2	Data3	1	1	1	0	0	.	.	.
  # Header1	Header2	Header3	 	 	 	 	 	F0	F33	F67
  # KidB	Day1	Data3	.	.	.	.	.	.	.	.
  # KidB	Day1	Data3	.	.	.	.	.	.	.	.
  # KidB	Day1	Data3	.	.	.	.	.	.	.	.

  # Some of the column names are empty, in the mock-data above there are five
  # tabs between "Header3" and "F0". Our first job is to create the header row
  # we wish we had (with no blank column names).

  # Break up tab-delimited tokens in first line to get column names
  raw_first_line_tokens <- readr::read_lines(filename, n_max = 1) %>%
    stringr::str_split("\t") %>%
    unlist()

  # F0, F33, etc are time samples at 0ms, 33 ms, etc. The blank column names
  # are time samples that occur before F0. We convert the blanks to times
  # counting back from 0, so the headers around time 0 are "X67  X33 F0  F33".
  blank_cols <- which(raw_first_line_tokens == " ")
  new_times <- backfill_column_names(blank_cols, "X", sampling_rate)
  new_first_line_tokens <- raw_first_line_tokens
  new_first_line_tokens[blank_cols] <- new_times

  # Create new columns for the blanks at the end of the file
  first_col <- which(raw_first_line_tokens == "F33")
  positive_cols <- seq(first_col, length(raw_first_line_tokens))
  positive_times <- fill_column_names(positive_cols, "F", sampling_rate, offset = 0)
  new_first_line_tokens[positive_cols] <- positive_times

  # Assemble desired header line
  first_line <- paste0(new_first_line_tokens, collapse = "\t")

  # Re-read the data. Keep the data rows by removing header-like lines and blank
  # lines.
  lines <- readr::read_lines(filename)
  first_few_col_names <- paste0(head(raw_first_line_tokens), collapse = "\t")
  header_pattern <- paste0("^", first_few_col_names)
  blank_pattern <- "^\\s+$"

  lines_clean <- lines %>%
    str_reject(header_pattern) %>%
    str_reject(blank_pattern)

  # Combine the header row we made with the data rows.
  lines <- c(first_line, lines_clean)

  # Combine the lines into a single blob of text and parse that literal data
  # like a file
  flat_lines <- paste0(lines, collapse = "\n")

  # Make sure that readr treats every column as a character (c). Otherwise it
  # will convert our gaze codes (1, 0, -, .) into numbers (1, 0, NA, NA)
  col_types <- rep_len("c", length(raw_first_line_tokens)) %>%
    paste0(collapse = "")

  readr::read_tsv(
    file = flat_lines,
    trim_ws = TRUE,
    na = c("", "NA"),
    col_types = col_types)
}

fill_column_names <- function(cols, col_prefix, increment, offset = 0, rounding = 0) {
  times <- (seq_along(cols) * increment) + offset
  time_values <- round(times, rounding)
  col_names <- sprintf("%s%s", col_prefix, time_values)
  col_names
}


backfill_column_names <- function(cols, col_prefix, increment, rounding = 0) {
  rev(fill_column_names(cols, col_prefix, increment, rounding, offset = 0))
}


str_reject <- function(string, pattern) {
  string[!stringr::str_detect(string, pattern)]
}


#' Convert DataWiz data into long format
#'
#' DataWiz files have several columns F0, F33, F67, etc. for each time sample.
#' This function converts a dataframe from such a file into a long format, where
#' there is a single time column and single column of gaze responses.
#'
#' @param df a dataframe created by reading a datawiz file
#' @param key_col the name of the new column that holds the time values
#' @param value_col the name of the new column that holds the looking data at
#'   each time sample
#' @return a long data-frame
#' @export
melt_datawiz <- function(df, key_col = "Time", value_col = "Look") {
  # Assuming X[Numbers] and F[Numbers] are the time columns
  time_cols <- stringr::str_subset(colnames(df), "^[XF]\\d+$")

  df %>%
    tidyr::gather_(key_col = key_col, value_col = value_col,
                   gather_cols = time_cols, na.rm = FALSE, convert = FALSE,
                   factor_key = FALSE) %>%
    mutate_(Time = ~ as_time(Time))
}

as_time <- function(xs) {
  xs %>%
    stringr::str_replace("^X", "-") %>%
    stringr::str_replace("^F", "+") %>%
    as.numeric
}


#' Convert to DataWiz codes to AOI names
#'
#' @param xs a vector of DataWiz codes (-, ., 0, 1)
#' @return the vector with NA for "-", "Target" for "1", "Distractor" for "0", and
#'   "tracked" for ".".
#' @export
convert_datawiz_code_to_aoi <- function(xs) {
  xs <- xs %>%
    stringr::str_replace("[-]", "NA") %>%
    stringr::str_replace("1", "Target") %>%
    stringr::str_replace("0", "Distractor") %>%
    stringr::str_replace("[.]", "tracked")
  xs[xs == "NA"] <- NA
  xs
}

as_word <- function(xs) {
  xs <- xs %>%
    stringr::str_replace("[-]", "dash") %>%
    stringr::str_replace("1", "Target") %>%
    stringr::str_replace("0", "Distractor") %>%
    stringr::str_replace("[.]", "dot")
  xs[xs == "NA"] <- NA
  xs
}
tjmahr/littlelisteners documentation built on June 3, 2021, 2:10 p.m.