#' Read eyetracking data from a datawiz file
#'
#' @param filename a txt file generated by datawiz
#' @param sampling_rate the rate of the video recording in ms. By default, the
#' value is 33.3 for 1 frame every 33.3 ms.
#' @return a dataframe containing the cleaned-up eyetracking data
#' @export
#' @details
#' The files exported by DataWiz are a series of tab-separated data files all
#' combined into a single file. This means that the header row (with the column
#' names separated by tabs) will be repeated throughout the file. These repeated
#' header rows are removed.
#'
#' The header rows indicate the time of eyetracking samples by columns named
#' "F0", "F33", "F67", etc. There are also columns with blank names before
#' column "F0". These are also looking samples before "F0". This function
#' back-fills the column names so that the first column before "F0" changes from
#' " " to "X33", where the X indicates a negative time sample.
read_datawiz <- function(filename, sampling_rate = 33.3333) {
# Datawiz files are tab-separated files containing several tables concatenated
# together, so there are multiple rows with the column names:
# Header1 Header2 Header3 F0 F33 F67
# KidA Day1 Data3 - - - - - - - -
# KidA Day1 Data3 . . . . . . . .
# KidA Day1 Data3 0 0 0 0 0 0 0 0
# Header1 Header2 Header3 F0 F33 F67
# KidA Day2 Data3 1 1 1 1 1 1 1 1
# KidA Day2 Data3 . - 0 1 . - 0 1
# KidA Day2 Data3 1 1 1 0 0 . . .
# Header1 Header2 Header3 F0 F33 F67
# KidB Day1 Data3 . . . . . . . .
# KidB Day1 Data3 . . . . . . . .
# KidB Day1 Data3 . . . . . . . .
# Some of the column names are empty, in the mock-data above there are five
# tabs between "Header3" and "F0". Our first job is to create the header row
# we wish we had (with no blank column names).
# Break up tab-delimited tokens in first line to get column names
raw_first_line_tokens <- readr::read_lines(filename, n_max = 1) %>%
stringr::str_split("\t") %>%
unlist()
# F0, F33, etc are time samples at 0ms, 33 ms, etc. The blank column names
# are time samples that occur before F0. We convert the blanks to times
# counting back from 0, so the headers around time 0 are "X67 X33 F0 F33".
blank_cols <- which(raw_first_line_tokens == " ")
new_times <- backfill_column_names(blank_cols, "X", sampling_rate)
new_first_line_tokens <- raw_first_line_tokens
new_first_line_tokens[blank_cols] <- new_times
# Create new columns for the blanks at the end of the file
first_col <- which(raw_first_line_tokens == "F33")
positive_cols <- seq(first_col, length(raw_first_line_tokens))
positive_times <- fill_column_names(positive_cols, "F", sampling_rate, offset = 0)
new_first_line_tokens[positive_cols] <- positive_times
# Assemble desired header line
first_line <- paste0(new_first_line_tokens, collapse = "\t")
# Re-read the data. Keep the data rows by removing header-like lines and blank
# lines.
lines <- readr::read_lines(filename)
first_few_col_names <- paste0(head(raw_first_line_tokens), collapse = "\t")
header_pattern <- paste0("^", first_few_col_names)
blank_pattern <- "^\\s+$"
lines_clean <- lines %>%
str_reject(header_pattern) %>%
str_reject(blank_pattern)
# Combine the header row we made with the data rows.
lines <- c(first_line, lines_clean)
# Combine the lines into a single blob of text and parse that literal data
# like a file
flat_lines <- paste0(lines, collapse = "\n")
# Make sure that readr treats every column as a character (c). Otherwise it
# will convert our gaze codes (1, 0, -, .) into numbers (1, 0, NA, NA)
col_types <- rep_len("c", length(raw_first_line_tokens)) %>%
paste0(collapse = "")
readr::read_tsv(
file = flat_lines,
trim_ws = TRUE,
na = c("", "NA"),
col_types = col_types)
}
fill_column_names <- function(cols, col_prefix, increment, offset = 0, rounding = 0) {
times <- (seq_along(cols) * increment) + offset
time_values <- round(times, rounding)
col_names <- sprintf("%s%s", col_prefix, time_values)
col_names
}
backfill_column_names <- function(cols, col_prefix, increment, rounding = 0) {
rev(fill_column_names(cols, col_prefix, increment, rounding, offset = 0))
}
str_reject <- function(string, pattern) {
string[!stringr::str_detect(string, pattern)]
}
#' Convert DataWiz data into long format
#'
#' DataWiz files have several columns F0, F33, F67, etc. for each time sample.
#' This function converts a dataframe from such a file into a long format, where
#' there is a single time column and single column of gaze responses.
#'
#' @param df a dataframe created by reading a datawiz file
#' @param key_col the name of the new column that holds the time values
#' @param value_col the name of the new column that holds the looking data at
#' each time sample
#' @return a long data-frame
#' @export
melt_datawiz <- function(df, key_col = "Time", value_col = "Look") {
# Assuming X[Numbers] and F[Numbers] are the time columns
time_cols <- stringr::str_subset(colnames(df), "^[XF]\\d+$")
df %>%
tidyr::gather_(key_col = key_col, value_col = value_col,
gather_cols = time_cols, na.rm = FALSE, convert = FALSE,
factor_key = FALSE) %>%
mutate_(Time = ~ as_time(Time))
}
as_time <- function(xs) {
xs %>%
stringr::str_replace("^X", "-") %>%
stringr::str_replace("^F", "+") %>%
as.numeric
}
#' Convert to DataWiz codes to AOI names
#'
#' @param xs a vector of DataWiz codes (-, ., 0, 1)
#' @return the vector with NA for "-", "Target" for "1", "Distractor" for "0", and
#' "tracked" for ".".
#' @export
convert_datawiz_code_to_aoi <- function(xs) {
xs <- xs %>%
stringr::str_replace("[-]", "NA") %>%
stringr::str_replace("1", "Target") %>%
stringr::str_replace("0", "Distractor") %>%
stringr::str_replace("[.]", "tracked")
xs[xs == "NA"] <- NA
xs
}
as_word <- function(xs) {
xs <- xs %>%
stringr::str_replace("[-]", "dash") %>%
stringr::str_replace("1", "Target") %>%
stringr::str_replace("0", "Distractor") %>%
stringr::str_replace("[.]", "dot")
xs[xs == "NA"] <- NA
xs
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.