R/read_vcx.R

#' Read And Parse .vcx Files
#'
#' This function allows you to read and parse raw vcx files generated by iCoder
#' @description \code{read_vcx()} reads parses raw .vcx coding files to get prescreening information
#'   to filter unusable trials from eyetracking data. The bulk of the function is
#'   doing extraction of strings from deeply nested XML data structure.
#' @param file Either a path to a file, a connection, or literal data.
#' @export
#' @examples
#'   \dontrun{d <- read_vcx(file = "187_Habla2_25_Clips.vcx")}

read_vcx <- function(file) {
  xml_data <- xml_to_list(file)
  # extract unused trials
  unused_trials <- xml_data[["dict"]][["dict"]][["array"]] %>% unlist(use.names = F) %>% as.integer()
  # extract all integer-type entries (list of integers), meta info (age in months, subject id)
  integer_list <- xml_data[["dict"]][["dict"]][names(xml_data[["dict"]][["dict"]]) == "integer"]
  sub_info <- get_sub_info(integer_list)
  # extract pre-screened trials
  ps_trials <- xml_data[["dict"]][["dict"]][["dict"]][["dict"]][names(xml_data[["dict"]][["dict"]][["dict"]][["dict"]]) == "dict"]
  d_ps <- purrr::map_df(ps_trials, get_ps_info)
  # return the vcx data frame
  build_ps_df(d_ps, sub_info, unused_trials)
}

xml_to_list <- function(file) {
  XML::xmlParse(file = file) %>% XML::xmlToList()
}

get_sub_info <- function(integer_list) {
  if (length(integer_list) == 3) {
    id <- integer_list[[3]]
    age <- integer_list[[2]]
  } else {
    id <- integer_list[[2]]
    age <- "not_in_iCoder_file"
  }
  list(id = id, age = age)
}

get_ps_info <- function(d) {
  d <- data.frame(trial_number = d$integer, prescreen_notes = d$string,
                  stringsAsFactors = FALSE)
}

build_ps_df <- function(df_ps, sub_info, unused_trials) {
  n_trials <- unused_trials %>% max()
  df_final <- data.frame(matrix(ncol = 0, nrow = n_trials)) %>%
    dplyr::mutate(SubjectNum = sub_info$id,
                  age = sub_info$age,
                  trial_number = 1:nrow(.) %>% as.character())

  # check if there are prescreened out trials and add to data frame if they exist
  # this handles the case where there were no trials prescreened out in the vcx file
  if(nrow(df_ps) != 0) {
    df_final <- dplyr::left_join(df_final, df_ps, by = "trial_number")
  } else {
    df_final <- df_final %>% dplyr::mutate(prescreen_notes = NA)
  }

  df_final %>%
    dplyr::mutate(prescreen_notes = ifelse(trial_number %in% unused_trials,
                                    "unused_trial",
                                    prescreen_notes)) %>%
    dplyr::mutate(prescreen_notes = ifelse(is.na(prescreen_notes),
                                    "good_trial",
                                    prescreen_notes)) %>%
    dplyr::rename(ParticipantName = SubjectNum) %>%
    dplyr::mutate(trial_number = as.integer(trial_number))
}
kemacdonald/Rtobii documentation built on May 4, 2019, 1:21 p.m.