R/expfactory.R

Defines functions plot_by_cue_type process_ant process_expfactory_experiment process_new_expfactory_survey process_expfactory_survey process_surveys3

Documented in process_ant process_expfactory_experiment process_expfactory_survey process_new_expfactory_survey process_surveys3

library(jsonlite)
library(tidyverse)
library(stringr)

#' @export
plot_by_cue_type <- function(df, c, y) {
  df %>%
    filter(cue == c) %>%
    ggplot(aes(factor(time), rt, group=condition, color=condition, linetype=condition)) +
    stat_summary(geom="line", fun.data=mean_cl_boot, position=position_dodge(width=.1)) +
    geom_hline(yintercept = 0) +
    stat_summary(geom="pointrange") +
    ylab(y) + xlab("Time")
}

# FIXME
# I think this got developed in study 3 and should be factored in here at some point
# process_meditation_log <- function(log_file, p) {
#   if(!file.exists(log_file)){
#     return(data_frame(p=p,file=log_file))
#   }
#   log <- read.csv(log_file, header = TRUE) %>%
#     select(-Date,-Time,-Place,-Guided,-Your.notes.on.the.meditation..if.any.) %>%
#     rename(recommended = Recommended.minimum.minutes, actual = Actual.minutes)
#     mutate(recommended = as.numeric(recommended), actual = as.numeric(actual)) %>%
# }

#' Process expfactory Attention Network Test data file
#'
#' \code{process_ant()} processes data files generated by the The Experiment Factory
#' implementation of the Attention Network Task
#' (\url{https://expfactory.github.io/attention_network_task.html}).
#'
#' @param ant_file Path to ANT file
#' @param p Participant number
#' @param time Time-point at which participant completed ANT
#' @param json=TRUE Boolean indicating whether ANT data format is JSON (TRUE) or CSV (FALSE)
#' @keywords expfactory ANT
#' @export
#' @return Data frame
process_ant <- function(ant_file, p, time, json=TRUE) {
  if(!file.exists(ant_file)){
    return(data_frame(p=p,t=time, file=ant_file))
  }
  if (json) {
    ant <- process_expfactory_experiment(ant_file)
  } else {
    ant <- read.csv(ant_file, header = TRUE)
  }
  ant %>% filter(exp_stage == 'test') %>%
    filter(trial_type == 'poldrack-single-stim') %>%
    filter(! is.na(correct_response)) %>%
    select(-trial_id,-trial_index,-internal_node_id,-text,-timing_post_trial,-view_history,-stimulus,-trial_type,-time_elapsed) %>%
    mutate(p = p) %>%
    # no subject column in expfactory 3.X ant data
    mutate(subject = as.factor(p)) %>%
    mutate(file=ant_file, t=time)
}

#' Process expfactory experiment JSON data.
#'
#' Process expfactory experiment JSON data.
#' @param path Path to data file
#' @keywords expfactory
#' @export
#' @return Data frame (long format)
process_expfactory_experiment <- function(path) {
  if ( file.exists(path) ) {
    # POSTed data gets JSON encoded at the remote, so if it's JSON to start,
    # with it's DOUBLE JSON when written and needs decoding twice
    l <- jsonlite::read_json(path, simplifyVector = TRUE)
    jsonlite::fromJSON(unlist(l[1]))
  } else {
    message(path, ': file not found')
    return(data.frame)
  }
}

## To compare speed of different solutions
# mb <- microbenchmark::microbenchmark(
#   ldply(surveys, process_surveys),
#   ldply(surveys, process_surveys2, paths),
#   process_surveys3(".")
# )

#' Process expfactory 3.X survey data
#'
#' Process expfactory 3.X survey data.
#' @param token token number
#' @param survey path to JSON survey file
#' @param unflatten=FALSE Unflatten "flat" JSON
#' @keywords expfactory
#' @export
#' @return Data frame (long format)
process_new_expfactory_survey <- function(token, survey, flat = FALSE) {
  if ( file.exists(survey) ) {
    json <- jsonlite::read_json(survey, simplifyVector = TRUE)
    if (! flat) {
      df <- json
    } else {
      # "flattened" JSON to data frame
      # https://github.com/expfactory/expfactory/issues/76
      df <- stack(unlist(json)) %>%
        rename(value = values) %>%
        mutate(q = gsub("data\\[(\\d+)\\]\\[.*?\\]$", '\\1', ind),
               key = gsub("data\\[\\d+\\]\\[(.*?)\\]$", '\\1', ind)) %>%
        select(-ind) %>%
        filter(key %in% c('name','text','value')) %>%
        spread(key = key, value = value) %>%
        select(-q)
    }

    df %>%
      mutate(
        Token = token,
        qnum = as.numeric(gsub(".*(\\d+).*$", '\\1', name)) - 1,
        question = text,
        survey = survey) %>%
      select(survey, value, Token, question)
  } else {   # FIXME: handle missing data files
    message(survey, ': file not found')
  }
}

#' Process expfactory survey data
#'
#' Process expfactory survey data
#' @param p Participant Number
#' @keywords expfactory
#' @export
#' @param survey Survey name
#' @return Data frame (long format)
#' # http://expfactory.readthedocs.io/en/latest/development.html#contributing-to-surveys
process_expfactory_survey <- function(path, survey) {
  p <- as.integer(gsub(".*/(\\d+)$", '\\1', path))
  f <- paste(path, "/", p, "_", survey, ".json", sep = "")
  if ( file.exists(f) ) {
    df <- jsonlite::read_json(f, simplifyVector = TRUE)
    df %>%
      mutate(
        p = p,
        qnum = as.numeric(gsub(".*(\\d+).*$", '\\1', name)) - 1,
        question = text,
        #question = parse_number(name) - 1,
        survey = survey) %>%
      select(survey, value, p, question)
  } else {   # FIXME: handle missing data files
    message(f, ': file not found')
  }
}

#' Process expfactory survey data
#'
#' @param base_path data path
#'
#' @return data frame with [survey, value, participant, question]
process_surveys3 <- function(base_path){
  all_json_files = list.files(base_path, pattern = ".*\\.json", recursive = TRUE)
  df <- NULL
  for (f in all_json_files ) {
    df <- bind_rows(
      df,
      jsonlite::read_json(f, simplifyVector = TRUE) %>%
        mutate(file = f)
    )
  }

  df %>%
    mutate(
      #      survey = str_extract(name, ".*(?=_survey?)"),    # use survey name from JSON, different result and as an alternative to next line
      survey = str_extract(file, "(?<=_).*?(?=.json)"), # use survey name from filename
      p = str_extract(file, "^[:digit:]*(?=/?)"),       # extract the participant number from the directory name
      question = parse_number(name) - 1) %>%
    select(survey, value, p, question)
}
earcanal/manjushri documentation built on May 23, 2019, 7:34 a.m.