#' JATOS text file parsing function (multiple results)
#'
#' \code{import_multiple()} parses a JATOS .txt file containing multiple
#' participant's results to a (list of) R object(s).
#'
#' @param jatos_txt_file JATOS .txt file path.
#' @param separate Returns separate lists for non-distanced and distanced
#' groups?
#' @param exclusion Exclude those meeting exclusion criteria from questionnaire
#' / hBayesDM outputs?
#' @param indiv Return individual-level data? This can then be passed to
#' [plot_import()].
#' and plotted for one individual at a time.
#' @param ret_incomplete Return incomplete datasets? They may require manual
#' parsing.
#' @param ... Other arguments passed to [import_single()].
#'
#' @returns \code{list} of \code{tibbles} or a single \code{tibble} if
#' \code{combine == TRUE}.
#'
#' @importFrom utils setTxtProgressBar txtProgressBar
#'
#' @examples \dontrun{
#' # Data not included in package (as ~900MB)
#' all_res_split <- import_multiple(
#' "data-raw/all_995_complete.txt", add_sex = TRUE
#' )
#' }
#'
#' @export
import_multiple <- function(jatos_txt_file,
separate = TRUE,
exclusion = TRUE,
indiv = FALSE,
ret_incomplete = FALSE,
...) {
l <- list(...)
if (is.null(l$hbayesDM)) l$hbayesDM <- FALSE
if (is.null(l$qstns_gillan)) l$qstns_gillan <- TRUE
if (is.null(l$prolific)) l$prolific <- TRUE
if (is.null(l$accuracy)) l$accuracy <- FALSE
if (is.null(l$task_excl)) l$task_excl <- TRUE
if (is.null(l$combine)) l$combine <- FALSE
if (is.null(l$issues)) l$issues <- FALSE
if (is.null(l$add_sex)) l$add_sex <- FALSE
message("Reading text file...")
## to appease R CMD check
subjID <- NULL
all_comp <-
jsonlite::fromJSON(
sprintf("[%s]", paste(
readLines(
jatos_txt_file, encoding = "UTF-8", warn = FALSE
), collapse = ","
))
)
res_splits <-
which(
lapply(
seq_along(all_comp),
function(i) {
any(grepl("Welcome to the experiment.", all_comp[[i]], fixed = TRUE))
}
) %in% TRUE
)
# look along the list of individual component results, note down the list
# numbers where "Welcome to the experiment." occurs. If all results
# complete, should be every 5th list (i.e. 1, 6, 11 etc)
indiv_res_complete <- list()
indiv_res_incomplete <- list()
message("Identifying individual results...")
for (i in seq_along(res_splits)) {
end <- ifelse(
i == length(res_splits), length(all_comp), res_splits[i + 1] - 1
)
result <- all_comp[res_splits[i]:end]
if (length(result) == 5) {
curr_length <- length(indiv_res_complete)
indiv_res_complete[[curr_length + 1]] <- result
} else {
curr_length <- length(indiv_res_incomplete)
indiv_res_incomplete[[curr_length + 1]] <- result
}
}
if (length(indiv_res_incomplete) > 0 && !ret_incomplete) {
message(
strwrap(
"One or more incomplete datasets have been omitted. Set
incomplete=T to retrieve these data (will require manual parsing).",
prefix = " ", initial = ""
)
)
}
if (separate) {
parsed_results_nd <- list()
parsed_results_d <- list()
if (exclusion) {
excluded_nd <- list()
included_nd <- list()
excluded_d <- list()
included_d <- list()
}
} else {
parsed_results <- list()
if (exclusion) {
excluded <- list()
included <- list()
}
}
message("Parsing results...")
pb <- txtProgressBar(
min = 0, max = length(indiv_res_complete), initial = 0, style = 3
)
for (i in seq_along(indiv_res_complete)) {
setTxtProgressBar(pb, i)
if (separate) {
res <- import_single(
indiv_res_complete[[i]],
multiple = TRUE,
accuracy = l$accuracy,
task_excl = l$task_excl,
hbayesDM = l$hbayesDM,
prolific = l$prolific,
qstns_gillan = l$qstns_gillan,
combine = l$combine,
issues = l$issues,
add_sex = l$add_sex
)
if (res$ppt_info$distanced) {
parsed_results_d[[i]] <- res
names(parsed_results_d)[i] <- paste0(
"ID", parsed_results_d[[i]]$ppt_info$subjID
)
if (exclusion) {
if (parsed_results_d[[i]]$ppt_info$exclusion) {
excluded_d[[i]] <- parsed_results_d[[i]]
names(excluded_d)[i] <- paste0(
"ID", parsed_results_d[[i]]$ppt_info$subjID
)
} else {
included_d[[i]] <- parsed_results_d[[i]]
names(included_d)[i] <- paste0(
"ID", parsed_results_d[[i]]$ppt_info$subjID
)
}
}
} else {
parsed_results_nd[[i]] <- res
names(parsed_results_nd)[i] <- paste0(
"ID", parsed_results_nd[[i]]$ppt_info$subjID
)
if (exclusion) {
if (parsed_results_nd[[i]]$ppt_info$exclusion) {
excluded_nd[[i]] <- parsed_results_nd[[i]]
names(excluded_nd)[i] <- paste0(
"ID", parsed_results_nd[[i]]$ppt_info$subjID
)
} else {
included_nd[[i]] <- parsed_results_nd[[i]]
names(included_nd)[i] <- paste0(
"ID", parsed_results_nd[[i]]$ppt_info$subjID
)
}
}
}
} else {
parsed_results[[i]] <- import_single(
indiv_res_complete[[i]],
multiple = TRUE,
accuracy = l$accuracy,
task_excl = l$task_excl,
hbayesDM = l$hbayesDM,
prolific = l$prolific,
qstns_gillan = l$qstns_gillan,
combine = l$combine,
issues = l$issues,
add_sex = l$add_sex
)
names(parsed_results)[i] <- paste0(
"ID", parsed_results[[i]]$ppt_info$subjID
)
if (exclusion) {
if (parsed_results[[i]]$ppt_info$exclusion) {
excluded[[i]] <- parsed_results[[i]]
names(excluded)[i] <- paste0(
"ID", parsed_results[[i]]$ppt_info$subjID
)
} else {
included[[i]] <- parsed_results[[i]]
names(included)[i] <- paste0(
"ID", parsed_results[[i]]$ppt_info$subjID
)
}
}
}
}
if (separate) {
demographics_nd <- list()
questionnaires_nd <- list()
training_nd <- list()
test_nd <- list()
demographics_d <- list()
questionnaires_d <- list()
training_d <- list()
test_d <- list()
ret <- list()
demographics_nd <- lapply(
seq_along(parsed_results_nd),
function(i) demographics_nd[[i]] <- parsed_results_nd[[i]]$ppt_info
)
questionnaires_nd <- lapply(
seq_along(parsed_results_nd),
function(i) {
questionnaires_nd[[i]] <- parsed_results_nd[[i]]$questionnaires
}
)
demographics_d <- lapply(
seq_along(parsed_results_d),
function(i) demographics_d[[i]] <- parsed_results_d[[i]]$ppt_info
)
questionnaires_d <- lapply(
seq_along(parsed_results_d),
function(i) questionnaires_d[[i]] <- parsed_results_d[[i]]$questionnaires
)
ret$non_distanced$ppt_info <- dplyr::left_join(
tibble::as_tibble(
dplyr::bind_rows(demographics_nd)
),
dplyr::bind_rows(questionnaires_nd),
by = c("subjID", "sessionID", "studyID")
)
ret$distanced$ppt_info <- dplyr::left_join(
tibble::as_tibble(dplyr::bind_rows(demographics_d)),
dplyr::bind_rows(questionnaires_d),
by = c("subjID", "sessionID", "studyID")
)
if (exclusion) {
exclusion_list_nd <- ret$non_distanced$ppt_info |>
dplyr::select(subjID, exclusion)
exclusion_list_d <- ret$distanced$ppt_info |>
dplyr::select(subjID, exclusion)
}
training_nd <- lapply(
seq_along(parsed_results_nd),
function(i) training_nd[[i]] <- parsed_results_nd[[i]]$training
)
training_d <- lapply(
seq_along(parsed_results_d),
function(i) training_d[[i]] <- parsed_results_d[[i]]$training
)
ret$non_distanced$training <- dplyr::bind_rows(training_nd)
ret$distanced$training <- dplyr::bind_rows(training_d)
test_nd <- lapply(seq_along(parsed_results_nd),
function(i) test_nd[[i]] <- parsed_results_nd[[i]]$test)
test_d <- lapply(seq_along(parsed_results_d),
function(i) test_d[[i]] <- parsed_results_d[[i]]$test)
ret$non_distanced$test <- dplyr::bind_rows(test_nd)
ret$distanced$test <- dplyr::bind_rows(test_d)
if (l$hbayesDM) {
qlearning_data_nd <- list()
qlearning_data_d <- list()
qlearning_data_nd <-
lapply(
seq_along(parsed_results_nd),
function(i) {
qlearning_data_nd[[i]] <- parsed_results_nd[[i]]$qlearning_data
}
)
qlearning_data_d <-
lapply(
seq_along(parsed_results_d),
function(i) {
qlearning_data_d[[i]] <- parsed_results_d[[i]]$qlearning_data
}
)
if (exclusion) {
qlearning_data_nd <- dplyr::left_join(
dplyr::bind_rows(qlearning_data_nd), exclusion_list_nd, by = "subjID"
)
qlearning_data_d <- dplyr::left_join(
dplyr::bind_rows(qlearning_data_d), exclusion_list_d, by = "subjID"
)
ret$non_distanced$qlearning_data <- qlearning_data_nd |>
dplyr::filter(exclusion == 0) |>
dplyr::select(-exclusion)
ret$distanced$qlearning_data <- qlearning_data_d |>
dplyr::filter(exclusion == 0) |>
dplyr::select(-exclusion)
} else {
ret$non_distanced$qlearning_data <- dplyr::bind_rows(qlearning_data_nd)
ret$distanced$qlearning_data <- dplyr::bind_rows(qlearning_data_d)
}
}
if (l$qstns_gillan) {
gillan_questions_nd <- list()
gillan_questions_d <- list()
gillan_questions_nd <-
lapply(
seq_along(parsed_results_nd),
function(i) {
gillan_questions_nd[[i]] <- parsed_results_nd[[i]]$gillan_questions
}
)
gillan_questions_d <-
lapply(
seq_along(parsed_results_d),
function(i) {
gillan_questions_d[[i]] <- parsed_results_d[[i]]$gillan_questions
}
)
if (exclusion) {
gillan_questions_nd <-
dplyr::left_join(
dplyr::bind_rows(
gillan_questions_nd
), exclusion_list_nd, by = "subjID"
)
gillan_questions_d <-
dplyr::left_join(
dplyr::bind_rows(gillan_questions_d),
exclusion_list_d,
by = "subjID"
)
gillan_questions_nd <- gillan_questions_nd |>
dplyr::filter(exclusion == 0) |>
dplyr::select(-exclusion)
gillan_questions_nd[is.na(gillan_questions_nd)] <- 0
ret$non_distanced$gillan_questions <- gillan_questions_nd
gillan_questions_d <- gillan_questions_d |>
dplyr::filter(exclusion == 0) |>
dplyr::select(-exclusion)
gillan_questions_d[is.na(gillan_questions_d)] <- 0
ret$distanced$gillan_questions <- gillan_questions_d
} else {
gillan_questions_nd <- dplyr::bind_rows(gillan_questions_nd)
gillan_questions_nd[is.na(gillan_questions_nd)] <- 0
ret$non_distanced$gillan_questions <- gillan_questions_nd
gillan_questions_d <- dplyr::bind_rows(gillan_questions_d)
gillan_questions_d[is.na(gillan_questions_d)] <- 0
ret$distanced$gillan_questions <- gillan_questions_d
}
}
if (l$combine) {
combined_nd <- list()
combined_d <- list()
combined_nd <- lapply(
seq_along(parsed_results_nd),
function(i) combined_nd[[i]] <- parsed_results_nd[[i]]$full_data
)
combined_d <- lapply(
seq_along(parsed_results_d),
function(i) combined_d[[i]] <- parsed_results_d[[i]]$full_data
)
ret$non_distanced$full_data <- dplyr::bind_rows(combined_nd)
ret$distanced$full_data <- dplyr::bind_rows(combined_d)
}
if (l$issues) {
issues_nd <- list()
issues_d <- list()
issues_nd <- lapply(
seq_along(parsed_results_nd),
function(i) issues_nd[[i]] <- parsed_results_nd[[i]]$issues_comments
)
issues_d <- lapply(
seq_along(parsed_results_d),
function(i) issues_d[[i]] <- parsed_results_d[[i]]$issues_comments
)
ret$non_distanced$issues_comments <-
tibble::as_tibble(dplyr::bind_rows(issues_nd))
ret$distanced$issues_comments <-
tibble::as_tibble(dplyr::bind_rows(issues_d))
}
if (indiv) {
ret$non_distanced$individual_results <- parsed_results_nd
ret$distanced$individual_results <- parsed_results_d
if (exclusion) {
ret$non_distanced$individual_results$included <-
included_nd[-which(sapply(included_nd, is.null))]
ret$non_distanced$individual_results$excluded <-
tryCatch(
excluded_nd[-which(sapply(excluded_nd, is.null))],
error = function(e) {
message(
"\nNo excluded non-distanced participants, set exclusion=F."
)
}
)
ret$distanced$individual_results$included <-
included_d[-which(sapply(included_d, is.null))]
ret$distanced$individual_results$excluded <-
tryCatch(
excluded_d[-which(sapply(excluded_d, is.null))],
error = function(e) {
message("\nNo excluded distanced participants, set exclusion=F.")
}
)
}
}
if (ret_incomplete) {
ret$incomplete_results <- indiv_res_incomplete
}
} else {
demographics <- list()
questionnaires <- list()
training <- list()
test <- list()
ret <- list()
demographics <- lapply(
seq_along(parsed_results),
function(i) demographics[[i]] <- parsed_results[[i]]$ppt_info
)
questionnaires <- lapply(
seq_along(parsed_results),
function(i) questionnaires[[i]] <- parsed_results[[i]]$questionnaires
)
ret$ppt_info <- dplyr::left_join(
tibble::as_tibble(dplyr::bind_rows(demographics)),
dplyr::bind_rows(questionnaires),
by = c("subjID", "sessionID", "studyID")
)
if (exclusion) {
exclusion_list <- ret$ppt_info |> dplyr::select(subjID, exclusion)
}
training <- lapply(
seq_along(parsed_results),
function(i) training[[i]] <- parsed_results[[i]]$training
)
ret$training <- dplyr::bind_rows(training)
test <- lapply(
seq_along(parsed_results),
function(i) test[[i]] <- parsed_results[[i]]$test
)
ret$test <- dplyr::bind_rows(test)
if (l$hbayesDM) {
qlearning_data <- list()
qlearning_data <- lapply(
seq_along(parsed_results),
function(i) qlearning_data[[i]] <- parsed_results[[i]]$qlearning_data
)
if (exclusion) {
qlearning_data <- dplyr::left_join(
dplyr::bind_rows(qlearning_data), exclusion_list, by = "subjID"
)
ret$qlearning_data <- qlearning_data |>
dplyr::filter(exclusion == 0) |>
dplyr::select(-exclusion)
} else {
ret$qlearning_data <- dplyr::bind_rows(qlearning_data)
}
}
if (l$qstns_gillan) {
gillan_questions <- list()
gillan_questions <- lapply(
seq_along(parsed_results),
function(i) {
gillan_questions[[i]] <- parsed_results[[i]]$gillan_questions
}
)
if (exclusion) {
gillan_questions <- dplyr::left_join(
dplyr::bind_rows(gillan_questions), exclusion_list, by = "subjID"
)
gillan_questions <- gillan_questions |>
dplyr::filter(exclusion == 0) |>
dplyr::select(-exclusion)
gillan_questions[is.na(gillan_questions)] <- 0
ret$gillan_questions <- gillan_questions
} else {
gillan_questions <- dplyr::bind_rows(gillan_questions)
gillan_questions[is.na(gillan_questions)] <- 0
ret$gillan_questions <- gillan_questions
}
}
if (l$combine) {
combined <- list()
combined <- lapply(
seq_along(parsed_results),
function(i) combined[[i]] <- parsed_results[[i]]$full_data
)
ret$full_data <- dplyr::bind_rows(combined)
}
if (l$issues) {
issues <- list()
issues <- lapply(
seq_along(parsed_results),
function(i) issues[[i]] <- parsed_results[[i]]$issues_comments
)
ret$issues_comments <- tibble::as_tibble(dplyr::bind_rows(issues))
}
if (indiv) {
ret$individual_results <- parsed_results
if (exclusion) {
ret$individual_results$included <-
included[-which(sapply(included, is.null))]
ret$individual_results$excluded <-
tryCatch(
excluded[-which(sapply(excluded, is.null))],
error = function(e) {
message("\nNo excluded participants, set exclusion=F.")
}
)
}
}
if (ret_incomplete) {
ret$incomplete_results <- indiv_res_incomplete
}
}
return(ret)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.