#' Subset columns from processed ACE/SEA modules
#'
#' User-friendly wrapper to choose demographics and selected
#' outcome variables from ACE/SEA data processed with \code{\link{proc_by_module}}.
#'
#' @export
#' @importFrom dplyr matches mutate select
#' @importFrom magrittr %>%
#' @importFrom purrr map
#' @importFrom rlang enexprs syms !!!
#'
#' @param df a df, output by \code{\link{proc_by_module}}, containing processed
#' ACE or SEA data.
#' @param demo_names a character vector containing the names of demographic columns
#' to include in the output. Column names will be matched exactly.
#' @param metric_names a character vector containing partial names of metric columns
#' to \emph{include} in the output. All column names containing any of the inputs
#' will be included in output.
#' @param metric_names_exclude a character vector containing partial names of metric
#' columns to \emph{exclude} from the output. All column names containing any of the
#' inputs will be excluded from output. This argument supersedes the prior, so any
#' columns matching both a partial name from \code{metric_names} and a partial name
#' from \code{metric_names_exclude} will be excluded.
#' @return a df, similar in structure to \code{proc}, but containing only the
#' subsetted columns.
post_reduce_cols <- function (df,
demo_names = c("pid", "age", "grade", "gender"),
metric_names = c(),
metric_names_exclude = c()) {
call_metric_names <- paste0(metric_names, collapse = "|")
call_metric_names_exclude <- paste0(metric_names_exclude, collapse = "|")
if (all(c("module", "proc") %in% names(df))) {
# if was processed with output = "long"
df_reduced <- df %>%
mutate(proc = map(proc, ~.x %>%
select(!!!syms(demo_names), matches(call_metric_names))))
if (length(metric_names_exclude) > 0) {
df_reduced %<>%
mutate(proc = map(proc, ~.x %>%
select(-matches(call_metric_names_exclude))))
}
} else {
# if was processed with output = "wide"
df_reduced <- df %>%
select(!!!syms(demo_names), matches(call_metric_names))
if (length(metric_names_exclude) > 0) {
df_reduced %<>% select(-matches(call_metric_names_exclude))
}
}
return(df_reduced)
}
#' Scrub processed data with below-chance accuracy
#'
#' User-friendly wrapper to replace below-chance records with \code{NA}
#' in ACE data processed with \code{\link{proc_by_module}}. Currently only
#' compatible with ACE (SEA not yet implemented),
#'
#' @export
#' @importFrom dplyr case_when everything filter left_join mutate select tibble
#' @importFrom magrittr %>% %<>%
#' @importFrom purrr map map2 reduce
#' @importFrom rlang sym !!
#'
#' @param df a df, output by \code{\link{proc_by_module}}, containing processed
#' ACE data.
#' @param app_type character. What app type produced this data? One of
#' \code{c("classroom", "explorer")}. Must be specified.
#' @param overall Also scrub ".overall" data? Defaults to \code{TRUE}.
#' @param cutoff_dprime Maximum value of d' to replace with \code{NA}, for
#' relevant tasks (ACE Tap and Trace, SAAT). Defaults to 0.
#' @param cutoff_2choice Maximum value of accuracy to replace with \code{NA},
#' for 2-response tasks (ACE Flanker, Boxed). Defaults to 0.5.
#' @param cutoff_4choice Maximum value of accuracy to replace with \code{NA},
#' for 4-response tasks (ACE Stroop, Task Switch). Defaults to 0.25.
#' @param cutoff_5choice Maximum value of accuracy to replace with \code{NA},
#' for 5-response tasks (ACE Color Selection). Defaults to 0.2.
#' @param cutoff_k Maximum \emph{relative} value of Filter k to replace with
#' \code{NA}. Defaults to 1, which corresponds to 1 target item in both
#' 2-target conditions and 4-target conditions.
#' @param extra_demos Character vector specifying any custom-added demographics
#' columns (beyond app defaults) to pass through the function. Defaults to \{code{NULL}.
#' @return a df, similar in structure to \code{proc}, but with below-cutoff values in
#' certain columns converted to \code{NA}.
post_clean_chance <- function (df,
app_type = c("classroom", "explorer"),
overall = TRUE,
cutoff_dprime = 0,
cutoff_2choice = 0.5,
cutoff_4choice = 0.25,
cutoff_5choice = 0.2,
cutoff_k = 1,
extra_demos = NULL) {
stopifnot(length(app_type) == 1)
stopifnot(app_type != "sea")
metric_cols <- tibble(module = c(TNT,
STROOP,
FLANKER,
TASK_SWITCH,
BOXED,
SAAT_SUS,
SAAT_IMP,
FILTER,
ADP,
COLOR_SELECT))
# If SEA modules were getting included they would get concatenated right above here
# and in the analogous metric_cols bits below
stopifnot(length(app_type) == 1)
is_ace = app_type != "sea"
# Number of responses went from 4 in Classroom to 2 in Explorer
if (app_type == "classroom") {
cutoff_taskswitch <- cutoff_4choice
} else if (app_type == "explorer") {
cutoff_taskswitch <- cutoff_2choice
} else {
cutoff_taskswitch <- NA_real_
}
if (overall) {
metric_cols %<>%
mutate(metric = list(c("sdt_dprime.overall"),
c("acc_mean.overall"),
c("acc_mean.overall"),
c("acc_mean.overall"),
c("acc_mean.overall"),
c("sdt_dprime.overall"),
c("sdt_dprime.overall"),
# all of these won't be used together! to be used separately later
c("k.r2b0", "k.r2b2", "k.r2b4", "k.r4b0", "k.r4b2"),
c("acc_mean.overall"),
c("acc_mean.overall.strict")))
} else {
metric_cols %<>%
mutate(metric = list(c("sdt_dprime.tap_only"),
c("acc_mean.congruent"),
c("acc_mean.congruent"),
c("acc_mean.stay"),
c("acc_mean.feature_4"),
c("sdt_dprime.overall"),
c("sdt_dprime.overall"),
c("k.r2b0", "k.r2b2", "k.r2b4", "k.r4b0", "k.r4b2"),
c("acc_mean.overall"),
c("acc_mean.overall.strict")))
}
metric_cols %<>%
mutate(full = map2(module, metric, ~paste(.x, .y, sep = ".")),
cutoff = case_when(module %in% c(TNT, SAAT_SUS, SAAT_IMP) ~ cutoff_dprime,
module == STROOP ~ cutoff_4choice,
module %in% c(FLANKER,
BOXED,
ADP) ~ cutoff_2choice,
module == FILTER ~ cutoff_k,
module == TASK_SWITCH ~ cutoff_taskswitch,
module == COLOR_SELECT ~ cutoff_5choice,
TRUE ~ NA_real_)) %>%
# this step should implicitly take of making metric_cols ace only or sea only
# depending on the input data
filter(module %in% get_valid_modules(df, is_ace = is_ace))
if (!(all(c("module", "proc") %in% names(df)))) {
# if "wide" form, coerce to "long-ish" first
wide <- TRUE
check_extra_demos(df, wide, extra_demos, is_ace = is_ace)
valid_demos <- get_valid_demos(df, is_ace = is_ace)
# because for ACE Classroom, the "safe" demos extend beyond the shared bindable demos
valid_demos_rejoin <- valid_demos
filter_col <- sym("full")
df <- proc_wide_to_long(df, extra_demos, is_ace = is_ace)
} else {
wide <- FALSE
filter_col <- sym("metric")
check_extra_demos(df, wide, extra_demos, is_ace = is_ace)
valid_demos <- get_valid_demos(df$proc[[1]], is_ace = is_ace)
}
valid_demos <- c(valid_demos, extra_demos)
# so the unbindable non-shared ACE Classroom demos don't get coerced to logical in the "bad" data
if (app_type == "classroom") valid_demos <- c(valid_demos, get_ace_classroom_wide_bids(df))
df %<>%
left_join(metric_cols %>%
select(module,
filter_cols = !!filter_col,
cutoff),
by = "module") %>%
mutate(non_demo_cols = map(proc, ~names(.x)[!(names(.x) %in% valid_demos)]),
call_filter_cols_bad = map2(filter_cols, cutoff, ~map_call2_rel("<=", .x, .y)),
call_filter_cols_good = map2(filter_cols, cutoff, ~map_call2_rel(">", .x, .y)))
df_scrubbed <- df %>%
filter(module != FILTER) %>%
proc_na_all_by_some_cols()
if (FILTER %in% df$module) {
# divide this up to do it separately for r2b0, r2b2, r2b4, r4b0, r4b2
filter_conditions <- c("r2b0", "r2b2", "r2b4", "r4b0", "r4b2")
filter_toscrub <- df %>%
filter(module == FILTER)
# isolate the demo cols first because they need to get bound on to the individual ks later
# because the ks have to be scrubbed separately colwise
filter_scrubbed <- filter_toscrub$proc[[1]] %>%
select(-filter_toscrub$non_demo_cols[[1]][grepl("r.b.$", filter_toscrub$non_demo_cols[[1]])])
for (i in 1:length(filter_conditions)) {
this_filter_scrub_cols <- filter_toscrub$non_demo_cols[[1]][endsWith(filter_toscrub$non_demo_cols[[1]], filter_conditions[i])]
this_filter_proc <- filter_toscrub$proc[[1]] %>%
select(!!Q_COL_PID, !!Q_COL_BID, this_filter_scrub_cols)
this_filter_scrubbed <- na_all_by_some_cols(this_filter_proc,
filter_toscrub$call_filter_cols_bad[[1]][i],
filter_toscrub$call_filter_cols_good[[1]][i],
this_filter_scrub_cols)
filter_scrubbed <- left_join(filter_scrubbed, this_filter_scrubbed,
by = c(COL_PID, COL_BID))
}
filter_scrubbed <- tibble(module = FILTER,
proc = list(FILTER = filter_scrubbed))
df_scrubbed %<>%
bind_rows(filter_scrubbed)
}
# If "wide" form:
if (wide) {
df_scrubbed$proc %>%
reduce(dplyr::full_join, by = valid_demos_rejoin) %>%
select(valid_demos, everything()) %>%
return()
} else {
return (df_scrubbed)
}
}
#' Scrub processed data with too few trials
#'
#' User-friendly wrapper to replace records with too many no-responses with \code{NA}
#' in ACE data processed with \code{\link{proc_by_module}}.
#'
#' @export
#' @importFrom dplyr bind_rows everything filter mutate mutate_at select
#' @importFrom magrittr %>% %<>%
#' @importFrom purrr map map2 map_if reduce
#'
#' @param df a df, output by \code{\link{proc_by_module}}, containing processed
#' ACE or SEA data.
#' @param app_type character. What app type produced this data? One of
#' \code{c("classroom", "explorer")}. Must be specified.
#' @param min_trials Minimum number of trials to require in most restrictive condition.
#' Defaults to 5. This condition is checked against the \code{*_count} summary columns,
#' that count all trials with a valid response time (and all no-go trials, if a response
#' was not expected.)
#' @param extra_demos Character vector specifying any custom-added demographics
#' columns (beyond app defaults) to pass through the function. Defaults to \{code{NULL}.
#' @return a df, similar in structure to \code{proc}, but with records with too few trials
#' converted to \code{NA}.
post_clean_low_trials <- function (df,
app_type = c("classroom", "explorer"),
min_trials = 5,
extra_demos = NULL) {
stopifnot(length(app_type) == 1)
stopifnot(app_type != "sea")
is_ace = app_type != "sea"
if (!(all(c("module", "proc") %in% names(df)))) {
# if "wide" form, coerce to "long-ish" first
wide <- TRUE
check_extra_demos(df, wide, extra_demos, is_ace = is_ace)
valid_demos <- get_valid_demos(df, is_ace = is_ace)
valid_demos_rejoin <- valid_demos
df <- proc_wide_to_long(df, extra_demos, is_ace = is_ace)
} else {
wide <- FALSE
# as long as the demos are present somewhere this should be ok
# since these aren't join cols later it doesn't have to be a perfect match
check_extra_demos(df, wide, extra_demos, is_ace = is_ace)
valid_demos <- get_valid_demos(df$proc[[1]], is_ace = is_ace)
}
valid_demos <- c(valid_demos, extra_demos)
if (app_type == "classroom") valid_demos <- c(valid_demos, get_ace_classroom_wide_bids(df))
# The easiest way to do it will be to work inside a df that only contains one module's data
# aka "long" form
# If "long" form:
# filter just the bad ones, select only their demo cols, bind_rows back on to the good ones
# will then populate all the non-demo cols with NA
df %<>%
mutate(filter_cols = map(proc, ~names(.x)[grepl("count", names(.x)) &
!grepl("half", names(.x)) &
!grepl("correct", names(.x)) &
!grepl("cost", names(.x)) &
!grepl("start", names(.x)) &
!grepl("early", names(.x)) &
!grepl("hit", names(.x)) &
!grepl("miss", names(.x)) &
!grepl("cr", names(.x)) &
!grepl("fa", names(.x)) &
!grepl("practice", names(.x))]),
filter_cols = map_if(filter_cols, module == "SAAT", ~.x[!grepl("overall", .x)], .else = ~.x),
non_demo_cols = map(proc, ~names(.x)[!(names(.x) %in% valid_demos)]),
call_filter_cols_bad = map(filter_cols, ~map_call2_rel("<", .x, min_trials)),
call_filter_cols_good = map(filter_cols, ~map_call2_rel(">=", .x, min_trials, include_nas = TRUE)))
df_scrubbed <- proc_na_all_by_some_cols(df)
# If "wide" form:
if (wide) {
df_scrubbed$proc %>%
reduce(dplyr::full_join, by = valid_demos_rejoin) %>%
select(valid_demos, everything()) %>%
return()
} else {
return (df_scrubbed)
}
}
#' Helper function that takes long or long-ish ACE proc data
#' and renders all non-demographic columns as NA for those rows
#' that satisfy a condition
#' expects filter_cols, call_filter_cols_bad, call_filter_cols_good, non_demo_cols
#' @importFrom dplyr bind_rows filter mutate mutate_at select
#' @importFrom magrittr %>% %<>%
#' @importFrom purrr map map2
#' @importFrom rlang !!!
#' @keywords internal
proc_na_all_by_some_cols <- function (df) {
df_no_scrub <- df %>%
filter(lengths(call_filter_cols_bad) == 0) %>%
select(module, proc)
df_scrubbed <- df %>%
filter(lengths(call_filter_cols_bad) > 0) %>%
mutate(proc_scrubbed = pmap(list(proc, call_filter_cols_bad, call_filter_cols_good, non_demo_cols),
na_all_by_some_cols)) %>%
select(module, proc = proc_scrubbed) %>%
bind_rows(df_no_scrub)
return (df_scrubbed)
}
#' @importFrom dplyr across bind_rows filter mutate
#' @importFrom magrittr %>%
#' @importFrom rlang !!!
#' @keywords internal
na_all_by_some_cols <- function (proc, filter_cols_bad, filter_cols_good, scrub_cols) {
proc_bad <- filter(proc, !!!filter_cols_bad) %>%
mutate(across(scrub_cols, to_na))
proc_good <- filter(proc, !!!filter_cols_good)
proc_scrubbed <- bind_rows(proc_good, proc_bad)
return (proc_scrubbed)
}
#' @keywords internal
check_extra_demos <- function (df, is_wide, extra_demos, is_ace) {
if (is_ace) {
all_possible_demos <- ALL_POSSIBLE_DEMOS
all_modules <- ALL_MODULES
} else {
all_possible_demos <- ALL_POSSIBLE_SEA_DEMOS
all_modules <- ALL_SEA_MODULES
}
if (is_wide) {
# if "wide" form, coerce to "long-ish" first
if (!is.null(extra_demos)) {
if (!all(extra_demos %in% (names(df)))) {
stop(crayon::red("Extra demo cols not found! Check spelling?"))
}
} else {
if (!all(names(df)[!grepl(paste0("^", all_modules[all_modules != DEMOS], collapse = "|"), names(df))] %in% all_possible_demos)) {
warning(crayon::yellow("Possible extra demo cols detected but argument not specified, ignoring."))
}
}
} else {
if (!is.null(extra_demos)) {
if (!any(map_lgl(df$proc, ~all(extra_demos %in% names(.x))))) {
stop(crayon::red("Extra demo cols not found! Check spelling?"))
}
}
}
return (NULL)
}
#' Most module-post functions work much better with long proc data.
#' This will be called under the hood to coerce wide data to long-ish
#' Because it seems that users prefer the wide data
#' @importFrom dplyr distinct mutate select
#' @importFrom magrittr %>%
#' @importFrom purrr map
#' @importFrom tibble tibble
#' @keywords internal
proc_wide_to_long <- function (df, extra_demos, is_ace) {
valid_modules <- get_valid_modules(df, is_ace = is_ace)
valid_demos <- get_valid_demos(df, is_ace = is_ace)
valid_demos <- c(valid_demos, extra_demos)
out <- tibble(module = valid_modules,
proc = vector("list", length(valid_modules))) %>%
mutate(proc = map(module, ~ df %>%
select(valid_demos, dplyr::starts_with(.x)) %>%
distinct()))
# Note that the "long-ish" output will still have module names preprended to colnames (for now)
return (out)
}
#' @importFrom purrr map_lgl
#' @keywords internal
get_valid_modules <- function (df, is_ace) {
if (is_ace) {
all_modules <- ALL_MODULES
} else {
all_modules <- ALL_SEA_MODULES
}
if (all(c("module", "proc") %in% names(df))) {
# if was processed with output = "long"
modules <- all_modules[map_lgl(all_modules, ~.x %in% df[[COL_MODULE]])]
} else {
# else if was processed with output = "wide"
modules <- all_modules[map_lgl(all_modules, ~any(grepl(.x, names(df))))]
}
if (SAAT_SUS %in% modules | SAAT_IMP %in% modules) modules <- modules[modules != SAAT]
return (modules)
}
#' @keywords internal
get_ace_classroom_wide_bids <- function (df) {
valid_modules <- get_valid_modules(df, is_ace = TRUE)
return (c(paste0(valid_modules, ".bid"), paste0(valid_modules, ".time")))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.