Outline

Set-up

rm(list = ls())

# dependencies

library(aceR)
library(stringr)
library(ggplot2)
library(lme4)

# paths

rm(list = ls())

library(aceR)
library(stringr)
library(plyr)

# paths

BASE_DIRECTORY = "~/Desktop/seacrest"
RAW_SEACREST_DIRECTORY = paste(BASE_DIRECTORY, "post_data", sep = "/")
MISSING_FILES_DIRECTORY = paste(BASE_DIRECTORY, "remaining_files", sep = "/")
PARTICIPANT_INFO_DIRECTORY = paste(BASE_DIRECTORY, "participant_info_files", sep = "/")

# files

DEMOGRAPHICS_FILE = paste(PARTICIPANT_INFO_DIRECTORY, "Sea Crest Demographics.xlsx", sep = "/")
GRADE_FILE = paste(PARTICIPANT_INFO_DIRECTORY, "School Pilot WJ Data GRADE.xlsx", sep = "/")

# constants

PID = "pid"
GENDER = "gender"
AGE = "grade"
SS_ID = "SS"
BAND_ID = "Band"

ACE_INCLUDE = c(
  "rt_")

ACE_IGNORE = c(
  "cohort",
  "median",
  "length",
  "se", 
  "sd", 
  "incorrect",
  "rt_count", 
  "rw_count", 
  "acc_", 
  "rw_",
  "correct", 
  "object_count")

# helper functions 

grab_ss = function(df) {
  cols = names(df)
  return (aceR:::multi_filter_vec(cols, c(SS_ID)))
}

grab_ace = function(df, include = c(), skip = c()) {
  cols = names(df)
  vars = aceR:::multi_filter_vec(cols, include)
  vars = aceR:::multi_filter_out_vec(vars, skip)
  return (vars)
}

is_empty = function(col) {
  return (sum(is.na(col)) == length(col))
}

valid_cols = function(df, cols) {
  valid = sapply(cols, function(y) {
    invalid = is_empty(df[, y])
    return (!invalid)
  })
  return (names(which(valid)))
}

load_missing_files = function(path) {
  files = list.files(path, pattern = ".csv")
  out = data.frame()
  for (file in files) {
    file_path = paste(path, file, sep = "/")
    dat = aceR:::load_ace_filtered_file(file_path)
    out = plyr::rbind.fill(out, dat)
  }
  return (out)
}

load & prepare woodcock data

# load demographics
demographics = load_ace_demographics(DEMOGRAPHICS_FILE)
demographics$pid = standardize_seacrest_pid(demographics$pid)

load & woodcock metris

woodcock = aceR:::transform_woodcock(GRADE_FILE) woodcock = merge(demographics, woodcock, by = "pid")

cols = names(woodcock)

prepare woodcock data

woodcock_ss = woodcock[str_detect(cols, SS_ID)] woodcock_ss_score = apply(woodcock_ss, 2, function(x) { score = aceR:::first_number(x) score = aceR:::remove_whitespace(score) score = ifelse(is.na(score), NA, as.numeric(as.character(score))) return (score) }) woodcock_ss_score = as.data.frame(woodcock_ss_score) names(woodcock_ss_score) = sapply(names(woodcock_ss_score), function(x) { new_name = aceR:::remove_special_characters(x, "") new_name = gsub(SS_ID, "SS", new_name) new_name = gsub(BAND_ID, "_Band", new_name) new_name = gsub(" ", "", new_name) return (new_name) })

woodcock_ss_score = cbind(woodcock[, c(PID, GENDER)], woodcock_ss_score)

Age effects on woodcock scores

{r, warning = FALSE, message = FALSE} ss_ys = grab_ss(woodcock_ss_score) print(ss_ys) for (ss_y in ss_ys) {

ss_formula_str = paste0(ss_y, " ~ ", GENDER) ss_formula = formula(ss_formula_str) ss_mod = lm(formula = ss_formula, data = woodcock_ss_score) ss_mod_summary = summary(ss_mod)

# make plot and print ss_plot = make_box_plot(woodcock_ss_score, GENDER, ss_y)

ss_mod_summary$call = ss_formula_str print(ss_plot) print(ss_mod_summary) }

# GENDER effects on ACE scores

```r

ace = load_ace_bulk(path = RAW_SEACREST_DIRECTORY, pattern = ".csv", recursive = FALSE)
ace_missing = load_missing_files(path = MISSING_FILES_DIRECTORY)
ace = plyr::rbind.fill(ace, ace_missing)
ace_by_task = proc_by_module(ace, verbose = TRUE)

modules = names(ace_by_task)
print(modules)
for (module in ace_by_task) {

  module$pid = standardize_seacrest_pid(module$pid)
  ace_ys = grab_ace(module, ACE_INCLUDE, ACE_IGNORE)
  module_demographs = merge(module, demographics, by = "pid")
  valid_ace_ys = valid_cols(module_demographs, ace_ys)

  # get rid of infinites
  is.na(module_demographs) = sapply(module_demographs, is.infinite)

  for (ace_y in valid_ace_ys) {
      AceScripts::generate_plots_and_stats(module_demographs, AGE, ace_y)
      AceScripts::generate_plots_and_stats(module_demographs, GENDER, ace_y)
      AceScripts::generate_plots_and_stats(module_demographs, AGE, ace_y, cohort = GENDER)
  }

}

Regression between ACE and Woodcock metrics

``{r, warning = FALSE, message = FALSE}

for (module in ace_by_task) {

module$pid = standardize_seacrest_pid(module$pid) reg_ys = grab_ace(module, ACE_INCLUDE, ACE_IGNORE) module_demographs = merge(module, woodcock_ss_score, by = "pid") valid_reg_ys = valid_cols(module_demographs, reg_ys)

# get rid of infinites is.na(module_demographs) = sapply(module_demographs, is.infinite)

for (reg_y in valid_reg_ys) { reg_xs = ss_ys for (reg_x in reg_xs) { reg_formula_str = paste0(reg_y, " ~ ", reg_x) reg_formula = formula(reg_formula_str) reg_mod = lm(formula = reg_formula, data = module_demographs) reg_mod_summary = summary(reg_mod) reg_mod_summary_coef = reg_mod_summary$coefficients

  slope = reg_mod_summary_coef[2]
  intercept = reg_mod_summary_coef[1]
  plot_desc = paste(module$module[1], ":", reg_y, "by", reg_x, sep = " ")

  lplot = ggplot() +
    geom_point(aes(module_demographs[, reg_x], module_demographs[, reg_y]), module_demographs, na.rm = TRUE) +
    ggtitle(plot_desc) +
    xlab(reg_x) +
    ylab(reg_y) +
    geom_abline(slope = slope, intercept = intercept, colour='#E41A1C')

  reg_mod_summary$call = paste(module$module[1], ":", reg_formula_str, sep = " ")
  print(lplot)
  print(reg_mod_summary)
}

}

}

```



gazzlab/AceScripts documentation built on May 16, 2019, 10:11 p.m.