rm(list = ls()) # dependencies library(aceR) library(stringr) library(ggplot2) library(lme4) # paths rm(list = ls()) library(aceR) library(stringr) library(plyr) # paths BASE_DIRECTORY = "~/Desktop/seacrest" RAW_SEACREST_DIRECTORY = paste(BASE_DIRECTORY, "raw_email_files", sep = "/") MISSING_FILES_DIRECTORY = paste(BASE_DIRECTORY, "remaining_files", sep = "/") PARTICIPANT_INFO_DIRECTORY = paste(BASE_DIRECTORY, "participant_info_files", sep = "/") # files DEMOGRAPHICS_FILE = paste(PARTICIPANT_INFO_DIRECTORY, "Sea Crest Demographics.xlsx", sep = "/") GRADE_FILE = paste(PARTICIPANT_INFO_DIRECTORY, "School Pilot WJ Data GRADE.xlsx", sep = "/") # constants PID = "pid" AGE = "grade" GENDER = "gender" SS_ID = "SS" BAND_ID = "Band" ACE_INCLUDE = c( "rt_", "acc_", "rw_", "correct", "object_count") ACE_IGNORE = c( "cohort", "median", "length", "se", "sd", "incorrect", "rt_count", "rw_count") # helper functions grab_ss = function(df) { cols = names(df) return (aceR:::multi_filter_vec(cols, c(SS_ID))) } grab_ace = function(df, include = c(), skip = c()) { cols = names(df) vars = aceR:::multi_filter_vec(cols, include) vars = aceR:::multi_filter_out_vec(vars, skip) return (vars) } is_empty = function(col) { return (sum(is.na(col)) == length(col)) } valid_cols = function(df, cols) { valid = sapply(cols, function(y) { invalid = is_empty(df[, y]) return (!invalid) }) return (names(which(valid))) } load_missing_files = function(path) { files = list.files(path, pattern = ".csv") out = data.frame() for (file in files) { file_path = paste(path, file, sep = "/") dat = aceR:::load_ace_filtered_file(file_path) out = plyr::rbind.fill(out, dat) } return (out) }
# load demographics demographics = load_ace_demographics(DEMOGRAPHICS_FILE) demographics$pid = standardize_seacrest_pid(demographics$pid) # load & woodcock metris woodcock = aceR:::transform_woodcock(GRADE_FILE) woodcock = merge(demographics, woodcock, by = "pid") cols = names(woodcock) # prepare woodcock data woodcock_ss = woodcock[str_detect(cols, SS_ID)] woodcock_ss_score = apply(woodcock_ss, 2, function(x) { score = aceR:::first_number(x) score = aceR:::remove_whitespace(score) score = ifelse(is.na(score), NA, as.numeric(as.character(score))) return (score) }) woodcock_ss_score = as.data.frame(woodcock_ss_score) names(woodcock_ss_score) = sapply(names(woodcock_ss_score), function(x) { new_name = aceR:::remove_special_characters(x, "") new_name = gsub(SS_ID, "_SS_", new_name) new_name = gsub(BAND_ID, "_Band", new_name) new_name = gsub(" ", "", new_name) return (new_name) }) woodcock_ss_score = cbind(woodcock[, c(PID, AGE)], woodcock_ss_score)
ss_ys = grab_ss(woodcock_ss_score) print(ss_ys) for (ss_y in ss_ys) { ss_formula_str = paste0(ss_y, " ~ ", AGE) ss_formula = formula(ss_formula_str) ss_mod = lm(formula = ss_formula, data = woodcock_ss_score) ss_mod_summary = summary(ss_mod) # make plot and print ss_plot = make_box_plot(woodcock_ss_score, AGE, ss_y, cohort = GENDER) ss_mod_summary$call = ss_formula_str print(ss_plot) print(ss_mod_summary) }
ace = load_ace_bulk(path = RAW_SEACREST_DIRECTORY, pattern = ".csv", recursive = FALSE) ace_missing = load_missing_files(path = MISSING_FILES_DIRECTORY) ace = plyr::rbind.fill(ace, ace_missing) ace_by_task = proc_by_module(ace, verbose = TRUE) modules = names(ace_by_task) print(modules)
for (module in ace_by_task) { module$pid = standardize_seacrest_pid(module$pid) ace_ys = grab_ace(module, ACE_INCLUDE, ACE_IGNORE) module_demographs = merge(module, woodcock_ss_score, by = "pid") valid_ace_ys = valid_cols(module_demographs, ace_ys) # get rid of infinites is.na(module_demographs) = sapply(module_demographs, is.infinite) for (ace_y in valid_ace_ys) { ace_formula_str = paste0(ace_y, " ~ ", AGE) ace_formula = formula(ace_formula_str) ace_mod = lm(formula = ace_formula, data = module_demographs) ace_mod_summary = summary(ace_mod) # make plot and print ace_title = paste(module_demographs$module[1], ":", ace_y, "by", AGE) ace_plot = make_box_plot(module_demographs, AGE, ace_y, ace_title, cohort = GENDER) ace_mod_summary$call = paste(module_demographs$module[1], ":", ace_formula_str, sep = " ") print(ace_plot) print(ace_mod_summary) } }
for (module in ace_by_task) { module$pid = standardize_seacrest_pid(module$pid) reg_ys = grab_ace(module, ACE_INCLUDE, ACE_IGNORE) module_demographs = merge(module, woodcock_ss_score, by = "pid") valid_reg_ys = valid_cols(module_demographs, reg_ys) # get rid of infinites is.na(module_demographs) = sapply(module_demographs, is.infinite) for (reg_y in valid_reg_ys) { reg_xs = ss_ys for (reg_x in reg_xs) { reg_formula_str = paste0(reg_y, " ~ ", reg_x) reg_formula = formula(reg_formula_str) reg_mod = lm(formula = reg_formula, data = module_demographs) reg_mod_summary = summary(reg_mod) reg_mod_summary_coef = reg_mod_summary$coefficients slope = reg_mod_summary_coef[2] intercept = reg_mod_summary_coef[1] plot_desc = paste(module$module[1], ":", reg_y, "by", reg_x, sep = " ") lplot = ggplot() + geom_point(aes(module_demographs[, reg_x], module_demographs[, reg_y]), module_demographs, na.rm = TRUE) + ggtitle(plot_desc) + xlab(reg_x) + ylab(reg_y) + geom_abline(slope = slope, intercept = intercept, colour='#E41A1C') reg_mod_summary$call = paste(module$module[1], ":", reg_formula_str, sep = " ") print(lplot) print(reg_mod_summary) } } }
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.