###################################################################################### # all qc code goes in this first chunck, all html output will be in the chunks below # ###################################################################################### # load packages library(data.table) library(DT) library(easimple) library(gridExtra) library(ggplot2) # create copy of test data in_test <- copy(params$file_name) ################### # save parameters # ################### # interval parameter outlier_parm <- params$outlier_parm # duplicate parameter dup_parm <- params$duplicate_parm # threshold for % of missing... missing_parm <- params$missing_parm # set colors c_good <- "white" c_warning <- "#FFEB9C" c_problem <- "#FFC7CE" ###################### # general formatting # ###################### # make test_scale_score and test_sem numeric in_test[, test_scale_score := as.numeric(test_scale_score)] in_test[, test_sem := as.numeric(test_sem)] # convert NA grades to 99 and NA school years to missing in_test[is.na(test_grade), test_grade:=99] in_test[is.na(test_school_year), test_school_year := "missing"] # make test grade two characters in_test[nchar(as.character(test_grade)) == 1 & !grepl("k", test_grade), test_grade := paste0(0, test_grade)] ################ # create lists # ################ # list of years year_list <- unique(in_test$test_school_year) # list of grades grade_list <- unique(in_test$test_grade) # sort grade list low to high grade_list <- sort(grade_list, decreasing=FALSE) # put k and pk on front end of list if they both exist if ( ("k" %chin% grade_list) & ("pk" %chin% grade_list)) { other_grades <- setdiff(grade_list, c("k", "pk")) other_grades <- sort(other_grades, decreasing=FALSE) grade_list <- c("pk", "k", other_grades)} # put k on front end of list if it exists if ( ("k" %chin% grade_list) & !("pk" %chin% grade_list)) { other_grades <- setdiff(grade_list, c("k")) other_grades <- sort(other_grades, decreasing=FALSE) grade_list <- c("k", other_grades)} # put pk on front end of list if it exists if ( !("k" %chin% grade_list) & ("pk" %chin% grade_list)) { other_grades <- setdiff(grade_list, c("pk")) other_grades <- sort(other_grades, decreasing=FALSE) grade_list <- c("pk", other_grades)} ########################## # TABLE 1: GENERAL TABLE # ########################## # create general table general_table <- in_test[, list(total_n_obs = .N, n_unique_tests = length(unique(ea_test_name)), n_unique_students = length(unique(ea_student_id))), by="test_school_year"] # output n_exact duplicate rows exact_dups <- ea_out_dups(in_test, opt_key_all = 1) # calc n_exact duplicate rows per year exact_dups_table <- exact_dups[, list(exact_dup_row=.N), by="test_school_year"] # merge duplicate row info onto general table general_table <- ea_merge(general_table, exact_dups_table, "test_school_year", "x") # create % of duplicate rows general_table[, perc_exact_dup_row := round((exact_dup_row/total_n_obs)*100, 2)] # set NA values to 0 general_table[is.na(general_table)] <- 0 # sort years from high to low general_table <- general_table[order(-(test_school_year))] # convert to datatable general_table_dt <- datatable(general_table, rownames = FALSE, extensions = "KeyTable") ####################################### # flag large disparities across years # ####################################### # set general_table_flag to zero general_table_flag <- 0 # set list of vars to check in loop gen_vars <- c("total_n_obs", "n_unique_tests", "n_unique_students") ################################################################## # check 1.1 -> 1.3 loop through variables and highlight outliers # ################################################################## # flag large disparities for (m_var in gen_vars){ # set intervals lower_int <- general_table[test_school_year != "missing", mean(get(m_var)) - mean(get(m_var))*(outlier_parm)] upper_int <- general_table[test_school_year != "missing", mean(get(m_var)) + mean(get(m_var))*(outlier_parm)] # apply color scheme if (is.na(lower_int)==0) {general_table_dt <- formatStyle(general_table_dt, columns = paste0(m_var), background = styleInterval(c(lower_int, upper_int), c(c_problem, c_good, c_problem)))} # trigger internal flag if outliers exists if ( (is.na(lower_int)==0) & ( (general_table[, min(get(m_var))] < lower_int) | (general_table[, max(get(m_var))] > upper_int))) {general_table_flag <- 1} } ######################################################################### # check 1.4 -> % of duplicate rows is higher than the missing threshold # ######################################################################### # apply color scheme to % exact duplicate row general_table_dt <- formatStyle(general_table_dt, columns = "perc_exact_dup_row", background = styleInterval(c(0, dup_parm), c(c_good, c_warning, c_problem))) # internal flag for general table if (sum(general_table$perc_exact_dup_row, na.rm = TRUE)!=0) { if ( (min(general_table$perc_exact_dup_row, na.rm = TRUE) > dup_parm)) {general_table_flag <- 1} } #################################### # check 1.5 -> missing school year # #################################### # highlight missing school year general_table_dt <- formatStyle(general_table_dt, columns = "test_school_year", backgroundColor = styleEqual(c("missing"), c(c_problem))) # internal flag for missing school year if ("missing" %chin% unique(general_table$test_school_year)) {general_table_flag <- 1} #################################### # TABLE 2: MISSING VARIABLES TABLE # #################################### # create a copy of in_test as a data.frame missing_vars <- as.data.frame(copy(in_test)) # subset test dataset to missing vars missing_vars <- missing_vars[sapply(missing_vars, function(x) all(is.na(x)))] # create list of all missing vars miss_var_list <- colnames(missing_vars) # create data.table of remaining columns miss_var_table <- as.data.table(colnames(in_test)) # setnames setnames(miss_var_table, "V1", "variable") # create a missing flag miss_var_table[, missing := ifelse(variable %chin% miss_var_list, 1,0)] # datatable miss_var_dt <- datatable(miss_var_table, rownames = FALSE, extensions = "Scroller", options=list(deferRender=TRUE, scrollY=400, scrollCollapse=TRUE)) # apply color scheme: highlight missing vars miss_var_dt <- formatStyle(miss_var_dt, columns = "missing", background = styleEqual(c(0, 1), c(c_good, c_problem))) # check 2.1 -> internal flag for list of missing vars if (length(miss_var_list)>0) { missing_vars_flag <- 1 } else { missing_vars_flag <- 0} ################################# # TABLE 3: MISSING DATA % TABLE # ################################# # create table missing_table <- in_test[, list(total_n_obs = .N, missing_student_id = sum(is.na(ea_student_id)), missing_scale_score = sum(is.na(test_scale_score)), missing_test_sem = sum(is.na(test_sem)), missing_test_date = sum(is.na(test_date)), missing_test_term = sum(is.na(test_term))), by="test_school_year"] # add %'s to table missing_table[, perc_miss_stud_id := round((missing_student_id/total_n_obs)*100, 2)] missing_table[, perc_miss_scale_score := round((missing_scale_score/total_n_obs)*100, 2)] missing_table[, perc_miss_test_sem := round((missing_test_sem/total_n_obs)*100, 2)] missing_table[, perc_miss_test_date := round((missing_test_date/total_n_obs)*100, 2)] missing_table[, perc_miss_test_term := round((missing_test_term/total_n_obs)*100, 2)] # sort column order ea_colorder(missing_table, c("test_school_year", "total_n_obs", "missing_student_id", "perc_miss_stud_id", "missing_scale_score", "perc_miss_scale_score", "missing_test_sem", "perc_miss_test_sem", "missing_test_date", "perc_miss_test_date", "missing_test_term", "perc_miss_test_term")) # sort years from high to low missing_table <- missing_table[order(-(test_school_year))] # the following code pre-hides the "extra" columns and allows you to open more if you want more info missing_dt <- datatable(missing_table, rownames=FALSE, extensions = 'ColVis', options = list(columnDefs = list(list(targets = c(1,2,4,6,8,10), visible = FALSE)), dom = 'C<"clear">lfrtip')) ################################## # flag large %'s of missing data # ################################## # .i. check 3.1 -> 3.5 apply color scheme to all percent missings from "missing_table" missing_dt <- formatStyle(missing_dt, columns = c("perc_miss_stud_id", "perc_miss_scale_score", "perc_miss_test_sem", "perc_miss_test_date", "perc_miss_test_term"), background = styleInterval(c(0, missing_parm), c(c_good, c_warning, c_problem))) # internal flag for missing table if ( (max(missing_table$perc_miss_stud_id) > missing_parm) | (max(missing_table$perc_miss_scale_score) > missing_parm) | (max(missing_table$perc_miss_test_sem) > missing_parm) | (max(missing_table$perc_miss_test_date) > missing_parm) | (max(missing_table$perc_miss_test_term) > missing_parm)) { missing_table_flag <- 1 } else { missing_table_flag <- 0} ################################ # TABLE 4: ID ISSUES (BY TEST) # ################################ #################################### # find total_n_obs per unique test # #################################### # create table of total count per unique test students_per_test <- ea_table(in_test, c("ea_test_name", "site_test_abbrev", "ea_test_subject", "test_grade", "test_term", "test_school_year")) # count -> total_n_obs setnames(students_per_test, "count", "total_n_obs") ########################################## # find n_unique_students per unique test # ########################################## # find n_unique_students sub_test <- ea_no_dups(in_test, c("ea_student_id", "ea_test_name","site_test_abbrev", "ea_test_subject", "test_grade", "test_term", "test_school_year")) # make table unique_studs <- ea_table(sub_test, c("ea_test_name","site_test_abbrev", "ea_test_subject", "test_grade", "test_term", "test_school_year")) # setnames setnames(unique_studs, "count", "n_unique_students") # merge onto original table students_per_test <- ea_merge(students_per_test, unique_studs, c("ea_test_name","site_test_abbrev", "ea_test_subject", "test_grade", "test_term", "test_school_year"), "x") ####################################################### # find n_duplicate_students (overall) per unique test # ####################################################### # find number of duplicate students per test dup_studs <- ea_out_dups(in_test, c("ea_student_id", "ea_test_name","site_test_abbrev", "ea_test_subject", "test_grade", "test_term", "test_school_year")) # count dups dup_studs_table <- dup_studs[, list(n_dup_studs=.N), by=c("ea_test_name","site_test_abbrev", "ea_test_subject", "test_grade", "test_term", "test_school_year")] # merge onto original table students_per_test <- ea_merge(students_per_test, dup_studs_table, c("ea_test_name","site_test_abbrev", "ea_test_subject", "test_grade", "test_term", "test_school_year"), "x") # convert NA values to zeroes students_per_test[is.na(n_dup_studs), n_dup_studs:=0] ############################################################ # find n_duplicate_students that have the same scale score # ############################################################ # find number of duplicate students with the same scale scores dup_studs_same_ss <- ea_out_dups(dup_studs, c("ea_student_id", "ea_test_name","site_test_abbrev", "ea_test_subject", "test_grade", "test_term", "test_school_year", "test_scale_score")) # count dups same_ss_table <- dup_studs_same_ss[, list(n_dup_studs_same_ss=.N), by=c("ea_test_name","site_test_abbrev", "ea_test_subject", "test_grade", "test_term", "test_school_year")] # merge onto original table students_per_test <- ea_merge(students_per_test, same_ss_table, c("ea_test_name","site_test_abbrev", "ea_test_subject", "test_grade", "test_term", "test_school_year"), "x") # convert NA values to zeroes students_per_test[is.na(n_dup_studs_same_ss), n_dup_studs_same_ss:=0] ############################################################## # find n_duplicate_students that have different scale scores # ############################################################## # subset dup studs to just test, student and score dup_studs <- subset(dup_studs, select = c("ea_test_name","site_test_abbrev", "ea_test_subject", "test_grade", "test_term", "test_school_year","ea_student_id", "test_scale_score")) # create column with # of times the scale score appears diff_ss <- dup_studs[,list(n_scale_score=.N),by=c("ea_test_name" ,"ea_student_id", "test_scale_score")] # count the number of times a certain test appears {{with more than one score}} diff_ss <- diff_ss[,list(n_dup_studs_diff_ss=.N),by=c("ea_test_name")] # merge the # of duplicate studs with non-matching scale scores onto table students_per_test <- ea_merge(students_per_test, diff_ss, "ea_test_name", "x") # convert NA values to zeroes students_per_test[is.na(n_dup_studs_diff_ss), n_dup_studs_diff_ss:=0] ############## # create %'s # ############## # % of unique ids students_per_test[, perc_unique_studs := round((n_unique_students/total_n_obs)*100, 2)] # % of dup ids students_per_test[, perc_dup_studs := round((n_dup_studs/total_n_obs)*100, 2)] # % of dup ids diff ss students_per_test[, perc_dup_studs_diff_ss := round((n_dup_studs_diff_ss/total_n_obs)*100, 2)] ########## # format # ########## # the following code pre-hides the "extra" columns and allows you to open more if you want more info students_per_test_dt <- datatable(students_per_test, rownames=FALSE, extensions = c('ColVis', 'FixedHeader'), options = list(columnDefs = list(list(targets = c(1,2,3,4,5,6,7,8,9,10), visible = FALSE)), dom = 'C<"clear">lfrtip')) ######################################### # flag large # of duplicate student ids # ######################################### # check 4.1 -> perc_unique_studs less than 99% students_per_test_dt <- formatStyle(students_per_test_dt, columns = c("perc_unique_studs"), background = styleInterval(c(95,99), c(c_problem, c_warning, c_good))) # checks 4.2 - 4.3 -> perc_dup_studs & perc_dup_studs_diff_ss greater than dup_parm students_per_test_dt <- formatStyle(students_per_test_dt, columns = c("perc_dup_studs", "perc_dup_studs_diff_ss"), background = styleInterval(c(1, dup_parm), c(c_good, c_warning, c_problem))) # internal flag for 4.1 if ( (min(students_per_test$perc_unique_studs) < 99 )) { students_per_test_flag <- 1 } else { students_per_test_flag <- 0} # internal flag for 4.2 - 4.3 if ( (max(students_per_test$perc_dup_studs) > dup_parm) | (max(students_per_test$perc_dup_studs_diff_ss) > dup_parm)) {students_per_test_flag <- 1} ######################################### # TABLE 5: STUDENTS PER GRADE (BY TEST) # ######################################### # remove duplicate students sub_test <- ea_no_dups(in_test, c("ea_student_id", "ea_test_name")) # create table with grade students_per_grade <- ea_table(sub_test, c("site_test_abbrev", "ea_test_subject", "test_grade", "test_school_year", "test_term")) # put wide students_per_grade <- data.table(dcast(students_per_grade, site_test_abbrev + ea_test_subject + test_school_year + test_term ~ test_grade, value.var = "count")) # create table without grade (count=# of students in that model, not accounting for grade) sum_grades <- ea_table(sub_test, c("site_test_abbrev", "ea_test_subject", "test_school_year", "test_term")) # setnames setnames(sum_grades, "count", "n_total") # merge together students_per_grade <- ea_merge(students_per_grade, sum_grades, c("site_test_abbrev", "ea_test_subject", "test_school_year", "test_term")) # make column name grade "missing" if it exists if ("99" %chin% grade_list) {setnames(students_per_grade, "99", "missing")} # create new grade_list grade_list_t5 <- copy(grade_list) # replace "99" with "missing" for new grade list if ("99" %chin% grade_list_t5) {grade_list_t5 <- gsub("99", "missing", grade_list)} ############################################## # create column with number of missing years # ############################################## # set n_no_students to zero students_per_grade[, n_no_students:=0] # loop over years and create zero_"grade" columns for (grade in grade_list_t5) { # if NA convert to zero students_per_grade[is.na(get(grade)), paste0(grade) := 0] # create column that indicates if this grade has zero students students_per_grade[, zero_students:=ifelse(get(grade)==0, 1, 0)] # add zero_students onto n_no_students students_per_grade[, n_no_students:= n_no_students + zero_students]} # drop zero students flag students_per_grade[, zero_students:=NULL] ############################################ # create max/min/mean for each grade level # ############################################ # set max/min/flag_missing_grade to zero students_per_grade[, flag_missing_grade:=0] students_per_grade[, max:=0] students_per_grade[, min:=0] # save "first_grade" in list first_grade <- grade_list_t5[1] # loop over grade_list for (grade in grade_list_t5) { # flag missing grade of data if grade==0 students_per_grade[flag_missing_grade==0, flag_missing_grade:=ifelse(get(grade)==0, 1, 0)] # set first grade to be equal to the max and the min if(grade==first_grade) {students_per_grade[,max:=get(grade)] students_per_grade[,min:=get(grade)]} # for all other grades... if(grade!=first_grade) { # create a comparison size column students_per_grade[, comp_size:=get(grade)] # if comp_size is bigger than the current max, overwrite students_per_grade[comp_size > max, max:=get(grade)] # if comp_size is smaller than the current min, overwrite { OR if min is still zero, reset the minimum to this new value} students_per_grade[( ((comp_size < min) & comp_size!=0) | min==0), min:=get(grade)] } } # save the number of grades n_grades <- length(grade_list_t5) # if a missing grade exists, subtract it if("missing" %chin% grade_list_t5) {n_grades <- n_grades -1 } # create mean students_per_grade[,mean_with_students:= (n_total / (n_grades-n_no_students))] ####################### # implement flag rule # ####################### # set flag to zero students_per_grade[ , large_disparity:=0] # brule students_per_grade[ (max >= (mean_with_students + outlier_parm*(mean_with_students))) | (min <= (mean_with_students - outlier_parm*(mean_with_students))), large_disparity:=1] ################ # format table # ################ # get rid of comp_size/mean students_per_grade[, comp_size:=NULL] students_per_grade[, mean_with_students:=NULL] students_per_grade[, n_no_students:=NULL] # column order ea_colorder(students_per_grade, c("site_test_abbrev", "ea_test_subject", "test_school_year", "test_term", grade_list_t5, "large_disparity", "flag_missing_grade", "n_total")) ############################################ # convert to datatable and highlight flags # ############################################ # the following code hides n_total, max, min students_per_grade_dt <- datatable(students_per_grade, rownames=FALSE, extensions = c('ColVis', 'FixedHeader'), options = list(columnDefs = list(list(targets = c(-1,-2,-3), visible = FALSE)), dom = 'C<"clear">lfrtip')) # check 5.1 -> highlight cells where large_disparity flag is 1 students_per_grade_dt <- formatStyle(students_per_grade_dt, columns = c("large_disparity", "flag_missing_grade"), background = styleEqual(c(0, 1), c(c_good, c_problem))) # internal flag 5.1 if ( sum(students_per_grade$large_disparity)>0) { students_per_grade_flag <- 1 } else { students_per_grade_flag <- 0} # internal flag 5.2 if (sum(students_per_grade$flag_missing_grade, na.rm = TRUE)!=0) { if(sum(students_per_grade$flag_missing_grade, na.rm = TRUE)>0) {students_per_grade_flag <- 1} } ######################################### # TABLE 6: STUDENTS PER YEAR (BY TEST) # ######################################### # remove duplicate students sub_test <- ea_no_dups(in_test, c("ea_student_id", "ea_test_name")) # create table with grade within_grade_comp <- sub_test[, list(site_test_abbrev=site_test_abbrev, ea_test_subject=ea_test_subject, test_grade=test_grade, test_term=test_term),by="test_school_year"] # put wide within_grade_comp <- data.table(dcast(within_grade_comp, site_test_abbrev + ea_test_subject + test_grade + test_term ~ test_school_year)) # calculate n_total within_grade_total <- ea_table(sub_test, c("site_test_abbrev", "ea_test_subject", "test_grade", "test_term")) # merge on total within_grade_comp <- ea_merge(within_grade_comp, within_grade_total, c("site_test_abbrev", "ea_test_subject", "test_grade", "test_term")) # setkey setkey(within_grade_comp, site_test_abbrev, ea_test_subject, test_grade, test_term) ############################################## # create column with number of missing years # ############################################## # set n_no_students to zero within_grade_comp[, n_no_students:=0] # loop over years and create zero_"year" columns for (year in year_list) { # create column that indicates if this year has zero students within_grade_comp[, zero_students:=ifelse(get(year)==0, 1, 0)] # add zero_students onto n_no_students within_grade_comp[, n_no_students:= n_no_students + zero_students]} # drop zero students flag within_grade_comp[, zero_students:=NULL] ############################################ # create max/min/mean for each grade level # ############################################ # set max/min/flag_missing_year to zero within_grade_comp[, flag_missing_year:=0] within_grade_comp[, max:=0] within_grade_comp[, min:=0] # save "first_year" in list first_year <- year_list[1] # loop over grade_list for (year in year_list) { # flag missing year of data if year==0 within_grade_comp[flag_missing_year==0, flag_missing_year:=ifelse(get(year)==0, 1, 0)] # set first year to be equal to the max and the min if(year==first_year) {within_grade_comp[,max:=get(year)] within_grade_comp[,min:=get(year)]} # for all other grades... if(year!=first_year) { # create a comparison size column within_grade_comp[, comp_size:=get(year)] # if comp_size is bigger than the current max, overwrite within_grade_comp[comp_size > max, max:=get(year)] # if comp_size is smaller than the current min, overwrite { OR if min is still zero, reset the minimum to this new value} within_grade_comp[( ((comp_size < min) & comp_size!=0) | min==0), min:=get(year)] } } # save the number of years n_years <- length(year_list) # create mean within_grade_comp[,mean_with_students:= (count / (n_years-n_no_students))] ####################### # implement flag rule # ####################### # set flag to zero within_grade_comp[ , large_disparity:=0] # brule within_grade_comp[ (max >= (mean_with_students + outlier_parm*(mean_with_students))) | (min <= (mean_with_students - outlier_parm*(mean_with_students))), large_disparity:=1] ################ # format table # ################ # get rid of vars within_grade_comp[, comp_size:=NULL] within_grade_comp[, mean_with_students:=NULL] within_grade_comp[, n_no_students:=NULL] within_grade_comp[, count:=NULL] # column order ea_colorder(within_grade_comp, c("site_test_abbrev", "ea_test_subject", "test_grade", "test_term", year_list, "large_disparity", "flag_missing_year")) ############################################ # convert to datatable and highlight flags # ############################################ # the following code hides n_total, max, min within_grade_dt <- datatable(within_grade_comp, rownames=FALSE, extensions = c('ColVis', 'FixedHeader'), options = list(columnDefs = list(list(targets = c(-1,-2), visible = FALSE)),dom = 'C<"clear">lfrtip')) # check 6.1 -> highlight cells where large_disparity flag is 1 within_grade_dt <- formatStyle(within_grade_dt, columns = c("large_disparity", "flag_missing_year"), background = styleEqual(c(0, 1), c(c_good, c_problem))) # internal flag 6.1 if ( sum(within_grade_dt$large_disparity)>0) { within_grade_flag <- 1 } else { within_grade_flag <- 0} # internal flag 6.2 if(sum(within_grade_comp$flag_missing_year)>0) {within_grade_flag <- 1} ####################################### # TABLE 7: SCALE SCORE INFO (BY TEST) # ####################################### # add flag for missing ss in_test[, flag_ss_missing:= ifelse(is.na(test_scale_score), 1, 0)] # find min, max, mean, sd by unique test ss_model_stats <- in_test[, list(n_obs = .N, ss_min = min(test_scale_score, na.rm = TRUE), ss_max = max(test_scale_score, na.rm = TRUE), ss_mean = round(mean(test_scale_score, na.rm = TRUE)), ss_sd = round(sd(test_scale_score, na.rm = TRUE)), n_missing_ss = sum(flag_ss_missing), perc_missing_ss = round(mean(flag_ss_missing)*100,2)), by = c("ea_test_name", "site_test_abbrev", "ea_test_subject", "test_grade", "test_school_year", "test_term")] # set column order setkey(ss_model_stats, ea_test_subject, test_grade, test_school_year, test_term) # sort years from high to low ss_model_stats <- ss_model_stats[order(-(test_school_year))] # convert to DT ss_model_stats_dt <- datatable(ss_model_stats, rownames=FALSE, extensions = c('ColVis', 'FixedHeader'), options = list(columnDefs = list(list(targets = c(1,2,3,4,5,6,11), visible = FALSE)),dom = 'C<"clear">lfrtip')) ################# # flag problems # ################# # flag 7.1 -> perc_missing_ss more than 2.5% ss_model_stats_dt <- formatStyle(ss_model_stats_dt, columns = c("perc_missing_ss"), background = styleInterval(c(.001,missing_parm), c(c_good, c_warning, c_problem))) # internal flag for 7.1 if ( (max(ss_model_stats_dt$perc_missing_ss) > 2.5 )) { ss_model_stats_flag <- 1 } else { ss_model_stats_flag <- 0} ######################### # HISTOGRAMS SS and SEM # ######################### # create empty list test_hist_list <- list() # create list of unique tests model_list <- unique(in_test$ea_test_name) # loop over models to histogram and sum stat for (m in 1 : length(model_list)){ # define model m_model_list <- model_list[m] # subset in_test m_test <- subset(in_test, ea_test_name == m_model_list, select = c("ea_student_id", "test_scale_score", "test_sem")) # put long m_test_long <- melt(m_test, "ea_student_id", c("test_scale_score", "test_sem")) # histogram h <- ggplot(m_test_long, aes(value)) + geom_histogram() + facet_wrap(~ variable, ncol = 1, scale="free") h <- h + ggtitle(paste0("scale score and sem ", m_model_list)) # save to list test_hist_list[[m_model_list]] <- h } ################## # to be added....# ################## # across grades/years/terms Cohort info for students, flag potential issues # Missing data # need to differentiate between mandatory variables and optional vars # ID issues # notify when more than one student ID exists in raw data, which to use # if both state/local ids exist, analyze their uniqueness & overlap # Check for bad id's # non alpha-numeric numbers # leading zeroes # scientific notation # By school info # grade spans per school & year # check for missing grades, weird spans, large changes # number of students per school & year # by school # by school and grade # flag any large changes in n-sizes at a school across years # an overall flag for whether or nor the headings "flagged/unflagged should appear aka...is anything flagged?" -->
Below are all the the quality control checks that have been flagged for possible problems
r if (general_table_flag==1) '### general table'
########################## # table 1: general table # ########################## if (general_table_flag==1){ general_table_dt}
r if (missing_vars_flag==1) '### missing variables'
############################## # table 2: missing variables # ############################## if (missing_vars_flag==1) {miss_var_dt}
r if (missing_table_flag==1) '### missing data percentages'
############################### # table 3: missing data table # ############################### if (missing_table_flag==1) {missing_dt}
r if (students_per_test_flag==1) '### ID issues (by test)'
################################ # table 4: ID issues (by test) # ################################ if (students_per_test_flag==1) {students_per_test_dt}
r if (students_per_grade_flag==1) '### students per grade (by test)'
######################################### # table 5: students per grade (by test) # ######################################### if (students_per_grade_flag==1) {students_per_grade_dt}
r if (within_grade_flag==1) '### students per year (by test) '
######################################## # table 6: students per year (by test) # ######################################## if (within_grade_flag==1) {within_grade_dt}
r if (ss_model_stats_flag==1) '### scale score table'
############################## # table 7: scale score table # ############################## if (ss_model_stats_flag==1) {ss_model_stats_dt}
Below are all the the qc_checks that were not flagged for possible problems
r if (general_table_flag==0) '### general table'
########################## # table 1: general table # ########################## if (general_table_flag==0){ general_table_dt}
r if (missing_vars_flag==0) '### missing variables'
############################## # table 2: missing variables # ############################## if (missing_vars_flag==0) {miss_var_dt}
r if (missing_table_flag==0) '### missing data percentages'
############################### # table 3: missing data table # ############################### if (missing_table_flag==0) {missing_dt}
r if (students_per_test_flag==0) '### ID issues (by test)'
################################ # table 4: ID issues (by test) # ################################ if (students_per_test_flag==0) {students_per_test_dt}
r if (students_per_grade_flag==0) '### students per grade (by test) '
######################################### # table 5: students per grade (by test) # ######################################### if (students_per_grade_flag==0) {students_per_grade_dt}
r if (within_grade_flag==0) '### students per year (by test) '
######################################## # table 6: students per year (by test) # ######################################## if (within_grade_flag==0) {within_grade_dt}
r if (ss_model_stats_flag==0) '### scale score table'
############################## # table 7: scale score table # ############################## if (ss_model_stats_flag==0) {ss_model_stats_dt}
# only print graphs if option is turned on if (params$opt_graphs==1) {print(test_hist_list)} if (params$opt_graphs==0) {message("The option to print histograms with scale score and test sem distributions has been turned off")}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.