knitr::opts_chunk$set(echo = FALSE, comment = NA, message = FALSE) ## Packages that must be installed in R, to run this: library(aplpack) library(ggplot2) library(knitr) options(width = 100)
The full path to the marks folder used in this report is:
marks_folder <- paste0(working_directory, "/marks") norman::print_path(marks_folder)
The data files contained in that folder are:
filenames <- norman::print_file_listing(marks_folder)
module_codes <- unlist(substr(filenames, 1, 5)) module_marks <- vector(mode = "list", length = length(module_codes)) for (i in seq(along = filenames)) { module_marks[[module_codes[i]]] <- read.csv(paste0(marks_folder, "/", filenames[i]), stringsAsFactors = TRUE)[, c("sprCode", "overallMark")] } student_IDs <- sapply(module_marks, function(m) as.character(m[[1]])) module_marks <- sapply(module_marks, function(m) (m[[2]])) unique_student_IDs <- sort(unique(unlist(student_IDs))) marks_matrix <- matrix(NA, length(unique_student_IDs), length(module_codes)) rownames(marks_matrix) <- unique_student_IDs colnames(marks_matrix) <- module_codes for (m in module_codes) { marks_matrix[student_IDs[[m]], m] <- module_marks[[m]] }
checklist <- norman::check_modules_expected(working_directory, module_codes)
if (file.exists(paste0(working_directory, "/module_names.csv"))) { module_names <- read.csv(paste0(working_directory, "/module_names.csv"), stringsAsFactors = TRUE) row.names(module_names) <- module_names[, 1] module_names$ModuleCode <- NULL missing_module_names <- module_codes[!module_codes %in% row.names(module_names)] if (length(missing_module_names) == 0) missing_module_names <- "none" } else { module_names <- NULL missing_module_names <- "**ERROR**, the file \\small `module_names.csv` \\normalsize does not exist" }
Modules expected that were missing from the marks folder:
r paste(checklist$missing)
Modules that were found in the marks folder unexpectedly:
r paste(checklist$extras)
Module names missing from \small module_names.csv
\normalsize: r paste(missing_module_names)
Overall median mark: r median(marks_matrix, na.rm = TRUE)
summaries <- norman::raw_mark_summaries(marks_matrix) kable(summaries)
classes <- norman::raw_mark_classes(marks_matrix) kable(classes) rownames(classes) <- paste(rownames(classes), "(%)")
\newpage
Module effects are measured via a two-stage analysis:
This analysis is similar to Tukey's median polish analysis, but is both more transparent and more robust for the present purpose. (Blog post at https://davidfirth.github.io/blog/2019/04/26/robust-measurement-from-a-2-way-table/ amplifies this a little.)
For completeness, in case needed, the median differences from stage 1 are reported in full, in the next sub-section.
The result of this median-of-differences analysis is the following table of module effects. The effects are centred such that their median is zero. Also reported in the table is the total number of within-student differences that were available to use in this analysis, for each module.
md <- norman::meddiff(marks_matrix) ## used as input to meddiff_fit() mdd <- norman::meddiff_for_display(marks_matrix) ## the latter is used only for the full listing of differences below mdfit <- norman::meddiff_fit(md) rsq <- summary(mdfit)$r.squared
Easiest to hardest. All are relative to the median module.
Also shown here, for each of the modules, is the total number (Count) of mark-pair comparisons that were available for that module.
(The median Effect is set to zero, arbitrarily: in this analysis it is only the differences between modules that are interpretable.)
mdf <- norman::get_module_effects(module_codes, mdd)
These module effects explain r paste0(round(100 * rsq, 0), "%")
of the variation among the median differences listed below.
If the variation seen between modules is just natural (roughly normally distributed) variation, then the points in this graph should not deviate far from a straight line.
Points near the ends of the graph that are far above or far below the line represent modules that deserve scrutiny.
effects <- mdf$Effect n <- length(effects) hard <- effects <= -10 labels1 <- ifelse(hard, row.names(mdf), "") easy <- effects >= 10 labels2 <- ifelse(easy, row.names(mdf), "") sample <- sort(effects) probs <- (1:n) / (n + 1) theoretical <- qnorm(probs) greenline <- lm(sample ~ theoretical, subset = floor(n / 4) : ceiling(3 * n / 4)) dat <- data.frame(sample = sample, theoretical = theoretical, stringsAsFactors = TRUE) thegraph <- ggplot(dat, aes(x = theoretical, y = sample)) + geom_point() + geom_text(size = 3, hjust = 0, nudge_x = 0.05, label = rev(labels1)) + geom_text(size = 3, hjust = 1, nudge_x = -0.05, label = rev(labels2)) + labs(x = "Normal quantile", y = "Module effect") + geom_abline(intercept = coef(greenline)[1], slope = coef(greenline)[2], color = "green") thegraph
These are listed here for completeness, just in case any of them needs to be looked at (in the scaling meeting, for example).
For each considered module, tabulated here are all other modules that were taken (by at least 5 students) together with the module under consideration. For each such 'comparator' module, two numbers are shown here:
\footnotesize
norman::list_all_median_differences(mdd)
\normalsize
student_overall_median <- apply(marks_matrix, 1, function(row) median(na.omit(row)))
\bigskip
(first module is on next page)
\newpage
\newgeometry{top=1.5cm, bottom = 1.5cm}
\fancypagestyle{mylandscape}{ \fancyhf{} %Clears the header/footer \fancyfoot{% Footer \makebox[\textwidth][r]{% Right \rlap{\hspace{0.75cm}% Push out of margin by \footskip \smash{% Remove vertical height \raisebox{4.87in}{% Raise vertically \rotatebox{90}{\qquad\qquad\thepage}}}}}}% Rotate counter-clockwise \renewcommand{\headrulewidth}{0pt}% No header rule \renewcommand{\footrulewidth}{0pt}% No footer rule }
\subsectionfont{\fontsize{14}{17}\selectfont}
module_pages <- norman::make_module_pages(working_directory, module_codes, module_names)
cat(module_pages, collapse = "\n")
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.