item.analysis: Item analysis for test scores.

#' @title Item Analysis
#'
#' @description Evaluates the standard deviation, correlation, difficulty,
#' discrimination, and reliability of a set of test scores.
#' @param data_mat A numeric matrix of test scores with rows representing
#' each examinee and columns representing each question. Elements should
#' equal to 1 (correct) or 0 (incorrect).
#' @return Returns a list of the following statistics.
#' \item{question}{Question names.}
#' \item{sd}{Standard deviation of each question.}
#' \item{correlation}{Pearson correlation coefficeint of each question with
#' the average test score for each examinee. (Analogous to the point
#' biserial correlation coefficient.)}
#' \item{difficulty}{Average response for each question. High values
#' indicate low difficulty.}
#' \item{discrimination}{The ability of a question to differentiate between
#' a high scoring student and a low
#' scoring student. The statistic is bound between -1 and 1. The statistic
#' for each question is caluclated using the top third scorers and the bottom
#' third scorers. Questions with a large positive discrimination score
#' indicates that examinees who performed well on this question also had high
#' final test scores, and examinees who performed poorly on this question had
#' low final test scores.}
#' \item{reliability}{Calculated using the Kuder-Richardson Formula 20
#' (KR-20). Typically test reliability scores above 0.70 for classroom exams
#' are preferred. (This is an overall statistic - not item-specific.)}

item_analysis <- function(data_mat) {
  out <- list()
  rows <- dim(data_mat)[1]
  cols <- dim(data_mat)[2]

  # question
  out$question <- colnames(data_mat)

  # standard deviation
  for(i in 1:cols) {
    out$sd[i] <- sd(data_mat[,i])
  }

  # pearson correlation (same as point bi-serial)
  for(i in 1:cols) {
    out$correlation[i] <- cor(data_mat[,i], rowMeans(data_mat[,1:cols]))
  }

  # difficulty
  out$difficulty <- as.numeric(colMeans(data_mat[,1:cols]))

  # discrimination
  quantile(rowMeans(data_mat[,1:cols]), probs = seq(0, 1, 1/3))
  # interpret as: 0% of the data lies below... ~33% data lies below... etc.
  thirds <- as.vector(quantile(rowMeans(data_mat[,1:cols]), probs = seq(0, 1, 1/3)))
  lower <- which(rowMeans(data_mat[,1:cols])<=thirds[2])
  upper <- which(rowMeans(data_mat[,1:cols])>=thirds[3])
  for(i in 1:cols) {
    quant <- list()
    quant$lower <- length(which(data_mat[lower,i]==1))
    quant$upper <- length(which(data_mat[upper,i]==1))
    if(quant$lower>=quant$upper) {
      quant$denom <- quant$lower
    }
    else {
      quant$denom <- quant$upper
    }
    out$discrimination[i] <- (quant$upper-quant$lower)/quant$denom
  }

  # reliability
  # (high values, e.g. above 0.70, indicate that the exam correlates well with the overall test scores)
  numerator <- sum(colMeans(data_mat)*(1-colMeans(data_mat)))
  out$reliability <- (cols/(cols-1))*(1-(numerator/var(rowSums(data_mat))))

  return(out)
}