R/functions.R

Defines functions extract_rasch_difficulties_ordered plot_rasch_curves rasch_logit plot_item_curves compute_Modified_probabilities fit_binary_irt prepare_data

Documented in compute_Modified_probabilities extract_rasch_difficulties_ordered fit_binary_irt plot_item_curves plot_rasch_curves prepare_data rasch_logit

#' Prepare Data for IRT Analysis
#'
#' @param data A data frame or matrix of binary responses (0 and 1).
#'
#' @return A list containing the following two components:
#' \itemize{
#'   \item \code{matrix}: A numeric matrix of the original data, converted to numeric format and potentially reordered by total score.
#'   \item \code{total_score}: A numeric vector representing the sum of correct responses (row sums) for each person, sorted in descending order if the original data was unsorted.
#' }
#'
#' @examples
#' set.seed(123)
#' raw_data <- data.frame(
#'   item1 = c(1, 0, 1, 1),
#'   item2 = c(0, 0, 1, 1),
#'   item3 = c(1, 1, 1, 0)
#' )
#'
#' prepared <- prepare_data(raw_data)
#'
#' print(prepared$matrix)
#' print(prepared$total_score)
#'
#' @export
prepare_data <- function(data) {
  data_mat <- as.matrix(as.data.frame(data))
  data_mat <- apply(data_mat, 2, as.numeric)
  total_score <- rowSums(data_mat)

  if (is.unsorted(rev(total_score))) {
    order_index <- order(total_score, decreasing = TRUE)
    data_mat <- data_mat[order_index, ]
    total_score <- rowSums(data_mat)
  }
  return(list(matrix = data_mat, total_score = total_score))
}

#' Fit Binary IRT Model using GLM
#'
#' @param data_mat A numeric matrix of responses (persons in rows, items in columns).
#' @param total_score A numeric vector of total scores for each person.
#'
#' @return A data frame with one row per item and the following columns:
#' \itemize{
#'   \item \code{Item}: The name of the item.
#'   \item \code{Intercept}: The estimated intercept parameter from the GLM logit model.
#'   \item \code{Slope}: The estimated slope parameter (discrimination) from the GLM logit model.
#'   \item \code{threshold}: The calculated item difficulty (also known as the beta parameter),
#'   computed as -Intercept / Slope. This represents the point on the ability scale where
#'   the probability of a correct response is 0.5.
#' }
#'
#' @examples
#' set.seed(42)
#' sample_data <- matrix(sample(c(0, 1), 50, replace = TRUE), ncol = 5)
#' colnames(sample_data) <- paste0("Item", 1:5)
#'
#' prepared <- prepare_data(sample_data)
#'
#' irt_results <- fit_binary_irt(prepared$matrix, prepared$total_score)
#'
#' print(irt_results)
#'
#' @export
fit_binary_irt <- function(data_mat, total_score) {
  results <- data.frame(Item = colnames(data_mat), Intercept = NA, Slope = NA)
  for(i in 1:ncol(data_mat)) {
    model <- stats::glm(data_mat[,i] ~ total_score, family = stats::binomial(link = "logit"))
    results$Intercept[i] <- stats::coef(model)[1]
    results$Slope[i]     <- stats::coef(model)[2]
  }
  results$threshold <- -results$Intercept / results$Slope
  return(results)
}

#' Compute Modified Item Response Probabilities
#'
#' @param results The data frame returned by fit_binary_irt.
#' @param theta_all A numeric vector representing ability levels (total scores).
#'
#' @return A numeric matrix of predicted probabilities with dimensions
#' \code{length(theta_all)} rows by \code{nrow(results)} columns.
#' Each cell contains the probability of a correct response for a given
#' ability level and item, calculated using the logistic function:
#' \eqn{exp(\theta - \beta) / (1 + exp(\theta - \beta))}, where \eqn{\beta}
#' is the item threshold. Columns are named after the items.
#'
#' @examples
#' set.seed(123)
#' sample_data <- matrix(sample(c(0, 1), 100, replace = TRUE), ncol = 10)
#' prepared <- prepare_data(sample_data)
#' irt_results <- fit_binary_irt(prepared$matrix, prepared$total_score)
#'
#' abilities <- seq(0, 10, length.out = 5)
#'
#' probs <- compute_Modified_probabilities(irt_results, abilities)
#'
#' print(head(probs))
#'
#' @export
compute_Modified_probabilities <- function(results, theta_all) {
  Modified_prob_matrix <- matrix(NA, nrow = length(theta_all), ncol = nrow(results))
  for(i in 1:nrow(results)) {
    Modified_logit <- theta_all - results$threshold[i]
    Modified_prob_matrix[, i] <- exp(Modified_logit) / (1 + exp(Modified_logit))
  }
  colnames(Modified_prob_matrix) <- results$Item
  return(Modified_prob_matrix)
}

#' Plot Item Characteristic Curves (ICC)
#'
#' @param theta_all A numeric vector of ability levels.
#' @param Modified_prob_matrix The matrix returned by compute_Modified_probabilities.
#' @param results The data frame returned by fit_binary_irt.
#'
#' @return No return value, called for side effects. It generates a series of plots
#' showing the Item Characteristic Curves (ICC) for each item in the model.
#'
#' @examples
#' set.seed(123)
#' sample_data <- matrix(sample(c(0, 1), 100, replace = TRUE), ncol = 10)
#' prepared <- prepare_data(sample_data)
#' irt_results <- fit_binary_irt(prepared$matrix, prepared$total_score)
#'
#' theta_range <- seq(0, 10, length.out = 100)
#' probs <- compute_Modified_probabilities(irt_results, theta_range)
#'
#' # Plot the curves
#' plot_item_curves(theta_range, probs, irt_results)
#'
#' @export
plot_item_curves <- function(theta_all, Modified_prob_matrix, results) {

  oldpar <- graphics::par(no.readonly = TRUE)

  on.exit(graphics::par(oldpar))

  graphics::par(mfrow = c(1, 1))

  for (i in 1:ncol(Modified_prob_matrix)) {
    ord <- order(theta_all)
    graphics::plot(theta_all[ord], Modified_prob_matrix[ord, i], type = "l", lwd = 2,
                   col = "blue", ylim = c(0, 1),
                   xlab = "Ability (Total Score)", ylab = "Probability",
                   main = paste("ICC for Item:", colnames(Modified_prob_matrix)[i]))
    graphics::grid()
    graphics::abline(h = 0.5, v = results$threshold[i], col = "red", lty = 2)

    if(i < ncol(Modified_prob_matrix)) readline(prompt="Press [Enter] for next plot...")
  }
}

#' Compute Logit and Row Means
#'
#' @param prob_matrix The matrix returned by compute_Modified_probabilities.
#'
#' @return A numeric matrix of logit transformations (log-odds) with dimensions
#' \code{nrow(prob_matrix)} rows by \code{ncol(prob_matrix) + 1} columns.
#' The first columns contain the logit values for each item, and the final
#' column, named \code{Row_Mean_Logit}, contains the mean logit across all
#' items for each person, which serves as an estimate of Student Ability.
#'
#' @examples
#' set.seed(123)
#' sample_data <- matrix(sample(c(0, 1), 100, replace = TRUE), ncol = 10)
#' prepared <- prepare_data(sample_data)
#' irt_results <- fit_binary_irt(prepared$matrix, prepared$total_score)
#' probs <- compute_Modified_probabilities(irt_results, prepared$total_score)
#'
#' logit_results <- rasch_logit(probs)
#'
#' print(head(logit_results))
#'
#' @export
rasch_logit <- function(prob_matrix) {
  logit_matrix <- log(prob_matrix / (1 - prob_matrix))
  row_means <- rowMeans(logit_matrix, na.rm = TRUE)
  final_matrix <- cbind(logit_matrix, Row_Mean_Logit = row_means)
  colnames(final_matrix)[1:ncol(logit_matrix)] <- colnames(prob_matrix)
  return(final_matrix)
}

#' Plot Rasch Item Curves
#'
#' @param prob_matrix The matrix returned by compute_Modified_probabilities.
#' @param final_logit_matrix The matrix returned by rasch_logit.
#'
#' @return No return value, called for side effects. It generates a series of plots
#' showing the Rasch Item Characteristic Curves (ICC) with difficulty indicators.
#'
#' @examples
#' set.seed(123)
#' sample_data <- matrix(sample(c(0, 1), 100, replace = TRUE), ncol = 10)
#' prepared <- prepare_data(sample_data)
#' irt_results <- fit_binary_irt(prepared$matrix, prepared$total_score)
#' probs <- compute_Modified_probabilities(irt_results, prepared$total_score)
#' logits <- rasch_logit(probs)
#'
#' # Plot the Rasch curves
#' plot_rasch_curves(probs, logits)
#'
#' @export
plot_rasch_curves <- function(prob_matrix, final_logit_matrix) {

  oldpar <- graphics::par(no.readonly = TRUE)

  on.exit(graphics::par(oldpar))

  last_col_idx <- ncol(final_logit_matrix)
  row_mean_logit <- final_logit_matrix[, last_col_idx]
  num_items <- ncol(prob_matrix)

  graphics::par(mfrow = c(1, 1))

  for (i in 1:num_items) {
    item_probs <- prob_matrix[, i]
    item_name <- colnames(prob_matrix)[i]
    difficulty_value <- (final_logit_matrix[, last_col_idx] - final_logit_matrix[, i])[1]

    ord <- order(row_mean_logit)
    graphics::plot(row_mean_logit[ord], item_probs[ord],
                   type = "l", lwd = 2, col = "blue",
                   ylim = c(0, 1),
                   xlab = "Student Ability (Row Mean Logit)",
                   ylab = "Probability of Correct Response",
                   main = paste("Rasch Curve for Item:", item_name))
    graphics::grid()
    graphics::abline(h = 0.5, col = "red", lty = 2)
    graphics::abline(v = difficulty_value, col = "darkgreen", lty = 3, lwd = 2)
    graphics::text(x = difficulty_value, y = 0.2,
                   labels = paste("Difficulty =", round(difficulty_value, 2)),
                   col = "darkgreen", pos = 4, cex = 0.8)

    if (i < num_items) {
      readline(prompt = "Press [Enter] to see the next item curve...")
    }
  }
}

#' Extract Rasch Item Difficulties in Original Order
#'
#' @param final_logit_matrix The matrix returned by rasch_logit.
#'
#' @return A data frame containing the calculated difficulty parameters for
#' each item, with the following columns:
#' \itemize{
#'   \item \code{Item_Number}: The sequential index of the item.
#'   \item \code{Item_Name}: The name or label of the item.
#'   \item \code{Difficulty_Logit}: The item difficulty parameter (beta) expressed
#'   on the logit scale, rounded to four decimal places. Higher values indicate
#'   more difficult items.
#' }
#'
#' @examples
#' set.seed(123)
#' sample_data <- matrix(sample(c(0, 1), 100, replace = TRUE), ncol = 10)
#' colnames(sample_data) <- paste0("Q", 1:10)
#' prepared <- prepare_data(sample_data)
#' irt_results <- fit_binary_irt(prepared$matrix, prepared$total_score)
#' probs <- compute_Modified_probabilities(irt_results, prepared$total_score)
#' logits <- rasch_logit(probs)
#'
#' difficulty_table <- extract_rasch_difficulties_ordered(logits)
#'
#' print(difficulty_table)
#'
#' @export
extract_rasch_difficulties_ordered <- function(final_logit_matrix) {
  last_col_idx <- ncol(final_logit_matrix)
  num_items <- last_col_idx - 1
  diff_values <- numeric(num_items)
  item_names <- colnames(final_logit_matrix)[1:num_items]

  for (i in 1:num_items) {
    diff_values[i] <- (final_logit_matrix[, last_col_idx] - final_logit_matrix[, i])[1]
  }

  difficulty_table <- data.frame(
    Item_Number = 1:num_items,
    Item_Name = item_names,
    Difficulty_Logit = round(diff_values, 4)
  )
  return(difficulty_table)
}

Try the GLMBasedRaschEstimation package in your browser

Any scripts or data that you put into this service are public.

GLMBasedRaschEstimation documentation built on April 22, 2026, 9:08 a.m.