R/dif.R

Defines functions area_between_icc dif

Documented in area_between_icc dif

###############################################################################@
############################# dif ##############################################
###############################################################################@
#' Evaluate Differential Item Functioning (DIF) of a test
#'
#' @description
#' \code{dif} evaluates Differential Item Functioning (DIF) of a test.
#'
#'
#' @param resp A vector of item responses.
#' @param group Group membership
#' @param focal_name In the group variable, the value that represents the focal
#'   group.
#' @param ip An \code{\link{Itempool-class}} object.
#' @param type The type of the DIF method.
#'
#' @return A data.frame of DIF values.
#'
#' @include item-class.R
#' @include itempool-class.R
#' @include item-class-methods.R
#' @include itempool-class-methods.R
#'
#' @author Emre Gonulates
#'
#'
#'
dif <- function(resp, group, focal_name, ip = NULL, type = "mh") {
  type <- match.arg(type)
  if (type == "mh") {
    result <- data.frame(item_id = paste0("Item", 1:ncol(resp)), mh_statistic = NA,
                         chisq = NA, p_value = NA, ETS = NA, ETS_class = NA)
    if (!is.null(colnames(resp))) result$item_id <- colnames(resp)
    total_score <- rowSums(resp, na.rm = TRUE)
    K <- sort(unique(total_score))
    group[group == focal_name] <- "focal"
    group[group != "focal"] <- "reference"
    group <- as.factor(group)

    for (i in seq_len(ncol(resp))) { # i = item
      n_r1 <- rep(NA, length(K))
      n_r0 <- rep(NA, length(K))
      n_f1 <- rep(NA, length(K))
      n_f0 <- rep(NA, length(K))
      n_r <- rep(NA, length(K))
      n_f <- rep(NA, length(K))
      n_0 <- rep(NA, length(K))
      n_1 <- rep(NA, length(K))
      n <- rep(NA, length(K))
      for (k in seq_len(length(K))) {
        temp <- which(total_score == K[k])
        temp_resp <- factor(resp[temp, i], levels = unique(resp[, i]))
        temp <- stats::addmargins(table(group[temp], temp_resp))
        n_r1[k] <- temp["reference", "1"]
        n_r0[k] <- temp["reference", "0"]
        n_f1[k] <- temp["focal", "1"]
        n_f0[k] <- temp["focal", "0"]
        n_r[k] <- temp["reference", "Sum"]
        n_f[k] <- temp["focal", "Sum"]
        n_0[k] <- temp["Sum", "0"]
        n_1[k] <- temp["Sum", "1"]
        n[k] <- temp["Sum", "Sum"]
      }
      temp <- n > 1
      result$mh_statistic[i] <- sum((n_r1 * n_f0 / n)[temp]) / sum((n_r0 * n_f1 / n)[temp])
      mu <- (n_r * n_1 / n)[temp]
      v <- (n_r * n_f * n_1 * n_0 / (n^2*(n - 1)))[temp]
      result$chisq[i] <- (abs(sum(n_r1[temp]) - sum(mu)) - 0.5)^2 / sum(v)
    }
    result$ETS <- -2.35 * log(result$mh_statistic)
    result$p_value <- 1 - stats::pchisq(q = result$chisq, df = 1)
    result$ETS_class <- ifelse(abs(result$ETS) < 1, "A",
                               ifelse(abs(result$ETS) < 1.5, "B", "C"))
  }
  return(result)
}



###############################################################################@
############################# area_between_icc #################################
###############################################################################@
#' Calculate the area between two ICC curves
#'
#' @description This function calculates the area between two item
#'   characteristic curves (ICC) for unidimensional dichotomous IRT models.
#'
#'   There are two types of area calculation methods. The first one is
#'   \code{type = "exact"} where the exact area from negative infinity to
#'   positive infinity between the two ICC curves will be calculated.
#'   This method implements the approach in Raju's 1988 paper. This
#'   method works for 'Rasch', '1PL', '2PL', '3PL' models but when the
#'   pseudo-guessing parameters of the items differ for '3PL' model, the
#'   area will be infinity. In such cases it is advisable to use
#'   \code{type = "closed"}.
#'
#'   The area can only be calculated for 'Rasch', '1PL', '2PL', '3PL' or
#'   '4PL' models.
#'
#' @param ... An \code{\link{Itempool-class}} object; or a combination of
#'   \code{\link{Item-class}} and \code{\link{Testlet-class}} objects.
#' @param type A string representing the method that will be used to calculate
#'   the area between two ICC curves. Available values are:
#'   \describe{
#'     \item{\code{"exact"}}{The exact area between the whole theta scale
#'       \code{-Inf} and \code{Inf}. This method implements Raju's (1988)
#'       approach. When the pseudo-guessing parameters of the items differ
#'       for '3PL' model, the area will be infinity. See Raju (1988) for
#'       details.}
#'     \item{\code{"closed"}}{The area within a closed interval defined by
#'       \code{theta_range} argument will be calculated. This method always
#'       returns a finite value. See Kim and Cohen (1991) for details.}
#'   }
#'   The default method is \code{"closed"}.
#' @param theta_range A numeric vector of length two with the first element
#'   smaller than the second element. The values define the boundaries in which
#'   the area between two ICC's will be calculated. The default value is
#'   \code{c(-5, 5)}.
#' @param signed_area A logical value for whether the signed or unsigned area
#'   between two curves will be calculated. When \code{signed = TRUE}, the
#'   area under the second item is subtracted from the area under the first
#'   item. The result can be negative if the first item is mostly under
#'   the second item. When \code{signed = FALSE}, the distance between two ICC
#'   curves will be calculated. The default value is \code{signed = TRUE}.
#'
#' @return A matrix where the values in cells are the areas between items.
#'   The rows represent the first item and the columns represents the second
#'   item and the area of second item is subtracted from the first item when
#'   "signed" area is desired. For example, the value corresponding to the cell
#'   where row is for "Item_4" and column is for "Item_2", the value in the cell
#'   is the area of "Item_4 - Item_2".
#' @references
#' Kim, S.-H., & Cohen, A. S. (1991). A comparison of two area measures for
#' detecting differential item functioning. Applied Psychological Measurement,
#' 15(3), 269–278.
#'
#' Raju, N. S. (1988). The area between two item characteristic curves.
#' Psychometrika, 53(4), 495–502.
#'
#' @export
#'
#' @author Emre Gonulates
#'
#' @examples
#' # Closed area example:
#' ip <- generate_ip(model = c("3PL", "3PL", "3PL"))
#' # plot(ip) # See the ICCs
#' area_between_icc(ip, type = "closed")
#' area_between_icc(ip, type = "closed", signed_area = TRUE)
#'
#' # The result is infinite because 'c' parameters are not equal
#' area_between_icc(ip, type = "exact")
#'
#' # Exact area example:
#' ip <- generate_ip(model = c("2PL", "2PL", "2PL"))
#' area_between_icc(ip, type = "exact")
#' area_between_icc(ip, type = "exact", signed_area = TRUE)
#' # The 'closed' area is very close to the 'exact' area with a wide theta range
#' area_between_icc(ip, type = "closed", theta_range = c(-10, 10))
#'
area_between_icc <- function(..., type = c("closed", "exact"),
                             theta_range = c(-5, 5), signed_area = FALSE) {
  args <- list(...)
  type <- match.arg(type)
  error_invalid_ip <- paste0("Invalid arguments. Please provide an Itempool ",
                             "or at least two Item objects.")
  if (length(args) == 0) stop(error_invalid_ip)
  x <- args[[1]]
  item_list <- NULL
  if (is(x, "Itempool")) {
    item_list <- flatten_itempool_cpp(x)
  } else if (inherits(x, c("Item", "Testlet"))) {
    item_list_indicator <- sapply(args, function(m) is(m, "Item") |
                                    is(m, "Testlet"))
    item_list <- flatten_itempool_cpp(itempool(args[item_list_indicator]))
  } else stop(error_invalid_ip)
  # if (length(item_list) < 2) stop(error_invalid_ip)
  if (type == "closed") {
    return(outer(item_list, item_list, function(x,y) vapply(
      seq_along(x), function(i) area_between_icc_closed_cpp(
        x[[i]],y[[i]], signed_area = signed_area, theta_range = theta_range),
      numeric(1))))
  } else if (type == "exact") {
    return(outer(item_list, item_list, function(x,y) vapply(
      seq_along(x), function(i) area_between_icc_exact_cpp(
        x[[i]],y[[i]], signed_area = signed_area), numeric(1))))
  }
}

Try the irt package in your browser

Any scripts or data that you put into this service are public.

irt documentation built on Nov. 10, 2022, 5:50 p.m.