# R/log_reg.R In mstDIF: A Collection of DIF Tests for Multistage Tests

#### Documented in log_reg

#' A logistic regression DIF test for MSTs
#'
#' This function allows the detection of itemwise DIF for Multistage Tests. It is based on the comparison
#' of three logistic regression models for each item. The first logistic regression model (Model 1)
#' predicts the positiveness of each response solely on the estimated ability parameters. The second
#' logistic regression model (Model 2) predicts the positiveness based on the ability parameters
#' and the membership to the focal and reference group as additive predictor variables.
#' The third model (Model 3) uses the same predictors as Model 2 to predict the positiveness of the responses, but
#' also includes an interaction effect. Three model comparisons are carried out (Models 1/2, Models 1/3, Models 2/3)
#' based on two criteria: The comparison of the Nagelkerke R squared values, and the p-values of a likelihood ratio test.
#'
#' Author: Sebastian Appelbaum, with minor changes by Rudolf Debelak and Dries Debeer
#'
#' @param resp A data frame containing the response matrix. Rows correspond to respondents, columns to items.
#' @param DIF_covariate A factor indicating the membership to the reference and focal groups.
#' @param theta A vector of ability estimates for each respondent.
#'
#' @return A list with four elements. The first element is the response matrix, the second element is the name of
#' the DIF covariate, and the third element is the name of the test. The fourth element is a data frame where
#' each row corresponds to an item. The columns of this data frame correspond to the following entries:
#' \describe{
#'   \item{\code{N}}{The number of responses observed for this item.}
#'   \item{\code{overall_chi_sq}}{The chi squared statistic of the likelihood ratio test comparing Model 2 and Model 0.}
#'   \item{\code{overall_p_value}}{The p-values of the likelihood ratio test comparing Model 2 and Model 0 as
#'   an indicator for the overall DIF effect.}
#'   \item{\code{Delta_NagelkerkeR2}}{The difference of the Nagelkerke R squared values for Model 2 and Model 0.}
#'   \item{\code{UDIF_chi_sq}}{The chi squared statistic of the likelihood ratio test comparing Model 1 and Model 0.}
#'   \item{\code{UDIF_p_value}}{The p-values of the likelihood ratio test comparing Model 1 and Model 0.}
#'   \item{\code{UDIF_Delta_NagelkerkeR2}}{The difference of the Nagelkerke R squared values for Model 1 and Model 0.}
#'   \item{\code{CDIF_chi_sq}}{The chi squared statistic of the likelihood ratio test comparing Model 2 and Model 1.}
#'   \item{\code{CDIF_p_value}}{The p-values of the likelihood ratio test comparing Model 2 and Model 1.}
#'   \item{\code{CDIF_Delta_NagelkerkeR2}}{The difference of the Nagelkerke R squared values for Model 2 and Model 1.}
#' }
#'
#' @examples
#' data("toydata")
#' resp <- toydata$resp #' group_categ <- toydata$group_categ
#' theta_est <- toydata$theta_est #' log_reg(resp, DIF_covariate = factor(group_categ), theta = theta_est) #' #' #' @export log_reg <- function(resp, DIF_covariate, theta = NULL){ # get call call <- match.call() # a theta-argument is required if(is.null(theta)) stop("'theta'-argument is missing. Include a vector with the estimated 'theta'-values.", call. = FALSE) # get the DIF_covariate name DIF_covariate_name <- as.character(deparse(call$DIF_covariate))

if(!is.factor(DIF_covariate)){
DIF_covariate <- as.factor(DIF_covariate)
message(paste0("For the DIF analysis, '", DIF_covariate_name,
"' was transformed into a factor."))
}

# Helper functions
R2 <- function(m, n) 1 - (exp(-m$null.deviance/2 + m$deviance/2))^(2/n)
R2max <- function(m, n) 1 - (exp(-m$null.deviance/2))^(2/n) R2DIF <- function(m, n) R2(m, n)/R2max(m, n) # NagelkerkeR2 R2DIF2 <- function(m1, m2, n) R2DIF(m2, n) - R2DIF(m1, n) # Delta_NagelkerkeR2 # function to compute the log-reg-DIF-test for one item log_reg_DIF_1_item <- function(itemname, data){ glm_2 <- stats::glm( stats::as.formula(paste0(itemname," ~ theta * DIF_covariate")), family = stats::binomial, data = d) # number of observations in test N <- summary(glm_2)$df.null + 1

# does it make sense to test for DIF?
if (dim(summary(glm_2)$coef)[1] == 4){ glm_1 <- stats::glm( stats::as.formula(paste0(itemname," ~ theta + DIF_covariate")), family = stats::binomial, data = d) glm_0 <- stats::glm( stats::as.formula(paste0(itemname," ~ theta")), family = stats::binomial, data = d) test_udif <- stats::anova(glm_0, glm_1, test = "LRT") test_cdif <- stats::anova(glm_1, glm_2, test = "LRT") test_dif <- stats::anova(glm_0, glm_2, test = "LRT") return(data.frame(item = itemname, N = N, stat = test_dif$Deviance[2],
p_value = test_dif$Pr(>Chi)[2], eff_size = R2DIF2(glm_0, glm_2, N), stat_u = test_udif$Deviance[2],
p_value_u = test_udif$Pr(>Chi)[2], eff_size_u = R2DIF2(glm_0, glm_1, N), stat_nu = test_cdif$Deviance[2],
p_value_nu = test_cdif\$Pr(>Chi)[2],
eff_size_nu = R2DIF2(glm_1, glm_2, N),
stringsAsFactors = FALSE))

} else {
return(data.frame(item = itemname,
N = N,
stat = NA,
p_value = NA,
eff_size = NA,
stat_u = NA,
p_value_u = NA,
eff_size_u = NA,
stat_nu = NA,
p_value_nu = NA,
eff_size_nu = NA,
stringsAsFactors = FALSE))
}
}

# get number of items
nItem <- ncol(resp)

# get/set item names
colnames(resp) <- itemnames <- 'if'(is.null(colnames(resp)),
sprintf(paste("it%0", nchar(nItem),
"d", sep=''),
seq_len(nItem)),
colnames(resp))

# combine estimated theta's with grouping variable and responses
d <- as.data.frame(cbind(theta, DIF_covariate, resp))

# apply the test for one item to all the items
res <- do.call(rbind, lapply(itemnames, log_reg_DIF_1_item, data = d))
return(list(resp = resp,
DIF_covariate = DIF_covariate_name,
test = "Likelihood Ratio Test",
results = res))
}


## Try the mstDIF package in your browser

Any scripts or data that you put into this service are public.

mstDIF documentation built on July 24, 2020, 5:08 p.m.