#' Creates a formula for non-linear regression DIF models.
#'
#' @description
#' The function returns the formula of the non-linear regression DIF model based
#' on model specification and DIF type to be tested.
#'
#' @usage
#' formulaNLR(model, constraints = NULL, type = "all", parameterization = "irt",
#' outcome)
#'
#' @param model character: generalized logistic regression model for which
#' starting values should be estimated. See \strong{Details}.
#' @param constraints character: which parameters should be the same for both
#' groups. Possible values are any combinations of parameters \code{"a"},
#' \code{"b"}, \code{"c"}, and \code{"d"}. Default value is \code{NULL}.
#' @param type character: type of DIF to be tested. Possible values are
#' \code{"all"} for detecting difference in any parameter (default),
#' \code{"udif"} for uniform DIF only (i.e., difference in difficulty
#' parameter \code{"b"}),
#' \code{"nudif"} for non-uniform DIF only (i.e., difference in discrimination
#' parameter \code{"a"}),
#' \code{"both"} for uniform and non-uniform DIF (i.e., difference in
#' parameters \code{"a"} and \code{"b"}),
#' or any combination of parameters \code{"a"}, \code{"b"}, \code{"c"}, and
#' \code{"d"}. Can be specified as a single value (for all items) or as an
#' item-specific vector.
#' @param parameterization character: parameterization of regression
#' coefficients. Possible options are \code{"irt"} (IRT parameterization,
#' default), \code{"is"} (intercept-slope), and \code{"logistic"} (logistic
#' regression as in the \code{\link[stats]{glm}} function, available for
#' the \code{"2PL"} model only). See \strong{Details}.
#' @param outcome character: name of outcome to be printed in formula. If not
#' specified \code{"y"} is used.
#'
#' @details
#' The unconstrained form of the 4PL generalized logistic regression model for
#' probability of correct answer (i.e., \eqn{Y_{pi} = 1}) using IRT parameterization
#' is
#' \deqn{P(Y_{pi} = 1|X_p, G_p) = (c_{iR} \cdot G_p + c_{iF} \cdot (1 - G_p)) +
#' (d_{iR} \cdot G_p + d_{iF} \cdot (1 - G_p) - c_{iR} \cdot G_p - c_{iF} \cdot
#' (1 - G_p)) / (1 + \exp(-(a_i + a_{i\text{DIF}} \cdot G_p) \cdot
#' (X_p - b_p - b_{i\text{DIF}} \cdot G_p))), }
#' where \eqn{X_p} is the matching criterion (e.g., standardized total score) and
#' \eqn{G_p} is a group membership variable for respondent \eqn{p}.
#' Parameters \eqn{a_i}, \eqn{b_i}, \eqn{c_{iR}}, and \eqn{d_{iR}}
#' are discrimination, difficulty, guessing, and inattention for the reference
#' group for item \eqn{i}. Terms \eqn{a_{i\text{DIF}}} and \eqn{b_{i\text{DIF}}}
#' then represent differences between the focal and reference groups in
#' discrimination and difficulty for item \eqn{i}. Terms \eqn{c_{iF}}, and
#' \eqn{d_{iF}} are guessing and inattention parameters for the focal group for
#' item \eqn{i}. In the case that there is no assumed difference between the
#' reference and focal group in the guessing or inattention parameters, the terms
#' \eqn{c_i} and \eqn{d_i} are used.
#'
#' Alternatively, intercept-slope parameterization may be applied:
#' \deqn{P(Y_{pi} = 1|X_p, G_p) = (c_{iR} \cdot G_p + c_{iF} \cdot (1 - G_p)) +
#' (d_{iR} \cdot G_p + d_{iF} \cdot (1 - G_p) - c_{iR} \cdot G_p - c_{iF} \cdot
#' (1 - G_p)) / (1 + \exp(-(\beta_{i0} + \beta_{i1} \cdot X_p +
#' \beta_{i2} \cdot G_p + \beta_{i3} \cdot X_p \cdot G_p))), }
#' where parameters \eqn{\beta_{i0}, \beta_{i1}, \beta_{i2}, \beta_{i3}} are
#' intercept, effect of the matching criterion, effect of the group membership,
#' and their mutual interaction, respectively.
#'
#' The \code{model} argument offers several predefined models. The options are as follows:
#' \code{Rasch} for 1PL model with discrimination parameter fixed on value 1 for both groups,
#' \code{1PL} for 1PL model with discrimination parameter set the same for both groups,
#' \code{2PL} for logistic regression model,
#' \code{3PLcg} for 3PL model with fixed guessing for both groups,
#' \code{3PLdg} for 3PL model with fixed inattention for both groups,
#' \code{3PLc} (alternatively also \code{3PL}) for 3PL regression model with guessing parameter,
#' \code{3PLd} for 3PL model with inattention parameter,
#' \code{4PLcgdg} for 4PL model with fixed guessing and inattention parameter for both groups,
#' \code{4PLcgd} (alternatively also \code{4PLd}) for 4PL model with fixed guessing for both groups,
#' \code{4PLcdg} (alternatively also \code{4PLc}) for 4PL model with fixed inattention for both groups,
#' or \code{4PL} for 4PL model.
#'
#' Three possible parameterizations can be specified in the
#' \code{"parameterization"} argument: \code{"irt"} returns the IRT parameters
#' of the reference group and differences in these parameters between the
#' reference and focal group. Parameters of asymptotes are printed separately
#' for the reference and focal groups. \code{"is"} returns intercept-slope
#' parameterization. Parameters of asymptotes are again printed separately for
#' the reference and focal groups. \code{"logistic"} returns parameters in
#' logistic regression parameterization as in the \code{\link[stats]{glm}}
#' function, and it is available only for the 2PL model.
#'
#' @return
#' A list of two models. Each includes a formula, parameters to be estimated,
#' and their lower and upper constraints.
#'
#' @author
#' Adela Hladka (nee Drabinova) \cr
#' Institute of Computer Science of the Czech Academy of Sciences \cr
#' \email{hladka@@cs.cas.cz} \cr
#'
#' Patricia Martinkova \cr
#' Institute of Computer Science of the Czech Academy of Sciences \cr
#' \email{martinkova@@cs.cas.cz} \cr
#'
#' @seealso \code{\link[difNLR]{difNLR}}
#'
#' @examples
#' # 3PL model with the same guessing parameter for both groups
#' # to test both types of DIF
#' formulaNLR(model = "3PLcg", type = "both")
#' formulaNLR(model = "3PLcg", type = "both", parameterization = "is")
#'
#' # 4PL model with the same guessing and inattention parameters
#' # to test uniform DIF
#' formulaNLR(model = "4PLcgdg", type = "udif")
#' formulaNLR(model = "4PLcgdg", type = "udif", parameterization = "is")
#'
#' # 2PL model to test non-uniform DIF
#' formulaNLR(model = "2PL", type = "nudif")
#' formulaNLR(model = "2PL", type = "nudif", parameterization = "is")
#' formulaNLR(model = "2PL", type = "nudif", parameterization = "logistic")
#'
#' # 4PL model to test all possible DIF
#' formulaNLR(model = "4PL", type = "all", parameterization = "irt")
#' formulaNLR(model = "4PL", type = "all", parameterization = "is")
#'
#' # 4PL model with fixed a and c parameters
#' # to test difference in b
#' formulaNLR(model = "4PL", constraints = "ac", type = "b")
#' formulaNLR(model = "4PL", constraints = "ac", type = "b", parameterization = "is")
#' @export
#' @importFrom stats as.formula
formulaNLR <- function(model, constraints = NULL, type = "all", parameterization = "irt", outcome) {
if (missing(model)) {
stop("Argument 'model' is missing.", call. = FALSE)
} else {
if (!(model %in% c(
"Rasch", "1PL", "2PL",
"3PLcg", "3PLdg", "3PLc", "3PL", "3PLd",
"4PLcgdg", "4PLcgd", "4PLd", "4PLcdg", "4PLc", "4PL"
))) {
stop("Invalid value for the 'model' argument.", call. = FALSE)
}
}
if (parameterization == "logistic" & model != "2PL") {
stop("Logistic parameterization is available only for the 2PL model", call. = FALSE)
}
# constraints for model
cons <- rep(TRUE, 8)
names(cons) <- c("a", "b", "c", "d", "aDif", "bDif", "cF", "dF")
if (!is.null(constraints)) {
if (any(!is.na(constraints))) {
constr <- unlist(strsplit(constraints, split = ""))
if (!all(constr %in% letters[1:4])) {
warning("Constraints can be only 'a', 'b', 'c', or 'd'!", call. = FALSE)
}
cons[paste0(constr[!(constr %in% c("c", "d"))], "Dif")] <- FALSE
cons[paste0(constr[(constr %in% c("c", "d"))], "F")] <- FALSE
}
}
# model
mod <- logical(8)
mod <- switch(model,
# 1PL models
"Rasch" = c(FALSE, TRUE, FALSE, FALSE, FALSE, TRUE, FALSE, FALSE),
"1PL" = c(TRUE, TRUE, FALSE, FALSE, FALSE, TRUE, FALSE, FALSE),
# 2PL models
"2PL" = c(TRUE, TRUE, FALSE, FALSE, TRUE, TRUE, FALSE, FALSE),
# 3PL models
"3PLcg" = c(TRUE, TRUE, TRUE, FALSE, TRUE, TRUE, FALSE, FALSE),
"3PLdg" = c(TRUE, TRUE, FALSE, TRUE, TRUE, TRUE, FALSE, FALSE),
"3PLc" = c(TRUE, TRUE, TRUE, FALSE, TRUE, TRUE, TRUE, FALSE),
"3PL" = c(TRUE, TRUE, TRUE, FALSE, TRUE, TRUE, TRUE, FALSE),
"3PLd" = c(TRUE, TRUE, FALSE, TRUE, TRUE, TRUE, FALSE, TRUE),
# 4PL models
"4PLcgdg" = c(TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, FALSE, FALSE),
"4PLcgd" = c(TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, FALSE, TRUE),
"4PLd" = c(TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, FALSE, TRUE),
"4PLcdg" = c(TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, FALSE),
"4PLc" = c(TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, FALSE),
"4PL" = c(TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE)
)
names(mod) <- c("a", "b", "c", "d", "aDif", "bDif", "cF", "dF")
mod <- apply(cbind(mod, cons), 1, all)
# type of DIF to be tested
typ <- typcons <- mod
if (!(type %in% c("udif", "nudif", "both", "all"))) {
types <- unlist(strsplit(type, split = ""))
if (!all(types %in% letters[1:4])) {
stop("Type of DIF to be tested not recognized. Only parameters 'a', 'b', 'c', or 'd' can be tested
or 'type' must be one of predefined options: either 'udif', 'nudif', 'both', or 'all'.", call. = FALSE)
}
if (any(types %in% c("c", "d"))) {
typcons[.MYpaste(types[(types %in% c("c", "d"))], "F")] <- FALSE
}
if (length(types[!(types %in% c("c", "d"))]) > 0) {
typcons[paste0(types[!(types %in% c("c", "d"))], "Dif")] <- FALSE
}
type <- "other"
}
if (type == "other") {
if (any(!is.na(constraints))) {
if (length(intersect(types, constr)) > 0) {
stop("The difference in constrained parameters cannot be tested.", call. = FALSE)
}
}
}
switch(type,
"udif" = {
typ["bDif"] <- FALSE
typ["aDif"] <- FALSE
mod["aDif"] <- FALSE
},
"nudif" = {
typ["aDif"] <- FALSE
},
"both" = {
typ["bDif"] <- FALSE
typ["aDif"] <- FALSE
},
"all" = {
typ[c("aDif", "bDif", "cF", "dF")] <- FALSE
},
"other" = {
typ <- typcons
}
)
if (model %in% c("Rasch", "1PL")) {
typ["aDif"] <- FALSE
if (type == "both") {
warning("Only uniform DIF can be tested with specified model.", call. = FALSE)
}
if (type == "nudif") {
stop("It is not possible to test non-uniform DIF in specified model.", call. = FALSE)
}
}
# model 0 and model 1
mod0 <- typ
mod1 <- mod
if (parameterization == "logistic") {
mod0 <- c(
"(Intercept)" = as.logical(mod0[["b"]]), # note, intercept always estimated, even if discrimination is not
"x" = as.logical(mod0[["a"]]),
"g" = as.logical(-mod0[["a"]] * mod0[["bDif"]] -
mod0[["aDif"]] * mod0[["b"]] -
mod0[["aDif"]] * mod0[["bDif"]]),
"x:g" = as.logical(mod0[["aDif"]])
)
mod1 <- c(
"(Intercept)" = as.logical(mod1[["b"]]), # note, intercept always estimated, even if discrimination is not
"x" = as.logical(mod1[["a"]]),
"g" = as.logical(-mod1[["a"]] * mod1[["bDif"]] -
mod1[["aDif"]] * mod1[["b"]] -
mod1[["aDif"]] * mod1[["bDif"]]),
"x:g" = as.logical(mod1[["aDif"]])
)
part_expit0 <- names(mod0[-1])[mod0[-1]]
part_expit0 <- .MYpaste(part_expit0, collapse = " + ")
part_asym0 <- ""
part_expit1 <- names(mod1[-1])[mod1[-1]]
part_expit1 <- .MYpaste(part_expit1, collapse = " + ")
part_asym1 <- ""
} else if (parameterization == "is") {
mod0 <- c(
"b0" = as.logical(mod0[["b"]]), # note, intercept always estimated, even if discrimination is not
"b1" = as.logical(mod0[["a"]]),
"b2" = any(mod0[["bDif"]], as.logical(-mod0[["a"]] * mod0[["bDif"]] - # note, intercept always estimated, even if discrimination is not
mod0[["aDif"]] * mod0[["b"]] -
mod0[["aDif"]] * mod0[["bDif"]])),
"b3" = as.logical(mod0[["aDif"]]),
"c" = as.logical(mod0[["c"]]),
"cF" = as.logical(mod0[["cF"]]),
"d" = as.logical(mod0[["d"]]),
"dF" = as.logical(mod0[["dF"]])
)
mod1 <- c(
"b0" = as.logical(mod1[["b"]]), # note, intercept always estimated, even if discrimination is not
"b1" = as.logical(mod1[["a"]]),
"b2" = any(mod1[["bDif"]], as.logical(-mod1[["a"]] * mod1[["bDif"]] - # note, intercept always estimated, even if discrimination is not
mod1[["aDif"]] * mod1[["b"]] -
mod1[["aDif"]] * mod1[["bDif"]])),
"b3" = as.logical(mod1[["aDif"]]),
"c" = as.logical(mod1[["c"]]),
"cF" = as.logical(mod1[["cF"]]),
"d" = as.logical(mod1[["d"]]),
"dF" = as.logical(mod1[["dF"]])
)
# expit part
mod0_expit <- mod0[c("b0", "b1", "b2", "b3")]
part_expit0 <- paste0(paste0(names(mod0_expit), c("", " * x", " * g", " * x * g"))[mod0_expit], collapse = " + ")
part_expit0 <- ifelse(model == "Rasch", paste(part_expit0, "+ x"), part_expit0)
part_expit0 <- paste0(" / (1 + exp(-(", part_expit0, ")))")
# expit part
mod1_expit <- mod1[c("b0", "b1", "b2", "b3")]
part_expit1 <- paste0(paste0(names(mod1_expit), c("", " * x", " * g", " * x * g"))[mod1_expit], collapse = " + ")
part_expit1 <- ifelse(model == "Rasch", paste(part_expit1, "+ x"), part_expit1)
part_expit1 <- paste0(" / (1 + exp(-(", part_expit1, ")))")
} else {
# expit part - a
mod0_expit_a0 <- mod0[c("a", "aDif")]
part_a0 <- paste0(paste0(names(mod0_expit_a0), c("", " * g"))[mod0_expit_a0], collapse = " + ")
part_a0 <- ifelse(nchar(part_a0) <= 1, part_a0, paste0("(", part_a0, ")"))
# expit part - b
mod0_expit_b0 <- mod0[c("b", "bDif")]
part_b0 <- paste0(paste0(names(mod0_expit_b0), c("", " * g"))[mod0_expit_b0], collapse = " + ")
part_b0 <- ifelse(nchar(part_b0) <= 1, part_b0, paste0("(", part_b0, ")"))
# expit part
part_expit0 <- paste0("/ (1 + exp(-", .MYpaste(c(part_a0, paste0("(x - ", part_b0, ")")), collapse = " * "), "))")
# expit part - a
mod1_expit_a1 <- mod1[c("a", "aDif")]
part_a1 <- paste0(paste0(names(mod1_expit_a1), c("", " * g"))[mod1_expit_a1], collapse = " + ")
part_a1 <- ifelse(nchar(part_a1) <= 1, part_a1, paste0("(", part_a1, ")"))
# expit part - b
mod1_expit_b1 <- mod1[c("b", "bDif")]
part_b1 <- paste0(paste0(names(mod1_expit_b1), c("", " * g"))[mod1_expit_b1], collapse = " + ")
part_b1 <- ifelse(nchar(part_b1) <= 1, part_b1, paste0("(", part_b1, ")"))
# expit part
part_expit1 <- paste0("/ (1 + exp(-", .MYpaste(c(part_a1, paste0("(x - ", part_b1, ")")), collapse = " * "), "))")
}
if (parameterization != "logistic") {
# c asymptote
if (mod0["cF"]) {
names(mod0)[names(mod0) %in% c("c", "cF")] <- c("cR", "cF")
mod0_c0 <- mod0[c("cR", "cF")]
part_c0 <- paste0(paste(names(mod0_c0), c("* (1 - g)", "* g"))[mod0_c0], collapse = " + ")
} else {
mod0_c0 <- mod0[c("c", "cF")]
part_c0 <- paste0(paste0(names(mod0_c0), c("", " * g"))[mod0_c0], collapse = " + ")
}
part_c0 <- ifelse(nchar(part_c0) > 1, paste0("(", part_c0, ")"), part_c0)
# d asymptote
if (mod0["dF"]) {
names(mod0)[names(mod0) %in% c("d", "dF")] <- c("dR", "dF")
mod0_d0 <- mod0[c("dR", "dF")]
part_d0 <- paste0(paste(names(mod0_d0), c("* (1 - g)", "* g"))[mod0_d0], collapse = " + ")
} else {
mod0_d0 <- mod0[c("d", "dF")]
part_d0 <- paste0(paste0(names(mod0_d0), c("", " * g"))[mod0_d0], collapse = " + ")
}
part_d0 <- ifelse(part_d0 == "" & (grepl(3, model) | grepl(4, model)), 1, part_d0)
# d - c
part_dc0 <- .MYpaste(c(part_d0, part_c0), collapse = " - ")
part_dc0 <- ifelse(part_dc0 == "" & (grepl(3, model) | grepl(4, model)), 1, part_dc0)
part_dc0 <- ifelse(nchar(part_dc0) > 1, paste0("(", part_dc0, ")"), part_dc0)
# c + (d - c) asymptotes
part_asym0 <- ifelse(grepl(3, model) | grepl(4, model), .MYpaste(c(part_c0, part_dc0), collapse = " + "), 1)
# c asymptote
if (mod1["cF"]) {
names(mod1)[names(mod1) %in% c("c", "cF")] <- c("cR", "cF")
mod1_c1 <- mod1[c("cR", "cF")]
part_c1 <- paste0(paste(names(mod1_c1), c("* (1 - g)", "* g"))[mod1_c1], collapse = " + ")
} else {
mod1_c1 <- mod1[c("c", "cF")]
part_c1 <- paste0(paste0(names(mod1_c1), c("", " * g"))[mod1_c1], collapse = " + ")
}
part_c1 <- ifelse(nchar(part_c1) > 1, paste0("(", part_c1, ")"), part_c1)
# d asymptote
if (mod1["dF"]) {
names(mod1)[names(mod1) %in% c("d", "dF")] <- c("dR", "dF")
mod1_d1 <- mod1[c("dR", "dF")]
part_d1 <- paste0(paste(names(mod1_d1), c("* (1 - g)", "* g"))[mod1_d1], collapse = " + ")
} else {
mod1_d1 <- mod1[c("d", "dF")]
part_d1 <- paste0(paste0(names(mod1_d1), c("", " * g"))[mod1_d1], collapse = " + ")
}
part_d1 <- ifelse(part_d1 == "" & (grepl(3, model) | grepl(4, model)), 1, part_d1)
# d - c
part_dc1 <- .MYpaste(c(part_d1, part_c1), collapse = " - ")
part_dc1 <- ifelse(part_dc1 == "" & (grepl(3, model) | grepl(4, model)), 1, part_dc1)
part_dc1 <- ifelse(nchar(part_dc1) > 1, paste0("(", part_dc1, ")"), part_dc1)
# c + (d - c) asymptotes
part_asym1 <- ifelse(grepl(3, model) | grepl(4, model), .MYpaste(c(part_c1, part_dc1), collapse = " + "), 1)
} else {
part_asym0 <- part_asym1 <- ""
}
# formula
form0 <- paste0(part_asym0, part_expit0)
form1 <- paste0(part_asym1, part_expit1)
# formula - adding outcome
if (missing(outcome)) {
outcome <- "y"
}
form0 <- paste(outcome, "~", form0)
form1 <- paste(outcome, "~", form1)
lower <- switch(parameterization,
"irt" = c(-Inf, -Inf, 0, 0, -Inf, -Inf, 0, 0),
"is" = c(-Inf, -Inf, -Inf, -Inf, 0, 0, 0, 0),
"logistic" = NULL
)
upper <- switch(parameterization,
"irt" = c(Inf, Inf, 1, 1, Inf, Inf, 1, 1),
"is" = c(Inf, Inf, Inf, Inf, 1, 1, 1, 1),
"logistic" = NULL
)
if (parameterization != "logistic") {
lower0 <- setNames(lower[mod0], names(mod0[mod0]))
upper0 <- setNames(upper[mod0], names(mod0[mod0]))
lower1 <- setNames(lower[mod1], names(mod1[mod1]))
upper1 <- setNames(upper[mod1], names(mod1[mod1]))
} else {
lower0 <- upper0 <- lower1 <- upper1 <- NULL
}
return(list(
M0 = list(
formula = as.formula(form0),
parameters = names(mod0[mod0]),
lower = lower0,
upper = upper0
),
M1 = list(
formula = as.formula(form1),
parameters = names(mod1[mod1]),
lower = lower1,
upper = upper1
)
))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.