# R/blim.R In pks: Probabilistic Knowledge Structures

```## Fitting the basic local independence model (BLIM) by MDML
blim <- function(K, N.R, method = c("MD", "ML", "MDML"), R = as.binmat(N.R),
P.K = rep(1/nstates, nstates),
beta = rep(0.1, nitems),
eta = rep(0.1, nitems),
betafix = rep(NA, nitems), etafix = rep(NA, nitems),
betaequal = NULL, etaequal = NULL,
errtype = c("both", "error", "guessing"),
errequal = FALSE, randinit = FALSE, incradius = 0,
tol = 1e-07, maxiter = 10000, zeropad = 12) {

K       <- as.matrix(K)
N.R     <- setNames(as.integer(N.R), names(N.R))  # convert to named int
N       <- sum(N.R)
nitems  <- ncol(K)
npat    <- nrow(R)
nstates <- nrow(K)

## Uniformly random initial values
if (randinit) {
beta <- runif(nitems)                       # constraint: beta + eta < 1
eta <- runif(nitems)
beta <- ifelse(beta + eta < 1, beta, 1 - beta)
eta <- ifelse(beta + eta < 1,  eta, 1 -  eta)
x <- c(0, sort(runif(nstates - 1)), 1)
P.K <- x[-1] - x[-length(x)]               # constraint: sum(P.K) == 1
}

## Equality restrictions
betaeq <- etaeq <- diag(nitems)
if (!is.null(betaequal)) for (i in betaequal) betaeq[i, i] <- 1
if (!is.null( etaequal)) for (i in  etaequal)  etaeq[i, i] <- 1

errtype <- match.arg(errtype)                        # overrides arguments
if (errtype == "error") {
etafix <- rep(0, nitems)
warning("errtype is deprecated, use etafix = rep(0, nitems) instead")
}
if (errtype == "guessing") {
betafix <- rep(0, nitems)
warning("errtype is deprecated, use betafix = rep(0, nitems) instead")
}
if (errequal) {
betaeq <- etaeq <- matrix(1, nitems, nitems)
warning("errequal is deprecated, use betaequal or etaequal instead")
}
beta[!is.na(betafix)] <- betafix[!is.na(betafix)]    # overrides arguments
eta[!is.na( etafix)] <-  etafix[!is.na( etafix)]

names(P.K) <- if(is.null(rownames(K))) as.pattern(K) else rownames(K)
names(beta) <- names(eta) <-
if (is.null(colnames(K))) {
make.unique(c("a", letters[(seq_len(nitems) %% 26) + 1])[-(nitems + 1)],
sep = "")
} else colnames(K)
dimnames(betaeq) <- dimnames(etaeq) <- list(names(eta), names(eta))

## Assigning state K given response R
d.RK <- if (length(which(c(betafix, etafix) == 0)) > 0) {
apply(K, 1, function(k) {
RwoK <- t(R) & !k
idx <- which(RwoK, arr.ind=TRUE)
RwoK[idx[idx[, "row"] %in% which(etafix == 0), ]] <- NA

KwoR <- k & !t(R)
idx <- which(KwoR, arr.ind=TRUE)
KwoR[idx[idx[, "row"] %in% which(betafix == 0), ]] <- NA
colSums(RwoK) + colSums(KwoR)
})
} else
apply(K, 1, function(k) colSums(xor(t(R), k)))
d.min <- apply(d.RK, 1, min, na.rm = TRUE)             # minimum discrepancy
i.RK  <- (d.RK <= (d.min + incradius)) & !is.na(d.RK)

## Minimum discrepancy distribution
disc.tab <- xtabs(N.R ~ d.min)
disc     <- as.numeric(names(disc.tab)) %*% disc.tab / N

eps     <- 1e-06
iter    <- 0
maxdiff <- 2 * tol
em      <- switch(method <- match.arg(method), MD = 0, ML = 1, MDML = 1)
md      <- switch(method, MD = 1, ML = 0, MDML = 1)
beta.num <- beta.denom <- eta.num <-  eta.denom <- beta
while ((maxdiff > tol) && (iter < maxiter) &&
((md*(1 - em) != 1) || (iter == 0))) {
pi.old   <- P.K
beta.old <- beta
eta.old  <- eta

P.R.K <- apply(K, 1, function(k) apply(
beta^((1 - t(R))*k) * (1 - beta)^(t(R)*k) *
eta^(t(R)*(1 - k)) * (1 - eta)^((1 - t(R))*(1 - k)),
2, prod))
P.R    <- as.numeric(P.R.K %*% P.K)
P.K.R  <- P.R.K * outer(1/P.R, P.K)         # prediction of P(K|R)
mat.RK <- i.RK^md * P.K.R^em
m.RK   <- (mat.RK / rowSums(mat.RK)) * N.R  # m.RK = E(M.RK) = P(K|R)*N(R)

## Distribution of knowledge states
P.K <- colSums(m.RK) / N

## Careless error and guessing parameters
for (j in seq_len(nitems)) {
beta.num[j]   <- sum(m.RK[which(R[, j] == 0), which(K[, j] == 1)])
beta.denom[j] <- sum(m.RK[, which(K[, j] == 1)])
eta.num[j]   <- sum(m.RK[which(R[, j] == 1), which(K[, j] == 0)])
eta.denom[j] <- sum(m.RK[, which(K[, j] == 0)])
}
beta <- drop(betaeq %*% beta.num / betaeq %*% beta.denom)
eta <- drop( etaeq %*%  eta.num /  etaeq %*%  eta.denom)
beta[is.na(beta) | beta < eps] <- eps
eta[is.na( eta) |  eta < eps] <- eps
beta[!is.na(betafix)] <- betafix[!is.na(betafix)]  # reset fixed parameters
eta[!is.na( etafix)] <-  etafix[!is.na( etafix)]

maxdiff <- max(abs(c(P.K, beta, eta) - c(pi.old, beta.old, eta.old)))
iter <- iter + 1
}
if(iter >= maxiter) warning("iteration maximum has been exceeded")

## Mean number of errors
P.Kq <- numeric(nitems)
for(j in seq_len(nitems))
P.Kq[j] <- sum(P.K[which(K[,j] == 1)])
nerror <- c("careless error" = sum(beta * P.Kq),
"lucky guess" = sum( eta * (1 - P.Kq)))

## If there are missing response patterns, create complete R and N.R
if(npat < 2^nitems && nitems <= zeropad) {
N.Rincomp <- N.R
R   <- expand.grid(rep(list(0:1), nitems), KEEP.OUT.ATTRS=FALSE)
N.R <- setNames(integer(nrow(R)), as.pattern(R)) # named int filled w/zeros
R   <- as.binmat(N.R)                            # named int again
N.R[names(N.Rincomp)] <- N.Rincomp
}

## Recompute predictions and likelihood
P.R.K <- apply(K, 1, function(k) apply(
beta^((1 - t(R))*k) * (1 - beta)^(t(R)*k) *
eta^(t(R)*(1 - k)) * (1 - eta)^((1 - t(R))*(1 - k)),
2, prod))
P.R <- as.numeric(P.R.K %*% P.K)
if (sum(P.R) < 1) P.R <- P.R/sum(P.R)      # if no zero padding: normalize
loglik <- sum(log(P.R) * N.R, na.rm=TRUE)

## Number of parameters
npar <- nstates - 1 + qr(betaeq)\$rank - sum(!is.na(betafix)) +
qr( etaeq)\$rank - sum(!is.na( etafix))

## Goodness of fit, df = number of patterns or persons
fitted <- setNames(N*P.R, names(N.R))
G2     <- 2*sum(N.R*log(N.R/fitted), na.rm=TRUE)
# df     <- min(2^nitems - 1, N) - npar        # number of patterns or persons
df     <- min(if(nitems <= zeropad) 2^nitems - 1 else npat, N) - npar
gof    <- c(G2=G2, df=df, pval = 1 - pchisq(G2, df))

z <- list(discrepancy=c(disc), P.K=P.K, beta=beta, eta=eta,
disc.tab=disc.tab, K=K, N.R=N.R, nitems=nitems, nstates=nstates,
npatterns=npat, ntotal=N, nerror=nerror, npar=npar,
method=method, iter=iter, loglik=loglik, fitted.values=fitted,
goodness.of.fit=gof)
class(z) <- "blim"
z
}

print.blim <- function(x, P.Kshow = FALSE, errshow = TRUE,
digits=max(3, getOption("digits") - 2), ...){
cat("\nBasic local independence models (BLIMs)\n")
cat("\nNumber of knowledge states:", x\$nstates)
cat("\nNumber of response patterns:", x\$npatterns)
cat("\nNumber of respondents:", x\$ntotal)

method <- switch(x\$method,
MD = "Minimum discrepancy",
ML = "Maximum likelihood",
MDML = "Minimum discrepancy maximum likelihood")
cat("\n\nMethod:", method)
cat("\nNumber of iterations:", x\$iter)
G2   <- x\$goodness.of.fit[1]
df   <- x\$goodness.of.fit[2]
pval <- x\$goodness.of.fit[3]
cat("\nGoodness of fit (2 log likelihood ratio):\n")
cat("\tG2(", df, ") = ", format(G2, digits=digits), ", p = ",
format(pval, digits=digits), "\n", sep="")

cat("\nMinimum discrepancy distribution (mean = ",
round(x\$discrepancy, digits=digits), ")\n", sep="")
disc.tab <- x\$disc.tab
names(dimnames(disc.tab)) <- NULL
print(disc.tab)
cat("\nMean number of errors (total = ",
round(sum(x\$nerror), digits=digits), ")\n", sep="")
print(x\$nerror)
if(P.Kshow){
cat("\nDistribution of knowledge states\n")
printCoefmat(cbind("P(K)"=x\$P.K), digits=digits, cs.ind=1, tst.ind=NULL,
zap.ind=1)
}
if(errshow){
cat("\nError and guessing parameters\n")
printCoefmat(cbind(beta=x\$beta, eta=x\$eta), digits=digits, cs.ind=1:2,
tst.ind=NULL, zap.ind=1:2)
}
cat("\n")
invisible(x)
}

## Log-likelihood for blim objects
logLik.blim <- function(object, ...){
if(length(list(...)))
p <- object\$npar
val <- object\$loglik
attr(val, "df") <- p
attr(val, "nobs") <- object\$npatterns
class(val) <- "logLik"
val
}

## Number of observations
nobs.blim <- function(object, ...) object\$npatterns

## Residuals for BLIMs
residuals.blim <- function(object, type=c("deviance", "pearson"), ...){

dev.resids <- function(y, mu, wt)
2 * wt * (y * log(ifelse(y == 0, 1, y/mu)) - (y - mu))

type <- match.arg(type)
wts <- object\$ntotal
y <- object\$N.R / wts
mu <- object\$fitted.values/wts
res <- switch(type,
deviance = if(object\$goodness['df'] > 0){
d.res <- sqrt(pmax(dev.resids(y, mu, wts), 0))
ifelse(y > mu, d.res, -d.res)  # sign
}
else rep.int(0, length(mu)),
pearson = (y - mu) * sqrt(wts)/sqrt(mu)
)
if(!is.null(object\$na.action)) res <- naresid(object\$na.action, res)
res
}

## Diagnostic plot for BLIMs
plot.blim <- function(x,
xlab="Predicted response probabilities", ylab="Deviance residuals", ...){

xres <- resid(x)
mu   <- x\$fitted.values/x\$ntotal
plot(mu, xres, xlab = xlab, ylab = ylab, type="n", ...)
abline(h = 0, lty = 2)
panel.smooth(mu, xres)
}

## Simulate responses from BLIM
simulate.blim <- function(object, nsim = 1, seed = NULL, ...){
P.K <- object\$P.K
beta <- object\$beta
eta <- object\$eta
tK <- t(as.matrix(object\$K))
N <- object\$ntotal
nitems <- nrow(tK)

state.id <- sample(seq_along(P.K), N, replace=TRUE, prob=P.K)  # draw states

P.1.K <- tK*(1 - beta) + (1 - tK)*eta               # P(resp = 1 | K)
R     <- matrix(0, N, nitems)                       # response matrix
for(i in seq_len(N))
R[i,] <- rbinom(nitems, 1, P.1.K[, state.id[i]])  # draw a response

as.pattern(R, freq = TRUE)
}

## Convert binary matrix to vector of response patterns
as.pattern <- function(R, freq = FALSE, as.letters = FALSE, as.set = FALSE){
if(freq){
N.R <- table(apply(R, 1, paste, collapse=""))
setNames(as.integer(N.R), names(N.R))          # convert to named int
}else
if(as.letters | as.set){
nitems <- ncol(R)
item.names <-
make.unique(c("a", letters[(seq_len(nitems) %% 26) + 1])[-(nitems + 1)],
sep="")
lett <- apply(R, 1, function(r) paste(item.names[which(r == 1)],
collapse=""))
lett[lett == ""] <- "0"

if(as.set){
# Separate elements in lett by "_", remove leading "_",
# then strsplit along "_" (trailing "_" are ignored by strsplit)
setfam <- as.set(lapply(strsplit(
gsub("^_(.+)", "\\1", gsub("([0-9]*)", "\\1_", unname(lett))),
"_"), as.set))
if (set_contains_element(setfam, set("0")))
setfam[[set("0")]] <- set()  # proper empty set
setfam  # return family of sets, class set
}else
lett    # return letters, class character
}else
unname(apply(R, 1, paste, collapse=""))
}

## Convert vector of response patterns to named binary matrix
as.binmat <- function(N.R, uniq = TRUE, col.names = NULL){
if (is.set(N.R)) {
states <- lapply(N.R, as.character)
items <- sort(unique(unlist(states)))
R <- matrix(0, length(N.R), length(items),
dimnames=list(NULL,
if(is.null(col.names)) items else col.names))
for (i in seq_len(nrow(R))) R[i, states[[i]]] <- 1
} else {
pat <- if(is.null(names(N.R))) N.R else names(N.R)
R   <- if(uniq) strsplit(pat, "") else strsplit(rep(pat, N.R), "")
R   <- do.call(rbind, R)

colnames(R) <-
if(is.null(col.names)){
nitems <- ncol(R)
make.unique(c("a", letters[(seq_len(nitems) %% 26) + 1])[-(nitems + 1)],
sep="")
}else
col.names
}
storage.mode(R) <- "integer"
R
}

anova.blim <- function(object, ..., test = c("Chisq", "none")){
## Adapted form MASS::anova.polr and stats::anova.glmlist

test <- match.arg(test)
dots <- list(...)
if (length(dots) == 0)
stop('anova is not implemented for a single "blim" object')
mlist <- list(object, ...)
nmodels <- length(mlist)
names(mlist) <- sapply(match.call()[-1],
function(s) paste(deparse(s), collapse="\n"))[seq_len(nmodels)]

if (any(!sapply(mlist, inherits, "blim")))
stop('not all objects are of class "blim"')

ns <- sapply(mlist, function(x) length(x\$fitted))
if (any(ns != ns[1]))
stop("models were not all fitted to the same size of dataset")

dfs <- sapply(mlist, function(x) x\$goodness.of.fit["df"])
lls <- sapply(mlist, function(x) x\$goodness.of.fit["G2"])
df <- c(NA, -diff(dfs))
x2 <- c(NA, -diff(lls))
pr <- c(NA, pchisq(x2[-1], df[-1], lower.tail = FALSE))

out <- data.frame(Resid.df = dfs, Deviance = lls, Df = df, Chisq = x2,
Prob = pr)
dimnames(out) <- list(1:nmodels, c("Resid. Df", "Resid. Dev", "Df",
"Deviance", "Pr(>Chi)"))
if (test == "none") out <- out[, -ncol(out)]

structure(out,
heading = c("Analysis of Deviance Table\n",
paste0("Model ", format(1L:nmodels), ": ",
names(mlist), collapse = "\n")),
class = c("anova", "data.frame"))
}

deviance.blim <- function(object, ...) object\$goodness.of.fit["G2"]

coef.blim <- function(object, ...){
c(setNames(object\$beta, paste("beta", names(object\$beta), sep=".")),
setNames(object\$eta,  paste( "eta", names(object\$eta),  sep=".")),
object\$P.K)
}
```

## Try the pks package in your browser

Any scripts or data that you put into this service are public.

pks documentation built on May 2, 2019, 4:47 p.m.