data.read | R Documentation |
This dataset contains N=328
students and I=12
items measuring reading
competence. All 12 items are arranged into 3 testlets (items with common
text stimulus) labeled as
A, B and C. The allocation of items to testlets is indicated by their
variable names.
data(data.read)
A data frame with 328 persons on the following 12 variables.
Rows correspond to persons and columns to items. The following items are
included in data.read
:
Testlet A: A1
, A2
, A3
, A4
Testlet B: B1
, B2
, B3
, B4
Testlet C: C1
, C2
, C3
, C4
## Not run:
data(data.read)
dat <- data.read
I <- ncol(dat)
# list of needed packages for the following examples
packages <- scan(what="character")
eRm ltm TAM mRm CDM mirt psychotools IsingFit igraph qgraph pcalg
poLCA randomLCA psychomix MplusAutomation lavaan
# load packages. make an installation if necessary
miceadds::library_install(packages)
#*****************************************************
# Model 1: Rasch model
#*****************************************************
#---- M1a: rasch.mml2 (in sirt)
mod1a <- sirt::rasch.mml2(dat)
summary(mod1a)
#---- M1b: smirt (in sirt)
Qmatrix <- matrix(1,nrow=I, ncol=1)
mod1b <- sirt::smirt(dat,Qmatrix=Qmatrix)
summary(mod1b)
#---- M1c: gdm (in CDM)
theta.k <- seq(-6,6,len=21)
mod1c <- CDM::gdm(dat,theta.k=theta.k,irtmodel="1PL", skillspace="normal")
summary(mod1c)
#---- M1d: tam.mml (in TAM)
mod1d <- TAM::tam.mml( resp=dat )
summary(mod1d)
#---- M1e: RM (in eRm)
mod1e <- eRm::RM( dat )
# eRm uses Conditional Maximum Likelihood (CML) as the estimation method.
summary(mod1e)
eRm::plotPImap(mod1e)
#---- M1f: mrm (in mRm)
mod1f <- mRm::mrm( dat, cl=1) # CML estimation
mod1f$beta # item parameters
#---- M1g: mirt (in mirt)
mod1g <- mirt::mirt( dat, model=1, itemtype="Rasch", verbose=TRUE )
print(mod1g)
summary(mod1g)
coef(mod1g)
# arrange coefficients in nicer layout
sirt::mirt.wrapper.coef(mod1g)$coef
#---- M1h: rasch (in ltm)
mod1h <- ltm::rasch( dat, control=list(verbose=TRUE ) )
summary(mod1h)
coef(mod1h)
#---- M1i: RaschModel.fit (in psychotools)
mod1i <- psychotools::RaschModel.fit(dat) # CML estimation
summary(mod1i)
plot(mod1i)
#---- M1j: noharm.sirt (in sirt)
Fpatt <- matrix( 0, I, 1 )
Fval <- 1 + 0*Fpatt
Ppatt <- Pval <- matrix(1,1,1)
mod1j <- sirt::noharm.sirt( dat=dat, Ppatt=Ppatt, Fpatt=Fpatt, Fval=Fval, Pval=Pval)
summary(mod1j)
# Normal-ogive model, multiply item discriminations with constant D=1.7.
# The same holds for other examples with noharm.sirt and R2noharm.
plot(mod1j)
#---- M1k: rasch.pml3 (in sirt)
mod1k <- sirt::rasch.pml3( dat=dat)
# pairwise marginal maximum likelihood estimation
summary(mod1k)
#---- M1l: running Mplus (using MplusAutomation package)
mplus_path <- "c:/Mplus7/Mplus.exe" # locate Mplus executable
#****************
# specify Mplus object
mplusmod <- MplusAutomation::mplusObject(
TITLE="1PL in Mplus ;",
VARIABLE=paste0( "CATEGORICAL ARE ", paste0(colnames(dat),collapse=" ") ),
MODEL="
! fix all item loadings to 1
F1 BY A1@1 A2@1 A3@1 A4@1 ;
F1 BY B1@1 B2@1 B3@1 B4@1 ;
F1 BY C1@1 C2@1 C3@1 C4@1 ;
! estimate variance
F1 ;
",
ANALYSIS="ESTIMATOR=MLR;",
OUTPUT="stand;",
usevariables=colnames(dat), rdata=dat )
#****************
# write Mplus syntax
filename <- "mod1u" # specify file name
# create Mplus syntaxes
res2 <- MplusAutomation::mplusModeler(object=mplusmod, dataout=paste0(filename,".dat"),
modelout=paste0(filename,".inp"), run=0 )
# run Mplus model
MplusAutomation::runModels( filefilter=paste0(filename,".inp"), Mplus_command=mplus_path)
# alternatively, the system() command can also be used
# get results
mod1l <- MplusAutomation::readModels(target=getwd(), filefilter=filename )
mod1l$summaries # summaries
mod1l$parameters$unstandardized # parameter estimates
#*****************************************************
# Model 2: 2PL model
#*****************************************************
#---- M2a: rasch.mml2 (in sirt)
mod2a <- sirt::rasch.mml2(dat, est.a=1:I)
summary(mod2a)
#---- M2b: smirt (in sirt)
mod2b <- sirt::smirt(dat,Qmatrix=Qmatrix,est.a="2PL")
summary(mod2b)
#---- M2c: gdm (in CDM)
mod2c <- CDM::gdm(dat,theta.k=theta.k,irtmodel="2PL", skillspace="normal")
summary(mod2c)
#---- M2d: tam.mml (in TAM)
mod2d <- TAM::tam.mml.2pl( resp=dat )
summary(mod2d)
#---- M2e: mirt (in mirt)
mod2e <- mirt::mirt( dat, model=1, itemtype="2PL" )
print(mod2e)
summary(mod2e)
sirt::mirt.wrapper.coef(mod1g)$coef
#---- M2f: ltm (in ltm)
mod2f <- ltm::ltm( dat ~ z1, control=list(verbose=TRUE ) )
summary(mod2f)
coef(mod2f)
plot(mod2f)
#---- M2g: R2noharm (in NOHARM, running from within R using sirt package)
# define noharm.path where 'NoharmCL.exe' is located
noharm.path <- "c:/NOHARM"
# covariance matrix
P.pattern <- matrix( 1, ncol=1, nrow=1 )
P.init <- P.pattern
P.init[1,1] <- 1
# loading matrix
F.pattern <- matrix(1,I,1)
F.init <- F.pattern
# estimate model
mod2g <- sirt::R2noharm( dat=dat, model.type="CFA", F.pattern=F.pattern,
F.init=F.init, P.pattern=P.pattern, P.init=P.init,
writename="ex2g", noharm.path=noharm.path, dec="," )
summary(mod2g)
#---- M2h: noharm.sirt (in sirt)
mod2h <- sirt::noharm.sirt( dat=dat, Ppatt=P.pattern,Fpatt=F.pattern,
Fval=F.init, Pval=P.init )
summary(mod2h)
plot(mod2h)
#---- M2i: rasch.pml2 (in sirt)
mod2i <- sirt::rasch.pml2(dat, est.a=1:I)
summary(mod2i)
#---- M2j: WLSMV estimation with cfa (in lavaan)
lavmodel <- "F=~ A1+A2+A3+A4+B1+B2+B3+B4+
C1+C2+C3+C4"
mod2j <- lavaan::cfa( data=dat, model=lavmodel, std.lv=TRUE, ordered=colnames(dat))
summary(mod2j, standardized=TRUE, fit.measures=TRUE, rsquare=TRUE)
#*****************************************************
# Model 3: 3PL model (note that results can be quite unstable!)
#*****************************************************
#---- M3a: rasch.mml2 (in sirt)
mod3a <- sirt::rasch.mml2(dat, est.a=1:I, est.c=1:I)
summary(mod3a)
#---- M3b: smirt (in sirt)
mod3b <- sirt::smirt(dat,Qmatrix=Qmatrix,est.a="2PL", est.c=1:I)
summary(mod3b)
#---- M3c: mirt (in mirt)
mod3c <- mirt::mirt( dat, model=1, itemtype="3PL", verbose=TRUE)
summary(mod3c)
coef(mod3c)
# stabilize parameter estimating using informative priors for guessing parameters
mirtmodel <- mirt::mirt.model("
F=1-12
PRIOR=(1-12, g, norm, -1.38, 0.25)
")
# a prior N(-1.38,.25) is specified for transformed guessing parameters: qlogis(g)
# simulate values from this prior for illustration
N <- 100000
logit.g <- stats::rnorm(N, mean=-1.38, sd=sqrt(.5) )
graphics::plot( stats::density(logit.g) ) # transformed qlogis(g)
graphics::plot( stats::density( stats::plogis(logit.g)) ) # g parameters
# estimate 3PL with priors
mod3c1 <- mirt::mirt(dat, mirtmodel, itemtype="3PL",verbose=TRUE)
coef(mod3c1)
# In addition, set upper bounds for g parameters of .35
mirt.pars <- mirt::mirt( dat, mirtmodel, itemtype="3PL", pars="values")
ind <- which( mirt.pars$name=="g" )
mirt.pars[ ind, "value" ] <- stats::plogis(-1.38)
mirt.pars[ ind, "ubound" ] <- .35
# prior distribution for slopes
ind <- which( mirt.pars$name=="a1" )
mirt.pars[ ind, "prior_1" ] <- 1.3
mirt.pars[ ind, "prior_2" ] <- 2
mod3c2 <- mirt::mirt(dat, mirtmodel, itemtype="3PL",
pars=mirt.pars,verbose=TRUE, technical=list(NCYCLES=100) )
coef(mod3c2)
sirt::mirt.wrapper.coef(mod3c2)
#---- M3d: ltm (in ltm)
mod3d <- ltm::tpm( dat, control=list(verbose=TRUE), max.guessing=.3)
summary(mod3d)
coef(mod3d) #=> numerical instabilities
#*****************************************************
# Model 4: 3-dimensional Rasch model
#*****************************************************
# define Q-matrix
Q <- matrix( 0, nrow=12, ncol=3 )
Q[ cbind(1:12, rep(1:3,each=4) ) ] <- 1
rownames(Q) <- colnames(dat)
colnames(Q) <- c("A","B","C")
# define nodes
theta.k <- seq(-6,6,len=13)
#---- M4a: smirt (in sirt)
mod4a <- sirt::smirt(dat,Qmatrix=Q,irtmodel="comp", theta.k=theta.k, maxiter=30)
summary(mod4a)
#---- M4b: rasch.mml2 (in sirt)
mod4b <- sirt::rasch.mml2(dat,Q=Q,theta.k=theta.k, mmliter=30)
summary(mod4b)
#---- M4c: gdm (in CDM)
mod4c <- CDM::gdm( dat, irtmodel="1PL", theta.k=theta.k, skillspace="normal",
Qmatrix=Q, maxiter=30, centered.latent=TRUE )
summary(mod4c)
#---- M4d: tam.mml (in TAM)
mod4d <- TAM::tam.mml( resp=dat, Q=Q, control=list(nodes=theta.k, maxiter=30) )
summary(mod4d)
#---- M4e: R2noharm (in NOHARM, running from within R using sirt package)
noharm.path <- "c:/NOHARM"
# covariance matrix
P.pattern <- matrix( 1, ncol=3, nrow=3 )
P.init <- 0.8+0*P.pattern
diag(P.init) <- 1
# loading matrix
F.pattern <- 0*Q
F.init <- Q
# estimate model
mod4e <- sirt::R2noharm( dat=dat, model.type="CFA", F.pattern=F.pattern,
F.init=F.init, P.pattern=P.pattern, P.init=P.init,
writename="ex4e", noharm.path=noharm.path, dec="," )
summary(mod4e)
#---- M4f: mirt (in mirt)
cmodel <- mirt::mirt.model("
F1=1-4
F2=5-8
F3=9-12
# equal item slopes correspond to the Rasch model
CONSTRAIN=(1-4, a1), (5-8, a2), (9-12,a3)
COV=F1*F2, F1*F3, F2*F3
" )
mod4f <- mirt::mirt(dat, cmodel, verbose=TRUE)
summary(mod4f)
#*****************************************************
# Model 5: 3-dimensional 2PL model
#*****************************************************
#---- M5a: smirt (in sirt)
mod5a <- sirt::smirt(dat,Qmatrix=Q,irtmodel="comp", est.a="2PL", theta.k=theta.k,
maxiter=30)
summary(mod5a)
#---- M5b: rasch.mml2 (in sirt)
mod5b <- sirt::rasch.mml2(dat,Q=Q,theta.k=theta.k,est.a=1:12, mmliter=30)
summary(mod5b)
#---- M5c: gdm (in CDM)
mod5c <- CDM::gdm( dat, irtmodel="2PL", theta.k=theta.k, skillspace="loglinear",
Qmatrix=Q, maxiter=30, centered.latent=TRUE,
standardized.latent=TRUE)
summary(mod5c)
#---- M5d: tam.mml (in TAM)
mod5d <- TAM::tam.mml.2pl( resp=dat, Q=Q, control=list(nodes=theta.k, maxiter=30) )
summary(mod5d)
#---- M5e: R2noharm (in NOHARM, running from within R using sirt package)
noharm.path <- "c:/NOHARM"
# covariance matrix
P.pattern <- matrix( 1, ncol=3, nrow=3 )
diag(P.pattern) <- 0
P.init <- 0.8+0*P.pattern
diag(P.init) <- 1
# loading matrix
F.pattern <- Q
F.init <- Q
# estimate model
mod5e <- sirt::R2noharm( dat=dat, model.type="CFA", F.pattern=F.pattern,
F.init=F.init, P.pattern=P.pattern, P.init=P.init,
writename="ex5e", noharm.path=noharm.path, dec="," )
summary(mod5e)
#---- M5f: mirt (in mirt)
cmodel <- mirt::mirt.model("
F1=1-4
F2=5-8
F3=9-12
COV=F1*F2, F1*F3, F2*F3
" )
mod5f <- mirt::mirt(dat, cmodel, verbose=TRUE)
summary(mod5f)
#*****************************************************
# Model 6: Network models (Graphical models)
#*****************************************************
#---- M6a: Ising model using the IsingFit package (undirected graph)
# - fit Ising model using the "OR rule" (AND=FALSE)
mod6a <- IsingFit::IsingFit(x=dat, family="binomial", AND=FALSE)
summary(mod6a)
## Network Density: 0.29
## Gamma: 0.25
## Rule used: Or-rule
# plot results
qgraph::qgraph(mod6a$weiadj,fade=FALSE)
#**-- graph estimation using pcalg package
# some packages from Bioconductor must be downloaded at first (if not yet done)
if (FALSE){ # set 'if (TRUE)' if packages should be downloaded
source("http://bioconductor.org/biocLite.R")
biocLite("RBGL")
biocLite("Rgraphviz")
}
#---- M6b: graph estimation based on Pearson correlations
V <- colnames(dat)
n <- nrow(dat)
mod6b <- pcalg::pc(suffStat=list(C=stats::cor(dat), n=n ),
indepTest=gaussCItest, ## indep.test: partial correlations
alpha=0.05, labels=V, verbose=TRUE)
plot(mod6b)
# plot in qgraph package
qgraph::qgraph(mod6b, label.color=rep( c( "red", "blue","darkgreen" ), each=4 ),
edge.color="black")
summary(mod6b)
#---- M6c: graph estimation based on tetrachoric correlations
mod6c <- pcalg::pc(suffStat=list(C=sirt::tetrachoric2(dat)$rho, n=n ),
indepTest=gaussCItest, alpha=0.05, labels=V, verbose=TRUE)
plot(mod6c)
summary(mod6c)
#---- M6d: Statistical implicative analysis (in sirt)
mod6d <- sirt::sia.sirt(dat, significance=.85 )
# plot results with igraph and qgraph package
plot( mod6d$igraph.obj, vertex.shape="rectangle", vertex.size=30 )
qgraph::qgraph( mod6d$adj.matrix )
#*****************************************************
# Model 7: Latent class analysis with 3 classes
#*****************************************************
#---- M7a: randomLCA (in randomLCA)
# - use two trials of starting values
mod7a <- randomLCA::randomLCA(dat,nclass=3, notrials=2, verbose=TRUE)
summary(mod7a)
plot(mod7a,type="l", xlab="Item")
#---- M7b: rasch.mirtlc (in sirt)
mod7b <- sirt::rasch.mirtlc( dat, Nclasses=3,seed=-30, nstarts=2 )
summary(mod7b)
matplot( t(mod7b$pjk), type="l", xlab="Item" )
#---- M7c: poLCA (in poLCA)
# define formula for outcomes
f7c <- paste0( "cbind(", paste0(colnames(dat),collapse=","), ") ~ 1 " )
dat1 <- as.data.frame( dat + 1 ) # poLCA needs integer values from 1,2,..
mod7c <- poLCA::poLCA( stats::as.formula(f7c),dat1,nclass=3, verbose=TRUE)
plot(mod7c)
#---- M7d: gom.em (in sirt)
# - the latent class model is a special grade of membership model
mod7d <- sirt::gom.em( dat, K=3, problevels=c(0,1), model="GOM" )
summary(mod7d)
#---- - M7e: mirt (in mirt)
# define three latent classes
Theta <- diag(3)
# define mirt model
I <- ncol(dat) # I=12
mirtmodel <- mirt::mirt.model("
C1=1-12
C2=1-12
C3=1-12
")
# get initial parameter values
mod.pars <- mirt::mirt(dat, model=mirtmodel, pars="values")
# modify parameters: only slopes refer to item-class probabilities
set.seed(9976)
# set starting values for class specific item probabilities
mod.pars[ mod.pars$name=="d","value" ] <- 0
mod.pars[ mod.pars$name=="d","est" ] <- FALSE
b1 <- stats::qnorm( colMeans( dat ) )
mod.pars[ mod.pars$name=="a1","value" ] <- b1
# random starting values for other classes
mod.pars[ mod.pars$name %in% c("a2","a3"),"value" ] <- b1 + stats::runif(12*2,-1,1)
mod.pars
#** define prior for latent class analysis
lca_prior <- function(Theta,Etable){
# number of latent Theta classes
TP <- nrow(Theta)
# prior in initial iteration
if ( is.null(Etable) ){
prior <- rep( 1/TP, TP )
}
# process Etable (this is correct for datasets without missing data)
if ( ! is.null(Etable) ){
# sum over correct and incorrect expected responses
prior <- ( rowSums(Etable[, seq(1,2*I,2)]) + rowSums(Etable[,seq(2,2*I,2)]) )/I
}
prior <- prior / sum(prior)
return(prior)
}
#** estimate model
mod7e <- mirt::mirt(dat, mirtmodel, pars=mod.pars, verbose=TRUE,
technical=list( customTheta=Theta, customPriorFun=lca_prior) )
# compare estimated results
print(mod7e)
summary(mod7b)
# The number of estimated parameters is incorrect because mirt does not correctly count
# estimated parameters from the user customized prior distribution.
mod7e@nest <- as.integer(sum(mod.pars$est) + 2) # two additional class probabilities
# extract log-likelihood
mod7e@logLik
# compute AIC and BIC
( AIC <- -2*mod7e@logLik+2*mod7e@nest )
( BIC <- -2*mod7e@logLik+log(mod7e@Data$N)*mod7e@nest )
# RMSEA and SRMSR fit statistic
mirt::M2(mod7e) # TLI and CFI does not make sense in this example
#** extract item parameters
sirt::mirt.wrapper.coef(mod7e)
#** extract class-specific item-probabilities
probs <- apply( coef1[, c("a1","a2","a3") ], 2, stats::plogis )
matplot( probs, type="l", xlab="Item", main="mirt::mirt")
#** inspect estimated distribution
mod7e@Theta
mod7e@Prior[[1]]
#*****************************************************
# Model 8: Mixed Rasch model with two classes
#*****************************************************
#---- M8a: raschmix (in psychomix)
mod8a <- psychomix::raschmix(data=as.matrix(dat), k=2, scores="saturated")
summary(mod8a)
#---- M8b: mrm (in mRm)
mod8b <- mRm::mrm(data.matrix=dat, cl=2)
mod8b$conv.to.bound
plot(mod8b)
print(mod8b)
#---- M8c: mirt (in mirt)
#* define theta grid
theta.k <- seq( -5, 5, len=9 )
TP <- length(theta.k)
Theta <- matrix( 0, nrow=2*TP, ncol=4)
Theta[1:TP,1:2] <- cbind(theta.k, 1 )
Theta[1:TP + TP,3:4] <- cbind(theta.k, 1 )
Theta
# define model
I <- ncol(dat) # I=12
mirtmodel <- mirt::mirt.model("
F1a=1-12 # slope Class 1
F1b=1-12 # difficulty Class 1
F2a=1-12 # slope Class 2
F2b=1-12 # difficulty Class 2
CONSTRAIN=(1-12,a1),(1-12,a3)
")
# get initial parameter values
mod.pars <- mirt::mirt(dat, model=mirtmodel, pars="values")
# set starting values for class specific item probabilities
mod.pars[ mod.pars$name=="d","value" ] <- 0
mod.pars[ mod.pars$name=="d","est" ] <- FALSE
mod.pars[ mod.pars$name=="a1","value" ] <- 1
mod.pars[ mod.pars$name=="a3","value" ] <- 1
# initial values difficulties
b1 <- stats::qlogis( colMeans(dat) )
mod.pars[ mod.pars$name=="a2","value" ] <- b1
mod.pars[ mod.pars$name=="a4","value" ] <- b1 + stats::runif(I, -1, 1)
#* define prior for mixed Rasch analysis
mixed_prior <- function(Theta,Etable){
NC <- 2 # number of theta classes
TP <- nrow(Theta) / NC
prior1 <- stats::dnorm( Theta[1:TP,1] )
prior1 <- prior1 / sum(prior1)
if ( is.null(Etable) ){ prior <- c( prior1, prior1 ) }
if ( ! is.null(Etable) ){
prior <- ( rowSums( Etable[, seq(1,2*I,2)] ) +
rowSums( Etable[,seq(2,2*I,2)]) )/I
a1 <- stats::aggregate( prior, list( rep(1:NC, each=TP) ), sum )
a1[,2] <- a1[,2] / sum( a1[,2])
# print some information during estimation
cat( paste0( " Class proportions: ",
paste0( round(a1[,2], 3 ), collapse=" " ) ), "\n")
a1 <- rep( a1[,2], each=TP )
# specify mixture of two normal distributions
prior <- a1*c(prior1,prior1)
}
prior <- prior / sum(prior)
return(prior)
}
#* estimate model
mod8c <- mirt::mirt(dat, mirtmodel, pars=mod.pars, verbose=TRUE,
technical=list( customTheta=Theta, customPriorFun=mixed_prior ) )
# Like in Model 7e, the number of estimated parameters must be included.
mod8c@nest <- as.integer(sum(mod.pars$est) + 1)
# two class proportions and therefore one probability is freely estimated.
#* extract item parameters
sirt::mirt.wrapper.coef(mod8c)
#* estimated distribution
mod8c@Theta
mod8c@Prior
#---- M8d: tamaan (in TAM)
tammodel <- "
ANALYSIS:
TYPE=MIXTURE ;
NCLASSES(2);
NSTARTS(7,20);
LAVAAN MODEL:
F=~ A1__C4
F ~~ F
ITEM TYPE:
ALL(Rasch);
"
mod8d <- TAM::tamaan( tammodel, resp=dat )
summary(mod8d)
# plot item parameters
I <- 12
ipars <- mod8d$itempartable_MIXTURE[ 1:I, ]
plot( 1:I, ipars[,3], type="o", ylim=range( ipars[,3:4] ), pch=16,
xlab="Item", ylab="Item difficulty")
lines( 1:I, ipars[,4], type="l", col=2, lty=2)
points( 1:I, ipars[,4], col=2, pch=2)
#*****************************************************
# Model 9: Mixed 2PL model with two classes
#*****************************************************
#---- M9a: tamaan (in TAM)
tammodel <- "
ANALYSIS:
TYPE=MIXTURE ;
NCLASSES(2);
NSTARTS(10,30);
LAVAAN MODEL:
F=~ A1__C4
F ~~ F
ITEM TYPE:
ALL(2PL);
"
mod9a <- TAM::tamaan( tammodel, resp=dat )
summary(mod9a)
#*****************************************************
# Model 10: Rasch testlet model
#*****************************************************
#---- M10a: tam.fa (in TAM)
dims <- substring( colnames(dat),1,1 ) # define dimensions
mod10a <- TAM::tam.fa( resp=dat, irtmodel="bifactor1", dims=dims,
control=list(maxiter=60) )
summary(mod10a)
#---- M10b: mirt (in mirt)
cmodel <- mirt::mirt.model("
G=1-12
A=1-4
B=5-8
C=9-12
CONSTRAIN=(1-12,a1), (1-4, a2), (5-8, a3), (9-12,a4)
")
mod10b <- mirt::mirt(dat, model=cmodel, verbose=TRUE)
summary(mod10b)
coef(mod10b)
mod10b@logLik # equivalent is slot( mod10b, "logLik")
#alternatively, using a dimensional reduction approach (faster and better accuracy)
cmodel <- mirt::mirt.model("
G=1-12
CONSTRAIN=(1-12,a1), (1-4, a2), (5-8, a3), (9-12,a4)
")
item_bundles <- rep(c(1,2,3), each=4)
mod10b1 <- mirt::bfactor(dat, model=item_bundles, model2=cmodel, verbose=TRUE)
coef(mod10b1)
#---- M10c: smirt (in sirt)
# define Q-matrix
Qmatrix <- matrix(0,12,4)
Qmatrix[,1] <- 1
Qmatrix[ cbind( 1:12, match( dims, unique(dims)) +1 ) ] <- 1
# uncorrelated factors
variance.fixed <- cbind( c(1,1,1,2,2,3), c(2,3,4,3,4,4), 0 )
# estimate model
mod10c <- sirt::smirt( dat, Qmatrix=Qmatrix, irtmodel="comp",
variance.fixed=variance.fixed, qmcnodes=1000, maxiter=60)
summary(mod10c)
#*****************************************************
# Model 11: Bifactor model
#*****************************************************
#---- M11a: tam.fa (in TAM)
dims <- substring( colnames(dat),1,1 ) # define dimensions
mod11a <- TAM::tam.fa( resp=dat, irtmodel="bifactor2", dims=dims,
control=list(maxiter=60) )
summary(mod11a)
#---- M11b: bfactor (in mirt)
dims1 <- match( dims, unique(dims) )
mod11b <- mirt::bfactor(dat, model=dims1, verbose=TRUE)
summary(mod11b)
coef(mod11b)
mod11b@logLik
#---- M11c: smirt (in sirt)
# define Q-matrix
Qmatrix <- matrix(0,12,4)
Qmatrix[,1] <- 1
Qmatrix[ cbind( 1:12, match( dims, unique(dims)) +1 ) ] <- 1
# uncorrelated factors
variance.fixed <- cbind( c(1,1,1,2,2,3), c(2,3,4,3,4,4), 0 )
# estimate model
mod11c <- sirt::smirt( dat, Qmatrix=Qmatrix, irtmodel="comp", est.a="2PL",
variance.fixed=variance.fixed, qmcnodes=1000, maxiter=60)
summary(mod11c)
#*****************************************************
# Model 12: Located latent class model: Rasch model with three theta classes
#*****************************************************
# use 10th item as the reference item
ref.item <- 10
# ability grid
theta.k <- seq(-4,4,len=9)
#---- M12a: rasch.mirtlc (in sirt)
mod12a <- sirt::rasch.mirtlc(dat, Nclasses=3, modeltype="MLC1", ref.item=ref.item)
summary(mod12a)
#---- M12b: gdm (in CDM)
theta.k <- seq(-1, 1, len=3) # initial matrix
b.constraint <- matrix( c(10,1,0), nrow=1,ncol=3)
# estimate model
mod12b <- CDM::gdm( dat, theta.k=theta.k, skillspace="est", irtmodel="1PL",
b.constraint=b.constraint, maxiter=200)
summary(mod12b)
#---- M12c: mirt (in mirt)
items <- colnames(dat)
# define three latent classes
Theta <- diag(3)
# define mirt model
I <- ncol(dat) # I=12
mirtmodel <- mirt::mirt.model("
C1=1-12
C2=1-12
C3=1-12
CONSTRAIN=(1-12,a1),(1-12,a2),(1-12,a3)
")
# get parameters
mod.pars <- mirt(dat, model=mirtmodel, pars="values")
# set starting values for class specific item probabilities
mod.pars[ mod.pars$name=="d","value" ] <- stats::qlogis( colMeans(dat,na.rm=TRUE) )
# set item difficulty of reference item to zero
ind <- which( ( paste(mod.pars$item)==items[ref.item] ) &
( ( paste(mod.pars$name)=="d" ) ) )
mod.pars[ ind,"value" ] <- 0
mod.pars[ ind,"est" ] <- FALSE
# initial values for a1, a2 and a3
mod.pars[ mod.pars$name %in% c("a1","a2","a3"),"value" ] <- c(-1,0,1)
mod.pars
#* define prior for latent class analysis
lca_prior <- function(Theta,Etable){
# number of latent Theta classes
TP <- nrow(Theta)
# prior in initial iteration
if ( is.null(Etable) ){
prior <- rep( 1/TP, TP )
}
# process Etable (this is correct for datasets without missing data)
if ( ! is.null(Etable) ){
# sum over correct and incorrect expected responses
prior <- ( rowSums( Etable[, seq(1,2*I,2)] ) + rowSums( Etable[, seq(2,2*I,2)] ) )/I
}
prior <- prior / sum(prior)
return(prior)
}
#* estimate model
mod12c <- mirt(dat, mirtmodel, technical=list(
customTheta=Theta, customPriorFun=lca_prior),
pars=mod.pars, verbose=TRUE )
# estimated parameters from the user customized prior distribution.
mod12c@nest <- as.integer(sum(mod.pars$est) + 2)
#* extract item parameters
coef1 <- sirt::mirt.wrapper.coef(mod12c)
#* inspect estimated distribution
mod12c@Theta
coef1$coef[1,c("a1","a2","a3")]
mod12c@Prior[[1]]
#*****************************************************
# Model 13: Multidimensional model with discrete traits
#*****************************************************
# define Q-Matrix
Q <- matrix( 0, nrow=12,ncol=3)
Q[1:4,1] <- 1
Q[5:8,2] <- 1
Q[9:12,3] <- 1
# define discrete theta distribution with 3 dimensions
Theta <- scan(what="character",nlines=1)
000 100 010 001 110 101 011 111
Theta <- as.numeric( unlist( lapply( Theta, strsplit, split="") ) )
Theta <- matrix(Theta, 8, 3, byrow=TRUE )
Theta
#---- Model 13a: din (in CDM)
mod13a <- CDM::din( dat, q.matrix=Q, rule="DINA")
summary(mod13a)
# compare used Theta distributions
cbind( Theta, mod13a$attribute.patt.splitted)
#---- Model 13b: gdm (in CDM)
mod13b <- CDM::gdm( dat, Qmatrix=Q, theta.k=Theta, skillspace="full")
summary(mod13b)
#---- Model 13c: mirt (in mirt)
# define mirt model
I <- ncol(dat) # I=12
mirtmodel <- mirt::mirt.model("
F1=1-4
F2=5-8
F3=9-12
")
# get parameters
mod.pars <- mirt(dat, model=mirtmodel, pars="values")
# starting values d parameters (transformed guessing parameters)
ind <- which( mod.pars$name=="d" )
mod.pars[ind,"value"] <- stats::qlogis(.2)
# starting values transformed slipping parameters
ind <- which( ( mod.pars$name %in% paste0("a",1:3) ) & ( mod.pars$est ) )
mod.pars[ind,"value"] <- stats::qlogis(.8) - stats::qlogis(.2)
mod.pars
#* define prior for latent class analysis
lca_prior <- function(Theta,Etable){
TP <- nrow(Theta)
if ( is.null(Etable) ){
prior <- rep( 1/TP, TP )
}
if ( ! is.null(Etable) ){
prior <- ( rowSums( Etable[, seq(1,2*I,2)] ) + rowSums( Etable[, seq(2,2*I,2)] ) )/I
}
prior <- prior / sum(prior)
return(prior)
}
#* estimate model
mod13c <- mirt(dat, mirtmodel, technical=list(
customTheta=Theta, customPriorFun=lca_prior),
pars=mod.pars, verbose=TRUE )
# estimated parameters from the user customized prior distribution.
mod13c@nest <- as.integer(sum(mod.pars$est) + 2)
#* extract item parameters
coef13c <- sirt::mirt.wrapper.coef(mod13c)$coef
#* inspect estimated distribution
mod13c@Theta
mod13c@Prior[[1]]
#-* comparisons of estimated parameters
# extract guessing and slipping parameters from din
dfr <- coef(mod13a)[, c("guess","slip") ]
colnames(dfr) <- paste0("din.",c("guess","slip") )
# estimated parameters from gdm
dfr$gdm.guess <- stats::plogis(mod13b$item$b)
dfr$gdm.slip <- 1 - stats::plogis( rowSums(mod13b$item[,c("b.Cat1","a.F1","a.F2","a.F3")] ) )
# estimated parameters from mirt
dfr$mirt.guess <- stats::plogis( coef13c$d )
dfr$mirt.slip <- 1 - stats::plogis( rowSums(coef13c[,c("d","a1","a2","a3")]) )
# comparison
round(dfr[, c(1,3,5,2,4,6)],3)
## din.guess gdm.guess mirt.guess din.slip gdm.slip mirt.slip
## A1 0.691 0.684 0.686 0.000 0.000 0.000
## A2 0.491 0.489 0.489 0.031 0.038 0.036
## A3 0.302 0.300 0.300 0.184 0.193 0.190
## A4 0.244 0.239 0.240 0.337 0.340 0.339
## B1 0.568 0.579 0.577 0.163 0.148 0.151
## B2 0.329 0.344 0.340 0.344 0.326 0.329
## B3 0.817 0.827 0.825 0.014 0.007 0.009
## B4 0.431 0.463 0.456 0.104 0.089 0.092
## C1 0.188 0.191 0.189 0.013 0.013 0.013
## C2 0.050 0.050 0.050 0.239 0.238 0.239
## C3 0.000 0.002 0.001 0.065 0.065 0.065
## C4 0.000 0.004 0.000 0.212 0.212 0.212
# estimated class sizes
dfr <- data.frame( "Theta"=Theta, "din"=mod13a$attribute.patt$class.prob,
"gdm"=mod13b$pi.k, "mirt"=mod13c@Prior[[1]])
# comparison
round(dfr,3)
## Theta.1 Theta.2 Theta.3 din gdm mirt
## 1 0 0 0 0.039 0.041 0.040
## 2 1 0 0 0.008 0.009 0.009
## 3 0 1 0 0.009 0.007 0.008
## 4 0 0 1 0.394 0.417 0.412
## 5 1 1 0 0.011 0.011 0.011
## 6 1 0 1 0.017 0.042 0.037
## 7 0 1 1 0.042 0.008 0.016
## 8 1 1 1 0.480 0.465 0.467
#*****************************************************
# Model 14: DINA model with two skills
#*****************************************************
# define some simple Q-Matrix (does not really make in this application)
Q <- matrix( 0, nrow=12,ncol=2)
Q[1:4,1] <- 1
Q[5:8,2] <- 1
Q[9:12,1:2] <- 1
# define discrete theta distribution with 3 dimensions
Theta <- scan(what="character",nlines=1)
00 10 01 11
Theta <- as.numeric( unlist( lapply( Theta, strsplit, split="") ) )
Theta <- matrix(Theta, 4, 2, byrow=TRUE )
Theta
#---- Model 14a: din (in CDM)
mod14a <- CDM::din( dat, q.matrix=Q, rule="DINA")
summary(mod14a)
# compare used Theta distributions
cbind( Theta, mod14a$attribute.patt.splitted)
#---- Model 14b: mirt (in mirt)
# define mirt model
I <- ncol(dat) # I=12
mirtmodel <- mirt::mirt.model("
F1=1-4
F2=5-8
(F1*F2)=9-12
")
#-> constructions like (F1*F2*F3) are also allowed in mirt.model
# get parameters
mod.pars <- mirt(dat, model=mirtmodel, pars="values")
# starting values d parameters (transformed guessing parameters)
ind <- which( mod.pars$name=="d" )
mod.pars[ind,"value"] <- stats::qlogis(.2)
# starting values transformed slipping parameters
ind <- which( ( mod.pars$name %in% paste0("a",1:3) ) & ( mod.pars$est ) )
mod.pars[ind,"value"] <- stats::qlogis(.8) - stats::qlogis(.2)
mod.pars
#* use above defined prior lca_prior
# lca_prior <- function(prior,Etable) ...
#* estimate model
mod14b <- mirt(dat, mirtmodel, technical=list(
customTheta=Theta, customPriorFun=lca_prior),
pars=mod.pars, verbose=TRUE )
# estimated parameters from the user customized prior distribution.
mod14b@nest <- as.integer(sum(mod.pars$est) + 2)
#* extract item parameters
coef14b <- sirt::mirt.wrapper.coef(mod14b)$coef
#-* comparisons of estimated parameters
# extract guessing and slipping parameters from din
dfr <- coef(mod14a)[, c("guess","slip") ]
colnames(dfr) <- paste0("din.",c("guess","slip") )
# estimated parameters from mirt
dfr$mirt.guess <- stats::plogis( coef14b$d )
dfr$mirt.slip <- 1 - stats::plogis( rowSums(coef14b[,c("d","a1","a2","a3")]) )
# comparison
round(dfr[, c(1,3,2,4)],3)
## din.guess mirt.guess din.slip mirt.slip
## A1 0.674 0.671 0.030 0.030
## A2 0.423 0.420 0.049 0.050
## A3 0.258 0.255 0.224 0.225
## A4 0.245 0.243 0.394 0.395
## B1 0.534 0.543 0.166 0.164
## B2 0.338 0.347 0.382 0.380
## B3 0.796 0.802 0.016 0.015
## B4 0.421 0.436 0.142 0.140
## C1 0.850 0.851 0.000 0.000
## C2 0.480 0.480 0.097 0.097
## C3 0.746 0.746 0.026 0.026
## C4 0.575 0.577 0.136 0.137
# estimated class sizes
dfr <- data.frame( "Theta"=Theta, "din"=mod13a$attribute.patt$class.prob,
"mirt"=mod14b@Prior[[1]])
# comparison
round(dfr,3)
## Theta.1 Theta.2 din mirt
## 1 0 0 0.357 0.369
## 2 1 0 0.044 0.049
## 3 0 1 0.047 0.031
## 4 1 1 0.553 0.551
#*****************************************************
# Model 15: Rasch model with non-normal distribution
#*****************************************************
# A non-normal theta distributed is specified by log-linear smoothing
# the distribution as described in
# Xu, X., & von Davier, M. (2008). Fitting the structured general diagnostic model
# to NAEP data. ETS Research Report ETS RR-08-27. Princeton, ETS.
# define theta grid
theta.k <- matrix( seq(-4,4,len=15), ncol=1 )
# define design matrix for smoothing (up to cubic moments)
delta.designmatrix <- cbind( 1, theta.k, theta.k^2, theta.k^3 )
# constrain item difficulty of fifth item (item B1) to zero
b.constraint <- matrix( c(5,1,0), ncol=3 )
#---- Model 15a: gdm (in CDM)
mod15a <- CDM::gdm( dat, irtmodel="1PL", theta.k=theta.k,
b.constraint=b.constraint )
summary(mod15a)
# plot estimated distribution
graphics::barplot( mod15a$pi.k[,1], space=0, names.arg=round(theta.k[,1],2),
main="Estimated Skewed Distribution (gdm function)")
#---- Model 15b: mirt (in mirt)
# define mirt model
mirtmodel <- mirt::mirt.model("
F=1-12
")
# get parameters
mod.pars <- mirt::mirt(dat, model=mirtmodel, pars="values", itemtype="Rasch")
# fix variance (just for correct counting of parameters)
mod.pars[ mod.pars$name=="COV_11", "est"] <- FALSE
# fix item difficulty
ind <- which( ( mod.pars$item=="B1" ) & ( mod.pars$name=="d" ) )
mod.pars[ ind, "value"] <- 0
mod.pars[ ind, "est"] <- FALSE
# define prior
loglinear_prior <- function(Theta,Etable){
TP <- nrow(Theta)
if ( is.null(Etable) ){
prior <- rep( 1/TP, TP )
}
# process Etable (this is correct for datasets without missing data)
if ( ! is.null(Etable) ){
# sum over correct and incorrect expected responses
prior <- ( rowSums( Etable[, seq(1,2*I,2)] ) + rowSums( Etable[, seq(2,2*I,2)] ) )/I
# smooth prior using the above design matrix and a log-linear model
# see Xu & von Davier (2008).
y <- log( prior + 1E-15 )
lm1 <- lm( y ~ 0 + delta.designmatrix, weights=prior )
prior <- exp(fitted(lm1)) # smoothed prior
}
prior <- prior / sum(prior)
return(prior)
}
#* estimate model
mod15b <- mirt::mirt(dat, mirtmodel, technical=list(
customTheta=theta.k, customPriorFun=loglinear_prior ),
pars=mod.pars, verbose=TRUE )
# estimated parameters from the user customized prior distribution.
mod15b@nest <- as.integer(sum(mod.pars$est) + 3)
#* extract item parameters
coef1 <- sirt::mirt.wrapper.coef(mod15b)$coef
#** compare estimated item parameters
dfr <- data.frame( "gdm"=mod15a$item$b.Cat1, "mirt"=coef1$d )
rownames(dfr) <- colnames(dat)
round(t(dfr),4)
## A1 A2 A3 A4 B1 B2 B3 B4 C1 C2 C3 C4
## gdm 0.9818 0.1538 -0.7837 -1.3197 0 -1.0902 1.6088 -0.170 1.9778 0.006 1.1859 0.135
## mirt 0.9829 0.1548 -0.7826 -1.3186 0 -1.0892 1.6099 -0.169 1.9790 0.007 1.1870 0.136
# compare estimated theta distribution
dfr <- data.frame( "gdm"=mod15a$pi.k, "mirt"=mod15b@Prior[[1]] )
round(t(dfr),4)
## 1 2 3 4 5 6 7 8 9 10 11 12 13
## gdm 0 0 1e-04 9e-04 0.0056 0.0231 0.0652 0.1299 0.1881 0.2038 0.1702 0.1129 0.0612
## mirt 0 0 1e-04 9e-04 0.0056 0.0232 0.0653 0.1300 0.1881 0.2038 0.1702 0.1128 0.0611
## 14 15
## gdm 0.0279 0.011
## mirt 0.0278 0.011
## End(Not run)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.