data.dcm | R Documentation |
Dataset from Chapter 9 of the book 'Diagnostic Measurement' (Rupp, Templin & Henson, 2010).
data(data.dcm)
The format of the data is a list containing the dichotomous item
response data data
(10000 persons at 7 items)
and the Q-matrix q.matrix
(7 items and 3 skills):
List of 2
$ data :'data.frame':
..$ id: int [1:10000] 1 2 3 4 5 6 7 8 9 10 ...
..$ D1: num [1:10000] 0 0 0 0 1 0 1 0 0 1 ...
..$ D2: num [1:10000] 0 0 0 0 0 1 1 1 0 1 ...
..$ D3: num [1:10000] 1 0 1 0 1 1 0 0 0 1 ...
..$ D4: num [1:10000] 0 0 1 0 0 1 1 1 0 0 ...
..$ D5: num [1:10000] 1 0 0 0 1 1 1 0 1 0 ...
..$ D6: num [1:10000] 0 0 0 0 1 1 1 0 0 1 ...
..$ D7: num [1:10000] 0 0 0 0 0 1 1 0 1 1 ...
$ q.matrix: num [1:7, 1:3] 1 0 0 1 1 0 1 0 1 0 ...
..- attr(*, "dimnames")=List of 2
.. ..$ : chr [1:7] "D1" "D2" "D3" "D4" ...
.. ..$ : chr [1:3] "skill1" "skill2" "skill3"
For supplementary material of the Rupp, Templin and Henson book (2010) see http://dcm.coe.uga.edu/.
The dataset was downloaded from http://dcm.coe.uga.edu/supplemental/chapter9.html.
Rupp, A. A., Templin, J., & Henson, R. A. (2010). Diagnostic Measurement: Theory, Methods, and Applications. New York: The Guilford Press.
## Not run: data(data.dcm, package="CDM") dat <- data.dcm$data[,-1] Q <- data.dcm$q.matrix #***************************************************** # Model 1: DINA model #***************************************************** mod1 <- CDM::din( dat, q.matrix=Q ) summary(mod1) #-------- # Model 1m: estimate model in mirt package library(mirt) library(sirt) #** define theta grid of skills # use the function skillspace.hierarchy just for convenience hier <- "skill1 > skill2" skillspace <- CDM::skillspace.hierarchy( hier, skill.names=colnames(Q) ) Theta <- as.matrix(skillspace$skillspace.complete) #** create mirt model mirtmodel <- mirt::mirt.model(" skill1=1 skill2=2 skill3=3 (skill1*skill2)=4 (skill1*skill3)=5 (skill2*skill3)=6 (skill1*skill2*skill3)=7 " ) #** mirt parameter table mod.pars <- mirt::mirt( dat, mirtmodel, pars="values") # use starting values of .20 for guessing parameter ind <- which( mod.pars$name=="d" ) mod.pars[ind,"value"] <- stats::qlogis(.20) # guessing parameter on the logit metric # use starting values of .80 for anti-slipping parameter ind <- which( ( mod.pars$name %in% paste0("a",1:20 ) ) & (mod.pars$est) ) mod.pars[ind,"value"] <- stats::qlogis(.80) - stats::qlogis(.20) mod.pars #** prior for the skill space distribution I <- ncol(dat) lca_prior <- function(Theta,Etable){ TP <- nrow(Theta) if ( is.null(Etable) ){ prior <- rep( 1/TP, TP ) } if ( ! is.null(Etable) ){ prior <- ( rowSums(Etable[, seq(1,2*I,2)]) + rowSums(Etable[,seq(2,2*I,2)]) )/I } prior <- prior / sum(prior) return(prior) } #** estimate model in mirt mod1m <- mirt::mirt(dat, mirtmodel, pars=mod.pars, verbose=TRUE, technical=list( customTheta=Theta, customPriorFun=lca_prior) ) # The number of estimated parameters is incorrect because mirt does not correctly count # estimated parameters from the user customized prior distribution. mod1m@nest <- as.integer(sum(mod.pars$est) + nrow(Theta) - 1) # extract log-likelihood mod1m@logLik # compute AIC and BIC ( AIC <- -2*mod1m@logLik+2*mod1m@nest ) ( BIC <- -2*mod1m@logLik+log(mod1m@Data$N)*mod1m@nest ) #** extract item parameters cmod1m <- sirt::mirt.wrapper.coef(mod1m)$coef # compare estimated guessing and slipping parameters dfr <- data.frame( "din.guess"=mod1$guess$est, "mirt.guess"=plogis(cmod1m$d), "din.slip"=mod1$slip$est, "mirt.slip"=1-plogis( rowSums( cmod1m[, c("d", paste0("a",1:7) ) ] ) ) ) round(t(dfr),3) ## [,1] [,2] [,3] [,4] [,5] [,6] [,7] ## din.guess 0.217 0.193 0.189 0.135 0.143 0.135 0.162 ## mirt.guess 0.226 0.189 0.184 0.132 0.142 0.132 0.158 ## din.slip 0.338 0.331 0.334 0.220 0.222 0.211 0.042 ## mirt.slip 0.339 0.333 0.336 0.223 0.225 0.214 0.044 # compare estimated skill class distribution dfr <- data.frame("din"=mod1$attribute.patt$class.prob, "mirt"=mod1m@Prior[[1]] ) round(t(dfr),3) ## [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] ## din 0.113 0.083 0.094 0.092 0.064 0.059 0.065 0.429 ## mirt 0.116 0.074 0.095 0.064 0.095 0.058 0.066 0.433 #** extract estimated classifications fsc1m <- sirt::mirt.wrapper.fscores( mod1m ) #- estimated reliabilities fsc1m$EAP.rel ## skill1 skill2 skill3 ## 0.5479942 0.5362595 0.5357961 #- estimated classfications: EAPs, MLEs and MAPs head( round(fsc1m$person,3) ) ## case M EAP.skill1 SE.EAP.skill1 EAP.skill2 SE.EAP.skill2 EAP.skill3 SE.EAP.skill3 ## 1 1 0.286 0.508 0.500 0.067 0.251 0.820 0.384 ## 2 2 0.000 0.162 0.369 0.191 0.393 0.190 0.392 ## 3 3 0.286 0.200 0.400 0.211 0.408 0.607 0.489 ## 4 4 0.000 0.162 0.369 0.191 0.393 0.190 0.392 ## 5 5 0.571 0.802 0.398 0.267 0.443 0.928 0.258 ## 6 6 0.857 0.998 0.045 1.000 0.019 1.000 0.020 ## MLE.skill1 MLE.skill2 MLE.skill3 MAP.skill1 MAP.skill2 MAP.skill3 ## 1 1 0 1 1 0 1 ## 2 0 0 0 0 0 0 ## 3 0 0 1 0 0 1 ## 4 0 0 0 0 0 0 ## 5 1 0 1 1 0 1 ## 6 1 1 1 1 1 1 #** estimate model fit in mirt ( fit1m <- mirt::M2( mod1m ) ) #***************************************************** # Model 2: DINO model #***************************************************** mod2 <- CDM::din( dat, q.matrix=Q, rule="DINO") summary(mod2) #***************************************************** # Model 3: log-linear model (LCDM): this model is the GDINA model with the # logit link function #***************************************************** mod3 <- CDM::gdina( dat, q.matrix=Q, link="logit") summary(mod3) #***************************************************** # Model 4: GDINA model with identity link function #***************************************************** mod4 <- CDM::gdina( dat, q.matrix=Q ) summary(mod4) #***************************************************** # Model 5: GDINA additive model identity link function #***************************************************** mod5 <- CDM::gdina( dat, q.matrix=Q, rule="ACDM") summary(mod5) #***************************************************** # Model 6: GDINA additive model logit link function #***************************************************** mod6 <- CDM::gdina( dat, q.matrix=Q, link="logit", rule="ACDM") summary(mod6) #-------- # Model 6m: GDINA additive model in mirt package # use data specifications from Model 1m) #** create mirt model mirtmodel <- mirt::mirt.model(" skill1=1,4,5,7 skill2=2,4,6,7 skill3=3,5,6,7 " ) #** mirt parameter table mod.pars <- mirt::mirt( dat, mirtmodel, pars="values") #** estimate model in mirt # Theta and lca_prior as defined as in Model 1m mod6m <- mirt::mirt(dat, mirtmodel, pars=mod.pars, verbose=TRUE, technical=list( customTheta=Theta, customPriorFun=lca_prior) ) mod6m@nest <- as.integer(sum(mod.pars$est) + nrow(Theta) - 1) # extract log-likelihood mod6m@logLik # compute AIC and BIC ( AIC <- -2*mod6m@logLik+2*mod6m@nest ) ( BIC <- -2*mod6m@logLik+log(mod6m@Data$N)*mod6m@nest ) #** skill distribution mod6m@Prior[[1]] #** extract item parameters cmod6m <- mirt.wrapper.coef(mod6m)$coef print(cmod6m,digits=4) ## item a1 a2 a3 d g u ## 1 D1 1.882 0.000 0.000 -0.9330 0 1 ## 2 D2 0.000 2.049 0.000 -1.0430 0 1 ## 3 D3 0.000 0.000 2.028 -0.9915 0 1 ## 4 D4 2.697 2.525 0.000 -2.9925 0 1 ## 5 D5 2.524 0.000 2.478 -2.7863 0 1 ## 6 D6 0.000 2.818 2.791 -3.1324 0 1 ## 7 D7 3.113 2.918 2.785 -4.2794 0 1 #***************************************************** # Model 7: Reduced RUM model #***************************************************** mod7 <- CDM::gdina( dat, q.matrix=Q, rule="RRUM") summary(mod7) #***************************************************** # Model 8: latent class model with 3 classes and 4 sets of starting values #***************************************************** #-- Model 8a: randomLCA package library(randomLCA) mod8a <- randomLCA::randomLCA( dat, nclass=3, verbose=TRUE, notrials=4) #-- Model8b: rasch.mirtlc function in sirt package library(sirt) mod8b <- sirt::rasch.mirtlc( dat, Nclasses=3, nstarts=4 ) summary(mod8a) summary(mod8b) ## End(Not run)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.