Nothing
skip_on_cran()
skip_if_not_installed("EdSurvey")
require(EdSurvey)
# paramTab$a/slope <-
# paramTab$g/asymptote/guessing <-
# paramTab$d/difficulty <-
# param$D <- 1.7
dichotParamTab <- structure(list(ItemID = structure(1:11,
.Label = c("m109801", "m020001", "m111001", "m046301", "m046501", "m051501", "m111601", "m111301", "m111201", "m110801", "m110101"), class = "factor"),
test = rep("composite",11),
subtest = c(rep("num",6),rep("alg",5)),
slope = c(0.96, 0.69, 0.83, 0.99, 1.03, 0.97, 1.45, 0.59, 0.34, 0.18, 1.20),
difficulty = c(-0.37, -0.55, 0.85 , -0.97, -0.14, 1.21, 0.53, -1.84, -0.46, 2.43, 0.70),
guessing = c(0.16,0.00,0.17,0.31,0.37,0.18, 0.28, 0.15, 0.09, 0.05, 0.18),
D = rep(1.7, 11),
MODEL = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = "3pl", class = "factor")), class = "data.frame", row.names = c(NA, -11L))
## GPCM
# a / slope
# d1,2,...,(param-1)
# b/itemLocation, then map to d1, d2
# D / D
polyParamTab <- structure(list(ItemID = structure(c(1:2), .Label = c("m0757cl", "m066501"), class = "factor"),
test = rep("composite",2),
subtest = c(rep("alg",2)),
slope = c( 0.43, 0.52 ), # could also be called "a"
itemLocation = c( -1.21, -0.96), # could also not exist, added to d1 ... dn
d1 = c(2.38, -0.56),
d2 = c(-0.57, 0.56),
d3 = c(-1.18, NA),
D = c(1.7, 1.7),
scorePoints = c(4L, 3L) # number of score points, read d1 to d(n-1)
), class = "data.frame", row.names=c(1L,2L))
# read-in NAEP Primer data
sdf <- readNAEP(system.file("extdata/data", "M36NT2PM.dat", package = "NAEPprimer"))
items <- c(as.character(dichotParamTab$ItemID), as.character(polyParamTab$ItemID))
edf <- getData(data=sdf, varnames=c(items, "dsex", "origwt", "repgrp1", "jkunit", "sdracem"), omittedLevels = FALSE)
edf$sid <- paste0("S",1:nrow(edf))
# apply super basic scoring
for(i in 1:length(items)) {
coli <- items[i]
# save the original
edf[,paste0(coli,"raw")] <- edf[,coli]
if( coli %in% dichotParamTab$ItemID) {
edf[,coli] <- ifelse(grepl("[ABCDE]", edf[,paste0(coli,"raw")]), 0, NA)
edf[,coli] <- ifelse(grepl("*", edf[,paste0(coli,"raw")], fixed=TRUE), 1, edf[,coli])
} else {
edf[,coli] <- ifelse(grepl("Incorrect", edf[,paste0(coli,"raw")], fixed=TRUE), 0, NA)
edf[,coli] <- ifelse(grepl("None correct", edf[,paste0(coli,"raw")], fixed=TRUE), 0, edf[,coli])
edf[,coli] <- ifelse(grepl("Incorrect", edf[,paste0(coli,"raw")], fixed=TRUE), 0, edf[,coli])
edf[,coli] <- ifelse(grepl("One correct", edf[,paste0(coli,"raw")], fixed=TRUE), 1, edf[,coli])
edf[,coli] <- ifelse(grepl("Partial", edf[,paste0(coli,"raw")], fixed=TRUE), 1, edf[,coli])
edf[,coli] <- ifelse(grepl("Two correct", edf[,paste0(coli,"raw")], fixed=TRUE), 2, edf[,coli])
edf[,coli] <- ifelse(grepl("Correct", edf[,paste0(coli,"raw")], fixed=TRUE), 2, edf[,coli])
edf[,coli] <- ifelse(grepl("Three correct", edf[,paste0(coli,"raw")], fixed=TRUE), 3, edf[,coli])
}
# print results
edf[,paste0(coli,"raw")] <- NULL # delete original
}
# dummify
indepVars <- c("dsex","sdracem")
dummyVars <- c()
for (indep in indepVars) {
dummyMatrix <- model.matrix(~get(indep) - 1, data = edf)
edf <- cbind(edf, dummyMatrix)
levs <- levels(edf[[indep]])
colnames(edf) <- sub('^get\\(indep\\)', indep, colnames(edf)) # change to variableLEVEL
edf <- edf[,!colnames(edf)%in% c(indep)] # drop original column and first dummy
# record dummy names for subset
newNames <- paste0(indep,levs)
dummyVars <- c(dummyVars, newNames)
}
# AM doesn't like certain overlap of variable names so make a correction here
colnames(edf)[colnames(edf) == "sdracemAmer Ind/Alaska Natv"] <- "sdracemInd"
dummyVars[dummyVars == "sdracemAmer Ind/Alaska Natv"] <- "sdracemInd"
# make stuItems (long) and stuDat
stuItems <- stuItems1 <- edf[,c("sid", items)]
stuItems <- reshape(data=stuItems, varying=c(items), idvar=c("sid"), direction="long", v.names="score", times=items, timevar="key")
stuDat <- edf[,c("sid", "origwt", "repgrp1", "jkunit", dummyVars)]
# make testDat
testDat <- data.frame(test=c("composite", "composite", "composite") ,
subtest=c("num", "alg", NA),
location=c(277.1563, 280.2948, sum(c(0.3,0.7) * c(277.1563, 280.2948))),
scale=c(37.7297, 36.3887, sum(c(0.3,0.7) * c(37.7297, 36.3887))),
subtestWeight=c(0.3,0.7, NA))
context("NAEPPrimer Regression Non-composite Algebra")
# algebra
mmlA <- mml(alg ~ dsexFemale + sdracemBlack + sdracemHispanic + `sdracemAsian/Pacific Island` +
sdracemInd + sdracemOther, stuItems=stuItems, stuDat=stuDat, dichotParamTab=dichotParamTab, polyParamTab=polyParamTab,
Q=34, idVar="sid", testScale=testDat, composite=FALSE, weightVar="origwt",
minNode = -4, maxNode = 4,
strataVar="repgrp1", PSUVar="jkunit")
mmlATaylor <- summary(mmlA, varType="Taylor", gradientHessian=TRUE, strataVar="repgrp1", PSUVar="jkunit")
# similarly, the PV results should be approximately equal
set.seed(2)
pvA <- drawPVs(mmlA, construct="alg")
stuDatA <- merge(stuDat, pvA[["data"]], by.x="sid", by.y="id", all.x=TRUE, all.y=FALSE)
lmA <- summary(lm(alg_dire1 ~ dsexFemale + sdracemBlack + sdracemHispanic + `sdracemAsian/Pacific Island` +
sdracemInd + sdracemOther, data=stuDatA, weights=stuDatA$origwt))
expect_equal(coef(mmlATaylor)[1:6, 1], lmA$coef[1:6, 1], 0.1)
# check stochastic beta works
pvAs <- drawPVs(mmlATaylor, construct="alg", stochasticBeta=TRUE)
stuDatAs <- merge(stuDat, pvAs[["data"]], by.x="sid", by.y="id", all.x=TRUE, all.y=FALSE)
lmAs <- summary(lm(alg_dire1 ~ dsexFemale + sdracemBlack + sdracemHispanic + `sdracemAsian/Pacific Island` +
sdracemInd + sdracemOther, data=stuDatAs, weights=stuDatA$origwt))
expect_equal(coef(mmlATaylor)[1:6, 1], lmAs$coef[1:6, 1], 0.1)
# check the posterior means agree with the MML estimate
pvApos <- drawPVs(mmlA, construct="alg", returnPosterior =TRUE)
pvApos <- pvApos[["posterior"]]
pvApos$mus <- pvApos$mu * mmlA$scale + mmlA$location
stuDatApos <- merge(stuDat, pvApos, by.x="sid", by.y="id", all.x=TRUE, all.y=FALSE)
lmApos <- summary(lm(mus ~ dsexFemale + sdracemBlack + sdracemHispanic + `sdracemAsian/Pacific Island` +
sdracemInd + sdracemOther, data=stuDatApos, weights=stuDatApos$origwt))
# this should be exact
expect_equal(coef(mmlATaylor)[1:6, 1], lmApos$coef[1:6, 1])
# MML Regression
# Run completed on Wednesday, December 09, 2020. 09:54:09 AM
#
# Selection: ALL
# Observations: 8826
# Strata Variable: REPGRP1
# Cluster Variable: JKUNIT
# Weight Variable: ORIGWT
#
# Iterations: 37
# Log Likelihood: -14495.6
#
# Adjusted Wald Test
# F(6,57) = 22.4731
# p(F > f) = 2.17826e-013
# Dependent Variable: composite, alg
# Parameter Name Estimate Standard Error t Statistic p > |t|
# Constant 284.0244475898 1.5620600420 181.8268440084 0.0000000000
# DSEXFEMA 4.6425240064 1.6651582440 2.7880377274 0.0070480262
# SDRACEMB -26.4952368029 3.1195434525 -8.4933058976 0.0000000000
# SDRACEMH -20.3300619109 2.9454209419 -6.9022602582 0.0000000033
# SDRACEMA 13.4723776156 4.7778038300 2.8197845904 0.0064602479
# SDRACEMI -24.0877598983 7.1463851501 -3.3706215649 0.0013014462
# SDRACEMO -2.9570604020 10.8693889849 -0.2720539679 0.7864836263
# Root MSE 29.9108170108 1.1684294466 --- ---
# AM Statistical Software Beta Version 0.06.04. (c) The American Institutes for Research and Jon Cohen
# compare estimate and weight
expect_equal(unname(mmlATaylor$coefficients[,1]),
c(284.0244475898, 4.6425240064, -26.4952368029,-20.3300619109,
13.4723776156, -24.0877598983, -2.9570604020, 29.9108170108),
tolerance=50*sqrt(.Machine$double.eps))
expect_equal(unname(mmlATaylor$coef[,2]),
c(1.5620600420, 1.6651582440,3.1195434525, 2.9454209419,
4.7778038300, 7.1463851501, 10.8693889849, 1.1684294466),
tolerance=5*(.Machine$double.eps)^0.25)
context("NAEPPrimer Regression Non-composite Numeric")
# Numeric
mmlN <- mml(num ~ dsexFemale + sdracemBlack + sdracemHispanic + `sdracemAsian/Pacific Island` + sdracemInd + sdracemOther,
stuItems=stuItems,
stuDat=stuDat,
dichotParamTab=dichotParamTab,
polyParamTab=polyParamTab,
Q=34,
idVar="sid",
testScale=testDat,
composite=FALSE,
weightVar="origwt",
minNode = -4,
maxNode = 4,
strataVar="repgrp1",
PSUVar="jkunit")
mmlNTaylor <- summary(mmlN, varType="Taylor", gradientHessian=TRUE, strataVar="repgrp1", PSUVar="jkunit")
# MML Regression
# Run completed on Wednesday, December 09, 2020. 10:00:59 AM
#
# Selection: ALL
# Observations: 6378
# Strata Variable: REPGRP1
# Cluster Variable: JKUNIT
# Weight Variable: ORIGWT
#
# Iterations: 12
# Log Likelihood: -8647.33
#
# Adjusted Wald Test
# F(6,57) = 25.0037
# p(F > f) = 2.63123e-014
# Dependent Variable: composite, num
# Parameter Name Estimate Standard Error t Statistic p > |t|
# Constant 295.4743460911 1.5484819999 190.8154864667 0.0000000000
# DSEXFEMA -2.6901100945 1.7664688656 -1.5228743324 0.1328644564
# SDRACEMB -26.8599727157 2.5390553373 -10.5787267893 0.0000000000
# SDRACEMH -22.3144730714 2.8487217696 -7.8331528582 0.0000000001
# SDRACEMA 4.9481736285 5.9500552409 0.8316180990 0.4088183655
# SDRACEMI -17.7347803395 7.2513021373 -2.4457373316 0.0173294853
# SDRACEMO -4.9037488612 13.9236962237 -0.3521872915 0.7259035127
# Root MSE 29.6919958757 1.3943320344 --- ---
# AM Statistical Software Beta Version 0.06.04. (c) The American Institutes for Research and Jon Cohen
# compare estimate and SE
expect_equal(unname(mmlNTaylor$coefficients[,1]),
c(295.4743460911, -2.6901100945, -26.8599727157,-22.3144730714,
4.9481736285, -17.7347803395, -4.9037488612, 29.6919958757),
tolerance=200*sqrt(.Machine$double.eps))
expect_equal(unname(mmlNTaylor$coef[,2]),
c(1.5484819999, 1.7664688656,2.5390553373, 2.8487217696,
5.9500552409, 7.2513021373, 13.9236962237, 1.3943320344),
tolerance=3*(.Machine$double.eps)^0.25)
context("NAEPPrimer Regression Composite")
# Composite
mmlC <- mml(composite ~ dsexFemale + sdracemBlack + sdracemHispanic + `sdracemAsian/Pacific Island` +
`sdracemInd` + sdracemOther, stuItems=stuItems, stuDat=stuDat, dichotParamTab=dichotParamTab, polyParamTab=polyParamTab,
Q=34, idVar="sid", testScale=testDat, composite=TRUE, weightVar="origwt",
minNode = -4, maxNode = 4,
strataVar="repgrp1", PSUVar="jkunit")
mmlCTaylor <- summary(mmlC, varType="Taylor", gradientHessian=TRUE)
### similarly, the PV results should be approximately equal
# draw the PVs
pvC <- drawPVs(mmlC)
stuDatC <- merge(stuDat, pvC[["data"]], by.x="sid", by.y="id", all.x=TRUE, all.y=FALSE)
# first for a subscale
lmA2 <- summary(lm(alg_dire1 ~ dsexFemale + sdracemBlack + sdracemHispanic + `sdracemAsian/Pacific Island` +
sdracemInd + sdracemOther, data=stuDatC, weights=stuDatC$origwt))
# this is the above, just Algebra mml estimate
expect_equal(coef(mmlATaylor)[1:6, 1], lmA2$coef[1:6, 1], 0.05)
# now use full composite
lmC2 <- summary(lm(composite_dire1 ~ dsexFemale + sdracemBlack + sdracemHispanic + `sdracemAsian/Pacific Island` +
sdracemInd + sdracemOther, data=stuDatC, weights=stuDatC$origwt))
# this is the above, just Algebra mml estimate
expect_equal(coef(mmlCTaylor)[1:6, 1], lmC2$coef[1:6, 1], 0.05)
# now draw with stochastic beta
pvCs <- drawPVs(mmlCTaylor, stochasticBeta=TRUE)
stuDatCs <- merge(stuDat, pvCs[["data"]], by.x="sid", by.y="id", all.x=TRUE, all.y=FALSE)
# first for a subscale
lmA2s <- summary(lm(alg_dire1 ~ dsexFemale + sdracemBlack + sdracemHispanic + `sdracemAsian/Pacific Island` +
sdracemInd + sdracemOther, data=stuDatCs, weights=stuDatC$origwt))
expect_equal(coef(mmlATaylor)[1:6, 1], lmA2s$coef[1:6, 1], 0.05)
# now composite
lmC2s <- summary(lm(composite_dire1 ~ dsexFemale + sdracemBlack + sdracemHispanic + `sdracemAsian/Pacific Island` +
sdracemInd + sdracemOther, data=stuDatCs, weights=stuDatC$origwt))
expect_equal(coef(mmlCTaylor)[1:6, 1], lmC2s$coef[1:6, 1], 0.05)
# MML Composite Regression
# Run completed on Wednesday, December 09, 2020. 12:54:46 PM
#
# Selection: ALL
# Observations: 16915
# Strata Variable: REPGRP1
# Cluster Variable: JKUNIT
# Weight Variable: ORIGWT
#
# Adjusted Wald Test
# F(6,57) = 22.1717
# p(F > f) = 2.83107e-013
# Composite Results
# Parameter Name Estimate Standard Error t Statistic p > |t|
# Constant 287.4594172767 1.5217659456 5.3268139977 0.0000014991
# DSEXFEMA 2.4427336887 1.6550653899 1.4759137033 0.1450151434
# SDRACEMB -26.6046575811 2.8827206928 -9.2290098195 0.0000000000
# SDRACEMH -20.9253852827 2.8492971588 -7.3440515737 0.0000000006
# SDRACEMA 10.9151163178 5.0000511649 2.1830009250 0.0328517195
# SDRACEMI -22.1818659549 7.0094097999 -3.1645839790 0.0024164736
# SDRACEMO -3.5410669629 11.4857615243 -0.3083005820 0.7588921216
# Root MSE 29.1509253186 (--)
# AM Statistical Software Beta Version 0.06.04. (c) The American Institutes for Research and Jon Cohen
# Dependent Variable: composite, alg
# Parameter Name Estimate Standard Error
# Constant 0.102 0.043
# DSEXFEMA 0.128 0.046
# SDRACEMB -0.728 0.086
# SDRACEMH -0.559 0.081
# SDRACEMA 0.370 0.131
# SDRACEMI -0.662 0.196
# SDRACEMO -0.081 0.299
# Root MSE 0.822 0.032
# Dependent Variable: composite, num
# Parameter Name Estimate Standard Error
# Constant 0.486 0.041
# DSEXFEMA -0.071 0.047
# SDRACEMB -0.712 0.067
# SDRACEMH -0.591 0.076
# SDRACEMA 0.131 0.158
# SDRACEMI -0.470 0.192
# SDRACEMO -0.130 0.369
# Root MSE 0.787 0.037
# DIRE to AM
expect_equal(unname(mmlCTaylor$coefficients[-8,1]),
c(287.4594172767, 2.4427336887, -26.6046575811, -20.9253852827,
10.9151163178, -22.1818659549, -3.5410669629),
tolerance=50*sqrt(.Machine$double.eps))
# AM gets wrong correlations, reported for posterity
# expect_equal(unname(mmlCTaylor$coefficients[1:7,2]),
# c(1.5217659456, 1.6550653899, 2.8827206928,
# 2.8492971588, 5.0000511649, 7.0094097999,11.4857615243),
# tolerance=5*(.Machine$double.eps)^0.25)
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.