Nothing
## ---- include = FALSE---------------------------------------------------------
library(knitr)
set.seed(1)
is_CRAN <- !identical(Sys.getenv("NOT_CRAN"), "true")
if (!is_CRAN) {
options(mc.cores = parallel::detectCores())
} else {
knitr::opts_chunk$set(eval = FALSE)
knitr::knit_hooks$set(evaluate.inline = function(x, envir) x)
}
knitr::opts_chunk$set(
collapse = TRUE,
comment = "#>",
cache.lazy = FALSE # https://github.com/yihui/knitr/issues/572
)
options(digits=4)
options(scipen=2)
## ---- message=FALSE-----------------------------------------------------------
library(rstan)
library(pcFactorStan)
library(loo)
library(qgraph)
library(ggplot2)
library(Matrix)
## ---- results='hide'----------------------------------------------------------
head(phyActFlowPropensity)
## ---- results='asis', echo=FALSE----------------------------------------------
kable(head(phyActFlowPropensity))
## -----------------------------------------------------------------------------
dl <- prepData(phyActFlowPropensity[,c(paste0('pa',1:2), 'skill')])
dl$varCorrection <- 5.0
## ----pcStan, message=FALSE, results='hide', cache=TRUE------------------------
fit1 <- pcStan("unidim_adapt", data=dl)
## ----pcStanDiag1, cache=TRUE--------------------------------------------------
check_hmc_diagnostics(fit1)
## ----pcStanDiag2, cache=TRUE--------------------------------------------------
allPars <- summary(fit1, probs=c())$summary
print(min(allPars[,'n_eff']))
print(max(allPars[,'Rhat']))
## ----skill, cache=TRUE--------------------------------------------------------
theta <- summary(fit1, pars=c("theta"), probs=c())$summary[,'mean']
ggplot(data.frame(x=theta, activity=dl$nameInfo$pa, y=0.47)) +
geom_point(aes(x=x),y=0) +
geom_text(aes(label=activity, x=x, y=y),
angle=85, hjust=0, size=2,
position = position_jitter(width = 0, height = 0.4)) + ylim(0,1) +
theme(legend.position="none",
axis.title.x=element_blank(),
axis.title.y=element_blank(),
axis.text.y=element_blank(),
axis.ticks.y=element_blank())
## -----------------------------------------------------------------------------
s50 <- summary(fit1, pars=c("scale"), probs=c(.5))$summary[,'50%']
print(s50)
## ---- results='hide', include = FALSE-----------------------------------------
rm(fit1) # free up some memory
## ----calibrateItems, message=FALSE, results='hide', cache=TRUE----------------
result <- calibrateItems(phyActFlowPropensity, iter=1000L)
## ---- results='hide'----------------------------------------------------------
print(result)
## ---- results='asis', echo=FALSE----------------------------------------------
kable(result)
## ----covarianceData, cache=TRUE-----------------------------------------------
pafp <- phyActFlowPropensity
excl <- match(c('goal1','feedback1'), colnames(pafp))
pafp <- pafp[,-excl]
dl <- prepData(pafp)
dl$scale <- result[match(dl$nameInfo$item, result$item), 'scale']
## ----covariance, message=FALSE, results='hide', cache=TRUE--------------------
fit2 <- pcStan("correlation", data=dl, include=FALSE, pars=c('rawTheta', 'rawThetaCorChol'))
## ----covarianceDiag1, cache=TRUE----------------------------------------------
check_hmc_diagnostics(fit2)
allPars <- summary(fit2, probs=0.5)$summary
print(min(allPars[,'n_eff']))
print(max(allPars[,'Rhat']))
## ----covarianceDiag2, cache=TRUE----------------------------------------------
head(allPars[order(allPars[,'sd']),])
## ----covarianceDiag3, cache=TRUE----------------------------------------------
allPars <- allPars[allPars[,'sd'] > 1e-6,]
print(min(allPars[,'n_eff']))
print(max(allPars[,'Rhat']))
## ----corPlot, cache=TRUE------------------------------------------------------
corItemNames <- dl$nameInfo$item
tc <- summary(fit2, pars=c("thetaCor"), probs=c(.1,.5,.9))$summary
tcor <- matrix(tc, length(corItemNames), length(corItemNames))
tcor[sign(tc[,'10%']) != sign(tc[,'90%'])] <- 0 # delete faint edges
dimnames(tcor) <- list(corItemNames, corItemNames)
tcor <- nearPD(tcor, corr=TRUE)$mat
qgraph(tcor, layout = "spring", graph = "cor", labels=colnames(tcor),
legend.cex = 0.3,
cut = 0.3, maximum = 1, minimum = 0, esize = 20,
vsize = 7, repulsion = 0.8, negDashed=TRUE, theme="colorblind")
## ----responseCurves, cache=TRUE-----------------------------------------------
df <- responseCurve(dl, fit2,
item=setdiff(dl$nameInfo$item, c('spont','control','evaluated','waiting')),
responseNames=c("much more","somewhat more", 'equal',
"somewhat less", "much less"))
ggplot(df) +
geom_line(aes(x=worthDiff, y=prob, color=response,linetype=response,
group=responseSample), size=.2, alpha=.2) +
xlab("difference in latent worths") + ylab("probability") +
ylim(0,1) + facet_wrap(~item, scales="free_x") +
guides(color=guide_legend(override.aes=list(alpha = 1, size=1)))
## ----factorData1, cache=TRUE--------------------------------------------------
pafp <- pafp[,c(paste0('pa',1:2),
setdiff(corItemNames, c('spont','control','evaluated','waiting')))]
pafp <- normalizeData(filterGraph(pafp))
dl <- prepCleanData(pafp)
dl <- prepSingleFactorModel(dl)
dl$scale <- result[match(dl$nameInfo$item, result$item), 'scale']
## ---- results='hide', include = FALSE-----------------------------------------
rm(fit2) # free up some memory
## ----factor, message=FALSE, results='hide', cache=TRUE------------------------
fit3 <- pcStan("factor1_ll", data=dl, include=FALSE,
pars=c('rawUnique', 'rawUniqueTheta', 'rawPerComponentVar',
'rawFactor', 'rawLoadings', 'rawFactorProp', 'rawThreshold',
'rawPathProp', 'rawCumTh'))
## ----factorDiag1, cache=TRUE--------------------------------------------------
check_hmc_diagnostics(fit3)
interest <- c("threshold", "alpha", "pathProp", "factor", "residualItemCor", "lp__")
allPars <- summary(fit3, pars=interest)$summary
allPars <- allPars[allPars[,'sd'] > 1e-6,]
print(min(allPars[,'n_eff']))
print(max(allPars[,'Rhat']))
## ----factorLoo, cache=TRUE----------------------------------------------------
options(mc.cores=1) # otherwise loo consumes too much RAM
kThreshold <- 0.1
l1 <- toLoo(fit3)
print(l1)
## -----------------------------------------------------------------------------
pa11 <- levels(filterGraph(pafp, minDifferent=11L)$pa1)
ot <- outlierTable(dl, l1, kThreshold)
ot <- subset(ot, pa1 %in% pa11 & pa2 %in% pa11)
## ---- results='hide'----------------------------------------------------------
print(ot[1:6,])
## ---- results='asis', echo=FALSE----------------------------------------------
kable(ot[1:6,], row.names=TRUE)
## ---- results='hide'----------------------------------------------------------
xx <- which(ot[,'pa1'] == 'mountain biking' & ot[,'pa2'] == 'climbing' & ot[,'item'] == 'predict' & ot[,'pick'] == -2)
## ---- results='asis', echo=FALSE----------------------------------------------
if (length(xx) == 0) {
xx <- 1
warning("Can't find outlier")
}
kable(ot[xx,,drop=FALSE], row.names=TRUE)
## -----------------------------------------------------------------------------
pafp[pafp$pa1 == ot[xx,'pa1'] & pafp$pa2 == ot[xx,'pa2'],
c('pa1','pa2', as.character(ot[xx,'item']))]
## ----outlier, cache=TRUE------------------------------------------------------
loc <- sapply(ot[xx,c('pa1','pa2','item')], unfactor)
exam <- summary(fit3, pars=paste0("theta[",loc[paste0('pa',1:2)],
",", loc['item'],"]"))$summary
rownames(exam) <- c(as.character(ot[xx,'pa1']), as.character(ot[xx,'pa2']))
## ---- results='asis', echo=FALSE----------------------------------------------
#exam <- data.frame(mean=c(0,0), '2.5%'=c(0,0), '97.5%'=c(0,0))
kable(exam)
## -----------------------------------------------------------------------------
sum(c(pafp$pa1 == ot[xx,'pa1'], pafp$pa2 == ot[xx,'pa1']))
sum(c(pafp$pa1 == ot[xx,'pa2'], pafp$pa2 == ot[xx,'pa2']))
## ----pathProp, cache=TRUE-----------------------------------------------------
pi <- parInterval(fit3, 'pathProp', dl$nameInfo$item, label='item')
pi <- pi[order(abs(pi$M)),]
ggplot(pi) +
geom_vline(xintercept=0, color="green") +
geom_jitter(data=parDistributionFor(fit3, pi),
aes(value, item), height = 0.35, alpha=.05) +
geom_segment(aes(y=item, yend=item, x=L, xend=U),
color="yellow", alpha=.5) +
geom_point(aes(x=M, y=item), color="red", size=1) +
theme(axis.title.y=element_blank())
## ----activities, cache=TRUE---------------------------------------------------
pick <- paste0('factor[',match(pa11, dl$nameInfo$pa),',1]')
pi <- parInterval(fit3, pick, pa11, label='activity')
pi <- pi[order(pi$M),]
ggplot(pi) +
geom_vline(xintercept=0, color="green") +
geom_jitter(data=parDistributionFor(fit3, pi, samples=200),
aes(value, activity), height = 0.35, alpha=.05) +
geom_segment(aes(y=activity, yend=activity, x=L, xend=U),
color="yellow", alpha=.5) +
geom_point(aes(x=M, y=activity), color="red", size=1) +
theme(axis.title.y=element_blank())
## ----residualItemCor, cache=TRUE----------------------------------------------
m <- matrix(apply(expand.grid(r=1:dl$NITEMS, c=1:dl$NITEMS), 1,
function(x) paste0("residualItemCor[",x['r'],",",x['c'],"]")),
dl$NITEMS, dl$NITEMS)
n <- matrix(apply(expand.grid(r=dl$nameInfo$item, c=dl$nameInfo$item), 1,
function(x) paste0(x['r'],":",x['c'])),
dl$NITEMS, dl$NITEMS)
pi <- parInterval(fit3, m[lower.tri(m)], n[lower.tri(n)], label='cor')
pi <- pi[abs(pi$M) > .08,]
pi <- pi[order(-abs(pi$M)),]
ggplot(pi) +
geom_vline(xintercept=0, color="green") +
geom_jitter(data=parDistributionFor(fit3, pi, samples=800),
aes(value, cor), height = 0.35, alpha=.05) +
geom_segment(aes(y=cor, yend=cor, x=L, xend=U),
color="yellow", alpha=.5) +
geom_point(aes(x=M, y=cor), color="red", size=1) +
theme(axis.title.y=element_blank())
## -----------------------------------------------------------------------------
sessionInfo()
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.