Nothing
#' Latent correlations
#'
#' Estimates correlations between latent traits using plausible values as described in Marsman, et al. (2022).
#' An item_property is used to distinguish the different scales.
#'
#' @param dataSrc A connection to a dexter database or a data.frame with columns: person_id, item_id, item_score and
#' the item_property
#' @param item_property The name of the item property used to define the domains. If \code{dataSrc} is a dexter db then the
#' item_property must match a known item property. If datasrc is a data.frame, item_property must be equal to
#' one of its column names.
#' @param predicate An optional expression to subset data, if NULL all data is used
#' @param nDraws Number of draws for plausible values
#' @param hpd width of Bayesian highest posterior density interval around the correlations,
#' value must be between 0 and 1.
#' @param use Only complete.obs at this time. Respondents who don't have a score for one or more scales are removed.
#' @return List containing a estimated correlation matrix, the corresponding standard deviations,
#' and the lower and upper limits of the highest posterior density interval
#' @details
#' This function uses plausible values so results may differ slightly between calls.
#'
#' @references
#' Marsman, M., Bechger, T. M., & Maris, G. K. (2022). Composition algorithms for conditional distributions.
#' In Essays on Contemporary Psychometrics (pp. 219-250). Cham: Springer International Publishing.
#'
latent_cor = function(dataSrc, item_property, predicate=NULL, nDraws=500, hpd=0.95, use="complete.obs")
{
check_dataSrc(dataSrc)
check_num(nDraws, 'integer', .length=1, .min=1)
check_num(hpd, .length=1, .min=1/nDraws,.max=1)
qtpredicate = eval(substitute(quote(predicate)))
env = caller_env()
# use = pmatch(use, c("all.obs", "complete.obs",
# "pairwise.complete.obs", "everything", "na.or.complete"))
if(use != "complete.obs")
stop("only 'complete.obs' is currently implemented")
pb = get_prog_bar(nDraws, retrieve_data = is_db(dataSrc), lock=TRUE)
on.exit({pb$close()})
from = 5
by = 2
which.keep = seq(from,(from-by)*(from>by)+by*nDraws,by=by)
nIter=max(which.keep)
if(is.matrix(dataSrc))
{
stop("a matrix datasrc is not yet implemented for this function")
}
# to do: this is a bit tricky, we will often need to merge over persons, e.g. if booklets are administered
# per subject. But if the same person makes multiple tests for the same subject, this is not good.
# Still have to decide on a solution
# why not check which is the case?
respData = get_resp_data(dataSrc, qtpredicate, env=env, extra_columns=item_property,
merge_within_persons=TRUE)
respData$x[[item_property]] = ffactor(as.character(respData$x[[item_property]]))
lvl = levels(respData$x[[item_property]])
nd = length(lvl)
pb$set_nsteps(nIter + 4*nd)
# this assumes merge over booklets
persons = respData$x |>
distinct(.data$person_id, .data[[item_property]]) |>
count(.data$person_id)
persons = persons |>
filter(.data$n==nd) |>
mutate(new_person_id = dense_rank(.data$person_id)) |>
select('person_id','new_person_id')
np = nrow(persons)
if(np==0) stop_("There are no persons that have scores for every subscale.")
respData = lapply(split(respData$x, respData$x[[item_property]]), get_resp_data)
models = lapply(respData, function(x){try(fit_enorm(x), silent=TRUE)})
names(models) = names(respData)
if(any(sapply(models,inherits,what='try-error')))
{
message('\nThe model could not be estimated for one or more item properties, reasons:')
models[!sapply(models,inherits,what='try-error')] = 'OK'
print(data.frame(item_property=names(models),
result=gsub('^.+try\\(\\{ *:','',trimws(as.character(models)))))
stop('Some models could not be estimated')
}
for(i in seq_along(respData))
{
respData[[i]] = get_resp_data(respData[[i]], summarised=TRUE)
respData[[i]]$x = respData[[i]]$x |>
inner_join(persons,by='person_id') |>
select(-'person_id') |>
rename(person_id='new_person_id')
}
abl = mapply(ability, respData, models, SIMPLIFY=FALSE,
MoreArgs = list(method="EAP", prior="Jeffreys"))
pb$tick(2*nd)
# some matrices
# we cannot rely on order in ability so this is the way for now
abl_mat = matrix(NA_real_,np,nd)
for(d in 1:nd)
abl_mat[abl[[d]]$person_id,d] = abl[[d]]$theta
acor = cor(abl_mat,use=use)
pv = matrix(0,np,nd)
reliab = rep(0,nd)
sd_pv = rep(0,nd)
mean_pv = rep(0,nd)
for (i in 1:nd)
{
pvs = plausible_values(respData[[i]],models[[i]],nPV = 2)
reliab[i] = cor(pvs$PV1,pvs$PV2)
sd_pv[i] = sd(pvs$PV1)
mean_pv[i] = mean(pvs$PV1)
pv[pvs$person_id,i] = pvs$PV1
}
pb$tick(2*nd)
for (i in 1:(nd-1))
{
for (j in ((i+1):nd)){
acor[i,j] = acor[i,j]/sqrt(reliab[i]*reliab[j])
acor[j,i] = acor[i,j]
}
}
out_sd=matrix(0,nd,nd)
out_cor = acor
# make everything simple and zero indexed
# parms = cml
models = lapply(models, simplify_parms)
respData = lapply(respData, get_resp_data, summarised=TRUE)
for(d in 1:nd)
{
respData[[d]]$x$booklet_id = as.integer(respData[[d]]$x$booklet_id) - 1L
models[[d]]$bcni = c(0L,cumsum(table(as.integer(models[[d]]$design$booklet_id))))
}
prior = list(mu=mean_pv, Sigma = diag(1.7*sd_pv) %*% acor %*% diag(1.7*sd_pv))
store = matrix(0, length(which.keep), length(as.vector(prior$Sigma)))
tel = 1
max_cores = get_ncores(desired = 128L, maintain_free = 1L)
for (i in 1:nIter)
{
for (d in 1:nd)
{
cons = condMoments(prior$mu, prior$Sigma, d, x.value=pv[,-d])
PV_sve(models[[d]]$b, models[[d]]$a, models[[d]]$design$first0, models[[d]]$design$last0,
models[[d]]$bcni,
respData[[d]]$x$booklet_id, respData[[d]]$x$booklet_score, cons$mu, sqrt(cons$sigma),
max_cores,
pv,d-1L, 10L)
}
prior = update_MVNprior(pv,prior$Sigma)
if (i %in% which.keep){
store[tel,] = as.vector(cov2cor(prior$Sigma))
tel=tel+1
}
pb$tick()
}
out_sd = matrix(apply(store,2,sd),nd,nd)
diag(out_sd)=0
pd = t(apply(store,2,hpdens, conf=hpd))
res = list(cor = matrix(colMeans(store),nd,nd), sd = out_sd,
hpd_l = matrix(pd[,1], nd, nd), hpd_h = matrix(pd[,2], nd, nd))
res = lapply(res, function(x){colnames(x) = rownames(x) = names(models); x})
res$n_persons = np
res
}
# mean and variance of Y|x if x,y is multivariate normal
#
# with mean mu and variance-covariance matrix sigma
# @param m vector of means
# @param sigma covariance matrix
# @param y.ind indices dependent variable(s)
# @param x.ind indices conditioning variables. If null its just all others
# @param x.value value of conditioning variables
#
condMoments = function(mu, sigma, y.ind, x.ind=NULL, x.value )
{
if (is.null(x.ind)) x.ind = setdiff(1:length(mu), y.ind)
B = sigma[y.ind, y.ind]
C = sigma[y.ind, x.ind, drop = FALSE]
D = sigma[x.ind, x.ind]
CDinv = C %*% solve(D)
if (is.vector(x.value))
{
cMu = c(mu[y.ind] + CDinv %*% (x.value - mu[x.ind]))
}else
{
nP = nrow(x.value)
cMu = rep(0,nP)
for (i in 1:nP) cMu[i] = mu[y.ind] + CDinv %*% (x.value[i,] - mu[x.ind])
}
cVar = B - CDinv %*% t(C)
return(list(mu=cMu, sigma=cVar))
}
update_MVNprior = function(pvs,Sigma)
{
m_pv = colMeans(pvs)
nP = nrow(pvs)
mu = rmvnorm(1, mean=m_pv,sigma=Sigma/nP)
S=(t(pvs)-m_pv)%*%t(t(pvs)-m_pv)
Sigma = solve(rWishart(1,nP-1,solve(S))[,,1])
return(list(mu = mu, Sigma = Sigma))
}
# borrowed the following from mvtnorm source for the time being
# so we don't have to import a whole package
rmvnorm = function(n,mean,sigma)
{
ev = eigen(sigma, symmetric = TRUE)
R = t(ev$vectors %*% (t(ev$vectors) * sqrt(pmax(ev$values, 0))))
retval = matrix(rnorm(n * ncol(sigma)), nrow = n, byrow = TRUE) %*% R
retval = sweep(retval, 2, mean, "+")
colnames(retval) = names(mean)
retval
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.