Nothing
# utility functions for pairwise maximum likelihood
# stub for fml_deriv1
fml_deriv1 <- function(Sigma.hat = NULL, # model-based var/cov/cor
TH = NULL, # model-based thresholds + means
th.idx = NULL, # threshold idx per variable
num.idx = NULL, # which variables are numeric
X = NULL, # data
eXo = NULL, # external covariates
lavcache = NULL, # housekeeping stuff
scores = FALSE, # return case-wise scores
negative = TRUE) {
lav_msg_stop(gettext("not implemented"))
}
# the first derivative of the pairwise logLik function with respect to the
# thresholds/slopes/var/correlations; together with DELTA, we can use the
# chain rule to get the gradient
# this is adapted from code written by Myrsini Katsikatsou
# first attempt - YR 5 okt 2012
# HJ 18/10/23: Modification for complex design and completely observed data (no
# missing) with only ordinal indicators to get the right gradient for the
# optimisation and Hessian computation.
pml_deriv1 <- function(Sigma.hat = NULL, # model-based var/cov/cor
Mu.hat = NULL, # model-based means
TH = NULL, # model-based thresholds + means
th.idx = NULL, # threshold idx per variable
num.idx = NULL, # which variables are numeric
X = NULL, # data
eXo = NULL, # external covariates
wt = NULL, # case weights (not used yet)
lavcache = NULL, # housekeeping stuff
PI = NULL, # slopes
missing = "listwise", # how to deal with missings
scores = FALSE, # return case-wise scores
negative = TRUE) { # multiply by -1
# diagonal of Sigma.hat is not necessarily 1, even for categorical vars
Sigma.hat2 <- Sigma.hat
if (length(num.idx) > 0L) {
diag(Sigma.hat2)[-num.idx] <- 1
} else {
diag(Sigma.hat2) <- 1
}
Cor.hat <- cov2cor(Sigma.hat2) # to get correlations (rho!)
cors <- lav_matrix_vech(Cor.hat, diagonal = FALSE)
if (any(abs(cors) > 1)) {
# what should we do now... force cov2cor?
# cat("FFFFOOOORRRRRCEEE PD!\n")
# Sigma.hat <- Matrix::nearPD(Sigma.hat)
# Sigma.hat <- as.matrix(Sigma.hat$mat)
# Sigma.hat <- cov2cor(Sigma.hat)
# cors <- Sigma.hat[lower.tri(Sigma.hat)]
idx <- which(abs(cors) > 0.99)
cors[idx] <- 0.99 # clip
# cat("CLIPPING!\n")
}
nvar <- nrow(Sigma.hat)
pstar <- nvar * (nvar - 1) / 2
ov.types <- rep("ordered", nvar)
if (length(num.idx) > 0L) ov.types[num.idx] <- "numeric"
if (!is.null(eXo)) {
nexo <- ncol(eXo)
} else {
nexo <- 0
}
if (all(ov.types == "numeric")) {
N.TH <- nvar
} else {
N.TH <- length(th.idx)
}
N.SL <- nvar * nexo
N.VAR <- length(num.idx)
N.COR <- pstar
# add num.idx to th.idx
if (length(num.idx) > 0L) {
th.idx[th.idx == 0] <- num.idx
}
# print(Sigma.hat); print(TH); print(th.idx); print(num.idx); print(str(X))
# shortcut for ordinal-only/no-exo case
if (!scores && all(ov.types == "ordered") && nexo == 0L) {
# >>>>>>>> HJ/MK PML CODE >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
if (is.null(wt)) {
n.xixj.vec <- lavcache$bifreq
} else {
n.xixj.vec <- lavcache$sum_obs_weights_xixj_ab_vec
}
gradient <- grad_tau_rho(
no.x = nvar,
all.thres = TH,
index.var.of.thres = th.idx,
rho.xixj = cors,
n.xixj.vec = n.xixj.vec,
out.LongVecInd = lavcache$long
)
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
if (missing == "available.cases") {
uniPI <- univariateExpProbVec(TH = TH, th.idx = th.idx)
tmp <- lavcache$uniweights / uniPI
var.idx <- split(th.idx, th.idx)
var.idx <- unlist(lapply(var.idx, function(x) {
c(x, x[1])
}))
tmp.varwise <- split(tmp, var.idx)
tmp1 <- unlist(lapply(
tmp.varwise,
function(x) {
c(x[-length(x)])
}
))
tmp2 <- unlist(lapply(tmp.varwise, function(x) {
c(x[-1])
}))
uni.der.tau <- dnorm(TH) * (tmp1 - tmp2)
nTH <- length(TH)
gradient[1:nTH] <- gradient[1:nTH] + uni.der.tau
}
if (negative) {
gradient <- -1 * gradient
}
return(gradient)
}
# in this order: TH/MEANS + SLOPES + VAR + COR
GRAD.size <- N.TH + N.SL + N.VAR + N.COR
# scores or gradient?
if (scores) {
SCORES <- matrix(0, nrow(X), GRAD.size) # we will sum up over all pairs
} else {
GRAD <- matrix(0, pstar, GRAD.size) # each pair is a row
}
PSTAR <- matrix(0, nvar, nvar) # utility matrix, to get indices
PSTAR[lav_matrix_vech_idx(nvar, diagonal = FALSE)] <- 1:pstar
N <- length(X[, 1])
for (j in seq_len(nvar - 1L)) {
for (i in (j + 1L):nvar) {
# cat(" i = ", i, " j = ", j, "\n") # debug only
pstar.idx <- PSTAR[i, j]
cor.idx <- N.TH + N.SL + N.VAR + PSTAR[i, j]
th.idx_i <- which(th.idx == i)
th.idx_j <- which(th.idx == j)
if (nexo > 0L) {
sl.idx_i <- N.TH + seq(i, by = nvar, length.out = nexo)
sl.idx_j <- N.TH + seq(j, by = nvar, length.out = nexo)
if (length(num.idx) > 0L) {
var.idx_i <- N.TH + N.SL + match(i, num.idx)
var.idx_j <- N.TH + N.SL + match(j, num.idx)
}
} else {
if (length(num.idx) > 0L) {
var.idx_i <- N.TH + match(i, num.idx)
var.idx_j <- N.TH + match(j, num.idx)
}
}
if (ov.types[i] == "numeric" && ov.types[j] == "numeric") {
if (nexo > 1L) {
lav_msg_stop(gettext(
"mixed + exo in PML not implemented;
try optim.gradient = \"numerical\""))
}
SC <- lav_mvnorm_scores_mu_vech_sigma(
Y = X[, c(i, j)],
Mu = Mu.hat[c(i, j)], Sigma = Sigma.hat[c(i, j), c(i, j)]
)
if (scores) {
if (all(ov.types == "numeric") && nexo == 0L) {
# MU1 + MU2
SCORES[, c(i, j)] <- SCORES[, c(i, j)] + SC[, c(1, 2)]
# VAR1 + COV_12 + VAR2
var.idx <- (nvar +
lav_matrix_vech_match_idx(nvar, idx = c(i, j)))
SCORES[, var.idx] <- SCORES[, var.idx] + SC[, c(3, 4, 5)]
} else { # mixed ordered/continuous
# MU
mu.idx <- c(th.idx_i, th.idx_j)
SCORES[, mu.idx] <- SCORES[, mu.idx] + (-1) * SC[, c(1, 2)]
# VAR+COV
var.idx <- c(var.idx_i, cor.idx, var.idx_j)
SCORES[, var.idx] <- SCORES[, var.idx] + SC[, c(3, 4, 5)]
}
} else {
if (all(ov.types == "numeric") && nexo == 0L) {
mu.idx <- c(i, j)
sigma.idx <- (nvar +
lav_matrix_vech_match_idx(nvar, idx = c(i, j)))
# MU1 + MU2
GRAD[pstar.idx, mu.idx] <-
colSums(SC[, c(1, 2)], na.rm = TRUE)
} else {
mu.idx <- c(th.idx_i, th.idx_j)
sigma.idx <- c(var.idx_i, cor.idx, var.idx_j)
# MU (reverse sign!)
GRAD[pstar.idx, mu.idx] <-
-1 * colSums(SC[, c(1, 2)], na.rm = TRUE)
}
# SIGMA
GRAD[pstar.idx, sigma.idx] <-
colSums(SC[, c(3, 4, 5)], na.rm = TRUE)
} # gradient only
} else if (ov.types[i] == "numeric" && ov.types[j] == "ordered") {
# polyserial correlation
if (nexo > 1L) {
lav_msg_stop(gettext(
"mixed + exo in PML not implemented;
try optim.gradient = \"numerical\""))
}
SC.COR.UNI <- lav_bvmix_cor_scores(
Y1 = X[, i], Y2 = X[, j],
eXo = NULL, wt = wt,
evar.y1 = Sigma.hat[i, i],
beta.y1 = Mu.hat[i],
th.y2 = TH[th.idx == j],
sl.y2 = NULL,
rho = Cor.hat[i, j],
sigma.correction = TRUE
)
if (scores) {
# MU
SCORES[, th.idx_i] <- (SCORES[, th.idx_i] +
-1 * SC.COR.UNI$dx.mu.y1)
# TH
SCORES[, th.idx_j] <- (SCORES[, th.idx_j] +
SC.COR.UNI$dx.th.y2)
# VAR
SCORES[, var.idx_i] <- (SCORES[, var.idx_i] +
SC.COR.UNI$dx.var.y1)
# COR
SCORES[, cor.idx] <- (SCORES[, cor.idx] +
SC.COR.UNI$dx.rho)
} else {
# MU
GRAD[pstar.idx, th.idx_i] <-
-1 * sum(SC.COR.UNI$dx.mu.y1, na.rm = TRUE)
# TH
GRAD[pstar.idx, th.idx_j] <-
colSums(SC.COR.UNI$dx.th.y2, na.rm = TRUE)
# VAR
GRAD[pstar.idx, var.idx_i] <-
sum(SC.COR.UNI$dx.var.y1, na.rm = TRUE)
# COR
GRAD[pstar.idx, cor.idx] <-
sum(SC.COR.UNI$dx.rho, na.rm = TRUE)
} # grad only
} else if (ov.types[j] == "numeric" && ov.types[i] == "ordered") {
# polyserial correlation
if (nexo > 1L) {
lav_msg_stop(gettext(
"mixed + exo in PML not implemented;
try optim.gradient = \"numerical\""))
}
SC.COR.UNI <- lav_bvmix_cor_scores(
Y1 = X[, j], Y2 = X[, i],
eXo = NULL, wt = wt,
evar.y1 = Sigma.hat[j, j],
beta.y1 = Mu.hat[j],
th.y2 = TH[th.idx == i],
rho = Cor.hat[i, j],
sigma.correction = TRUE
)
if (scores) {
# MU
SCORES[, th.idx_j] <- (SCORES[, th.idx_j] +
-1 * SC.COR.UNI$dx.mu.y1)
# TH
SCORES[, th.idx_i] <- (SCORES[, th.idx_i] +
SC.COR.UNI$dx.th.y2)
# VAR
SCORES[, var.idx_j] <- (SCORES[, var.idx_j] +
SC.COR.UNI$dx.var.y1)
# COR
SCORES[, cor.idx] <- (SCORES[, cor.idx] +
SC.COR.UNI$dx.rho)
} else {
# MU
GRAD[pstar.idx, th.idx_j] <-
-1 * sum(SC.COR.UNI$dx.mu.y1, na.rm = TRUE)
# TH
GRAD[pstar.idx, th.idx_i] <-
colSums(SC.COR.UNI$dx.th.y2, na.rm = TRUE)
# VAR
GRAD[pstar.idx, var.idx_j] <-
sum(SC.COR.UNI$dx.var.y1, na.rm = TRUE)
# COR
GRAD[pstar.idx, cor.idx] <-
sum(SC.COR.UNI$dx.rho, na.rm = TRUE)
} # grad only
} else if (ov.types[i] == "ordered" && ov.types[j] == "ordered") {
# polychoric correlation
if (nexo == 0L) {
SC.COR.UNI <-
lav_bvord_cor_scores(
Y1 = X[, i], Y2 = X[, j],
eXo = NULL, wt = wt,
rho = Sigma.hat[i, j],
fit.y1 = NULL, # fixme
fit.y2 = NULL, # fixme
th.y1 = TH[th.idx == i],
th.y2 = TH[th.idx == j],
sl.y1 = NULL,
sl.y2 = NULL,
na.zero = TRUE
)
} else {
SC.COR.UNI <-
pc_cor_scores_PL_with_cov(
Y1 = X[, i],
Y2 = X[, j],
eXo = eXo,
Rho = Sigma.hat[i, j],
th.y1 = TH[th.idx == i],
th.y2 = TH[th.idx == j],
sl.y1 = PI[i, ],
sl.y2 = PI[j, ],
missing.ind = missing
)
}
if (scores) {
# TH
SCORES[, th.idx_i] <- SCORES[, th.idx_i] + SC.COR.UNI$dx.th.y1
SCORES[, th.idx_j] <- SCORES[, th.idx_j] + SC.COR.UNI$dx.th.y2
# SL
if (nexo > 0L) {
SCORES[, sl.idx_i] <- SCORES[, sl.idx_i] + SC.COR.UNI$dx.sl.y1
SCORES[, sl.idx_j] <- SCORES[, sl.idx_j] + SC.COR.UNI$dx.sl.y2
}
# NO VAR
# RHO
SCORES[, cor.idx] <- SCORES[, cor.idx] + SC.COR.UNI$dx.rho
} else {
# TH
if (length(th.idx_i) > 1L) {
GRAD[pstar.idx, th.idx_i] <-
colSums(SC.COR.UNI$dx.th.y1, na.rm = TRUE)
} else {
GRAD[pstar.idx, th.idx_i] <-
sum(SC.COR.UNI$dx.th.y1, na.rm = TRUE)
}
if (length(th.idx_j) > 1L) {
GRAD[pstar.idx, th.idx_j] <-
colSums(SC.COR.UNI$dx.th.y2, na.rm = TRUE)
} else {
GRAD[pstar.idx, th.idx_j] <-
sum(SC.COR.UNI$dx.th.y2, na.rm = TRUE)
}
# SL
if (nexo > 0L) {
if (length(sl.idx_i) > 1L) {
GRAD[pstar.idx, sl.idx_i] <-
colSums(SC.COR.UNI$dx.sl.y1, na.rm = TRUE)
} else {
GRAD[pstar.idx, sl.idx_i] <-
sum(SC.COR.UNI$dx.sl.y1, na.rm = TRUE)
}
if (length(sl.idx_j) > 1L) {
GRAD[pstar.idx, sl.idx_j] <-
colSums(SC.COR.UNI$dx.sl.y2, na.rm = TRUE)
} else {
GRAD[pstar.idx, sl.idx_j] <-
sum(SC.COR.UNI$dx.sl.y2, na.rm = TRUE)
}
}
# NO VAR
# RHO
GRAD[pstar.idx, cor.idx] <-
sum(SC.COR.UNI$dx.rho, na.rm = TRUE)
}
# GRAD2 <- numDeriv::grad(func = pc_logl_x,
# x = c(Sigma.hat[i,j],
# TH[ th.idx == i ],
# TH[ th.idx == j]),
# Y1 = X[,i],
# Y2 = X[,j],
# eXo = eXo,
# nth.y1 = sum( th.idx == i ),
# nth.y2 = sum( th.idx == j ))
}
}
}
if (missing == "available.cases" && all(ov.types == "ordered")) {
if (nexo == 0L) {
UNI_SCORES <- matrix(0, nrow(X), N.TH)
for (i in seq_len(nvar)) {
th.idx_i <- which(th.idx == i)
derY1 <- uni_scores(
Y1 = X[, i], th.y1 = TH[th.idx == i],
eXo = NULL, sl.y1 = NULL,
weights.casewise = lavcache$uniweights.casewise
)
UNI_SCORES[, th.idx_i] <- derY1$dx.th.y1
}
} else {
UNI_SCORES <- matrix(0, nrow(X), ncol = (N.TH + N.SL))
for (i in seq_len(nvar)) {
th.idx_i <- which(th.idx == i)
sl.idx_i <- N.TH + seq(i, by = nvar, length.out = nexo)
derY1 <- uni_scores(
Y1 = X[, i], th.y1 = TH[th.idx == i],
eXo = eXo, sl.y1 = PI[i, ],
weights.casewise = lavcache$uniweights.casewise
)
UNI_SCORES[, th.idx_i] <- derY1$dx.th.y1
UNI_SCORES[, sl.idx_i] <- derY1$dx.sl.y1
}
if (scores) {
SCORES <- SCORES[, 1:(N.TH + N.SL)] + UNI_SCORES
} else {
uni_gradient <- colSums(UNI_SCORES)
}
}
}
# do we need scores?
if (scores) {
return(SCORES)
}
# DEBUG
# :print(GRAD)
###########
# gradient is sum over all pairs
gradient <- colSums(GRAD, na.rm = TRUE)
if (missing == "available.cases" && all(ov.types == "ordered")) {
if (nexo == 0L) {
gradient[1:N.TH] <- gradient + uni_gradient
} else {
gradient[1:(N.TH + N.SL)] <- gradient + uni_gradient
}
}
# we multiply by -1 because we minimize
if (negative) {
gradient <- -1 * gradient
}
gradient
}
### all code below written by Myrsini Katsikatsou
# The function grad_tau_rho
# input:
# no.x - is scalar, the number of ordinal variables
# all.thres - is vector containing the thresholds of all variables in the
# following order: thres_var1, thres_var2,..., thres_var_p
# within each variable the thresholds are in ascending order
# Note that all.thres do NOT contain tau_0=-Inf and tau_last=Inf
# for all variables.
# index.var.of.thres - a vector keeping track to which variable the thresholds
# in all.thres belongs to, it is of the form
# (1,1,1..., 2,2,2,..., p,p,p,...)
# rho.xixj - is the vector of all correlations where j runs faster than i
# i.e. the order is rho_12, rho_13, ..., rho_1p, rho_23, ..., rho_2p,
# etc.
# n.xixj.vec - a vector with the observed frequency for every combination
# of categories and every pair. The frequencies are given in
# the same order as the expected probabilities in the output of
# pairwiseExpProbVec output
# out.LongVecInd - it is the output of function LongVecInd
# the output: it gives the elements of der.L.to.tau and der.L.to.rho in this
# order. The elements of der.L.to.tau where the elements are
# ordered as follows: the thresholds of each variable with respect
# to ascending order of the variable index (i.e. thres_var1,
# thres_var2, etc.) and within each variable the thresholds in
# ascending order.
# The elements of vector der.L.to.rho are der.Lxixj.to.rho.xixj
# where j runs faster than i.
# The function depends on four other functions: LongVecTH.Rho,
# pairwiseExpProbVec, derLtoRho, and derLtoTau, all given below.
# if n.xixj.ab is either an array or a list the following should be done
# n.xixj.vec <- if(is.array(n.xixj.ab)) {
# c(n.xixj.ab)
# } else if(is.list(n.xixj.ab)){
# unlist(n.xixj.ab)
# }
grad_tau_rho <- function(no.x, all.thres, index.var.of.thres, rho.xixj,
n.xixj.vec, out.LongVecInd) {
out.LongVecTH.Rho <- LongVecTH.Rho(
no.x = no.x, all.thres = all.thres,
index.var.of.thres = index.var.of.thres,
rho.xixj = rho.xixj
)
pi.xixj <- pairwiseExpProbVec(
ind.vec = out.LongVecInd,
th.rho.vec = out.LongVecTH.Rho
)
out.derLtoRho <- derLtoRho(
ind.vec = out.LongVecInd,
th.rho.vec = out.LongVecTH.Rho,
n.xixj = n.xixj.vec, pi.xixj = pi.xixj, no.x = no.x
)
out.derLtoTau <- derLtoTau(
ind.vec = out.LongVecInd,
th.rho.vec = out.LongVecTH.Rho,
n.xixj = n.xixj.vec, pi.xixj = pi.xixj,
no.x = no.x
)
grad <- c(out.derLtoTau, out.derLtoRho)
attr(grad, "pi.xixj") <- pi.xixj
grad
}
################################################################################
# The input of the function LongVecInd:
# no.x is scalar, the number of ordinal variables
# all.thres is vector containing the thresholds of all variables in the
# following order: thres_var1, thres_var2,..., thres_var_p
# within each variable the thresholds are in ascending order
# Note that all.thres does NOT contain the first and the last threshold of the
# variables, i.e. tau_0=-Inf and tau_last=Inf
# index.var.of.thres is a vector keeping track to which variable the thresholds
# in all.thres belongs to, it is of the form (1,1,1..., 2,2,2,..., p,p,p,...)
# The output of the function:
# it is a list of vectors keeping track of the indices
# of thresholds, of variables, and of pairs, and two T/F vectors indicating
# if the threshold index corresponds to the last threshold of a variable; all
# these for all pairs of variables. All are needed for the
# computation of expected probabilities, der.L.to.rho, and der.L.to.tau
# all duplications of indices are done as follows: within each pair of variables,
# xi-xj, if for example we want to duplicate the indices of the thresholds,
# tau^xi_a and tau^xj_b, then index a runs faster than b, i.e. for each b we
# take all different tau^xi's, and then we proceed to the next b and do the
# same. In other words if it was tabulated we fill the table columnwise.
# All pairs xi-xj are taken with index j running faster than i.
# Note that each variable may have a different number of categories, that's why
# for example we take lists below.
LongVecInd <- function(no.x, all.thres, index.var.of.thres) {
no.thres.of.each.var <- tapply(all.thres, index.var.of.thres, length)
index.pairs <- utils::combn(no.x, 2)
no.pairs <- ncol(index.pairs)
# index.thres.var1.of.pair and index.thres.var2.of.pair contain the indices of
# of all thresholds (from tau_0 which is -Inf to tau_last which is Inf)
# for any pair of variables appropriately duplicated so that the two vectors
# together give all possible combinations of thresholds indices
# Since here the threshold indices 0 and "last" are included, the vectors are
# longer than the vectors thres.var1.of.pair and thres.var2.of.pair above.
index.thres.var1.of.pair <- vector("list", no.pairs)
index.thres.var2.of.pair <- vector("list", no.pairs)
# index.var1.of.pair and index.var2.of.pair keep track the index of the
# variable that the thresholds in index.thres.var1.of.pair and
# index.thres.var2.of.pair belong to, respectively. So, these two variables
# are of same length as that of index.thres.var1.of.pair and
# index.thres.var2.of.pair
index.var1.of.pair <- vector("list", no.pairs)
index.var2.of.pair <- vector("list", no.pairs)
# index.pairs.extended gives the index of the pair for each pair of variables
# e.g. pair of variables 1-2 has index 1, variables 1-3 has index 2, etc.
# The vector is of the same length as index.thres.var1.of.pair,
# index.thres.var2.of.pair, index.var1.of.pair, and index.var2.of.pair
index.pairs.extended <- vector("list", no.pairs)
for (i in 1:no.pairs) {
no.thres.var1.of.pair <- no.thres.of.each.var[index.pairs[1, i]]
no.thres.var2.of.pair <- no.thres.of.each.var[index.pairs[2, i]]
index.thres.var1.of.pair[[i]] <- rep(0:(no.thres.var1.of.pair + 1),
times = (no.thres.var2.of.pair + 2)
)
index.thres.var2.of.pair[[i]] <- rep(0:(no.thres.var2.of.pair + 1),
each = (no.thres.var1.of.pair + 2)
)
length.vec <- length(index.thres.var1.of.pair[[i]])
index.var1.of.pair[[i]] <- rep(index.pairs[1, i], length.vec)
index.var2.of.pair[[i]] <- rep(index.pairs[2, i], length.vec)
index.pairs.extended[[i]] <- rep(i, length.vec)
}
index.thres.var1.of.pair <- unlist(index.thres.var1.of.pair)
index.thres.var2.of.pair <- unlist(index.thres.var2.of.pair)
index.var1.of.pair <- unlist(index.var1.of.pair)
index.var2.of.pair <- unlist(index.var2.of.pair)
index.pairs.extended <- unlist(index.pairs.extended)
# indicator vector (T/F) showing which elements of index.thres.var1.of.pair
# correspond to the last thresholds of variables. The length is the same as
# that of index.thres.var1.of.pair.
last.thres.var1.of.pair <- index.var1.of.pair == 1 &
index.thres.var1.of.pair == (no.thres.of.each.var[1] + 1)
# we consider up to variable (no.x-1) because in pairs xi-xj where j runs
# faster than i, the last variable is not included in the column of xi's
for (i in 2:(no.x - 1)) {
new.condition <- index.var1.of.pair == i &
index.thres.var1.of.pair == (no.thres.of.each.var[i] + 1)
last.thres.var1.of.pair <- last.thres.var1.of.pair | new.condition
}
# indicator vector (T/F) showing which elements of index.thres.var2.of.pair
# correspond to the last thresholds of variables. Notet that in pairs xi-xj
# where j runs faster than i, the first variable is not included in the column
# of xj's. That's why we start with variable 2. The length is the same as
# that of index.thres.var1.of.pair.
last.thres.var2.of.pair <- index.var2.of.pair == 2 &
index.thres.var2.of.pair == (no.thres.of.each.var[2] + 1)
for (i in 3:no.x) {
new.condition <- index.var2.of.pair == i &
index.thres.var2.of.pair == (no.thres.of.each.var[i] + 1)
last.thres.var2.of.pair <- last.thres.var2.of.pair | new.condition
}
list(
index.thres.var1.of.pair = index.thres.var1.of.pair,
index.thres.var2.of.pair = index.thres.var2.of.pair,
index.var1.of.pair = index.var1.of.pair,
index.var2.of.pair = index.var2.of.pair,
index.pairs.extended = index.pairs.extended,
last.thres.var1.of.pair = last.thres.var1.of.pair,
last.thres.var2.of.pair = last.thres.var2.of.pair
)
}
################################################################################
# The input of the function LongVecTH.Rho:
# no.x is scalar, the number of ordinal variables
# all.thres is vector containing the thresholds of all variables in the
# following order: thres_var1, thres_var2,..., thres_var_p
# within each variable the thresholds are in ascending order
# Note that all.thres does NOT contain the first and the last threshold of the
# variables, i.e. tau_0=-Inf and tau_last=Inf
# index.var.of.thres is a vector keeping track to which variable the thresholds
# in all.thres belongs to, it is of the form (1,1,1..., 2,2,2,..., p,p,p,...)
# rho.xixj is the vector of all corrlations where j runs faster than i
# i.e. the order is rho_12, rho_13, ..., rho_1p, rho_23, ..., rho_2p, etc.
# The output of the function:
# it is a list of vectors with thresholds and rho's duplicated appropriately,
# all needed for the computation of expected probabilities,
# der.L.to.rho, and der.L.to.tau
# all duplications below are done as follows: within each pair of variables,
# xi-xj, if for example we want to duplicate their thresholds, tau^xi_a and
# tau^xj_b, then index a runs faster than b, i.e. for each b we take all
# different tau^xi's, and then we proceed to the next b and do the same.
# In other words if it was tabulated we fill the table columnwise.
# All pairs xi-xj are taken with index j running faster than i.
# Note that each variable may have a different number of categories, that's why
# for example we take lists below.
LongVecTH.Rho <- function(no.x, all.thres, index.var.of.thres, rho.xixj) {
no.thres.of.each.var <- tapply(all.thres, index.var.of.thres, length)
index.pairs <- utils::combn(no.x, 2)
no.pairs <- ncol(index.pairs)
# create the long vectors needed for the computation of expected probabilities
# for each cell and each pair of variables. The vectors thres.var1.of.pair and
# thres.var2.of.pair together give all the possible combinations of the
# thresholds of any two variables. Note the combinations (-Inf, -Inf),
# (-Inf, Inf), (Inf, -Inf), (Inf, Inf) are NOT included. Only the combinations
# of the middle thresholds (tau_1 to tau_(last-1)).
# thres.var1.of.pair and thres.var2.of.pair give the first and the second
# argument, respectively, in functions pbivnorm and dbinorm
thres.var1.of.pair <- vector("list", no.pairs)
thres.var2.of.pair <- vector("list", no.pairs)
# Extending the rho.vector accordingly so that it will be the the third
# argument in pbivnorm and dbinorm functions. It is of same length as
# thres.var1.of.pair and thres.var2.of.pair.
rho.vector <- vector("list", no.pairs)
# thres.var1.for.dnorm.in.der.pi.to.tau.xi and
# thres.var2.for.dnorm.in.der.pi.to.tau.xj give the thresholds of almost
# all variables appropriately duplicated so that the vectors can be used
# as input in dnorm() to compute der.pi.xixj.to.tau.xi and
# der.pi.xixj.to.tau.xj.
# thres.var1.for.dnorm.in.der.pi.to.tau.xi does not contain the thresholds of
# the last variable and thres.var2.for.dnorm.in.der.pi.to.tau.xj those of
# the first variable
thres.var1.for.dnorm.in.der.pi.to.tau.xi <- vector("list", no.pairs)
thres.var2.for.dnorm.in.der.pi.to.tau.xj <- vector("list", no.pairs)
for (i in 1:no.pairs) {
single.thres.var1.of.pair <- all.thres[index.var.of.thres == index.pairs[1, i]]
single.thres.var2.of.pair <- all.thres[index.var.of.thres == index.pairs[2, i]]
# remember that the first (-Inf) and last (Inf) thresholds are not included
# so no.thres.var1.of.pair is equal to number of categories of var1 minus 1
# similarly for no.thres.var2.of.pair
no.thres.var1.of.pair <- no.thres.of.each.var[index.pairs[1, i]]
no.thres.var2.of.pair <- no.thres.of.each.var[index.pairs[2, i]]
thres.var1.of.pair[[i]] <- rep(single.thres.var1.of.pair,
times = no.thres.var2.of.pair
)
thres.var2.of.pair[[i]] <- rep(single.thres.var2.of.pair,
each = no.thres.var1.of.pair
)
rho.vector[[i]] <- rep(rho.xixj[i], length(thres.var1.of.pair[[i]]))
thres.var1.for.dnorm.in.der.pi.to.tau.xi[[i]] <-
rep(single.thres.var1.of.pair, times = (no.thres.var2.of.pair + 1))
thres.var2.for.dnorm.in.der.pi.to.tau.xj[[i]] <-
rep(single.thres.var2.of.pair, each = (no.thres.var1.of.pair + 1))
}
thres.var1.of.pair <- unlist(thres.var1.of.pair)
thres.var2.of.pair <- unlist(thres.var2.of.pair)
rho.vector <- unlist(rho.vector)
thres.var1.for.dnorm.in.der.pi.to.tau.xi <-
unlist(thres.var1.for.dnorm.in.der.pi.to.tau.xi)
thres.var2.for.dnorm.in.der.pi.to.tau.xj <-
unlist(thres.var2.for.dnorm.in.der.pi.to.tau.xj)
# thres.var2.for.last.cat.var1 and thres.var1.for.last.cat.var2 are needed
# for the computation of expected probabilities. In the computation of
# \Phi_2(tau1, tau2; rho) when either tau1 or tau2 are Inf then it is enought
# to compute pnorm() with the non-infinite tau as an argument
# In particular when the first variable of the pair has tau_last= Inf
# and the second a non-infite threshold we compute
# pnorm(thres.var2.for.last.cat.var1). Similarly, when the second variable of
# the pair has tau_last=Inf and the first a non-infite threshold we compute
# pnorm(thres.var1.for.last.cat.var2).
thres.var2.for.last.cat.var1 <- vector("list", (no.x - 1))
thres.var1.for.last.cat.var2 <- vector("list", (no.x - 1))
for (i in 1:(no.x - 1)) {
thres.var2.for.last.cat.var1[[i]] <-
c(all.thres[index.var.of.thres %in% (i + 1):no.x])
thres.var1.for.last.cat.var2[[i]] <- rep(all.thres[index.var.of.thres == i],
times = (no.x - i)
)
}
thres.var2.for.last.cat.var1 <- unlist(thres.var2.for.last.cat.var1)
thres.var1.for.last.cat.var2 <- unlist(thres.var1.for.last.cat.var2)
list(
thres.var1.of.pair = thres.var1.of.pair, # these 3 of same length
thres.var2.of.pair = thres.var2.of.pair,
rho.vector = rho.vector,
# the following of length dependning on the number of categories
thres.var1.for.dnorm.in.der.pi.to.tau.xi =
thres.var1.for.dnorm.in.der.pi.to.tau.xi,
thres.var2.for.dnorm.in.der.pi.to.tau.xj =
thres.var2.for.dnorm.in.der.pi.to.tau.xj,
thres.var2.for.last.cat.var1 = thres.var2.for.last.cat.var1,
thres.var1.for.last.cat.var2 = thres.var1.for.last.cat.var2
)
}
#########################################################
#########################################################
# The function pairwiseExpProbVec
# input: ind.vec - the output of function LongVecInd
# th.rho.vec - the output of function LongVecTH.Rho
# output: it gives the elements of pairwiseTablesExpected()$pi.tables
# table-wise and column-wise within each table. In other words if
# pi^xixj_ab is the expected probability for the pair of variables xi-xj
# and categories a and b, then index a runs the fastest of all, followed by b,
# then by j, and lastly by i.
pairwiseExpProbVec <- function(ind.vec, th.rho.vec) {
prob.vec <- rep(NA, length(ind.vec$index.thres.var1.of.pair))
prob.vec[ind.vec$index.thres.var1.of.pair == 0 |
ind.vec$index.thres.var2.of.pair == 0] <- 0
prob.vec[ind.vec$last.thres.var1.of.pair &
ind.vec$last.thres.var2.of.pair] <- 1
prob.vec[ind.vec$last.thres.var1.of.pair &
ind.vec$index.thres.var2.of.pair != 0 &
!ind.vec$last.thres.var2.of.pair] <-
pnorm(th.rho.vec$thres.var2.for.last.cat.var1)
prob.vec[ind.vec$last.thres.var2.of.pair &
ind.vec$index.thres.var1.of.pair != 0 &
!ind.vec$last.thres.var1.of.pair] <-
pnorm(th.rho.vec$thres.var1.for.last.cat.var2)
prob.vec[is.na(prob.vec)] <- pbivnorm(
th.rho.vec$thres.var1.of.pair,
th.rho.vec$thres.var2.of.pair,
th.rho.vec$rho.vector
)
cum.term1 <- prob.vec[ind.vec$index.thres.var1.of.pair != 0 &
ind.vec$index.thres.var2.of.pair != 0]
cum.term2 <- prob.vec[ind.vec$index.thres.var1.of.pair != 0 &
!ind.vec$last.thres.var2.of.pair]
cum.term3 <- prob.vec[ind.vec$index.thres.var2.of.pair != 0 &
!ind.vec$last.thres.var1.of.pair]
cum.term4 <- prob.vec[!ind.vec$last.thres.var1.of.pair &
!ind.vec$last.thres.var2.of.pair]
PI <- cum.term1 - cum.term2 - cum.term3 + cum.term4
# added by YR 11 nov 2012 to avoid Nan/-Inf
# log(.Machine$double.eps) = -36.04365
# all elements should be strictly positive
PI[PI < .Machine$double.eps] <- .Machine$double.eps
PI
}
# derLtoRho
# input: ind.vec - the output of function LongVecInd
# th.rho.vec - the output of function LongVecTH.Rho
# n.xixj - a vector with the observed frequency for every combination
# of categories and every pair. The frequencies are given in
# the same order as the expected probabilities in the output of
# pairwiseExpProbVec output
# pi.xixj - the output of pairwiseExpProbVec function
# no.x - the number of ordinal variables
# output: the vector of der.L.to.rho, each element corresponds to
# der.Lxixj.to.rho.xixj where j runs faster than i
derLtoRho <- function(ind.vec, th.rho.vec, n.xixj, pi.xixj, no.x) {
prob.vec <- rep(NA, length(ind.vec$index.thres.var1.of.pair))
prob.vec[ind.vec$index.thres.var1.of.pair == 0 |
ind.vec$index.thres.var2.of.pair == 0 |
ind.vec$last.thres.var1.of.pair |
ind.vec$last.thres.var2.of.pair] <- 0
prob.vec[is.na(prob.vec)] <- dbinorm(th.rho.vec$thres.var1.of.pair,
th.rho.vec$thres.var2.of.pair,
rho = th.rho.vec$rho.vector
)
den.term1 <- prob.vec[ind.vec$index.thres.var1.of.pair != 0 &
ind.vec$index.thres.var2.of.pair != 0]
den.term2 <- prob.vec[ind.vec$index.thres.var1.of.pair != 0 &
!ind.vec$last.thres.var2.of.pair]
den.term3 <- prob.vec[ind.vec$index.thres.var2.of.pair != 0 &
!ind.vec$last.thres.var1.of.pair]
den.term4 <- prob.vec[!ind.vec$last.thres.var1.of.pair &
!ind.vec$last.thres.var2.of.pair]
der.pi.xixj.to.rho.xixj <- den.term1 - den.term2 - den.term3 + den.term4
prod.terms <- (n.xixj / pi.xixj) * der.pi.xixj.to.rho.xixj
# to get der.Lxixj.to.rho.xixj we should all the elements of
# der.pi.xixj.to.rho.xixj which correspond to the pair xi-xj, to do so:
xnew <- lapply(
ind.vec[c("index.pairs.extended")],
function(y) {
y[ind.vec$index.thres.var1.of.pair != 0 &
ind.vec$index.thres.var2.of.pair != 0]
}
)
# der.L.to.rho is:
tapply(prod.terms, xnew$index.pairs.extended, sum)
}
###########################################################################
# derLtoTau
# input: ind.vec - the output of function LongVecInd
# th.rho.vec - the output of function LongVecTH.Rho
# n.xixj - a vector with the observed frequency for every combination
# of categories and every pair. The frequencies are given in
# the same order as the expected probabilities in the output of
# pairwiseExpProbVec output
# pi.xixj - the output of pairwiseExpProbVec function
# output: the vector of der.L.to.tau where the elements are ordered as follows:
# the thresholds of each variable with respect to ascending order of
# the variable index (i.e. thres_var1, thres_var2, etc.) and within
# each variable the thresholds in ascending order.
derLtoTau <- function(ind.vec, th.rho.vec, n.xixj, pi.xixj, no.x = 0L) {
# to compute der.pi.xixj.to.tau.xi
xi <- lapply(
ind.vec[c(
"index.thres.var2.of.pair",
"last.thres.var2.of.pair"
)],
function(y) {
y[!(ind.vec$index.thres.var1.of.pair == 0 |
ind.vec$last.thres.var1.of.pair)]
}
)
cum.prob.vec <- rep(NA, length(xi$index.thres.var2.of.pair))
cum.prob.vec[xi$index.thres.var2.of.pair == 0] <- 0
cum.prob.vec[xi$last.thres.var2.of.pair] <- 1
denom <- sqrt(1 - (th.rho.vec$rho.vector * th.rho.vec$rho.vector))
cum.prob.vec[is.na(cum.prob.vec)] <-
pnorm((th.rho.vec$thres.var2.of.pair -
th.rho.vec$rho.vector * th.rho.vec$thres.var1.of.pair) /
denom)
den.prob.vec <- dnorm(th.rho.vec$thres.var1.for.dnorm.in.der.pi.to.tau.xi)
der.pi.xixj.to.tau.xi <- den.prob.vec *
(cum.prob.vec[xi$index.thres.var2.of.pair != 0] -
cum.prob.vec[!xi$last.thres.var2.of.pair])
# to compute der.pi.xixj.to.tau.xj
xj <- lapply(
ind.vec[c(
"index.thres.var1.of.pair",
"last.thres.var1.of.pair"
)],
function(y) {
y[!(ind.vec$index.thres.var2.of.pair == 0 |
ind.vec$last.thres.var2.of.pair)]
}
)
cum.prob.vec <- rep(NA, length(xj$index.thres.var1.of.pair))
cum.prob.vec[xj$index.thres.var1.of.pair == 0] <- 0
cum.prob.vec[xj$last.thres.var1.of.pair] <- 1
denom <- sqrt(1 - (th.rho.vec$rho.vector * th.rho.vec$rho.vector))
cum.prob.vec[is.na(cum.prob.vec)] <-
pnorm((th.rho.vec$thres.var1.of.pair -
th.rho.vec$rho.vector * th.rho.vec$thres.var2.of.pair) /
denom)
den.prob.vec <- dnorm(th.rho.vec$thres.var2.for.dnorm.in.der.pi.to.tau.xj)
der.pi.xixj.to.tau.xj <- den.prob.vec *
(cum.prob.vec[xj$index.thres.var1.of.pair != 0] -
cum.prob.vec[!xj$last.thres.var1.of.pair])
# to compute der.Lxixj.tau.xi and der.Lxixj.tau.xi
n.over.pi <- n.xixj / pi.xixj
# get the appropriate differences of n.over.pi for der.Lxixj.to.tau.xi and
# der.Lxixj.to.tau.xj
x3a <- lapply(ind.vec, function(y) {
y[!(ind.vec$index.thres.var1.of.pair == 0 |
ind.vec$index.thres.var2.of.pair == 0)]
})
diff.n.over.pi.to.xi <- n.over.pi[!x3a$last.thres.var1.of.pair] -
n.over.pi[x3a$index.thres.var1.of.pair != 1]
diff.n.over.pi.to.xj <- n.over.pi[!x3a$last.thres.var2.of.pair] -
n.over.pi[x3a$index.thres.var2.of.pair != 1]
# terms.der.Lxixj.to.tau.xi and terms.der.Lxixj.to.tau.xj
terms.der.Lxixj.to.tau.xi <- diff.n.over.pi.to.xi * der.pi.xixj.to.tau.xi
terms.der.Lxixj.to.tau.xj <- diff.n.over.pi.to.xj * der.pi.xixj.to.tau.xj
# to add appropriately elements of terms.der.Lxixj.to.tau.xi
x3b <- lapply(
ind.vec[c(
"index.pairs.extended",
"index.thres.var1.of.pair"
)],
function(y) {
y[!(ind.vec$index.thres.var1.of.pair == 0 |
ind.vec$last.thres.var1.of.pair |
ind.vec$index.thres.var2.of.pair == 0)]
}
)
# to add appropriately elements of terms.der.Lxixj.to.tau.xj
x4b <- lapply(
ind.vec[c(
"index.pairs.extended",
"index.thres.var2.of.pair"
)],
function(y) {
y[!(ind.vec$index.thres.var2.of.pair == 0 |
ind.vec$last.thres.var2.of.pair |
ind.vec$index.thres.var1.of.pair == 0)]
}
)
ind.pairs <- utils::combn(no.x, 2)
# der.Lxixj.to.tau.xi is a matrix, nrow=no.pairs, ncol=max(no.of.free.thres)
# thus, there are NA's, similarly for der.Lxixj.to.tau.xj
der.Lxixj.to.tau.xi <- tapply(
terms.der.Lxixj.to.tau.xi,
list(
x3b$index.pairs.extended,
x3b$index.thres.var1.of.pair
),
sum
)
der.Lxixj.to.tau.xj <- tapply(
terms.der.Lxixj.to.tau.xj,
list(
x4b$index.pairs.extended,
x4b$index.thres.var2.of.pair
),
sum
)
# to add appropriately the terms of der.Lxixj.to.tau.xi and
# der.Lxixj.to.tau.xj
split.der.Lxixj.to.tau.xi <- split(
as.data.frame(der.Lxixj.to.tau.xi),
ind.pairs[1, ]
)
sums.der.Lxixj.to.tau.xi <- lapply(
split.der.Lxixj.to.tau.xi,
function(x) {
y <- apply(x, 2, sum)
y[!is.na(y)]
}
)
# Note: NA exist in the case where the ordinal variables have different
# number of response categories
split.der.Lxixj.to.tau.xj <- split(
as.data.frame(der.Lxixj.to.tau.xj),
ind.pairs[2, ]
)
sums.der.Lxixj.to.tau.xj <- lapply(
split.der.Lxixj.to.tau.xj,
function(x) {
y <- apply(x, 2, sum)
y[!is.na(y)]
}
)
# to get der.L.to.tau
c(
sums.der.Lxixj.to.tau.xi[[1]],
c(unlist(sums.der.Lxixj.to.tau.xi[2:(no.x - 1)]) +
unlist(sums.der.Lxixj.to.tau.xj[1:(no.x - 2)])),
sums.der.Lxixj.to.tau.xj[[no.x - 1]]
)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.