# utility functions for pairwise maximum likelihood
# stub for fml_deriv1
fml_deriv1 <- function(Sigma.hat = NULL, # model-based var/cov/cor
TH = NULL, # model-based thresholds + means
th.idx = NULL, # threshold idx per variable
num.idx = NULL, # which variables are numeric
X = NULL, # data
eXo = NULL, # external covariates
lavcache = NULL, # housekeeping stuff
scores = FALSE, # return case-wise scores
negative = TRUE) {
stop("not implemented")
}
# the first derivative of the pairwise logLik function with respect to the
# thresholds/slopes/var/correlations; together with DELTA, we can use the
# chain rule to get the gradient
# this is adapted from code written by Myrsini Katsikatsou
# first attempt - YR 5 okt 2012
pml_deriv1 <- function(Sigma.hat = NULL, # model-based var/cov/cor
Mu.hat = NULL, # model-based means
TH = NULL, # model-based thresholds + means
th.idx = NULL, # threshold idx per variable
num.idx = NULL, # which variables are numeric
X = NULL, # data
eXo = NULL, # external covariates
lavcache = NULL, # housekeeping stuff
PI = NULL, # slopes
missing = "listwise", # how to deal with missings
scores = FALSE, # return case-wise scores
negative = TRUE) { # multiply by -1
# diagonal of Sigma.hat is not necessarily 1, even for categorical vars
Sigma.hat2 <- Sigma.hat
if(length(num.idx) > 0L) {
diag(Sigma.hat2)[-num.idx] <- 1
} else {
diag(Sigma.hat2) <- 1
}
Cor.hat <- cov2cor(Sigma.hat2) # to get correlations (rho!)
cors <- lav_matrix_vech(Cor.hat, diagonal = FALSE)
if(any(abs(cors) > 1)) {
# what should we do now... force cov2cor?
#cat("FFFFOOOORRRRRCEEE PD!\n")
#Sigma.hat <- Matrix::nearPD(Sigma.hat)
#Sigma.hat <- as.matrix(Sigma.hat$mat)
#Sigma.hat <- cov2cor(Sigma.hat)
#cors <- Sigma.hat[lower.tri(Sigma.hat)]
idx <- which( abs(cors) > 0.99 )
cors[idx] <- 0.99 # clip
#cat("CLIPPING!\n")
}
nvar <- nrow(Sigma.hat)
pstar <- nvar*(nvar-1)/2
ov.types <- rep("ordered", nvar)
if(length(num.idx) > 0L) ov.types[num.idx] <- "numeric"
if(!is.null(eXo)) {
nexo <- ncol(eXo)
} else {
nexo <- 0
}
if(all(ov.types == "numeric")) {
N.TH <- nvar
} else {
N.TH <- length(th.idx)
}
N.SL <- nvar * nexo
N.VAR <- length(num.idx)
N.COR <- pstar
# add num.idx to th.idx
if(length(num.idx) > 0L) {
th.idx[ th.idx == 0 ] <- num.idx
}
#print(Sigma.hat); print(TH); print(th.idx); print(num.idx); print(str(X))
# shortcut for ordinal-only/no-exo case
if(!scores && all(ov.types == "ordered") && nexo == 0L) {
gradient <- grad_tau_rho(no.x = nvar,
all.thres = TH,
index.var.of.thres = th.idx,
rho.xixj = cors,
n.xixj.vec = lavcache$bifreq,
out.LongVecInd = lavcache$LONG)
if(missing == "available.cases") {
uniPI <- univariateExpProbVec(TH = TH, th.idx = th.idx)
tmp <- lavcache$uniweights / uniPI
var.idx <- split(th.idx, th.idx)
var.idx <- unlist( lapply(var.idx, function(x){c(x,x[1])}) )
tmp.varwise <- split(tmp, var.idx)
tmp1 <- unlist( lapply(tmp.varwise,
function(x){ c(x[-length(x)]) } ) )
tmp2 <- unlist( lapply(tmp.varwise, function(x){ c(x[-1]) } ) )
uni.der.tau <- dnorm(TH) * (tmp1 - tmp2)
nTH <- length(TH)
gradient[1:nTH] <- gradient[1:nTH] + uni.der.tau
}
if(negative) {
gradient <- -1 * gradient
}
return(gradient)
}
# in this order: TH/MEANS + SLOPES + VAR + COR
GRAD.size <- N.TH + N.SL + N.VAR + N.COR
# scores or gradient?
if(scores) {
SCORES <- matrix(0, nrow(X), GRAD.size) # we will sum up over all pairs
} else {
GRAD <- matrix(0, pstar, GRAD.size) # each pair is a row
}
PSTAR <- matrix(0, nvar, nvar) # utility matrix, to get indices
PSTAR[lav_matrix_vech_idx(nvar, diagonal = FALSE)] <- 1:pstar
N <- length(X[,1])
for(j in seq_len(nvar-1L)) {
for(i in (j+1L):nvar) {
# cat(" i = ", i, " j = ", j, "\n") # debug only
pstar.idx <- PSTAR[i,j]
cor.idx <- N.TH + N.SL + N.VAR + PSTAR[i,j]
th.idx_i <- which(th.idx == i)
th.idx_j <- which(th.idx == j)
if(nexo > 0L) {
sl.idx_i <- N.TH + seq(i, by=nvar, length.out=nexo)
sl.idx_j <- N.TH + seq(j, by=nvar, length.out=nexo)
if(length(num.idx) > 0L) {
var.idx_i <- N.TH + N.SL + match(i, num.idx)
var.idx_j <- N.TH + N.SL + match(j, num.idx)
}
} else {
if(length(num.idx) > 0L) {
var.idx_i <- N.TH + match(i, num.idx)
var.idx_j <- N.TH + match(j, num.idx)
}
}
if(ov.types[i] == "numeric" && ov.types[j] == "numeric") {
if(nexo > 1L) {
stop("psindex ERROR: mixed + exo in PML not implemented; try optim.gradient = \"numerical\"")
}
SC <- lav_mvnorm_scores_mu_vech_sigma(Y = X[,c(i,j)],
Mu = Mu.hat[c(i,j)], Sigma = Sigma.hat[c(i,j), c(i,j)])
if(scores) {
if(all(ov.types == "numeric") && nexo == 0L) {
# MU1 + MU2
SCORES[, c(i,j)] <- SCORES[, c(i,j)] + SC[, c(1,2)]
# VAR1 + COV_12 + VAR2
var.idx <- ( nvar +
lav_matrix_vech_match_idx(nvar, idx = c(i,j)) )
SCORES[, var.idx] <- SCORES[, var.idx] + SC[, c(3,4,5)]
} else { # mixed ordered/continuous
# MU
mu.idx <- c(th.idx_i, th.idx_j)
SCORES[, mu.idx] <- SCORES[, mu.idx] + (-1)*SC[, c(1,2)]
# VAR+COV
var.idx <- c(var.idx_i, cor.idx, var.idx_j)
SCORES[, var.idx] <- SCORES[, var.idx] + SC[, c(3,4,5)]
}
} else {
if(all(ov.types == "numeric") && nexo == 0L) {
mu.idx <- c(i,j)
sigma.idx <- ( nvar +
lav_matrix_vech_match_idx(nvar, idx = c(i,j)) )
# MU1 + MU2
GRAD[pstar.idx, mu.idx] <-
colSums(SC[,c(1,2)], na.rm = TRUE)
} else {
mu.idx <- c(th.idx_i, th.idx_j)
sigma.idx <- c(var.idx_i, cor.idx, var.idx_j)
# MU (reverse sign!)
GRAD[pstar.idx, mu.idx] <-
-1 * colSums(SC[,c(1,2)], na.rm = TRUE)
}
# SIGMA
GRAD[pstar.idx, sigma.idx] <-
colSums(SC[,c(3,4,5)], na.rm = TRUE)
} # gradient only
} else if(ov.types[i] == "numeric" && ov.types[j] == "ordered") {
# polyserial correlation
if(nexo > 1L) {
stop("psindex ERROR: mixed + exo in PML not implemented; try optim.gradient = \"numerical\"")
}
SC.COR.UNI <- ps_cor_scores_no_exo(Y1 = X[,i], Y2 = X[,j],
var.y1 = Sigma.hat[i,i],
eta.y1 = rep(Mu.hat[i], N),
th.y2 = TH[ th.idx == j ],
rho = Cor.hat[i,j],
sigma.correction = TRUE)
if(scores) {
# MU
SCORES[, th.idx_i] <- ( SCORES[, th.idx_i] +
-1 * SC.COR.UNI$dx.mu.y1 )
# TH
SCORES[, th.idx_j] <- ( SCORES[, th.idx_j] +
SC.COR.UNI$dx.th.y2 )
# VAR
SCORES[, var.idx_i] <- ( SCORES[, var.idx_i] +
SC.COR.UNI$dx.var.y1 )
# COR
SCORES[, cor.idx] <- ( SCORES[, cor.idx] +
SC.COR.UNI$dx.rho )
} else {
# MU
GRAD[pstar.idx, th.idx_i] <-
-1 * sum(SC.COR.UNI$dx.mu.y1, na.rm = TRUE)
# TH
GRAD[pstar.idx, th.idx_j] <-
colSums(SC.COR.UNI$dx.th.y2, na.rm = TRUE)
# VAR
GRAD[pstar.idx, var.idx_i] <-
sum(SC.COR.UNI$dx.var.y1, na.rm = TRUE)
# COR
GRAD[pstar.idx, cor.idx] <-
sum(SC.COR.UNI$dx.rho, na.rm = TRUE)
} # grad only
} else if(ov.types[j] == "numeric" && ov.types[i] == "ordered") {
# polyserial correlation
if(nexo > 1L) {
stop("psindex ERROR: mixed + exo in PML not implemented; try optim.gradient = \"numerical\"")
}
SC.COR.UNI <- ps_cor_scores_no_exo(Y1 = X[,j], Y2 = X[,i],
var.y1 = Sigma.hat[j,j],
eta.y1 = rep(Mu.hat[j], N),
th.y2 = TH[ th.idx == i ],
rho = Cor.hat[i,j],
sigma.correction = TRUE)
if(scores) {
# MU
SCORES[, th.idx_j] <- ( SCORES[, th.idx_j] +
-1 * SC.COR.UNI$dx.mu.y1 )
# TH
SCORES[, th.idx_i] <- ( SCORES[, th.idx_i] +
SC.COR.UNI$dx.th.y2 )
# VAR
SCORES[, var.idx_j] <- ( SCORES[, var.idx_j] +
SC.COR.UNI$dx.var.y1 )
# COR
SCORES[, cor.idx] <- ( SCORES[, cor.idx] +
SC.COR.UNI$dx.rho )
} else {
# MU
GRAD[pstar.idx, th.idx_j] <-
-1 * sum(SC.COR.UNI$dx.mu.y1, na.rm = TRUE)
# TH
GRAD[pstar.idx, th.idx_i] <-
colSums(SC.COR.UNI$dx.th.y2, na.rm = TRUE)
# VAR
GRAD[pstar.idx, var.idx_j] <-
sum(SC.COR.UNI$dx.var.y1, na.rm = TRUE)
# COR
GRAD[pstar.idx, cor.idx] <-
sum(SC.COR.UNI$dx.rho, na.rm = TRUE)
} # grad only
} else if(ov.types[i] == "ordered" && ov.types[j] == "ordered") {
# polychoric correlation
if(nexo == 0L) {
SC.COR.UNI <- pc_cor_scores(Y1 = X[,i],
Y2 = X[,j],
eXo = NULL,
rho = Sigma.hat[i,j],
fit.y1 = NULL, # fixme
fit.y2 = NULL, # fixme
th.y1 = TH[ th.idx == i ],
th.y2 = TH[ th.idx == j ],
sl.y1 = NULL,
sl.y2 = NULL,
na.zero = TRUE)
} else {
SC.COR.UNI <-
pc_cor_scores_PL_with_cov(Y1 = X[,i],
Y2 = X[,j],
eXo = eXo,
Rho = Sigma.hat[i,j],
th.y1 = TH[ th.idx == i ],
th.y2 = TH[ th.idx == j ],
sl.y1 = PI[i,],
sl.y2 = PI[j,],
missing.ind = missing)
}
if(scores) {
# TH
SCORES[,th.idx_i] <- SCORES[,th.idx_i] + SC.COR.UNI$dx.th.y1
SCORES[,th.idx_j] <- SCORES[,th.idx_j] + SC.COR.UNI$dx.th.y2
# SL
if(nexo > 0L) {
SCORES[,sl.idx_i] <- SCORES[,sl.idx_i] + SC.COR.UNI$dx.sl.y1
SCORES[,sl.idx_j] <- SCORES[,sl.idx_j] + SC.COR.UNI$dx.sl.y2
}
# NO VAR
# RHO
SCORES[,cor.idx] <- SCORES[,cor.idx] + SC.COR.UNI$dx.rho
} else {
# TH
if(length(th.idx_i) > 1L) {
GRAD[pstar.idx, th.idx_i] <-
colSums(SC.COR.UNI$dx.th.y1, na.rm = TRUE)
} else {
GRAD[pstar.idx, th.idx_i] <-
sum(SC.COR.UNI$dx.th.y1, na.rm = TRUE)
}
if(length(th.idx_j) > 1L) {
GRAD[pstar.idx, th.idx_j] <-
colSums(SC.COR.UNI$dx.th.y2, na.rm = TRUE)
} else {
GRAD[pstar.idx, th.idx_j] <-
sum(SC.COR.UNI$dx.th.y2, na.rm = TRUE)
}
# SL
if(nexo > 0L) {
if(length(sl.idx_i) > 1L) {
GRAD[pstar.idx, sl.idx_i] <-
colSums(SC.COR.UNI$dx.sl.y1, na.rm = TRUE)
} else {
GRAD[pstar.idx, sl.idx_i] <-
sum(SC.COR.UNI$dx.sl.y1, na.rm = TRUE)
}
if(length(sl.idx_j) > 1L) {
GRAD[pstar.idx, sl.idx_j] <-
colSums(SC.COR.UNI$dx.sl.y2, na.rm = TRUE)
} else {
GRAD[pstar.idx, sl.idx_j] <-
sum(SC.COR.UNI$dx.sl.y2, na.rm = TRUE)
}
}
# NO VAR
# RHO
GRAD[pstar.idx, cor.idx] <-
sum(SC.COR.UNI$dx.rho, na.rm = TRUE)
}
#GRAD2 <- numDeriv::grad(func = pc_logl_x,
# x = c(Sigma.hat[i,j],
# TH[ th.idx == i ],
# TH[ th.idx == j]),
# Y1 = X[,i],
# Y2 = X[,j],
# eXo = eXo,
# nth.y1 = sum( th.idx == i ),
# nth.y2 = sum( th.idx == j ))
}
}
}
if(missing == "available.cases" && all(ov.types == "ordered")) {
if(nexo == 0L) {
UNI_SCORES <- matrix(0, nrow(X), N.TH)
for(i in seq_len(nvar)) {
th.idx_i <- which(th.idx == i)
derY1 <- uni_scores(Y1 = X[,i], th.y1 = TH[ th.idx == i ],
eXo = NULL, sl.y1 = NULL,
weights.casewise = lavcache$uniweights.casewise)
UNI_SCORES[,th.idx_i] <- derY1$dx.th.y1
}
} else {
UNI_SCORES <- matrix(0, nrow(X), ncol=(N.TH+N.SL) )
for(i in seq_len(nvar)) {
th.idx_i <- which(th.idx == i)
sl.idx_i <- N.TH + seq(i, by=nvar, length.out=nexo)
derY1 <- uni_scores(Y1 = X[,i], th.y1 = TH[ th.idx == i ],
eXo = eXo, sl.y1 = PI[i,],
weights.casewise = lavcache$uniweights.casewise)
UNI_SCORES[,th.idx_i] <- derY1$dx.th.y1
UNI_SCORES[,sl.idx_i] <- derY1$dx.sl.y1
}
if(scores) {
SCORES <- SCORES[, 1:(N.TH+N.SL)] + UNI_SCORES
} else {
uni_gradient <- colSums(UNI_SCORES)
}
}
}
# do we need scores?
if(scores) return(SCORES)
# DEBUG
#:print(GRAD)
###########
# gradient is sum over all pairs
gradient <- colSums(GRAD, na.rm = TRUE)
if(missing == "available.cases" && all(ov.types == "ordered")) {
if(nexo == 0L) {
gradient[1:N.TH] <- gradient + uni_gradient
} else {
gradient[1:(N.TH+N.SL)] <- gradient + uni_gradient
}
}
# we multiply by -1 because we minimize
if(negative) {
gradient <- -1 * gradient
}
gradient
}
### all code below written by Myrsini Katsikatsou
# The function grad_tau_rho
# input:
# no.x - is scalar, the number of ordinal variables
# all.thres - is vector containing the thresholds of all variables in the
# following order: thres_var1, thres_var2,..., thres_var_p
# within each variable the thresholds are in ascending order
# Note that all.thres do NOT contain tau_0=-Inf and tau_last=Inf
# for all variables.
# index.var.of.thres - a vector keeping track to which variable the thresholds
# in all.thres belongs to, it is of the form
# (1,1,1..., 2,2,2,..., p,p,p,...)
# rho.xixj - is the vector of all correlations where j runs faster than i
# i.e. the order is rho_12, rho_13, ..., rho_1p, rho_23, ..., rho_2p,
# etc.
# n.xixj.vec - a vector with the observed frequency for every combination
# of categories and every pair. The frequencies are given in
# the same order as the expected probabilities in the output of
# pairwiseExpProbVec output
# out.LongVecInd - it is the output of function LongVecInd
# the output: it gives the elements of der.L.to.tau and der.L.to.rho in this
# order. The elements of der.L.to.tau where the elements are
# ordered as follows: the thresholds of each variable with respect
# to ascending order of the variable index (i.e. thres_var1,
# thres_var2, etc.) and within each variable the thresholds in
# ascending order.
# The elements of vector der.L.to.rho are der.Lxixj.to.rho.xixj
# where j runs faster than i.
# The function depends on four other functions: LongVecTH.Rho,
# pairwiseExpProbVec, derLtoRho, and derLtoTau, all given below.
# if n.xixj.ab is either an array or a list the following should be done
#n.xixj.vec <- if(is.array(n.xixj.ab)) {
# c(n.xixj.ab)
# } else if(is.list(n.xixj.ab)){
# unlist(n.xixj.ab)
# }
grad_tau_rho <- function(no.x, all.thres, index.var.of.thres, rho.xixj,
n.xixj.vec, out.LongVecInd) {
out.LongVecTH.Rho <- LongVecTH.Rho(no.x=no.x, all.thres=all.thres,
index.var.of.thres=index.var.of.thres,
rho.xixj=rho.xixj)
pi.xixj <- pairwiseExpProbVec(ind.vec= out.LongVecInd,
th.rho.vec= out.LongVecTH.Rho)
out.derLtoRho <- derLtoRho(ind.vec= out.LongVecInd,
th.rho.vec= out.LongVecTH.Rho,
n.xixj=n.xixj.vec, pi.xixj=pi.xixj, no.x=no.x)
out.derLtoTau <- derLtoTau(ind.vec= out.LongVecInd,
th.rho.vec= out.LongVecTH.Rho,
n.xixj=n.xixj.vec, pi.xixj=pi.xixj,
no.x=no.x)
grad <- c(out.derLtoTau, out.derLtoRho)
attr(grad, "pi.xixj") <- pi.xixj
grad
}
################################################################################
# The input of the function LongVecInd:
# no.x is scalar, the number of ordinal variables
# all.thres is vector containing the thresholds of all variables in the
# following order: thres_var1, thres_var2,..., thres_var_p
# within each variable the thresholds are in ascending order
# Note that all.thres does NOT contain the first and the last threshold of the
# variables, i.e. tau_0=-Inf and tau_last=Inf
# index.var.of.thres is a vector keeping track to which variable the thresholds
# in all.thres belongs to, it is of the form (1,1,1..., 2,2,2,..., p,p,p,...)
# The output of the function:
# it is a list of vectors keeping track of the indices
# of thresholds, of variables, and of pairs, and two T/F vectors indicating
# if the threshold index corresponds to the last threshold of a variable; all
# these for all pairs of variables. All are needed for the
# computation of expected probabilities, der.L.to.rho, and der.L.to.tau
# all duplications of indices are done as follows: within each pair of variables,
# xi-xj, if for example we want to duplicate the indices of the thresholds,
# tau^xi_a and tau^xj_b, then index a runs faster than b, i.e. for each b we
# take all different tau^xi's, and then we proceed to the next b and do the
# same. In other words if it was tabulated we fill the table columnwise.
# All pairs xi-xj are taken with index j running faster than i.
# Note that each variable may have a different number of categories, that's why
# for example we take lists below.
LongVecInd <- function(no.x, all.thres, index.var.of.thres) {
no.thres.of.each.var <- tapply(all.thres, index.var.of.thres, length)
index.pairs <- utils::combn(no.x,2)
no.pairs <- ncol(index.pairs)
# index.thres.var1.of.pair and index.thres.var2.of.pair contain the indices of
# of all thresholds (from tau_0 which is -Inf to tau_last which is Inf)
# for any pair of variables appropriately duplicated so that the two vectors
# together give all possible combinations of thresholds indices
# Since here the threshold indices 0 and "last" are included, the vectors are
# longer than the vectors thres.var1.of.pair and thres.var2.of.pair above.
index.thres.var1.of.pair <- vector("list", no.pairs)
index.thres.var2.of.pair <- vector("list", no.pairs)
# index.var1.of.pair and index.var2.of.pair keep track the index of the
# variable that the thresholds in index.thres.var1.of.pair and
# index.thres.var2.of.pair belong to, respectively. So, these two variables
# are of same length as that of index.thres.var1.of.pair and
# index.thres.var2.of.pair
index.var1.of.pair <- vector("list", no.pairs)
index.var2.of.pair <- vector("list", no.pairs)
# index.pairs.extended gives the index of the pair for each pair of variables
# e.g. pair of variables 1-2 has index 1, variables 1-3 has index 2, etc.
# The vector is of the same length as index.thres.var1.of.pair,
# index.thres.var2.of.pair, index.var1.of.pair, and index.var2.of.pair
index.pairs.extended <- vector("list", no.pairs)
for (i in 1:no.pairs) {
no.thres.var1.of.pair <- no.thres.of.each.var[index.pairs[1,i]]
no.thres.var2.of.pair <- no.thres.of.each.var[index.pairs[2,i]]
index.thres.var1.of.pair[[i]] <- rep(0:(no.thres.var1.of.pair+1),
times= (no.thres.var2.of.pair+2) )
index.thres.var2.of.pair[[i]] <- rep(0:(no.thres.var2.of.pair+1),
each= (no.thres.var1.of.pair+2) )
length.vec <- length(index.thres.var1.of.pair[[i]] )
index.var1.of.pair[[i]] <- rep(index.pairs[1,i], length.vec)
index.var2.of.pair[[i]] <- rep(index.pairs[2,i], length.vec)
index.pairs.extended[[i]] <- rep(i, length.vec)
}
index.thres.var1.of.pair <- unlist(index.thres.var1.of.pair)
index.thres.var2.of.pair <- unlist(index.thres.var2.of.pair)
index.var1.of.pair <- unlist(index.var1.of.pair)
index.var2.of.pair <- unlist(index.var2.of.pair)
index.pairs.extended <- unlist(index.pairs.extended)
# indicator vector (T/F) showing which elements of index.thres.var1.of.pair
# correspond to the last thresholds of variables. The length is the same as
# that of index.thres.var1.of.pair.
last.thres.var1.of.pair <- index.var1.of.pair==1 &
index.thres.var1.of.pair==(no.thres.of.each.var[1]+1)
# we consider up to variable (no.x-1) because in pairs xi-xj where j runs
# faster than i, the last variable is not included in the column of xi's
for(i in 2:(no.x-1)) {
new.condition <- index.var1.of.pair==i &
index.thres.var1.of.pair==(no.thres.of.each.var[i]+1)
last.thres.var1.of.pair <- last.thres.var1.of.pair | new.condition
}
# indicator vector (T/F) showing which elements of index.thres.var2.of.pair
# correspond to the last thresholds of variables. Notet that in pairs xi-xj
# where j runs faster than i, the first variable is not included in the column
# of xj's. That's why we start with variable 2. The length is the same as
# that of index.thres.var1.of.pair.
last.thres.var2.of.pair <- index.var2.of.pair==2 &
index.thres.var2.of.pair==(no.thres.of.each.var[2]+1)
for(i in 3:no.x) {
new.condition <- index.var2.of.pair==i &
index.thres.var2.of.pair==(no.thres.of.each.var[i]+1)
last.thres.var2.of.pair <- last.thres.var2.of.pair | new.condition
}
list(index.thres.var1.of.pair = index.thres.var1.of.pair,
index.thres.var2.of.pair = index.thres.var2.of.pair,
index.var1.of.pair = index.var1.of.pair,
index.var2.of.pair = index.var2.of.pair,
index.pairs.extended = index.pairs.extended,
last.thres.var1.of.pair = last.thres.var1.of.pair,
last.thres.var2.of.pair = last.thres.var2.of.pair )
}
################################################################################
# The input of the function LongVecTH.Rho:
# no.x is scalar, the number of ordinal variables
# all.thres is vector containing the thresholds of all variables in the
# following order: thres_var1, thres_var2,..., thres_var_p
# within each variable the thresholds are in ascending order
# Note that all.thres does NOT contain the first and the last threshold of the
# variables, i.e. tau_0=-Inf and tau_last=Inf
# index.var.of.thres is a vector keeping track to which variable the thresholds
# in all.thres belongs to, it is of the form (1,1,1..., 2,2,2,..., p,p,p,...)
# rho.xixj is the vector of all corrlations where j runs faster than i
# i.e. the order is rho_12, rho_13, ..., rho_1p, rho_23, ..., rho_2p, etc.
# The output of the function:
# it is a list of vectors with thresholds and rho's duplicated appropriately,
# all needed for the computation of expected probabilities,
# der.L.to.rho, and der.L.to.tau
# all duplications below are done as follows: within each pair of variables,
# xi-xj, if for example we want to duplicate their thresholds, tau^xi_a and
# tau^xj_b, then index a runs faster than b, i.e. for each b we take all
# different tau^xi's, and then we proceed to the next b and do the same.
# In other words if it was tabulated we fill the table columnwise.
# All pairs xi-xj are taken with index j running faster than i.
# Note that each variable may have a different number of categories, that's why
# for example we take lists below.
LongVecTH.Rho <- function(no.x, all.thres, index.var.of.thres, rho.xixj) {
no.thres.of.each.var <- tapply(all.thres, index.var.of.thres, length)
index.pairs <- utils::combn(no.x,2)
no.pairs <- ncol(index.pairs)
# create the long vectors needed for the computation of expected probabilities
# for each cell and each pair of variables. The vectors thres.var1.of.pair and
# thres.var2.of.pair together give all the possible combinations of the
# thresholds of any two variables. Note the combinations (-Inf, -Inf),
# (-Inf, Inf), (Inf, -Inf), (Inf, Inf) are NOT included. Only the combinations
# of the middle thresholds (tau_1 to tau_(last-1)).
# thres.var1.of.pair and thres.var2.of.pair give the first and the second
# argument, respectively, in functions pbivnorm and dbinorm
thres.var1.of.pair <- vector("list", no.pairs)
thres.var2.of.pair <- vector("list", no.pairs)
# Extending the rho.vector accordingly so that it will be the the third
# argument in pbivnorm and dbinorm functions. It is of same length as
# thres.var1.of.pair and thres.var2.of.pair.
rho.vector <- vector("list", no.pairs)
# thres.var1.for.dnorm.in.der.pi.to.tau.xi and
# thres.var2.for.dnorm.in.der.pi.to.tau.xj give the thresholds of almost
# all variables appropriately duplicated so that the vectors can be used
# as input in dnorm() to compute der.pi.xixj.to.tau.xi and
# der.pi.xixj.to.tau.xj.
# thres.var1.for.dnorm.in.der.pi.to.tau.xi does not contain the thresholds of
# the last variable and thres.var2.for.dnorm.in.der.pi.to.tau.xj those of
# the first variable
thres.var1.for.dnorm.in.der.pi.to.tau.xi <- vector("list", no.pairs)
thres.var2.for.dnorm.in.der.pi.to.tau.xj <- vector("list", no.pairs)
for (i in 1:no.pairs) {
single.thres.var1.of.pair <- all.thres[index.var.of.thres==index.pairs[1,i]]
single.thres.var2.of.pair <- all.thres[index.var.of.thres==index.pairs[2,i]]
# remember that the first (-Inf) and last (Inf) thresholds are not included
# so no.thres.var1.of.pair is equal to number of categories of var1 minus 1
# similarly for no.thres.var2.of.pair
no.thres.var1.of.pair <- no.thres.of.each.var[index.pairs[1,i]]
no.thres.var2.of.pair <- no.thres.of.each.var[index.pairs[2,i]]
thres.var1.of.pair[[i]] <- rep(single.thres.var1.of.pair,
times=no.thres.var2.of.pair )
thres.var2.of.pair[[i]] <- rep(single.thres.var2.of.pair,
each=no.thres.var1.of.pair)
rho.vector[[i]] <- rep(rho.xixj[i], length(thres.var1.of.pair[[i]]))
thres.var1.for.dnorm.in.der.pi.to.tau.xi[[i]] <-
rep(single.thres.var1.of.pair, times=(no.thres.var2.of.pair+1))
thres.var2.for.dnorm.in.der.pi.to.tau.xj[[i]] <-
rep(single.thres.var2.of.pair, each=(no.thres.var1.of.pair+1))
}
thres.var1.of.pair <- unlist(thres.var1.of.pair)
thres.var2.of.pair <- unlist(thres.var2.of.pair)
rho.vector <- unlist(rho.vector)
thres.var1.for.dnorm.in.der.pi.to.tau.xi <-
unlist(thres.var1.for.dnorm.in.der.pi.to.tau.xi)
thres.var2.for.dnorm.in.der.pi.to.tau.xj <-
unlist(thres.var2.for.dnorm.in.der.pi.to.tau.xj)
# thres.var2.for.last.cat.var1 and thres.var1.for.last.cat.var2 are needed
# for the computation of expected probabilities. In the computation of
# \Phi_2(tau1, tau2; rho) when either tau1 or tau2 are Inf then it is enought
# to compute pnorm() with the non-infinite tau as an argument
# In particular when the first variable of the pair has tau_last= Inf
# and the second a non-infite threshold we compute
# pnorm(thres.var2.for.last.cat.var1). Similarly, when the second variable of
# the pair has tau_last=Inf and the first a non-infite threshold we compute
# pnorm(thres.var1.for.last.cat.var2).
thres.var2.for.last.cat.var1 <- vector("list", (no.x-1))
thres.var1.for.last.cat.var2 <- vector("list", (no.x-1))
for (i in 1:(no.x-1)) {
thres.var2.for.last.cat.var1[[i]] <-
c(all.thres[index.var.of.thres %in% (i+1):no.x])
thres.var1.for.last.cat.var2[[i]] <- rep(all.thres[index.var.of.thres==i],
times=(no.x-i))
}
thres.var2.for.last.cat.var1 <- unlist(thres.var2.for.last.cat.var1)
thres.var1.for.last.cat.var2 <- unlist(thres.var1.for.last.cat.var2)
list(thres.var1.of.pair = thres.var1.of.pair, #these 3 of same length
thres.var2.of.pair = thres.var2.of.pair,
rho.vector = rho.vector,
#the following of length dependning on the number of categories
thres.var1.for.dnorm.in.der.pi.to.tau.xi =
thres.var1.for.dnorm.in.der.pi.to.tau.xi ,
thres.var2.for.dnorm.in.der.pi.to.tau.xj =
thres.var2.for.dnorm.in.der.pi.to.tau.xj ,
thres.var2.for.last.cat.var1=thres.var2.for.last.cat.var1,
thres.var1.for.last.cat.var2=thres.var1.for.last.cat.var2 )
}
#########################################################
#########################################################
# The function pairwiseExpProbVec
# input: ind.vec - the output of function LongVecInd
# th.rho.vec - the output of function LongVecTH.Rho
# output: it gives the elements of pairwiseTablesExpected()$pi.tables
# table-wise and column-wise within each table. In other words if
# pi^xixj_ab is the expected probability for the pair of variables xi-xj
# and categories a and b, then index a runs the fastest of all, followed by b,
# then by j, and lastly by i.
pairwiseExpProbVec <- function(ind.vec, th.rho.vec) {
prob.vec <- rep(NA, length(ind.vec$index.thres.var1.of.pair))
prob.vec[ind.vec$index.thres.var1.of.pair==0 |
ind.vec$index.thres.var2.of.pair==0] <- 0
prob.vec[ind.vec$last.thres.var1.of.pair &
ind.vec$last.thres.var2.of.pair] <- 1
prob.vec[ind.vec$last.thres.var1.of.pair &
ind.vec$index.thres.var2.of.pair!=0 &
!ind.vec$last.thres.var2.of.pair] <-
pnorm(th.rho.vec$thres.var2.for.last.cat.var1)
prob.vec[ind.vec$last.thres.var2.of.pair &
ind.vec$index.thres.var1.of.pair!=0 &
!ind.vec$last.thres.var1.of.pair] <-
pnorm(th.rho.vec$thres.var1.for.last.cat.var2)
prob.vec[is.na(prob.vec)] <- pbivnorm(th.rho.vec$thres.var1.of.pair,
th.rho.vec$thres.var2.of.pair,
th.rho.vec$rho.vector)
cum.term1 <- prob.vec[ind.vec$index.thres.var1.of.pair!=0 &
ind.vec$index.thres.var2.of.pair!=0]
cum.term2 <- prob.vec[ ind.vec$index.thres.var1.of.pair!=0 &
!ind.vec$last.thres.var2.of.pair]
cum.term3 <- prob.vec[ ind.vec$index.thres.var2.of.pair!=0 &
!ind.vec$last.thres.var1.of.pair]
cum.term4 <- prob.vec[!ind.vec$last.thres.var1.of.pair &
!ind.vec$last.thres.var2.of.pair]
PI <- cum.term1 - cum.term2 - cum.term3 + cum.term4
# added by YR 11 nov 2012 to avoid Nan/-Inf
# log(.Machine$double.eps) = -36.04365
# all elements should be strictly positive
PI[PI < .Machine$double.eps] <- .Machine$double.eps
PI
}
# derLtoRho
# input: ind.vec - the output of function LongVecInd
# th.rho.vec - the output of function LongVecTH.Rho
# n.xixj - a vector with the observed frequency for every combination
# of categories and every pair. The frequencies are given in
# the same order as the expected probabilities in the output of
# pairwiseExpProbVec output
# pi.xixj - the output of pairwiseExpProbVec function
# no.x - the number of ordinal variables
# output: the vector of der.L.to.rho, each element corresponds to
# der.Lxixj.to.rho.xixj where j runs faster than i
derLtoRho <- function(ind.vec, th.rho.vec, n.xixj, pi.xixj, no.x) {
prob.vec <- rep(NA, length(ind.vec$index.thres.var1.of.pair))
prob.vec[ind.vec$index.thres.var1.of.pair==0 |
ind.vec$index.thres.var2.of.pair==0 |
ind.vec$last.thres.var1.of.pair |
ind.vec$last.thres.var2.of.pair] <- 0
prob.vec[is.na(prob.vec)] <- dbinorm(th.rho.vec$thres.var1.of.pair,
th.rho.vec$thres.var2.of.pair,
rho=th.rho.vec$rho.vector)
den.term1 <- prob.vec[ind.vec$index.thres.var1.of.pair!=0 &
ind.vec$index.thres.var2.of.pair!=0]
den.term2 <- prob.vec[ ind.vec$index.thres.var1.of.pair!=0 &
!ind.vec$last.thres.var2.of.pair]
den.term3 <- prob.vec[ ind.vec$index.thres.var2.of.pair!=0 &
!ind.vec$last.thres.var1.of.pair]
den.term4 <- prob.vec[!ind.vec$last.thres.var1.of.pair &
!ind.vec$last.thres.var2.of.pair]
der.pi.xixj.to.rho.xixj <- den.term1 - den.term2 - den.term3 + den.term4
prod.terms <- (n.xixj/pi.xixj)*der.pi.xixj.to.rho.xixj
# to get der.Lxixj.to.rho.xixj we should all the elements of
# der.pi.xixj.to.rho.xixj which correspond to the pair xi-xj, to do so:
xnew <- lapply( ind.vec[c("index.pairs.extended")],
function(y){y[ind.vec$index.thres.var1.of.pair!=0 &
ind.vec$index.thres.var2.of.pair!=0]})
#der.L.to.rho is:
tapply(prod.terms, xnew$index.pairs.extended, sum)
}
###########################################################################
# derLtoTau
# input: ind.vec - the output of function LongVecInd
# th.rho.vec - the output of function LongVecTH.Rho
# n.xixj - a vector with the observed frequency for every combination
# of categories and every pair. The frequencies are given in
# the same order as the expected probabilities in the output of
# pairwiseExpProbVec output
# pi.xixj - the output of pairwiseExpProbVec function
# output: the vector of der.L.to.tau where the elements are ordered as follows:
# the thresholds of each variable with respect to ascending order of
# the variable index (i.e. thres_var1, thres_var2, etc.) and within
# each variable the thresholds in ascending order.
derLtoTau <- function(ind.vec, th.rho.vec, n.xixj, pi.xixj, no.x=0L) {
# to compute der.pi.xixj.to.tau.xi
xi <- lapply( ind.vec[c("index.thres.var2.of.pair",
"last.thres.var2.of.pair")],
function(y){ y[!(ind.vec$index.thres.var1.of.pair==0 |
ind.vec$last.thres.var1.of.pair)] } )
cum.prob.vec <- rep(NA, length(xi$index.thres.var2.of.pair) )
cum.prob.vec[xi$index.thres.var2.of.pair==0] <- 0
cum.prob.vec[xi$last.thres.var2.of.pair] <- 1
denom <- sqrt(1-(th.rho.vec$rho.vector*th.rho.vec$rho.vector))
cum.prob.vec[is.na(cum.prob.vec)] <-
pnorm( (th.rho.vec$thres.var2.of.pair -
th.rho.vec$rho.vector* th.rho.vec$thres.var1.of.pair) /
denom)
den.prob.vec <- dnorm(th.rho.vec$thres.var1.for.dnorm.in.der.pi.to.tau.xi)
der.pi.xixj.to.tau.xi <- den.prob.vec *
(cum.prob.vec[ xi$index.thres.var2.of.pair!=0] -
cum.prob.vec[!xi$last.thres.var2.of.pair] )
# to compute der.pi.xixj.to.tau.xj
xj <- lapply( ind.vec[c("index.thres.var1.of.pair",
"last.thres.var1.of.pair")],
function(y){ y[!(ind.vec$index.thres.var2.of.pair==0 |
ind.vec$last.thres.var2.of.pair)] } )
cum.prob.vec <- rep(NA, length(xj$index.thres.var1.of.pair) )
cum.prob.vec[xj$index.thres.var1.of.pair==0] <- 0
cum.prob.vec[xj$last.thres.var1.of.pair] <- 1
denom <- sqrt(1-(th.rho.vec$rho.vector*th.rho.vec$rho.vector))
cum.prob.vec[is.na(cum.prob.vec)] <-
pnorm( (th.rho.vec$thres.var1.of.pair -
th.rho.vec$rho.vector* th.rho.vec$thres.var2.of.pair) /
denom)
den.prob.vec <- dnorm(th.rho.vec$thres.var2.for.dnorm.in.der.pi.to.tau.xj)
der.pi.xixj.to.tau.xj <- den.prob.vec *
(cum.prob.vec[ xj$index.thres.var1.of.pair!=0] -
cum.prob.vec[!xj$last.thres.var1.of.pair] )
#to compute der.Lxixj.tau.xi and der.Lxixj.tau.xi
n.over.pi <- n.xixj/ pi.xixj
# get the appropriate differences of n.over.pi for der.Lxixj.to.tau.xi and
# der.Lxixj.to.tau.xj
x3a <- lapply(ind.vec, function(y){
y[!(ind.vec$index.thres.var1.of.pair==0 |
ind.vec$index.thres.var2.of.pair==0) ] } )
diff.n.over.pi.to.xi <- n.over.pi[!x3a$last.thres.var1.of.pair] -
n.over.pi[ x3a$index.thres.var1.of.pair!=1]
diff.n.over.pi.to.xj <- n.over.pi[!x3a$last.thres.var2.of.pair] -
n.over.pi[ x3a$index.thres.var2.of.pair!=1]
# terms.der.Lxixj.to.tau.xi and terms.der.Lxixj.to.tau.xj
terms.der.Lxixj.to.tau.xi <- diff.n.over.pi.to.xi * der.pi.xixj.to.tau.xi
terms.der.Lxixj.to.tau.xj <- diff.n.over.pi.to.xj * der.pi.xixj.to.tau.xj
# to add appropriately elements of terms.der.Lxixj.to.tau.xi
x3b <- lapply( ind.vec[c("index.pairs.extended",
"index.thres.var1.of.pair")],
function(y){ y[!(ind.vec$index.thres.var1.of.pair==0 |
ind.vec$last.thres.var1.of.pair |
ind.vec$index.thres.var2.of.pair==0) ] } )
# to add appropriately elements of terms.der.Lxixj.to.tau.xj
x4b <- lapply( ind.vec[c("index.pairs.extended",
"index.thres.var2.of.pair")],
function(y){ y[!(ind.vec$index.thres.var2.of.pair==0 |
ind.vec$last.thres.var2.of.pair |
ind.vec$index.thres.var1.of.pair==0) ] } )
ind.pairs <- utils::combn(no.x,2)
# der.Lxixj.to.tau.xi is a matrix, nrow=no.pairs, ncol=max(no.of.free.thres)
# thus, there are NA's, similarly for der.Lxixj.to.tau.xj
der.Lxixj.to.tau.xi <- tapply(terms.der.Lxixj.to.tau.xi,
list(x3b$index.pairs.extended,
x3b$index.thres.var1.of.pair),
sum)
der.Lxixj.to.tau.xj <- tapply(terms.der.Lxixj.to.tau.xj,
list(x4b$index.pairs.extended,
x4b$index.thres.var2.of.pair),
sum)
# to add appropriately the terms of der.Lxixj.to.tau.xi and
# der.Lxixj.to.tau.xj
split.der.Lxixj.to.tau.xi <- split(as.data.frame(der.Lxixj.to.tau.xi),
ind.pairs[1,] )
sums.der.Lxixj.to.tau.xi <- lapply(split.der.Lxixj.to.tau.xi,
function(x){ y <- apply(x,2,sum)
y[!is.na(y)] } )
# Note: NA exist in the case where the ordinal variables have different
# number of response categories
split.der.Lxixj.to.tau.xj <- split(as.data.frame(der.Lxixj.to.tau.xj),
ind.pairs[2,] )
sums.der.Lxixj.to.tau.xj <- lapply(split.der.Lxixj.to.tau.xj,
function(x){ y <- apply(x,2,sum)
y[!is.na(y)] } )
# to get der.L.to.tau
c( sums.der.Lxixj.to.tau.xi[[1]],
c( unlist(sums.der.Lxixj.to.tau.xi[2:(no.x-1)]) +
unlist(sums.der.Lxixj.to.tau.xj[1:(no.x-2)]) ),
sums.der.Lxixj.to.tau.xj[[no.x-1]] )
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.