psindex: Parameter Stability Index

# utility functions for pairwise maximum likelihood

# stub for fml_deriv1
fml_deriv1 <- function(Sigma.hat = NULL,    # model-based var/cov/cor
                       TH        = NULL,    # model-based thresholds + means
                       th.idx    = NULL,    # threshold idx per variable
                       num.idx   = NULL,    # which variables are numeric
                       X         = NULL,    # data
                       eXo       = NULL,    # external covariates
                       lavcache  = NULL,    # housekeeping stuff
                       scores    = FALSE,   # return case-wise scores
                       negative  = TRUE) { 
    stop("not implemented") 
}

# the first derivative of the pairwise logLik function with respect to the
# thresholds/slopes/var/correlations; together with DELTA, we can use the
# chain rule to get the gradient
# this is adapted from code written by Myrsini Katsikatsou
# first attempt - YR 5 okt 2012
pml_deriv1 <- function(Sigma.hat  = NULL,       # model-based var/cov/cor
                       Mu.hat     = NULL,       # model-based means
                       TH         = NULL,       # model-based thresholds + means
                       th.idx     = NULL,       # threshold idx per variable
                       num.idx    = NULL,       # which variables are numeric
                       X          = NULL,       # data
                       eXo        = NULL,       # external covariates
                       lavcache   = NULL,       # housekeeping stuff
                       PI         = NULL,       # slopes
                       missing    = "listwise", # how to deal with missings
                       scores     = FALSE,      # return case-wise scores
                       negative   = TRUE) {     # multiply by -1

    # diagonal of Sigma.hat is not necessarily 1, even for categorical vars
    Sigma.hat2 <- Sigma.hat
    if(length(num.idx) > 0L) {
        diag(Sigma.hat2)[-num.idx] <- 1
    } else {
        diag(Sigma.hat2) <- 1
    }
    Cor.hat <- cov2cor(Sigma.hat2) # to get correlations (rho!)
    cors <- lav_matrix_vech(Cor.hat, diagonal = FALSE)

    if(any(abs(cors) > 1)) {
        # what should we do now... force cov2cor?
        #cat("FFFFOOOORRRRRCEEE PD!\n")
        #Sigma.hat <- Matrix::nearPD(Sigma.hat)
        #Sigma.hat <- as.matrix(Sigma.hat$mat)
        #Sigma.hat <- cov2cor(Sigma.hat)
        #cors <- Sigma.hat[lower.tri(Sigma.hat)]
        idx <- which( abs(cors) > 0.99 )
        cors[idx] <- 0.99 # clip
        #cat("CLIPPING!\n")
    }

    nvar <- nrow(Sigma.hat)
    pstar <- nvar*(nvar-1)/2
    ov.types <- rep("ordered", nvar)
    if(length(num.idx) > 0L) ov.types[num.idx] <- "numeric"
    if(!is.null(eXo)) {
        nexo <- ncol(eXo)
    } else {
        nexo <- 0
    }


    if(all(ov.types == "numeric")) {
        N.TH <- nvar
    } else {
        N.TH <- length(th.idx)
    }
    N.SL  <- nvar * nexo
    N.VAR <- length(num.idx)
    N.COR <- pstar

    # add num.idx to th.idx
    if(length(num.idx) > 0L) {
        th.idx[ th.idx == 0 ] <- num.idx
    }

    #print(Sigma.hat); print(TH); print(th.idx); print(num.idx); print(str(X))

    # shortcut for ordinal-only/no-exo case
    if(!scores && all(ov.types == "ordered") && nexo == 0L) {
        gradient <- grad_tau_rho(no.x               = nvar,
                                 all.thres          = TH,
                                 index.var.of.thres = th.idx,
                                 rho.xixj           = cors,
                                 n.xixj.vec         = lavcache$bifreq,
                                 out.LongVecInd     = lavcache$LONG)

        if(missing == "available.cases") {
            uniPI <- univariateExpProbVec(TH = TH, th.idx = th.idx)
            tmp <- lavcache$uniweights / uniPI

            var.idx <- split(th.idx, th.idx)
            var.idx <- unlist( lapply(var.idx, function(x){c(x,x[1])}) )

            tmp.varwise <- split(tmp, var.idx)
            tmp1 <- unlist( lapply(tmp.varwise, 
                                   function(x){ c(x[-length(x)]) } ) )
            tmp2 <- unlist( lapply(tmp.varwise, function(x){ c(x[-1]) } ) )

            uni.der.tau <- dnorm(TH) * (tmp1 - tmp2)
            nTH <- length(TH)
            gradient[1:nTH] <- gradient[1:nTH] + uni.der.tau
        }

        if(negative) {
            gradient <- -1 * gradient
        }
        return(gradient)
    }

    # in this order: TH/MEANS + SLOPES + VAR + COR
    GRAD.size <- N.TH + N.SL + N.VAR + N.COR

    # scores or gradient?
    if(scores) {
        SCORES <- matrix(0, nrow(X), GRAD.size) # we will sum up over all pairs 
    } else {
        GRAD <- matrix(0, pstar, GRAD.size) # each pair is a row
    }
    PSTAR <- matrix(0, nvar, nvar)   # utility matrix, to get indices
    PSTAR[lav_matrix_vech_idx(nvar, diagonal = FALSE)] <- 1:pstar
    N <- length(X[,1])

    for(j in seq_len(nvar-1L)) {
        for(i in (j+1L):nvar) {
            # cat(" i = ", i, " j = ", j, "\n") # debug only
            pstar.idx <- PSTAR[i,j]
            cor.idx <- N.TH + N.SL + N.VAR + PSTAR[i,j]
            th.idx_i <- which(th.idx == i)
            th.idx_j <- which(th.idx == j)
            if(nexo > 0L) {
                sl.idx_i <- N.TH + seq(i, by=nvar, length.out=nexo)
                sl.idx_j <- N.TH + seq(j, by=nvar, length.out=nexo)

                if(length(num.idx) > 0L) {
                    var.idx_i <- N.TH + N.SL + match(i, num.idx)
                    var.idx_j <- N.TH + N.SL + match(j, num.idx)
                }
            } else {
                if(length(num.idx) > 0L) {
                    var.idx_i <- N.TH + match(i, num.idx)
                    var.idx_j <- N.TH + match(j, num.idx)
                }
            }
            if(ov.types[i] == "numeric" && ov.types[j] == "numeric") {
                if(nexo > 1L) {
                    stop("psindex ERROR: mixed + exo in PML not implemented; try optim.gradient = \"numerical\"")
                }

                SC <- lav_mvnorm_scores_mu_vech_sigma(Y = X[,c(i,j)],
                        Mu = Mu.hat[c(i,j)], Sigma = Sigma.hat[c(i,j), c(i,j)])

                if(scores) {

                    if(all(ov.types == "numeric") && nexo == 0L) {
                        # MU1 + MU2
                        SCORES[, c(i,j)] <- SCORES[, c(i,j)] + SC[, c(1,2)]
                        # VAR1 + COV_12 + VAR2
                        var.idx <- ( nvar + 
                            lav_matrix_vech_match_idx(nvar, idx = c(i,j)) )
                        SCORES[, var.idx] <- SCORES[, var.idx] + SC[, c(3,4,5)]
                    } else { # mixed ordered/continuous
                        # MU
                        mu.idx <- c(th.idx_i, th.idx_j) 
                        SCORES[, mu.idx] <- SCORES[, mu.idx] + (-1)*SC[, c(1,2)]
                        # VAR+COV
                        var.idx <- c(var.idx_i, cor.idx, var.idx_j)
                        SCORES[, var.idx] <- SCORES[, var.idx] + SC[, c(3,4,5)]
                    }

                } else {

                    if(all(ov.types == "numeric") && nexo == 0L) {
                        mu.idx <- c(i,j)
                        sigma.idx <- ( nvar +
                             lav_matrix_vech_match_idx(nvar, idx = c(i,j)) )
                        # MU1 + MU2
                        GRAD[pstar.idx, mu.idx] <-
                            colSums(SC[,c(1,2)], na.rm = TRUE)
                    } else {
                        mu.idx <- c(th.idx_i, th.idx_j)
                        sigma.idx <- c(var.idx_i, cor.idx, var.idx_j)
                        # MU (reverse sign!)
                        GRAD[pstar.idx, mu.idx] <-
                            -1 * colSums(SC[,c(1,2)], na.rm = TRUE)
                    }
                    # SIGMA
                    GRAD[pstar.idx, sigma.idx] <-
                        colSums(SC[,c(3,4,5)], na.rm = TRUE)
                } # gradient only


            } else if(ov.types[i] == "numeric" && ov.types[j] == "ordered") {
                # polyserial correlation
                if(nexo > 1L) {
                    stop("psindex ERROR: mixed + exo in PML not implemented; try optim.gradient = \"numerical\"")
                }

                SC.COR.UNI <- ps_cor_scores_no_exo(Y1 = X[,i], Y2 = X[,j],
                                                   var.y1 = Sigma.hat[i,i],
                                                   eta.y1 = rep(Mu.hat[i], N),
                                                   th.y2 = TH[ th.idx == j ],
                                                   rho = Cor.hat[i,j],
                                                   sigma.correction = TRUE)

                if(scores) {

                    # MU  
                    SCORES[, th.idx_i] <- ( SCORES[, th.idx_i] +
                                            -1 * SC.COR.UNI$dx.mu.y1 )
                    # TH
                    SCORES[, th.idx_j] <- ( SCORES[, th.idx_j] +
                                            SC.COR.UNI$dx.th.y2 )
                    # VAR
                    SCORES[, var.idx_i] <- ( SCORES[, var.idx_i] +
                                             SC.COR.UNI$dx.var.y1 )
                    # COR
                    SCORES[, cor.idx] <- ( SCORES[, cor.idx] +
                                           SC.COR.UNI$dx.rho )

                } else {

                    # MU  
                    GRAD[pstar.idx, th.idx_i] <- 
                        -1 * sum(SC.COR.UNI$dx.mu.y1, na.rm = TRUE)
                    # TH
                    GRAD[pstar.idx, th.idx_j] <-
                            colSums(SC.COR.UNI$dx.th.y2, na.rm = TRUE)
                    # VAR
                    GRAD[pstar.idx, var.idx_i] <- 
                        sum(SC.COR.UNI$dx.var.y1, na.rm = TRUE)
                    # COR
                    GRAD[pstar.idx, cor.idx] <- 
                        sum(SC.COR.UNI$dx.rho, na.rm = TRUE)

                } # grad only

            } else if(ov.types[j] == "numeric" && ov.types[i] == "ordered") {
                # polyserial correlation
                if(nexo > 1L) {
                    stop("psindex ERROR: mixed + exo in PML not implemented; try optim.gradient = \"numerical\"")
                }

                SC.COR.UNI <- ps_cor_scores_no_exo(Y1 = X[,j], Y2 = X[,i],
                                                   var.y1 = Sigma.hat[j,j],
                                                   eta.y1 = rep(Mu.hat[j], N),
                                                   th.y2 = TH[ th.idx == i ],
                                                   rho = Cor.hat[i,j],
                                                   sigma.correction = TRUE)

                if(scores) {

                   # MU  
                    SCORES[, th.idx_j] <- ( SCORES[, th.idx_j] +
                                            -1 * SC.COR.UNI$dx.mu.y1 )
                    # TH
                    SCORES[, th.idx_i] <- ( SCORES[, th.idx_i] +
                                            SC.COR.UNI$dx.th.y2 )
                    # VAR
                    SCORES[, var.idx_j] <- ( SCORES[, var.idx_j] +
                                             SC.COR.UNI$dx.var.y1 )
                    # COR
                    SCORES[, cor.idx] <- ( SCORES[, cor.idx] +
                                           SC.COR.UNI$dx.rho )

                } else {

                    # MU  
                    GRAD[pstar.idx, th.idx_j] <-
                        -1 * sum(SC.COR.UNI$dx.mu.y1, na.rm = TRUE)
                    # TH
                    GRAD[pstar.idx, th.idx_i] <-
                            colSums(SC.COR.UNI$dx.th.y2, na.rm = TRUE)
                    # VAR
                    GRAD[pstar.idx, var.idx_j] <-
                        sum(SC.COR.UNI$dx.var.y1, na.rm = TRUE)
                    # COR
                    GRAD[pstar.idx, cor.idx] <-
                        sum(SC.COR.UNI$dx.rho, na.rm = TRUE)

                } # grad only

            } else if(ov.types[i] == "ordered" && ov.types[j] == "ordered") {
                # polychoric correlation
                if(nexo == 0L) {
                    SC.COR.UNI <- pc_cor_scores(Y1  = X[,i],
                                                Y2  = X[,j],
                                                eXo = NULL,
                                                rho = Sigma.hat[i,j],
                                                fit.y1 = NULL, # fixme
                                                fit.y2 = NULL, # fixme
                                                th.y1 = TH[ th.idx == i ],
                                                th.y2 = TH[ th.idx == j ],
                                                sl.y1 = NULL,
                                                sl.y2 = NULL,
                                                na.zero = TRUE)
                } else {
                    SC.COR.UNI <-
                        pc_cor_scores_PL_with_cov(Y1 = X[,i],
                                                  Y2 = X[,j],
                                                  eXo = eXo,
                                                  Rho = Sigma.hat[i,j],
                                                  th.y1 = TH[ th.idx == i ],
                                                  th.y2 = TH[ th.idx == j ],
                                                  sl.y1 = PI[i,],
                                                  sl.y2 = PI[j,],
                                                  missing.ind = missing)
                }

                if(scores) {

                    # TH
                    SCORES[,th.idx_i] <- SCORES[,th.idx_i] + SC.COR.UNI$dx.th.y1
                    SCORES[,th.idx_j] <- SCORES[,th.idx_j] + SC.COR.UNI$dx.th.y2

                    # SL
                    if(nexo > 0L) {
                        SCORES[,sl.idx_i] <- SCORES[,sl.idx_i] + SC.COR.UNI$dx.sl.y1
                        SCORES[,sl.idx_j] <- SCORES[,sl.idx_j] + SC.COR.UNI$dx.sl.y2
                    }
                    # NO VAR
                    # RHO
                    SCORES[,cor.idx] <- SCORES[,cor.idx] + SC.COR.UNI$dx.rho
                
                } else {
                    # TH
                    if(length(th.idx_i) > 1L) {
                        GRAD[pstar.idx, th.idx_i] <- 
                            colSums(SC.COR.UNI$dx.th.y1, na.rm = TRUE)
                    } else {
                        GRAD[pstar.idx, th.idx_i] <- 
                            sum(SC.COR.UNI$dx.th.y1, na.rm = TRUE)
                    }
                    if(length(th.idx_j) > 1L) {
                         GRAD[pstar.idx, th.idx_j] <- 
                             colSums(SC.COR.UNI$dx.th.y2, na.rm = TRUE)
                    } else {
                         GRAD[pstar.idx, th.idx_j] <- 
                             sum(SC.COR.UNI$dx.th.y2, na.rm = TRUE)
                    }
    
                    # SL
                    if(nexo > 0L) {
                        if(length(sl.idx_i) > 1L) {
                            GRAD[pstar.idx, sl.idx_i] <- 
                                colSums(SC.COR.UNI$dx.sl.y1, na.rm = TRUE)
                        } else {
                            GRAD[pstar.idx, sl.idx_i] <- 
                                sum(SC.COR.UNI$dx.sl.y1, na.rm = TRUE)
                        }
                        if(length(sl.idx_j) > 1L) {
                            GRAD[pstar.idx, sl.idx_j] <- 
                                colSums(SC.COR.UNI$dx.sl.y2, na.rm = TRUE)
                        } else {
                            GRAD[pstar.idx, sl.idx_j] <- 
                                sum(SC.COR.UNI$dx.sl.y2, na.rm = TRUE)
                        }
                    }
                    # NO VAR

                    # RHO
                    GRAD[pstar.idx, cor.idx] <- 
                        sum(SC.COR.UNI$dx.rho, na.rm = TRUE)
                }

                #GRAD2 <- numDeriv::grad(func = pc_logl_x, 
                #                        x = c(Sigma.hat[i,j],
                #                              TH[ th.idx == i ],
                #                              TH[ th.idx == j]),
                #                        Y1  = X[,i],
                #                        Y2  = X[,j],
                #                        eXo = eXo,
                #                        nth.y1 = sum( th.idx == i ),
                #                        nth.y2 = sum( th.idx == j ))
            }
        }
    }

    if(missing == "available.cases" && all(ov.types == "ordered")) {
        if(nexo == 0L) {
            UNI_SCORES <- matrix(0, nrow(X), N.TH)
            for(i in seq_len(nvar)) {
                 th.idx_i <- which(th.idx == i)
                 derY1 <- uni_scores(Y1 = X[,i], th.y1 = TH[ th.idx == i ],
                              eXo = NULL, sl.y1 = NULL,
                              weights.casewise = lavcache$uniweights.casewise)
                 UNI_SCORES[,th.idx_i] <- derY1$dx.th.y1
            }
        } else {
            UNI_SCORES <- matrix(0, nrow(X), ncol=(N.TH+N.SL) )
            for(i in seq_len(nvar)) {
                th.idx_i <- which(th.idx == i)
                sl.idx_i <- N.TH + seq(i, by=nvar, length.out=nexo)
                derY1 <- uni_scores(Y1 = X[,i], th.y1 = TH[ th.idx == i ],
                             eXo = eXo, sl.y1 = PI[i,],
                             weights.casewise = lavcache$uniweights.casewise)
               UNI_SCORES[,th.idx_i] <- derY1$dx.th.y1
               UNI_SCORES[,sl.idx_i] <- derY1$dx.sl.y1
            }
            if(scores) {
                SCORES <- SCORES[, 1:(N.TH+N.SL)] + UNI_SCORES
            } else {
                uni_gradient <- colSums(UNI_SCORES)
            }
        }
    }

    # do we need scores?
    if(scores) return(SCORES)

    # DEBUG
    #:print(GRAD)
    ###########



    # gradient is sum over all pairs
    gradient <- colSums(GRAD, na.rm = TRUE)

    if(missing == "available.cases" && all(ov.types == "ordered")) {
        if(nexo == 0L) {
            gradient[1:N.TH] <- gradient + uni_gradient
        } else {
            gradient[1:(N.TH+N.SL)] <- gradient + uni_gradient
        }
    }

    # we multiply by -1 because we minimize
    if(negative) {
        gradient <- -1 * gradient
    }

    gradient
}


### all code below written by Myrsini Katsikatsou


# The function grad_tau_rho
# input:
# no.x -  is scalar, the number of ordinal variables
# all.thres  - is vector containing the thresholds of all variables in the
#              following order: thres_var1, thres_var2,..., thres_var_p
#              within each variable the thresholds are in ascending order
#              Note that all.thres do NOT contain tau_0=-Inf and tau_last=Inf
#              for all variables.
# index.var.of.thres - a vector keeping track to which variable the thresholds
#                      in all.thres belongs to, it is of the form
#                      (1,1,1..., 2,2,2,...,  p,p,p,...)
# rho.xixj - is the vector of all correlations where j runs faster than i
#            i.e. the order is rho_12, rho_13, ..., rho_1p, rho_23, ..., rho_2p,
#            etc.
# n.xixj.vec -  a vector with the observed frequency for every combination
#               of categories and every pair. The frequencies are given in
#               the same order as the expected probabilities in the output of
#               pairwiseExpProbVec output
# out.LongVecInd - it is the output of function LongVecInd
# the output: it gives the elements of der.L.to.tau and der.L.to.rho in this
#             order. The elements of der.L.to.tau where the elements are
#             ordered as follows: the thresholds of each variable with respect
#             to ascending order of the variable index (i.e. thres_var1,
#             thres_var2, etc.) and within each variable the thresholds in
#             ascending order.
#             The elements of vector der.L.to.rho are der.Lxixj.to.rho.xixj
#             where j runs faster than i.

# The function depends on four other functions: LongVecTH.Rho,
# pairwiseExpProbVec, derLtoRho, and derLtoTau, all given below.

# if n.xixj.ab is either an array or a list the following should be done
#n.xixj.vec <- if(is.array(n.xixj.ab)) {
#                 c(n.xixj.ab)
#              } else if(is.list(n.xixj.ab)){
#                 unlist(n.xixj.ab)
#              }


grad_tau_rho <- function(no.x, all.thres, index.var.of.thres, rho.xixj,
                         n.xixj.vec, out.LongVecInd) {

  out.LongVecTH.Rho <- LongVecTH.Rho(no.x=no.x, all.thres=all.thres,
                                     index.var.of.thres=index.var.of.thres,
                                     rho.xixj=rho.xixj)
  pi.xixj <- pairwiseExpProbVec(ind.vec= out.LongVecInd,
                                th.rho.vec= out.LongVecTH.Rho)

  out.derLtoRho <- derLtoRho(ind.vec= out.LongVecInd,
                             th.rho.vec= out.LongVecTH.Rho,
                             n.xixj=n.xixj.vec, pi.xixj=pi.xixj, no.x=no.x)

  out.derLtoTau <- derLtoTau(ind.vec= out.LongVecInd,
                             th.rho.vec= out.LongVecTH.Rho,
                             n.xixj=n.xixj.vec, pi.xixj=pi.xixj,
                             no.x=no.x)

  grad <- c(out.derLtoTau, out.derLtoRho)
  attr(grad, "pi.xixj") <- pi.xixj

  grad
}
################################################################################




# The input of the function  LongVecInd:

# no.x is scalar, the number of ordinal variables

# all.thres is vector containing the thresholds of all variables in the
# following order: thres_var1, thres_var2,..., thres_var_p
# within each variable the thresholds are in ascending order
# Note that all.thres does NOT contain the first and the last threshold of the
# variables, i.e. tau_0=-Inf and tau_last=Inf

# index.var.of.thres is a vector keeping track to which variable the thresholds
# in all.thres belongs to, it is of the form (1,1,1..., 2,2,2,...,  p,p,p,...)

# The output of the function:
# it is a list of vectors keeping track of the indices 
# of thresholds, of variables, and of pairs, and two T/F vectors indicating
# if the threshold index corresponds to the last threshold of a variable; all
# these for all pairs of variables. All are needed for the
# computation of expected probabilities, der.L.to.rho, and der.L.to.tau

# all duplications of indices are done as follows: within each pair of variables,
# xi-xj, if for example we want to duplicate the indices of the thresholds, 
# tau^xi_a and tau^xj_b, then index a runs faster than b, i.e. for each b we 
# take all different tau^xi's, and then we proceed to the next b and do the 
# same. In other words if it was tabulated we fill the table columnwise.

# All pairs xi-xj are taken with index j running faster than i.

# Note that each variable may have a different number of categories, that's why
# for example we take lists below.

LongVecInd <- function(no.x, all.thres, index.var.of.thres) {
 no.thres.of.each.var <- tapply(all.thres, index.var.of.thres, length)
 index.pairs <- utils::combn(no.x,2)
 no.pairs <- ncol(index.pairs)

 # index.thres.var1.of.pair and index.thres.var2.of.pair contain the indices of
 # of all thresholds (from tau_0 which is -Inf to tau_last which is Inf)
 # for any pair of variables appropriately duplicated so that the two vectors
 # together give all possible combinations of thresholds indices
 # Since here the threshold indices 0 and "last" are included, the vectors are
 # longer than the vectors thres.var1.of.pair and thres.var2.of.pair above.
 index.thres.var1.of.pair <- vector("list", no.pairs)
 index.thres.var2.of.pair <- vector("list", no.pairs)

 # index.var1.of.pair and index.var2.of.pair keep track the index of the
 # variable that the thresholds in index.thres.var1.of.pair and
 # index.thres.var2.of.pair belong to, respectively. So, these two variables
 # are of same length as that of index.thres.var1.of.pair and
 # index.thres.var2.of.pair
 index.var1.of.pair <- vector("list", no.pairs)
 index.var2.of.pair <- vector("list", no.pairs)

 # index.pairs.extended gives the index of the pair for each pair of variables
 # e.g. pair of variables 1-2 has index 1, variables 1-3 has index 2, etc.
 # The vector is of the same length as index.thres.var1.of.pair,
 # index.thres.var2.of.pair, index.var1.of.pair, and index.var2.of.pair
 index.pairs.extended <- vector("list", no.pairs)

 for (i in 1:no.pairs) {
   no.thres.var1.of.pair <- no.thres.of.each.var[index.pairs[1,i]]
   no.thres.var2.of.pair <- no.thres.of.each.var[index.pairs[2,i]]

   index.thres.var1.of.pair[[i]] <- rep(0:(no.thres.var1.of.pair+1),
                                        times= (no.thres.var2.of.pair+2) )
   index.thres.var2.of.pair[[i]] <- rep(0:(no.thres.var2.of.pair+1),
                                        each= (no.thres.var1.of.pair+2) )
   length.vec <- length(index.thres.var1.of.pair[[i]] )
   index.var1.of.pair[[i]] <- rep(index.pairs[1,i], length.vec)
   index.var2.of.pair[[i]] <- rep(index.pairs[2,i], length.vec)
   index.pairs.extended[[i]] <- rep(i, length.vec)
 }

 index.thres.var1.of.pair <- unlist(index.thres.var1.of.pair)
 index.thres.var2.of.pair <- unlist(index.thres.var2.of.pair)
 index.var1.of.pair <- unlist(index.var1.of.pair)
 index.var2.of.pair <- unlist(index.var2.of.pair)
 index.pairs.extended <- unlist(index.pairs.extended)

 # indicator vector (T/F) showing which elements of index.thres.var1.of.pair
 # correspond to the last thresholds of variables. The length is the same as
 # that of index.thres.var1.of.pair.
 last.thres.var1.of.pair <- index.var1.of.pair==1 &
                           index.thres.var1.of.pair==(no.thres.of.each.var[1]+1)
 # we consider up to variable (no.x-1) because in pairs xi-xj where j runs
 # faster than i, the last variable is not included in the column of xi's
 for(i in 2:(no.x-1)) {
   new.condition <- index.var1.of.pair==i &
                    index.thres.var1.of.pair==(no.thres.of.each.var[i]+1)
   last.thres.var1.of.pair <- last.thres.var1.of.pair | new.condition
 }

 # indicator vector (T/F) showing which elements of index.thres.var2.of.pair
 # correspond to the last thresholds of variables. Notet that in pairs xi-xj
 # where j runs faster than i, the first variable is not included in the column
 # of xj's. That's why we start with variable 2. The length is the same as
 # that of index.thres.var1.of.pair.
 last.thres.var2.of.pair <- index.var2.of.pair==2 &
                           index.thres.var2.of.pair==(no.thres.of.each.var[2]+1)
 for(i in 3:no.x) {
   new.condition <- index.var2.of.pair==i &
                    index.thres.var2.of.pair==(no.thres.of.each.var[i]+1)
   last.thres.var2.of.pair <- last.thres.var2.of.pair | new.condition
 }

 list(index.thres.var1.of.pair = index.thres.var1.of.pair,
      index.thres.var2.of.pair = index.thres.var2.of.pair,
      index.var1.of.pair = index.var1.of.pair,
      index.var2.of.pair = index.var2.of.pair,
      index.pairs.extended = index.pairs.extended,
      last.thres.var1.of.pair = last.thres.var1.of.pair,
      last.thres.var2.of.pair = last.thres.var2.of.pair )
}
################################################################################


# The input of the function  LongVecTH.Rho:

# no.x is scalar, the number of ordinal variables

# all.thres is vector containing the thresholds of all variables in the
# following order: thres_var1, thres_var2,..., thres_var_p
# within each variable the thresholds are in ascending order
# Note that all.thres does NOT contain the first and the last threshold of the
# variables, i.e. tau_0=-Inf and tau_last=Inf

# index.var.of.thres is a vector keeping track to which variable the thresholds
# in all.thres belongs to, it is of the form (1,1,1..., 2,2,2,...,  p,p,p,...)

# rho.xixj is the vector of all corrlations where j runs faster than i
# i.e. the order is rho_12, rho_13, ..., rho_1p, rho_23, ..., rho_2p, etc.

# The output of the function:
# it is a list of vectors with thresholds and rho's duplicated appropriately,
# all needed for the computation of expected probabilities,
# der.L.to.rho, and der.L.to.tau

# all duplications below are done as follows: within each pair of variables,
# xi-xj, if for example we want to duplicate their thresholds, tau^xi_a and
# tau^xj_b, then index a runs faster than b, i.e. for each b we take all
# different tau^xi's, and then we proceed to the next b and do the same.
# In other words if it was tabulated we fill the table columnwise.

# All pairs xi-xj are taken with index j running faster than i.

# Note that each variable may have a different number of categories, that's why
# for example we take lists below.

LongVecTH.Rho <- function(no.x, all.thres, index.var.of.thres, rho.xixj) {
 no.thres.of.each.var <- tapply(all.thres, index.var.of.thres, length)
 index.pairs <- utils::combn(no.x,2)
 no.pairs <- ncol(index.pairs)

 # create the long vectors needed for the computation of expected probabilities
 # for each cell and each pair of variables. The vectors thres.var1.of.pair and
 # thres.var2.of.pair together give all the possible combinations of the
 # thresholds of any two variables. Note the combinations (-Inf, -Inf),
 # (-Inf, Inf), (Inf, -Inf), (Inf, Inf) are NOT included. Only the combinations
 # of the middle thresholds (tau_1 to tau_(last-1)).
 # thres.var1.of.pair and thres.var2.of.pair give the first and the second
 # argument, respectively, in functions pbivnorm and dbinorm
 thres.var1.of.pair <- vector("list", no.pairs)
 thres.var2.of.pair <- vector("list", no.pairs)

 # Extending the rho.vector accordingly so that it will be the the third
 # argument in pbivnorm and dbinorm functions. It is of same length as
 # thres.var1.of.pair and thres.var2.of.pair.
 rho.vector <- vector("list", no.pairs)

 # thres.var1.for.dnorm.in.der.pi.to.tau.xi and
 # thres.var2.for.dnorm.in.der.pi.to.tau.xj give the thresholds of almost
 # all variables appropriately duplicated so that the vectors can be used
 # as input in dnorm() to compute der.pi.xixj.to.tau.xi and
 # der.pi.xixj.to.tau.xj.
 # thres.var1.for.dnorm.in.der.pi.to.tau.xi does not contain the thresholds of
 # the last variable and thres.var2.for.dnorm.in.der.pi.to.tau.xj those of
 # the first variable
 thres.var1.for.dnorm.in.der.pi.to.tau.xi <- vector("list", no.pairs)
 thres.var2.for.dnorm.in.der.pi.to.tau.xj <- vector("list", no.pairs)

 for (i in 1:no.pairs) {
   single.thres.var1.of.pair <- all.thres[index.var.of.thres==index.pairs[1,i]]
   single.thres.var2.of.pair <- all.thres[index.var.of.thres==index.pairs[2,i]]
   # remember that the first (-Inf) and last (Inf) thresholds are not included
   # so no.thres.var1.of.pair is equal to number of categories of var1 minus 1
   # similarly for no.thres.var2.of.pair
   no.thres.var1.of.pair <- no.thres.of.each.var[index.pairs[1,i]]
   no.thres.var2.of.pair <- no.thres.of.each.var[index.pairs[2,i]]

   thres.var1.of.pair[[i]] <- rep(single.thres.var1.of.pair,
                                  times=no.thres.var2.of.pair )
   thres.var2.of.pair[[i]] <- rep(single.thres.var2.of.pair,
                                  each=no.thres.var1.of.pair)
   rho.vector[[i]] <- rep(rho.xixj[i], length(thres.var1.of.pair[[i]]))

   thres.var1.for.dnorm.in.der.pi.to.tau.xi[[i]] <-
            rep(single.thres.var1.of.pair, times=(no.thres.var2.of.pair+1))
   thres.var2.for.dnorm.in.der.pi.to.tau.xj[[i]] <-
            rep(single.thres.var2.of.pair, each=(no.thres.var1.of.pair+1))
 }

 thres.var1.of.pair <- unlist(thres.var1.of.pair)
 thres.var2.of.pair <- unlist(thres.var2.of.pair)
 rho.vector <- unlist(rho.vector)
 thres.var1.for.dnorm.in.der.pi.to.tau.xi <-
                               unlist(thres.var1.for.dnorm.in.der.pi.to.tau.xi)
 thres.var2.for.dnorm.in.der.pi.to.tau.xj <-
                               unlist(thres.var2.for.dnorm.in.der.pi.to.tau.xj)

 # thres.var2.for.last.cat.var1 and thres.var1.for.last.cat.var2 are needed
 # for the computation of expected probabilities. In the computation of
 # \Phi_2(tau1, tau2; rho) when either tau1 or tau2 are Inf then it is enought
 # to compute pnorm() with the non-infinite tau as an argument
 # In particular when the first variable of the pair has tau_last= Inf
 # and the second a non-infite threshold we compute
 # pnorm(thres.var2.for.last.cat.var1). Similarly, when the second variable of
 # the pair has tau_last=Inf and the first a non-infite threshold we compute
 # pnorm(thres.var1.for.last.cat.var2).
 thres.var2.for.last.cat.var1 <- vector("list", (no.x-1))
 thres.var1.for.last.cat.var2 <- vector("list", (no.x-1))
 for (i in 1:(no.x-1)) {
   thres.var2.for.last.cat.var1[[i]] <-
                                c(all.thres[index.var.of.thres %in% (i+1):no.x])
   thres.var1.for.last.cat.var2[[i]] <- rep(all.thres[index.var.of.thres==i],
                                            times=(no.x-i))
 }
 thres.var2.for.last.cat.var1 <- unlist(thres.var2.for.last.cat.var1)
 thres.var1.for.last.cat.var2 <- unlist(thres.var1.for.last.cat.var2)


 list(thres.var1.of.pair = thres.var1.of.pair,  #these 3 of same length
      thres.var2.of.pair = thres.var2.of.pair,
      rho.vector = rho.vector,

      #the following of length dependning on the number of categories
      thres.var1.for.dnorm.in.der.pi.to.tau.xi =
        thres.var1.for.dnorm.in.der.pi.to.tau.xi ,
      thres.var2.for.dnorm.in.der.pi.to.tau.xj =
        thres.var2.for.dnorm.in.der.pi.to.tau.xj ,
      thres.var2.for.last.cat.var1=thres.var2.for.last.cat.var1,
      thres.var1.for.last.cat.var2=thres.var1.for.last.cat.var2 )
}
#########################################################







#########################################################

# The function  pairwiseExpProbVec
# input: ind.vec - the output of function LongVecInd
#        th.rho.vec - the output of function LongVecTH.Rho 
# output: it gives the elements of pairwiseTablesExpected()$pi.tables
# table-wise and column-wise within each table. In other words if
# pi^xixj_ab is the expected probability for the pair of variables xi-xj
# and categories a and b, then index a runs the fastest of all, followed by b,
# then by j, and lastly by i.

pairwiseExpProbVec <- function(ind.vec, th.rho.vec) {

  prob.vec <- rep(NA, length(ind.vec$index.thres.var1.of.pair))

  prob.vec[ind.vec$index.thres.var1.of.pair==0 |
           ind.vec$index.thres.var2.of.pair==0] <- 0

  prob.vec[ind.vec$last.thres.var1.of.pair &
           ind.vec$last.thres.var2.of.pair] <- 1

  prob.vec[ind.vec$last.thres.var1.of.pair &
           ind.vec$index.thres.var2.of.pair!=0 &
           !ind.vec$last.thres.var2.of.pair] <-
                                 pnorm(th.rho.vec$thres.var2.for.last.cat.var1)

  prob.vec[ind.vec$last.thres.var2.of.pair &
           ind.vec$index.thres.var1.of.pair!=0 &
           !ind.vec$last.thres.var1.of.pair] <-
                                 pnorm(th.rho.vec$thres.var1.for.last.cat.var2)

  prob.vec[is.na(prob.vec)] <- pbivnorm(th.rho.vec$thres.var1.of.pair,
                                        th.rho.vec$thres.var2.of.pair,
                                        th.rho.vec$rho.vector)

  cum.term1 <- prob.vec[ind.vec$index.thres.var1.of.pair!=0 &
                        ind.vec$index.thres.var2.of.pair!=0]

  cum.term2 <- prob.vec[ ind.vec$index.thres.var1.of.pair!=0 &
                        !ind.vec$last.thres.var2.of.pair]

  cum.term3 <- prob.vec[ ind.vec$index.thres.var2.of.pair!=0 &
                        !ind.vec$last.thres.var1.of.pair]

  cum.term4 <- prob.vec[!ind.vec$last.thres.var1.of.pair &
                        !ind.vec$last.thres.var2.of.pair]

  PI <- cum.term1 - cum.term2 - cum.term3 + cum.term4

  # added by YR 11 nov 2012 to avoid Nan/-Inf
  # log(.Machine$double.eps) = -36.04365
  # all elements should be strictly positive
  PI[PI < .Machine$double.eps] <- .Machine$double.eps

  PI
}


# derLtoRho
# input: ind.vec - the output of function LongVecInd
#        th.rho.vec - the output of function LongVecTH.Rho 
#        n.xixj - a vector with the observed frequency for every combination
#                 of categories and every pair. The frequencies are given in
#                 the same order as the expected probabilities in the output of
#                 pairwiseExpProbVec output
#        pi.xixj - the output of pairwiseExpProbVec function
#        no.x    - the number of ordinal variables
# output: the vector of der.L.to.rho, each element corresponds to
#         der.Lxixj.to.rho.xixj where j runs faster than i

derLtoRho <- function(ind.vec, th.rho.vec, n.xixj, pi.xixj, no.x) {
  prob.vec <- rep(NA, length(ind.vec$index.thres.var1.of.pair))

  prob.vec[ind.vec$index.thres.var1.of.pair==0 |
           ind.vec$index.thres.var2.of.pair==0 |
           ind.vec$last.thres.var1.of.pair |
           ind.vec$last.thres.var2.of.pair] <- 0

  prob.vec[is.na(prob.vec)] <- dbinorm(th.rho.vec$thres.var1.of.pair,
                                                th.rho.vec$thres.var2.of.pair,
                                                rho=th.rho.vec$rho.vector)

  den.term1 <- prob.vec[ind.vec$index.thres.var1.of.pair!=0 &
                        ind.vec$index.thres.var2.of.pair!=0]

  den.term2 <- prob.vec[ ind.vec$index.thres.var1.of.pair!=0 &
                        !ind.vec$last.thres.var2.of.pair]

  den.term3 <- prob.vec[ ind.vec$index.thres.var2.of.pair!=0 &
                        !ind.vec$last.thres.var1.of.pair]

  den.term4 <- prob.vec[!ind.vec$last.thres.var1.of.pair &
                        !ind.vec$last.thres.var2.of.pair]

  der.pi.xixj.to.rho.xixj <- den.term1 - den.term2 - den.term3 + den.term4
  prod.terms <- (n.xixj/pi.xixj)*der.pi.xixj.to.rho.xixj

  # to get der.Lxixj.to.rho.xixj we should all the elements of
  # der.pi.xixj.to.rho.xixj which correspond to the pair xi-xj, to do so:
  xnew <- lapply( ind.vec[c("index.pairs.extended")],
                  function(y){y[ind.vec$index.thres.var1.of.pair!=0 &
                                ind.vec$index.thres.var2.of.pair!=0]})
  #der.L.to.rho is:
  tapply(prod.terms, xnew$index.pairs.extended, sum)
}
###########################################################################


# derLtoTau
# input: ind.vec - the output of function LongVecInd
#        th.rho.vec - the output of function LongVecTH.Rho 
#        n.xixj - a vector with the observed frequency for every combination
#                 of categories and every pair. The frequencies are given in
#                 the same order as the expected probabilities in the output of
#                 pairwiseExpProbVec output
#        pi.xixj - the output of pairwiseExpProbVec function
# output: the vector of der.L.to.tau where the elements are ordered as follows:
#         the thresholds of each variable with respect to ascending order of
#         the variable index (i.e. thres_var1, thres_var2, etc.) and within
#         each variable the thresholds in ascending order.


derLtoTau <- function(ind.vec, th.rho.vec, n.xixj, pi.xixj, no.x=0L) {
  # to compute der.pi.xixj.to.tau.xi
  xi <- lapply( ind.vec[c("index.thres.var2.of.pair",
                          "last.thres.var2.of.pair")],
                function(y){ y[!(ind.vec$index.thres.var1.of.pair==0 |
                                 ind.vec$last.thres.var1.of.pair)] } )

  cum.prob.vec <- rep(NA, length(xi$index.thres.var2.of.pair) )
  cum.prob.vec[xi$index.thres.var2.of.pair==0] <- 0
  cum.prob.vec[xi$last.thres.var2.of.pair] <- 1
  denom <- sqrt(1-(th.rho.vec$rho.vector*th.rho.vec$rho.vector))
  cum.prob.vec[is.na(cum.prob.vec)] <-
      pnorm( (th.rho.vec$thres.var2.of.pair -
              th.rho.vec$rho.vector* th.rho.vec$thres.var1.of.pair) /
             denom)
  den.prob.vec <- dnorm(th.rho.vec$thres.var1.for.dnorm.in.der.pi.to.tau.xi)
  der.pi.xixj.to.tau.xi <-  den.prob.vec *
                            (cum.prob.vec[ xi$index.thres.var2.of.pair!=0] -
                             cum.prob.vec[!xi$last.thres.var2.of.pair] )

  # to compute der.pi.xixj.to.tau.xj
  xj <- lapply( ind.vec[c("index.thres.var1.of.pair",
                          "last.thres.var1.of.pair")],
                function(y){ y[!(ind.vec$index.thres.var2.of.pair==0 |
                                 ind.vec$last.thres.var2.of.pair)] } )

  cum.prob.vec <- rep(NA, length(xj$index.thres.var1.of.pair) )
  cum.prob.vec[xj$index.thres.var1.of.pair==0] <- 0
  cum.prob.vec[xj$last.thres.var1.of.pair] <- 1
  denom <- sqrt(1-(th.rho.vec$rho.vector*th.rho.vec$rho.vector))
  cum.prob.vec[is.na(cum.prob.vec)] <-
      pnorm( (th.rho.vec$thres.var1.of.pair -
              th.rho.vec$rho.vector* th.rho.vec$thres.var2.of.pair) /
              denom)
  den.prob.vec <- dnorm(th.rho.vec$thres.var2.for.dnorm.in.der.pi.to.tau.xj)
  der.pi.xixj.to.tau.xj <-  den.prob.vec *
                            (cum.prob.vec[ xj$index.thres.var1.of.pair!=0] -
                             cum.prob.vec[!xj$last.thres.var1.of.pair] )

  #to compute der.Lxixj.tau.xi and der.Lxixj.tau.xi
  n.over.pi <- n.xixj/ pi.xixj
  # get the appropriate differences of n.over.pi for der.Lxixj.to.tau.xi  and
  # der.Lxixj.to.tau.xj
  x3a <- lapply(ind.vec, function(y){
                          y[!(ind.vec$index.thres.var1.of.pair==0 |
                              ind.vec$index.thres.var2.of.pair==0) ] } )
  diff.n.over.pi.to.xi <- n.over.pi[!x3a$last.thres.var1.of.pair] -
                          n.over.pi[ x3a$index.thres.var1.of.pair!=1]
  diff.n.over.pi.to.xj <- n.over.pi[!x3a$last.thres.var2.of.pair] -
                          n.over.pi[ x3a$index.thres.var2.of.pair!=1]
  # terms.der.Lxixj.to.tau.xi  and terms.der.Lxixj.to.tau.xj
  terms.der.Lxixj.to.tau.xi <- diff.n.over.pi.to.xi * der.pi.xixj.to.tau.xi
  terms.der.Lxixj.to.tau.xj <- diff.n.over.pi.to.xj * der.pi.xixj.to.tau.xj
  # to add appropriately elements of terms.der.Lxixj.to.tau.xi
  x3b <- lapply( ind.vec[c("index.pairs.extended",
                           "index.thres.var1.of.pair")],
                 function(y){ y[!(ind.vec$index.thres.var1.of.pair==0 |
                                  ind.vec$last.thres.var1.of.pair |
                                  ind.vec$index.thres.var2.of.pair==0) ] } )
  # to add appropriately elements of terms.der.Lxixj.to.tau.xj
  x4b <- lapply( ind.vec[c("index.pairs.extended",
                           "index.thres.var2.of.pair")],
                 function(y){ y[!(ind.vec$index.thres.var2.of.pair==0 |
                                  ind.vec$last.thres.var2.of.pair |
                                  ind.vec$index.thres.var1.of.pair==0) ] } )
  ind.pairs <- utils::combn(no.x,2)
  # der.Lxixj.to.tau.xi is a matrix, nrow=no.pairs, ncol=max(no.of.free.thres)
  # thus, there are NA's, similarly for der.Lxixj.to.tau.xj
  der.Lxixj.to.tau.xi <- tapply(terms.der.Lxixj.to.tau.xi,
                                list(x3b$index.pairs.extended,
                                     x3b$index.thres.var1.of.pair),
                                sum)
  der.Lxixj.to.tau.xj <- tapply(terms.der.Lxixj.to.tau.xj,
                                list(x4b$index.pairs.extended,
                                     x4b$index.thres.var2.of.pair),
                                sum)

  # to add appropriately the terms of der.Lxixj.to.tau.xi and
  # der.Lxixj.to.tau.xj
  split.der.Lxixj.to.tau.xi <- split(as.data.frame(der.Lxixj.to.tau.xi),
                                     ind.pairs[1,] )
  sums.der.Lxixj.to.tau.xi <- lapply(split.der.Lxixj.to.tau.xi,
                                     function(x){ y <- apply(x,2,sum)
                                                  y[!is.na(y)] } )
  # Note: NA exist in the case where the ordinal variables have different
  # number of response categories
  split.der.Lxixj.to.tau.xj <- split(as.data.frame(der.Lxixj.to.tau.xj),
                                     ind.pairs[2,] )
  sums.der.Lxixj.to.tau.xj <- lapply(split.der.Lxixj.to.tau.xj,
                                     function(x){ y <- apply(x,2,sum)
                                                  y[!is.na(y)] } )
  # to get der.L.to.tau
  c( sums.der.Lxixj.to.tau.xi[[1]],
     c( unlist(sums.der.Lxixj.to.tau.xi[2:(no.x-1)]) +
        unlist(sums.der.Lxixj.to.tau.xj[1:(no.x-2)]) ),
     sums.der.Lxixj.to.tau.xj[[no.x-1]] )
}