R/ctr_pml_doubly_robust_utils.R

Defines functions pairwiseExpProbVec_GivenObs_UncMod LongVecTH.Rho.Generalised pairwiseExpProbVec_GivenObs compute_uniCondProb_based_on_bivProb

# This code was contributed by Myrsini Katsikatsou (LSE) -- September 2016
#
# compute_uniCondProb_based_on_bivProb()
# pairwiseExpProbVec_GivenObs() 
# LongVecTH.Rho.Generalised()
# pairwiseExpProbVec_GivenObs_UncMod()

compute_uniCondProb_based_on_bivProb <- function(bivProb, nvar,
                                                 idx.pairs,
                                                 idx.Y1,
                                                 idx.Gy2,
                                                 idx.cat.y1.split,
                                                 idx.cat.y2.split) {
  bivProb.split <- split(bivProb, idx.pairs)
  lngth <- 2*length(bivProb)
  idx.vec.el <- 1:lngth
  ProbY1Gy2 <- rep(NA, lngth)
  no.pairs <- nvar*(nvar-1)/2
  idx2.pairs <- combn(nvar,2)
  
  for(k in 1:no.pairs){
      y2Sums <- tapply(bivProb.split[[k]], idx.cat.y2.split[[k]], sum)
      y2Sums.mult <- y2Sums[idx.cat.y2.split[[k]] ]
      Y1Gy2 <- bivProb.split[[k]]/ y2Sums.mult
      tmp.idx.vec.el <- idx.vec.el[(idx.Y1  == idx2.pairs[1,k]) &
                                   (idx.Gy2 == idx2.pairs[2,k])]
      ProbY1Gy2[tmp.idx.vec.el] <- Y1Gy2
  }
  
  for(k in 1:no.pairs){
      y1Sums <- tapply(bivProb.split[[k]], idx.cat.y1.split[[k]], sum)
      y1Sums.mult <- y1Sums[idx.cat.y1.split[[k]] ]
      Y2Gy1 <- bivProb.split[[k]]/ y1Sums.mult
      reordered_Y2Gy1 <- Y2Gy1[order(idx.cat.y1.split[[k]])]
      tmp.idx.vec.el <- idx.vec.el[(idx.Y1  == idx2.pairs[2,k]) &
                                   (idx.Gy2 == idx2.pairs[1,k])]
      ProbY1Gy2[tmp.idx.vec.el] <- reordered_Y2Gy1
  }
  
  ProbY1Gy2
}

# The input of the function is a lavobject, which, in turn, is the output of the
# sem function having specified estimator="PML", missing="available.cases" 

#The output of the function is a list of two lists: the pairwiseProbGivObs list and
# the univariateProbGivObs list. Each of the two lists consists of G matrices where G
# is the number of groups in a multigroup analysis. If G=1 each of the lists 
# contains only one matrix that can be called as pairwiseProbGivObs[[1]], and
# univariateProbGivObs[[1]].

# Each of the matrices in the pairwiseProbGivObs list is of dimension: nrow=sample size, 
#ncol=sum of the number of response categories for all pairs of variables 
#(i.e. the length of the vector pxixj.ab where i<j=1,...,p, a=1,...,Ci, b=1,...,Cj; 
#a which is the index for the response category for yi variable runs the fastest, 
#then b which is the index for the response category for yj variable, 
#then j, and last i.) 
#The cells in a matrix of the pairwiseProbGivObs list have the value 0 except for
#those cells that correspond to the pairs of variables where both variables 
#are missing. Those cells have the value of the bivariate conditional probability
#for the given pair for all their response categories. The bivariate 
#probabilities are computed as follows: 
#the information in the observed variables is summarised in a factor score 
#for each individual and the bivariate probability given the estimated factor 
#scores is computed.


# Each of the matrices in the univariateProbGivObs list is of dimension: 
# nrow=sample size, ncol=sum of the number of response categories for all 
# variables. The columns are indexed with i and a, where i=1,...,p, and
# a=1,...,Ci, the response categories for yi variable; a runs faster than i. 
#The cells in a matrix of the univariateProbGivObs list have the value 0 except for
#those cells that correspond to variables with missing values. 
#Those cells have the value of the univariate conditional probability for the
#given variable for all its response categories. The univariate conditional 
#probabilities are computed as follows: 
#given that the bivariate conditional probabilities have been computed we sum over
#the response categories of each variable at a time (i.e. we compute the marginals).

#Version 3 - first compute univariate and then bivariate probabilities

pairwiseExpProbVec_GivenObs <- function(lavobject) {

 #compute yhat where yaht=nu + Lamda*eta + K*x where the parameter estimates are 
 #used and the factor scores for eta 
 #Below yhat is a list if lavobject@Data@ngroups >1, it is a list of G matrices
 #where G the number of groups and the matrices are fo dimension 
 #nrow=sample size and ncol=number of items.
 #If lavobject@Data@ngroups=1 then yhat is a matrix.
 yhat <- lavPredict(object=lavobject, type = "yhat" )

 #compute bivariate probabilities
 ngroups <- lavobject@Data@ngroups
 univariateProb <- vector("list", length=ngroups)
 pairwiseProb <- vector("list", length=ngroups)
 #save the indices of the Theta matrices for the groups stored in GLIST
 idx.ThetaMat <- which(names(lavobject@Model@GLIST)=="theta") 

 for(g in seq_len(ngroups)) { # g<-1
 
  if(ngroups>1L){
    yhat_group <- yhat[[g]]
  } else {
    yhat_group <- yhat
  }

  nsize  <- lavobject@Data@nobs[[g]]
  nvar   <- lavobject@Model@nvar[[g]]
  Data   <- lavobject@Data@X[[g]]
  TH     <- lavobject@Fit@TH[[g]]
  th.idx <- lavobject@Model@th.idx[[g]]
  Theta  <- lavobject@Model@GLIST[ idx.ThetaMat[g] ]$theta 
  error.stddev <- diag(Theta)^0.5 

  #for the computation of the univariate probabilities
  nlev <- lavobject@Data@ov$nlev
  idx.uniy <- rep(1:nvar, times=nlev)
  
  #indices vectors for the computation of bivariate probabilities
  idx.pairs.yiyj <- combn(1:nvar,2)
  no_biv_resp_cat_yiyj <- sapply(1:ncol(idx.pairs.yiyj), function(x){
                                 prod( nlev[idx.pairs.yiyj[,x]] )    })
  idx.y1 <- unlist( 
       mapply(rep, idx.pairs.yiyj[1,], each= no_biv_resp_cat_yiyj) )
  idx.y2 <- unlist( 
       mapply(rep, idx.pairs.yiyj[2,], each= no_biv_resp_cat_yiyj) )


  univariateProb[[g]] <- matrix(0, nrow = nsize, ncol = sum(nlev) ) 
  pairwiseProb[[g]]   <- matrix(0, nrow = nsize, 
                        ncol = length(lavobject@Cache[[g]]$bifreq))
  
  idx.MissVar.casewise <- apply(Data, 1, function(x) { which(is.na(x)) } )
  
  for(i in 1:nsize){
    idx.MissVar <- idx.MissVar.casewise[[i]]
    noMissVar   <- length(idx.MissVar)
    
    if( noMissVar>0L ) {
    #compute the univariate probabilities
    TH.list <- split(TH,th.idx)
    tmp.TH  <- TH.list[idx.MissVar]
    tmp.lowerTH <- unlist(lapply(tmp.TH, function(x){c(-Inf,x)}))
    tmp.upperTH <- unlist(lapply(tmp.TH, function(x){c(x,Inf) }))
    
    idx.items <- rep(c(1:noMissVar), times=nlev[idx.MissVar])
    tmp.mean    <- yhat_group[i,idx.MissVar]
    tmp.mean.extended <- tmp.mean[idx.items]
    tmp.stddev  <- error.stddev[idx.MissVar]
    tmp.stddev.extended <- tmp.stddev[idx.items]
    
    tmp.uniProb <- pnorm( (tmp.upperTH - tmp.mean.extended )/
                           tmp.stddev.extended ) -
                   pnorm( (tmp.lowerTH - tmp.mean.extended )/
                           tmp.stddev.extended )
    idx.columnsUni <- which(idx.uniy %in% idx.MissVar)
    univariateProb[[g]][i, idx.columnsUni] <- tmp.uniProb
   
  #compute the bivariate probabilities 
  if( noMissVar>1L ) {
   idx.pairsMiss <- combn(idx.MissVar ,2)
   no.pairs <- ncol(idx.pairsMiss) 
   idx.pairsV2 <- combn(noMissVar, 2)
   idx.columns <- unlist(lapply(1:no.pairs, function(x){
                           which(  (idx.y1 == idx.pairsMiss[1,x]) & 
                                  (idx.y2 == idx.pairsMiss[2,x]) )   } ) ) 
   
   if( all( Theta[t(idx.pairsMiss)]==0 ) ){ #items independence given eta 
     tmp.uniProb.list <- split(tmp.uniProb, idx.items)
     pairwiseProb[[g]][i, idx.columns] <- 
          unlist( lapply(1:no.pairs, function(x){ 
                     c( outer(tmp.uniProb.list[[ idx.pairsV2[1,x] ]] ,
                              tmp.uniProb.list[[ idx.pairsV2[2,x] ]]  ) ) }) )
   }  else { #when correlation between measurement errors
     
     tmp.th.idx <- th.idx[th.idx %in% idx.MissVar]
     #recode so that it is always 1,1,..,1, 2,...,2, etc.
     tmp.th.idx.recoded <- rep(c(1:noMissVar), times=table(tmp.th.idx))
     tmp.TH <- TH[th.idx %in% idx.MissVar]
    
     tmp.ind.vec <- LongVecInd(no.x = noMissVar, 
                               all.thres = tmp.TH, 
                               index.var.of.thres = tmp.th.idx.recoded)
                 
     tmp.th.rho.vec <-  LongVecTH.Rho.Generalised(
                           no.x = noMissVar, 
                             TH = tmp.TH, 
                         th.idx = tmp.th.idx.recoded,
                       cov.xixj = Theta[t(idx.pairsMiss)] ,
                         mean.x = yhat_group[i,idx.MissVar],
                       stddev.x = error.stddev[idx.MissVar] ) 
 
     tmp.bivProb <- pairwiseExpProbVec(ind.vec = tmp.ind.vec ,
                                       th.rho.vec = tmp.th.rho.vec)
    
     pairwiseProb[[g]][i, idx.columns] <- tmp.bivProb 
   } #end of else of if( all( Theta[t(idx.pairsMiss)]==0 ) ) 
     # which checks item local independence 
  } #end of if( noMissVar>1L )
   
     #cat(i,  "\n")
  } #end of if(noMissVar>0L)   
  } #end of for(i in 1:nsize)
  
 } #end of for(g in seq_len(lavobject@Data@ngroups))                           
 
 list(univariateProbGivObs = univariateProb,
        pairwiseProbGivObs = pairwiseProb)
} # end of the function pairwiseExpProbVec_GivenObs

##################################################################



# LongVecTH.Rho.Generalised function is defined as follows
LongVecTH.Rho.Generalised <- function(no.x, TH, th.idx,
                                      cov.xixj, mean.x, stddev.x ) {
 all.std.thres <- (TH - mean.x[th.idx]) / stddev.x[th.idx]
 id.pairs <- utils::combn(no.x,2)
 cor.xixj <- cov.xixj /( stddev.x[id.pairs[1,]] * stddev.x[id.pairs[2,]])
 
 LongVecTH.Rho(no.x = no.x, 
               all.thres = all.std.thres, 
               index.var.of.thres = th.idx,
               rho.xixj = cor.xixj)
}

# LongVecTH.Rho.Generalised is a generalisation  of the function
#  lavaan:::LongVecTH.Rho . The latter assumes that all y* follow standard
#  normal so the thresholds are automatically the standardised ones.
# LongVecTH.Rho.Generalised does not assume that, each of y*'s can follow
# a normal distribution with mean mu and standard deviation sigma.
# LongVecTH.Rho.Generalised has the following input arguments:
# no.x (same as in lavaan:::LongVecTH.Rho), 
# TH (similar to the TH in lavaan:::LongVecTH.Rho but here they are the unstandardised thresholds, i.e. of the normal distribution with mean mu and standard deviation sigma)
# th.idx (same as index.var.of.thres in lavaan:::LongVecTH.Rho)
# cov.xixj which are the polychoric covariances of the pairs of underlying variables provided in a similar fashion as rho.xixj in lavaan:::LongVecTH.Rho)
# mean.x  is a vector including the means of y*'s provided in the order mean.x1, mean.x2, ...., mean.xp
# stddev.x  is a vector including the standard deviations of y*'s provided in the order stddev.x1, stddev.x2, ...., stddev.xp

# The output of the new function is similar to that of lavaan:::LongVecTH.Rho#############################################



#lavobject is the output of lavaan function where either the unconstrained
#or a hypothesized model has been fitted   
pairwiseExpProbVec_GivenObs_UncMod <- function(lavobject) {
 ngroups <- lavobject@Data@ngroups
 TH <- lavobject@implied$th #these are the standardized thresholds
 #mean and variance of y* have been taken into account
 TH.IDX <- lavobject@SampleStats@th.idx
 Sigma.hat <- lavobject@implied$cov
 
 univariateProb <- vector("list", length=ngroups)
 pairwiseProb <- vector("list", length=ngroups)

 for(g in 1:ngroups) {
   Sigma.hat.g <- Sigma.hat[[g]]
   # is Sigma.hat always a correlation matrix?
   Cor.hat.g <- cov2cor(Sigma.hat.g)
   cors <- Cor.hat.g[lower.tri(Cor.hat.g)]
    if(any(abs(cors) > 1)) {
         warning("lavaan WARNING: some model-implied correlations
                  are larger than 1.0")
    }
    nvar <- nrow(Sigma.hat.g)
    MEAN <- rep(0, nvar)
    TH.g <- TH[[g]]
    th.idx.g <- TH.IDX[[g]]       

    nlev <- lavobject@Data@ov$nlev
    
    #create index vector to keep track which variable each column of
    #univariateProb matrix refers to
    idx.uniy <- rep(1:nvar, times=nlev)

    #create index vector to keep track which variables each column of
    #pairwiseProb matrix refers to
    idx.pairs.yiyj <- combn(1:nvar,2)
    no_biv_resp_cat_yiyj <- sapply(1:ncol(idx.pairs.yiyj), function(x){
                                   prod( nlev[idx.pairs.yiyj[,x]] )    })
    idx.y1 <- unlist( 
               mapply(rep, idx.pairs.yiyj[1,], each= no_biv_resp_cat_yiyj) )
    idx.y2 <- unlist( 
               mapply(rep, idx.pairs.yiyj[2,], each= no_biv_resp_cat_yiyj) )

    Data <- lavobject@Data@X[[g]]
    nsize  <- nrow(Data)

    #create the lists of matrices
    univariateProb[[g]] <- matrix(0, nrow = nsize, ncol = sum(nlev) ) 
    pairwiseProb[[g]]   <- matrix(0, nrow = nsize, 
                        ncol = length(lavobject@Cache[[g]]$bifreq))
  
    idx.MissVar.casewise <- apply(Data, 1, function(x) { 
                                           which(is.na(x)) } )
  
    for(i in 1:nsize){
      idx.MissVar <- idx.MissVar.casewise[[i]]
      noMissVar   <- length(idx.MissVar)
      
      if( noMissVar>0L ) {
       #compute the denominator of the conditional probability
       TH.VAR <- lapply(1:nvar, function(x) c(-Inf, TH.g[th.idx.g==x], +Inf))
       lower <- sapply(1:nvar, function(x) TH.VAR[[x]][ Data[i,x]      ])
       upper <- sapply(1:nvar, function(x) TH.VAR[[x]][ Data[i,x] + 1L ])
       lower.denom <- lower[-idx.MissVar]
       upper.denom <- upper[-idx.MissVar]
       MEAN.i <- MEAN[-idx.MissVar]
       Corhat.i <- Cor.hat.g[-idx.MissVar, -idx.MissVar, drop=FALSE] 
       denom <- sadmvn(lower.denom, upper.denom, mean=MEAN.i, varcov=Corhat.i)[1]
      } #end of if( noMissVar>0L )
      
      if( noMissVar==1L ) { #only univariate probabilities for one item
       #compute the numerator
       TH.MissVar <- c(-Inf, TH.g[th.idx.g==idx.MissVar], +Inf)
       #for all response categories of the missing item
       no.cat <- nlev[idx.MissVar]
       numer <- sapply(1:no.cat, function(x){
                     lower[idx.MissVar] <- TH.MissVar[x]
                     upper[idx.MissVar] <- TH.MissVar[x+ 1L]
                     sadmvn(lower, upper, mean=MEAN, varcov=Cor.hat.g)[1]  })
       idx.columnsUni <- which(idx.uniy %in% idx.MissVar)
       univariateProb[[g]][i, idx.columnsUni] <- numer / denom
      } #end of if( noMissVar==1L )

      if( noMissVar>1L ) {
       #compute the bivariate probabilities and based on them
       #calculate the univariate ones
      
       #form all possible pairs of items with missing values
       idx.pairsMiss <- combn(idx.MissVar ,2)
       no.pairs <- ncol(idx.pairsMiss) 
       for(j in 1:no.pairs ) {
          idx.Missy1y2 <- idx.pairsMiss[,j]
          idx.Missy1 <- idx.Missy1y2[1]
          idx.Missy2 <- idx.Missy1y2[2] 
          idx.MissRestItems <- idx.MissVar[ !(idx.MissVar %in% idx.Missy1y2)]
          TH.Missy1 <- c(-Inf, TH.g[th.idx.g==idx.Missy1], +Inf)
          TH.Missy2 <- c(-Inf, TH.g[th.idx.g==idx.Missy2], +Inf)
          no.cat.Missy1 <- nlev[ idx.Missy1 ]
          no.cat.Missy2 <- nlev[ idx.Missy2 ]
          no.bivRespCat <- no.cat.Missy1 * no.cat.Missy2
          mat_bivRespCat <- matrix(1:no.bivRespCat, nrow= no.cat.Missy1,
                                   ncol=no.cat.Missy2)
          
          numer <- sapply(1:no.bivRespCat, function(x){
            idx_y1_cat <- which(mat_bivRespCat==x, arr.ind=TRUE)[1]
            idx_y2_cat <- which(mat_bivRespCat==x, arr.ind=TRUE)[2]
            lower[idx.Missy1y2] <- 
                 c( TH.Missy1[idx_y1_cat],    TH.Missy2[idx_y2_cat] )
            upper[idx.Missy1y2] <- 
                 c( TH.Missy1[idx_y1_cat+1L], TH.Missy2[idx_y2_cat+1L] )
            lower.tmp <- lower
            upper.tmp <- upper
            MEAN.tmp  <- MEAN
            Cor.hat.g.tmp <- Cor.hat.g
            if( length(idx.MissRestItems)>0 ){
               lower.tmp <-     lower[-idx.MissRestItems]
               upper.tmp <-     upper[-idx.MissRestItems]
               MEAN.tmp  <-      MEAN[-idx.MissRestItems]
           Cor.hat.g.tmp <- Cor.hat.g[-idx.MissRestItems, -idx.MissRestItems]    
            }
            sadmvn(lower.tmp, upper.tmp, 
                   mean=MEAN.tmp, varcov=Cor.hat.g.tmp)[1]  
            })
         
         idx.columns <- which( (idx.y1 == idx.Missy1) & 
                               (idx.y2 == idx.Missy2) )
         tmp_biv <- numer/denom 
         pairwiseProb[[g]][i, idx.columns] <- tmp_biv 
         
         #compute the univariateProb based on the above bivariate
         # probabilities
         if(j==1L){
            univariateProb[[g]][i, which(idx.uniy %in% idx.Missy1) ] <-
               apply(mat_bivRespCat, 1, function(x){ sum( tmp_biv[x])} )
            
            univariateProb[[g]][i, which(idx.uniy %in% idx.Missy2) ] <-
               apply(mat_bivRespCat, 2, function(x){ sum( tmp_biv[x])} )
         }
         
         if(j>1L & j<noMissVar){
            univariateProb[[g]][i, which(idx.uniy %in% idx.Missy2) ] <-
               apply(mat_bivRespCat, 2, function(x){ sum( tmp_biv[x])} )
         }

       } #end of for(j in 1:no.pairs ) #no.pairs is that of missing items  

      } #end of if( noMissVar>1L )
    } #end of for(i in 1:nsize)
 } # end of for(g in 1:ngroups)
 
 list(univariateProbGivObs = univariateProb,
        pairwiseProbGivObs = pairwiseProb)
} #end of function

Try the lavaan package in your browser

Any scripts or data that you put into this service are public.

lavaan documentation built on July 26, 2023, 5:08 p.m.