In CJangelo/CLCM: Estimate Confirmatory Latent Class Models

knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)

Timepoints: 2
Latent Classes: K=2, 2^K = 4 latent classes
Multinomial latent regression, Groups separate at timepoint 2

The stratified transition matrix is a transition matrix estimated separately for different groups. This is essentially for type of latent regression. Here we regress the latent classes at timepoint 2 onto the observed Group, which allows us to estimate a stratified transition matrix - this is the output of interest when, for example, we look at the differences in treatment arms (observed groups) in the proportion of subjects moving from the "Sick" latent class to the "Not Sick" latent class.

library(CLCM)
library(nnet)

Specify details of the data. Here we have 2,000 subjects belonging to 4 different latent classes responding to 16 items at 2 timepoints.

    N <- 2000
     number.timepoints <- 2
     item.type <- rep( c('Ordinal', 'Nominal', 'Poisson', 'Neg_Binom', 'ZINB', 'ZIP', 'Normal', 'Beta'), 2)
     sim.categories.j <- rep( c(4, 4, 30, 30, 30, 30, NA, NA) , 2)
     J <- length(item.type)
     item.names <- paste0('Item_', 1:J)
     K= 2
     #alpha <- pattern(K)
     Q <- matrix(c(rep(c(1,0), J/2), rep(c(0, 1), J/2)), nrow = J, ncol = K, byrow = T)
     #eta=alpha %*% t(Q)

Generate the subject posteriors via (multinomial) latent regression. The latent class assignments will differ across the two observed groups (Group).

     number.groups <- 2
     dat <- data.frame(
                  'USUBJID' = rep(paste0('Subject_', formatC(1:N, width = 4, flag = '0')), length.out= N*number.timepoints),
                  'Group' = rep(paste0('Group_', 1:number.groups), length.out = N*number.timepoints),
                  'Time' = rep(paste0('Time_', 1:number.timepoints), each = N),
                  stringsAsFactors=F)

      # Design Matrix
      XX <- model.matrix( ~ Group*Time, data = dat) 
      # Beta
      Beta <- matrix(0, nrow = ncol(XX), ncol = 2^K - 1, dimnames=list(colnames(XX), rep('LC', 2^K -1)))
      Beta['(Intercept)', ] <- c(0.2, 0.8, 0.4)
      Beta['GroupGroup_2', ] <- 0
      Beta['TimeTime_2', ] <- 0
      Beta['GroupGroup_2:TimeTime_2', ] <- c(0.2, -1.0, -1.6)
      Beta

      # Matrix multiply:
      XB <- XX %*% Beta
      p <- exp(XB)/(1 + apply(exp(XB), 1, sum))
      p <-  cbind(1 - rowSums(p), p)
      lca <-  vector()
      for(i in 1:nrow(p)){
          lca <- c(lca, 
           sample(x = c(1:(2^K)), size = 1, prob = p[i, ])  
                  )
        } #end loop to sample lca

At this point we have used the observed group membership (Group) to generate the (true) posterior distributions/latent class assignments for both timepoints. Let's check the latent class assignments vs Observed group assignment. Again, we haven't yet moved into latent class models yet.

      # Check the LCA 
      prop.table(table(lca))
      # Check LCA across observed Groups:
      prop.table(xtabs( ~ lca + dat$Group), margin = 1)

Before we move on with the simulation, let's just do a quick check on whether a simple multinomial logistic regression can recover this. Regress latent class assignment onto observed groups:

      mod <- nnet:::multinom(as.factor(lca) ~ dat$Group*dat$Time)
      coef(mod)
      t(Beta)

Recovery seems fine. Let's create the true posterior distributions and pass to the simulation function.

      # Create true posterior distributions, pass 
      post.true <- matrix(0, nrow = nrow(dat), ncol = 2^K)
      post.true[ cbind(1:nrow(dat), lca) ] <- 1 

      # Simulate Data
        set.seed(03102021)
      sim.dat <- simulate_clcm(N = N, number.timepoints = number.timepoints, 
                        Q = Q, 
                        item.names = item.names, 
                        item.type = item.type, 
                        categories.j = sim.categories.j, 
                        post = post.true)

Merge the dataframe with the Observed Group membership with the simulated dataframe. You need the Group variable with the item responses to pass to the model estimation function. If you don't, the esimtation routine won't have a Group variable to compute the multinomial latent regression with.

      dat.cov <- merge(x = dat, y = sim.dat$dat, by = c('USUBJID', 'Time'))

Specify the latent regression formula - regress latent classes onto Group variable in model estimation by passing lat.reg = list('Time_1' = NULL, 'Time_2' = 'Group'). Note: if we had additional covariates, we would pass the corresponding regression formula, e.g. 'Group + Sex + Age'. This will be passed to as.formula and then passed to a function that fits a regression.

      mod <- clcm(dat = dat.cov, 
                 item.type = item.type, 
                 item.names = item.names, 
                 Q = Q, max.diff = 0.001, 
                 lat.reg = list('Time_1' = NULL, 'Time_2' = 'Group') )

Evaluate the Latent Regression Parameters:

      Beta # Generating parameter
      mod$lat.reg.param  # estimate

Evaluate the transition matrix, first not stratified across the Group variable, and compare that estimated transition matrix to the true transition matrix.

      transition_matrix_clcm(mod = mod, stratification = F)

      # Compare to Generating parameters
      post.true1 <- post.true[dat$Time == 'Time_1', ]
      post.true2 <- post.true[dat$Time == 'Time_2', ]
      t(post.true1) %*% post.true2/matrix(colSums(post.true1), nrow = 2^K, ncol = 2^K, byrow = F)

Appear to be very similar, very good recovery of the true generating parameters. Now let's check the estimated transition matrix, stratified on the Group variable:

  # Estimated:
  transition_matrix_clcm(mod = mod, stratification = T, covariate = 'Group')

  # Compare to Generating parameters:
  post.true11 <- post.true[dat$Time == 'Time_1' & dat$Group == 'Group_1', ]
  post.true12 <- post.true[dat$Time == 'Time_1' & dat$Group == 'Group_2', ]
  post.true21 <- post.true[dat$Time == 'Time_2' & dat$Group == 'Group_1', ]
  post.true22 <- post.true[dat$Time == 'Time_2' & dat$Group == 'Group_2', ]
  t(post.true11) %*% post.true21/matrix(colSums(post.true11), nrow = 2^K, ncol = 2^K, byrow = F)
  t(post.true12) %*% post.true22/matrix(colSums(post.true12), nrow = 2^K, ncol = 2^K, byrow = F)

Again, looks very close.

Next, compare true classification (latent class assignment, lca) with estimates:

      lca.hat <- mod$dat$lca 
      lca.true <- mod$dat$true_lca      
      table(lca.true == lca.hat)
      prop.table(table(lca.true == lca.hat)) 
      xtabs( ~ lca.true + lca.hat)