R/Gibbs_lcm.R
In ddtlcm: Latent Class Analysis with Dirichlet Diffusion Tree Process Prior

Documented in sample_class_assignment sample_leaf_locations_pg sample_sigmasq

###############################################################
# Gibbs sampler with polya-gamma and logistic data augmentation
# to sample LCM parameters.
###############################################################

#'Sample item group-specific variances through Gibbs sampler
#'@param shape0 a vector of G elements, each being the shape of the
#'    Inverse-Gamma prior of group g
#'@param rate0 a vector of G elements, each being the rate of the
#'    Inverse-Gamma prior of group g
#'@param dist_mat a list, containing the KxK tree-structured matrix of leaf nodes,
#'    where K is the number of leaves / latent classes, and SVD components
#'@param item_membership_list a vector of G elements, each indicating the number of
#'    items in this group
#'@param locations a KxJ matrix of leaf parameters
#'@return a numeric vector of G elements, each being the newly sampled variance
#'    of the latent location of this group
sample_sigmasq <- function(shape0, rate0, dist_mat, item_membership_list, locations){
  # number of item groups
  G <- length(item_membership_list)
  # number of leaves
  K <- nrow(dist_mat$dist_mat_g)
  Sigma_by_group <- rep(0, G)
  for (g in 1:G) {
    shape <- shape0[g] + K * length(item_membership_list[[g]]) * 0.5
    # get the location of the root node
    locations_g_diff <- locations[, item_membership_list[[g]], drop=F]
    # trace of A^t B A = vec(B^t A)^t vec(A)
    rate <- rate0[g] + 0.5 * sum( (dist_mat$Dhalf_Ut %*% locations_g_diff) **2)
    Sigma_by_group[g] <- 1.0 / rgamma(1, shape = shape, rate = rate)
  }
  return(Sigma_by_group)
}



#'Sample the leaf locations and Polya-Gamma auxilliary variables
#'@param item_membership_list a vector of G elements, each indicating the number of
#'    items in this group
#'@param dist_mat_old a list of leaf covariance matrix from the previous iteration. The list
#'    has length G, the number of item groups
#'@param Sigma_by_group a vector of length G, each denoting the variance of the
#'    brownian motion
#'@param pg_mat a K by J matrix of PG variables from the previous iteration
#'@param a_pg a N by J matrix of hyperparameters of the generalized logistic distribution
#'@param auxiliary_mat a N by J matrix of truncated normal variables from previous iteration
#'@param auxiliary_mat_range a list of two named elements: lb and ub. Each is an N by J 
#'  matrix of the lower/upper bounds of the truncated normal variables.
#'@param class_assignments an integer vector of length N for the individual class assignments.
#'  Each element takes value in 1, ..., K.
#'@return a named list of three matrices: the newly sampled leaf parameters, the Polya-gamma random variables,
#'  and the auxiliary truncated normal variables
sample_leaf_locations_pg <- function(item_membership_list, dist_mat_old,
                                     Sigma_by_group, pg_mat, a_pg, auxiliary_mat, 
                                     auxiliary_mat_range, class_assignments){
  N <- nrow(pg_mat)
  # number of classes
  K <- ncol(dist_mat_old$Dhalf_Ut)
  # total number of items
  J <- length(unlist(item_membership_list))
  # cumulative index of item groups
  new_leaf_data <- matrix(NA, nrow = K, ncol = J)
  for (g in seq_along(item_membership_list)) {
    J_g <- length(item_membership_list[[g]])
    precision_mat <- crossprod(dist_mat_old$Dhalf_Ut) / Sigma_by_group[g]
    # extract indices
    indices <- item_membership_list[[g]]
    pg_mat_g <- pg_mat[, indices, drop = FALSE]
    auxiliary_mat_g <- auxiliary_mat[, indices, drop = FALSE]
    xi_1_raw <- pg_mat_g * auxiliary_mat_g
    xi_0 <- matrix(0, nrow = K, ncol = J_g)
    xi_1 <- matrix(0, nrow = K, ncol = J_g)
    for (k in 1:K) {
      xi_0[k,] <- colSums(pg_mat_g[class_assignments == k, , drop = FALSE])
      xi_1[k,] <- colSums(xi_1_raw[class_assignments == k, , drop = FALSE])
    }
    # precision_mat <- Matrix::kronecker(precision_mat, Diagonal(item_membership_list[g]))
    precision_mat <- Matrix::kronecker(Matrix::Diagonal(J_g), precision_mat)
    if (any(abs(xi_0) < 1e-5)){
      # cat("xi =", xi_0, "\n")
      xi_0[xi_0 < 1e-5] <- 0.01
    }
    # cat("dim(precision_mat) =", dim(precision_mat), "\n")
    # cat("diag(precision_mat) =", diag(precision_mat))
    Matrix::diag(precision_mat) <- Matrix::diag(precision_mat) + xi_0
    new_leaf_data[,indices] <- draw_mnorm(precision_mat, c(xi_1))
  }
  
  ## sample Polya-Gamma
  pg_mat <- matrix(rpg(N*J, 2.0*a_pg, auxiliary_mat - new_leaf_data[class_assignments,]), ncol = J)
  
  ## sample auxiliary variables from truncated normals
  auxiliary_mat[,] <- rtruncnorm(N*J, a = auxiliary_mat_range[['lb']], b = auxiliary_mat_range[['ub']],
                                 mean = new_leaf_data[class_assignments,], sd = 1/sqrt(pg_mat))
  
  return(list(leaf_data = new_leaf_data, pg_data = pg_mat, auxiliary_mat = auxiliary_mat))
}



#' Sample individual class assignments Z_i, i = 1, ..., N
#'@param data a N by J binary matrix, where the i,j-th element is the response
#'    of item j for individual i
#'@param leaf_data a K by J matrix of \eqn{logit(theta_{kj})}
#'@param a_pg a N by J matrix of hyperparameters of the generalized logistic distribution
#'@param auxiliary_mat a N by J matrix of truncated normal variables from previous iteration
#'@param class_probability a length K vector, where the k-th element is the
#'    probability of assigning an individual to class k. It does not have to sum up to 1
#'@return a vector of length N, where the i-th element is the class assignment of
#'    individual i
sample_class_assignment <- function(data, leaf_data, a_pg, auxiliary_mat, class_probability){
  # number of classes
  K <- length(class_probability)
  # number of individuals
  N <- nrow(data)
  J <- ncol(leaf_data)
  # KxN log probabilities: the i-th column is the unnormalized log probabilities of K classes
  # for indiviudal i
  part1 <- log(class_probability) - a_pg * rowSums(leaf_data)
  leaf_data_t <- t(leaf_data)
  class_assignments <- rep(0, N)
  # for each individual
  for (i in 1:N) {
    logexp_sum <- colSums(log1p(exp((auxiliary_mat[i,] - leaf_data_t))))
    log_probs <- part1 - 2*a_pg * logexp_sum
    log_probs <- log_probs - max(log_probs)
    class_assignments[i] <- sample(1:K, 1, prob = exp_normalize(log_probs))
  }
  
  return(class_assignments)
}

Any scripts or data that you put into this service are public.

ddtlcm documentation built on May 29, 2024, 5:41 a.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

ddtlcm
Latent Class Analysis with Dirichlet Diffusion Tree Process Prior

R/Gibbs_lcm.R
In ddtlcm: Latent Class Analysis with Dirichlet Diffusion Tree Process Prior

Defines functions sample_class_assignment sample_leaf_locations_pg sample_sigmasq

Documented in sample_class_assignment sample_leaf_locations_pg sample_sigmasq

Try the ddtlcm package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

ddtlcm Latent Class Analysis with Dirichlet Diffusion Tree Process Prior

R/Gibbs_lcm.R In ddtlcm: Latent Class Analysis with Dirichlet Diffusion Tree Process Prior

Defines functions sample_class_assignment sample_leaf_locations_pg sample_sigmasq

Documented in sample_class_assignment sample_leaf_locations_pg sample_sigmasq

Try the ddtlcm package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

ddtlcm
Latent Class Analysis with Dirichlet Diffusion Tree Process Prior

R/Gibbs_lcm.R
In ddtlcm: Latent Class Analysis with Dirichlet Diffusion Tree Process Prior