simulate_nlcm_doubletree | R Documentation |
The observations belong to leaves in tree2 that may further form a few groups, each with its own K-class probabilities. In particular,
The K
by J
response profile
probabilities are at the leaves of tree1 - they may form groups of distinct
numbers of leaves in tree1 (e.g., cause groups with distinct response probability
profiles).
For each leaf in tree1, the K-vector class probability vectors may vary across the leaves in tree2; they may further form a small number of leaf groups in tree2 with distinct probability vector values.
simulate_nlcm_doubletree(
n,
mytrees,
itemprob_list,
lambda_mat_list,
pi_mat,
balanced = TRUE,
ratio = 4
)
n |
total sample size |
mytrees |
a list of two trees; see |
itemprob_list |
a list of length |
lambda_mat_list |
a list of length |
pi_mat |
a |
balanced |
by default is |
ratio |
only used if |
a list. In particular, the present function does not make the true_leaf_ids
missing for a subset of subjects in tree1. So to mimic a situation with missing
tree1 leaf labels, additional creation of missing tree1 label indicators are needed!
In addition, this function simulates fully observed responses; so to mimic
situations where missing responses would happen, additional missing response
indicators need to be created.
observations
a list that contains the simulation truth:
true_leaf_ids
all the leaf memberships to both trees
Z
true class indicators for all observations
pL1 by pL2 count matrix
BayesLCA::blca()
# simulate an example data set.
rm(list=ls())
library(igraph)
library(doubletree)
library(MASS)
library(poLCA)
library(BayesLCA)
set.seed(20210926)
# second tree - over domains:
data("example_domain_edges")
domain_tree <- graph_from_edgelist(example_domain_edges, directed = TRUE)
# set the levels l*_u for nodes in the cause tree; nodes
# in the same level will share a slab variance multiplier tau_l* (times the edge length
# emanating from the parent).
igraph::V(domain_tree)$levels <- c(1,rep(2,length(igraph::V(domain_tree))-1))
igraph::E(domain_tree)$weight <- rep(1,length(E(domain_tree)))
nodes2 <- names(igraph::V(domain_tree))
leaves2 <- names(igraph::V(domain_tree)[igraph::degree(domain_tree, mode = "out") == 0])
rootnode2 <- names(igraph::V(domain_tree)[igraph::degree(domain_tree, mode = "in") == 0])
pL2 <- length(leaves2)
p2 <- length(nodes2)
# # first tree - over causes:
# data("example_cause_edges")
# cause_tree <- graph_from_edgelist(example_cause_edges, directed = TRUE)
cause_tree <- domain_tree # for simplicity, make then the same for illustration.
igraph::V(cause_tree)$levels <- c(1,rep(2,length(igraph::V(cause_tree))-1))
igraph::E(cause_tree)$weight <- rep(1,length(E(cause_tree)))
nodes1 <- names(igraph::V(cause_tree))
leaves1 <- names(igraph::V(cause_tree)[igraph::degree(cause_tree, mode = "out") == 0])
rootnode1 <- names(igraph::V(cause_tree)[igraph::degree(cause_tree, mode = "in") == 0])
pL1 <- length(leaves1)
p1 <- length(nodes1)
mytrees <- list(tree1 = cause_tree, tree2 = domain_tree)
###############################################################################
## Begin simulating data using the two trees above.
###############################################################################
n <- 1000
K <- 2
J <- 20
fracs_leaves1 <- c(1,rep(1,pL1-1))
fracs_leaves1 <- fracs_leaves1/sum(fracs_leaves1)
pi_mat <- matrix(fracs_leaves1,nrow=pL1,ncol=pL2)
# create tree1 leaf level class-specific response probabilities:
itemprob0 <- rbind(rep(rep(c(0.95, 0.95), each = 1),10),
rep(rep(c(0.1, 0.1), each = 1),10))
# gamma_mat_list <- list(logit(itemprob0))
#
# for (u in 1:(p1-1)){ # random increments on random columns
# increment_mat <- matrix(rnorm(J*K,0,1),nrow=K,ncol=J)
# ind_j <- sample(1:J,floor(J/2))
# increment_mat[,ind_j] <- 0
# gamma_mat_list <- append(gamma_mat_list,list(increment_mat))
# }
#########################################################################
## diffusion along tree1 for itemprob_list
#########################################################################
# get lists of ancestors for each leaf_ids:
d1 <- igraph::diameter(mytrees[[1]],weights=NA)
# need to set weight=NA to prevent the use of edge lengths in determining the diameter.
ancestors1 <- igraph::ego(mytrees[[1]], order = d1 + 1, nodes = leaves1, mode = "in")
ancestors1 <- sapply(ancestors1, names, simplify = FALSE)
ancestors1 <- sapply(ancestors1, function(a, nodes) which(nodes %in% a),
nodes = nodes1, simplify = FALSE)
names(ancestors1) <- leaves1
# the list of item response probability
itemprob_list <-list()
for (v1 in 1:pL1){
curr_mat <- itemprob0
ind_j <- sample(1:J,floor(J/2))
for (j in ind_j){
curr_mat[,j] <- itemprob0[rev(1:nrow(curr_mat)),j]
}
# itemprob_list[[v1]] <- expit(Reduce("+",gamma_mat_list[ancestors1[[v1]]]))
itemprob_list[[v1]] <- curr_mat
}
par(mfrow=c(ceiling(sqrt(pL1+1)),ceiling(sqrt(pL1+1))),
mar=c(2,2,2,2))
for (v1 in 1:pL1){
image(1:J,1:K,t(itemprob_list[[v1]]),main=paste0("leaf_tree1: ",v1))
}
############################################################################
## diffusion along tree2 for alpha_mat_list
############################################################################
# specify the nodes that have non-trivial alpha^cu_k, this was called
# xi^cu_k, because xi^cu_k = s_cu*alpha^cu_k and s_cu = 1 if we set it in the simulation.
alpha0_mat <- myrdirich(pL1,v=rep(1,K)) # root
alpha_mat_list <- vector("list",pL1)
for (l in 1:pL1){
# alpha0 <- c(0.6,0.3,0.1) # root
alpha0 <- alpha0_mat[l,]
# for each leaf node in tree1:
alpha_mat = rbind(logit(prob2stick(alpha0)[-K]),
0,
0,
matrix(0,nrow=p2-3,ncol=K-1)
)
alpha_mat_list[[l]] <- alpha_mat
}
# get lists of ancestors for each leaf_ids:
d2 <- igraph::diameter(mytrees[[2]],weights=NA)
# need to set weight=NA to prevent the use of edge lengths in determining the diameter.
ancestors2 <- igraph::ego(mytrees[[2]], order = d2 + 1, nodes = leaves2, mode = "in")
ancestors2 <- sapply(ancestors2, names, simplify = FALSE)
ancestors2 <- sapply(ancestors2, function(a, nodes) which(nodes %in% a),
nodes = nodes2, simplify = FALSE)
names(ancestors2) <- leaves2
# calculate the class probabilities for all leaf nodes in tree2; each leaf node
# should have a K-dim vector that sums to one; Some nodes may share
# the same set of K-dim probability vector, others may differ. There are
# one or more groups of leaf nodes with distinct K-dim probability vectors.
# Note the branch lengths may also be used here.
lambda_mat_list <- list() # will be a list of length pL1, each being K by pL2.
for (v1 in 1:pL1){
lambda_mat_list[[v1]] <- matrix(NA,nrow=K,ncol=pL2)
for (v2 in 1:pL2){
tmp <- colSums(alpha_mat_list[[v1]][ancestors2[[v2]],,drop=FALSE])
lambda_mat_list[[v1]][,v2] <- tsb(c(expit(tmp),1))
}
}
# simulate data
example_data_doubletree <- simulate_nlcm_doubletree(n,mytrees,itemprob_list,lambda_mat_list,pi_mat)
# print("counts in simulation:")
example_data_doubletree$N_sim_mat
## save the simulated data to the R package for illustration:
# save(example_data_doubletree, file = "data/example_data_doubletree.rda", compress = "xz")
#
data("example_data_doubletree")
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.