inst/example/example_simulate_doubletree.R

# simulate an example data set.
rm(list=ls())

library(igraph)
library(doubletree)
library(MASS)
library(poLCA)
library(BayesLCA)

set.seed(20210926)

# second tree - over domains:
data("example_domain_edges")
domain_tree <- graph_from_edgelist(example_domain_edges, directed = TRUE)

# set the levels l*_u for nodes in the cause tree; nodes
# in the same level will share a slab variance multiplier tau_l* (times the edge length
# emanating from the parent).
igraph::V(domain_tree)$levels <- c(1,rep(2,length(igraph::V(domain_tree))-1))
igraph::E(domain_tree)$weight <- rep(1,length(E(domain_tree)))

nodes2  <- names(igraph::V(domain_tree))
leaves2 <- names(igraph::V(domain_tree)[igraph::degree(domain_tree, mode = "out") == 0])
rootnode2 <- names(igraph::V(domain_tree)[igraph::degree(domain_tree, mode = "in") == 0])
pL2 <- length(leaves2)
p2  <- length(nodes2)

# # first tree - over causes:
# data("example_cause_edges")
# cause_tree <- graph_from_edgelist(example_cause_edges, directed = TRUE)
cause_tree <- domain_tree # for simplicity, make then the same for illustration.

igraph::V(cause_tree)$levels <- c(1,rep(2,length(igraph::V(cause_tree))-1))
igraph::E(cause_tree)$weight <- rep(1,length(E(cause_tree)))

nodes1  <- names(igraph::V(cause_tree))
leaves1 <- names(igraph::V(cause_tree)[igraph::degree(cause_tree, mode = "out") == 0])
rootnode1 <- names(igraph::V(cause_tree)[igraph::degree(cause_tree, mode = "in") == 0])
pL1 <- length(leaves1)
p1  <- length(nodes1)

mytrees <- list(tree1 = cause_tree, tree2 = domain_tree)


###############################################################################
## Begin simulating data using the two trees above.
###############################################################################
n     <- 1000
K     <- 2
J     <- 20

fracs_leaves1  <- c(1,rep(1,pL1-1))
fracs_leaves1  <- fracs_leaves1/sum(fracs_leaves1)
pi_mat         <- matrix(fracs_leaves1,nrow=pL1,ncol=pL2)

# create tree1 leaf level class-specific response probabilities:
itemprob0 <- rbind(rep(rep(c(0.95, 0.95), each = 1),10),
                   rep(rep(c(0.1, 0.1), each = 1),10))
# gamma_mat_list <- list(logit(itemprob0))
#
# for (u in 1:(p1-1)){ # random increments on random columns
#   increment_mat <- matrix(rnorm(J*K,0,1),nrow=K,ncol=J)
#   ind_j         <- sample(1:J,floor(J/2))
#   increment_mat[,ind_j] <- 0
#   gamma_mat_list <- append(gamma_mat_list,list(increment_mat))
# }

#########################################################################
## diffusion along tree1 for itemprob_list
#########################################################################

# get lists of ancestors for each leaf_ids:
d1 <- igraph::diameter(mytrees[[1]],weights=NA)
# need to set weight=NA to prevent the use of edge lengths in determining the diameter.
ancestors1 <- igraph::ego(mytrees[[1]], order = d1 + 1, nodes = leaves1, mode = "in")
ancestors1 <- sapply(ancestors1, names, simplify = FALSE)
ancestors1 <- sapply(ancestors1, function(a, nodes) which(nodes %in% a),
                     nodes = nodes1, simplify = FALSE)
names(ancestors1) <- leaves1

# the list of item response probability
itemprob_list <-list()
for (v1 in 1:pL1){
  curr_mat <- itemprob0
  ind_j <- sample(1:J,floor(J/2))
  for (j in ind_j){
    curr_mat[,j] <- itemprob0[rev(1:nrow(curr_mat)),j]
  }
  # itemprob_list[[v1]] <- expit(Reduce("+",gamma_mat_list[ancestors1[[v1]]]))
  itemprob_list[[v1]] <- curr_mat
}

par(mfrow=c(ceiling(sqrt(pL1+1)),ceiling(sqrt(pL1+1))),
    mar=c(2,2,2,2))
for (v1 in 1:pL1){
  image(1:J,1:K,t(itemprob_list[[v1]]),main=paste0("leaf_tree1: ",v1))
}

############################################################################
## diffusion along tree2 for alpha_mat_list
############################################################################
# specify the nodes that have non-trivial alpha^cu_k, this was called
# xi^cu_k, because xi^cu_k = s_cu*alpha^cu_k and s_cu = 1 if we set it in the simulation.

alpha0_mat <- myrdirich(pL1,v=rep(1,K)) # root
alpha_mat_list <- vector("list",pL1)
for (l in 1:pL1){
  # alpha0 <- c(0.6,0.3,0.1) # root
  alpha0 <- alpha0_mat[l,]
  # for each leaf node in tree1:
  alpha_mat = rbind(logit(prob2stick(alpha0)[-K]),
                    0,
                    0,
                    matrix(0,nrow=p2-3,ncol=K-1)
  )
  alpha_mat_list[[l]] <- alpha_mat
}

# get lists of ancestors for each leaf_ids:
d2 <- igraph::diameter(mytrees[[2]],weights=NA)
# need to set weight=NA to prevent the use of edge lengths in determining the diameter.
ancestors2 <- igraph::ego(mytrees[[2]], order = d2 + 1, nodes = leaves2, mode = "in")
ancestors2 <- sapply(ancestors2, names, simplify = FALSE)
ancestors2 <- sapply(ancestors2, function(a, nodes) which(nodes %in% a),
                     nodes = nodes2, simplify = FALSE)
names(ancestors2) <- leaves2

# calculate the class probabilities for all leaf nodes in tree2; each leaf node
# should have a K-dim vector that sums to one; Some nodes may share
# the same set of K-dim probability vector, others may differ. There are
# one or more groups of leaf nodes with distinct K-dim probability vectors.
# Note the branch lengths may also be used here.
lambda_mat_list <- list() # will be a list of length pL1, each being K by pL2.
for (v1 in 1:pL1){
  lambda_mat_list[[v1]] <- matrix(NA,nrow=K,ncol=pL2)
  for (v2 in 1:pL2){
    tmp <- colSums(alpha_mat_list[[v1]][ancestors2[[v2]],,drop=FALSE])
    lambda_mat_list[[v1]][,v2] <- tsb(c(expit(tmp),1))
  }
}

# simulate data
example_data_doubletree <- simulate_nlcm_doubletree(n,mytrees,itemprob_list,lambda_mat_list,pi_mat)

# print("counts in simulation:")
example_data_doubletree$N_sim_mat

## save the simulated data to the R package for illustration:
# save(example_data_doubletree, file = "data/example_data_doubletree.rda", compress = "xz")
#

data("example_data_doubletree")
zhenkewu/doubletree documentation built on Oct. 21, 2023, 7:04 a.m.