lcm_tree | R Documentation |
wrapper function for fitting and summaries
lcm_tree( Y, leaf_ids, mytree, weighted_edge = TRUE, Z_obs = NULL, ci_level = 0.95, get_lcm_by_group = FALSE, update_hyper_freq = 50, print_freq = 10, quiet = FALSE, plot_fig = FALSE, shared_tau = FALSE, hyper_fixed = list(K = 3), tol = 1e-08, tol_hyper = 1e-04, max_iter = 5000, nrestarts = 3, keep_restarts = TRUE, parallel = TRUE, log_restarts = FALSE, log_dir = ".", vi_params_init = list(), hyperparams_init = list(), random_init = FALSE, random_init_vals = list(tau_lims = c(0.5, 1.5), psi_sd_frac = 0.2, phi_sd_frac = 0.2, mu_gamma_sd_frac = 0.2, mu_alpha_sd_frac = 0.2, u_sd_frac = 0.2), allow_continue = FALSE )
Y |
An |
leaf_ids |
Character vector of length |
mytree |
A directed |
weighted_edge |
default to |
Z_obs |
a matrix of two columns, first column is subject ids (of all samples),
the second column is a mix of |
ci_level |
A number between 0 and 1 giving the desired credible interval.
For example, |
get_lcm_by_group |
If |
update_hyper_freq |
How frequently to update hyperparameters. Default = every 50 iterations. |
print_freq |
How often to print out iteration number and current value of epsilon (the difference in objective function value for the two most recent iterations). |
quiet |
default to |
plot_fig |
plot figure about |
shared_tau |
logical: |
hyper_fixed |
Fixed values of hyperprior parameters for |
tol |
Convergence tolerance for the objective function.
Default is |
tol_hyper |
The convergence tolerance for the objective function
between subsequent hyperparameter updates. Typically it is a more generous
tolerance than |
max_iter |
Maximum number of iterations of the VI algorithm.
Default is |
nrestarts |
Number of random re-starts of the VI algorithm.
The restart that gives the highest value of the objective function will
be returned. It is recommended to choose |
keep_restarts |
If |
parallel |
If |
log_restarts |
If |
log_dir |
Directory for logging progress of random restarts. Default is the working directory. |
vi_params_init, hyperparams_init |
Named lists containing initial values for the
variational parameters and hyperparameters. Supplying good initial values can be challenging,
and |
random_init |
If |
random_init_vals |
If
.
|
allow_continue |
logical, |
a list also of class "lcm_tree"
res <- make_list(mod,mod_restarts,mytree,dsgn,prob_est,est_ad_hoc) class(res) <- c("lcm_tree","list")
Other lcm_tree functions:
continue_lcm_tree()
### Example workflow of `lotR` ### Zhenke Wu | zhenkewu@gmail.com ### August 04, 2020 ### This example has true parameters following the tree structured priors. rm(list=ls()) library(lotR) data("lotR_example_edges") library(igraph) tr <- graph_from_edgelist(lotR_example_edges, directed = TRUE) # Plot tree # Here in this example, we use equal weighted edges. # If needed, first install ggtree # if (!requireNamespace("BiocManager", quietly = TRUE)) # install.packages("BiocManager") # BiocManager::install("ggtree") library(ggtree) library(ggplot2) ggtree(tr, ladderize = FALSE, layout = "slanted") + geom_tiplab(geom = "label") + geom_nodelab(geom = "label") + theme(plot.margin = unit(c(0, 1.5, 0, 0.2), "cm")) + coord_cartesian(clip = "off") + scale_y_reverse() leaves <- names(igraph::V(tr)[degree(tr, mode = "out") == 0]) # set levels: leaves 2, non-leaves 1 igraph::V(tr)$levels <- rep(1,length(igraph::V(tr))) igraph::V(tr)$levels[match(names(igraph::V(tr)[igraph::degree(tr, mode = "out") == 0]), names(igraph::V(tr)))] <- 2 igraph::E(tr)$weight <- rep(1,length(E(tr))) ############################################################################### ## illustration of lotR on simulated data: ############################################################################### data("lotR_example_data_tree") # assumes a single pi for all observations. Y <- lotR_example_data_tree$Y Z_obs <- lotR_example_data_tree$Z_obs curr_leaves <- lotR_example_data_tree$curr_leaves theta <- lotR_example_data_tree$truth$itemprob pi_mat <- lotR_example_data_tree$truth$pi_mat # in the simulation, the 1st class has the highest level of probabilities, # so we will reorder obtained estimates to match the meaning of classes. Z <- lotR_example_data_tree$truth$Z ## check if all leave names are present in the data. setequal(leaves, unique(curr_leaves)) #> [1] TRUE knitr::kable(table(curr_leaves)) K <- nrow(theta) p <- length(V(tr)) J <- ncol(Y) dsgn0 <- design_tree(Y,curr_leaves,tr,weighted_edge = FALSE,Z_obs) nrestarts <- 1 # doParallel::registerDoParallel(cores = nrestarts) # log_dir <- tempdir() # dir.create(log_dir) set.seed(345083) par(mfrow=c(3,3));plot(0,0) mod0 <- lcm_tree(Y,curr_leaves,tr, weighted_edge = !TRUE, Z_obs = Z_obs, parallel = TRUE, hyper_fixed = list(K=K,a=c(1,1),b=c(1,1), tau_update_levels = c(1,2), # should this be set as default? s_u_zeroset = NULL, s_u_oneset = c(1)), # hyperparams_init = list(tau_1=matrix(9/4,nrow=2,ncol=K-1), # tau_2=array(9/4,c(2,J,K))), vi_params_init = list(prob=rep(0.95,p)), random_init = !TRUE, nrestarts = nrestarts, quiet = !TRUE, plot_fig = TRUE, shared_tau = FALSE, get_lcm_by_group = TRUE, #<--- check what are the prereq. print_freq = 10,update_hyper_freq = 50, max_iter = 200, tol = 1e-7, tol_hyper = 1e-4, allow_continue = FALSE)#, #log_restarts =!TRUE, #log_dir = log_dir) ############################################################################### # print out summaries of group specific estimates: ############################################################################### # plot(mod0,layout = "slanted", horizontal = FALSE) plot(mod0) print(mod0) which(mod0$mod$vi_params$prob>0.5) unique(lotR_example_data_tree$truth$pi_mat) ############################################################################### # print out performance metrics (only possible when doing simulation) ############################################################################### res <- mod0 #------------------------------------------------# # estimates based on data-driven leaf groups. #------------------------------------------------# # 1. RMSE ## truth vs grouped: # prob_est is with node_select (grouped estimates). NB: need to do individual # leaf estimates. # permute the estimated classes to best match the truth in terms of the response # probability profiles. itemprob_truth <- t(lotR_example_data_tree$truth$itemprob) itemprob_est <- res$prob_est$theta_collapsed # we set prob_gamma to be all zero except for the root node; # so theta_collapsed is just the shared response probability profiles. permute_est <- opt_colpermB(itemprob_truth,itemprob_est) # permute estimated classes to best match truth. A <- lotR_example_data_tree$truth$pi_mat # leaf level; in truth, # we have group structure. B <- res$prob_est$pi[,permute_est,drop=FALSE] #leaf level rmse_bias_grp <- rmse_bias_AB(A,B) print(rmse_bias_grp) ## Do the above for separate ad hoc estimates. # 2. adjusted Rand index (aRI): true_grouping <- factor(apply(A,1,paste,collapse=""), levels = unique(apply(A,1,paste,collapse=""))) # the true grouping labels are following the order with # which the leaf nodes were coded in the tree. # mclust::adjustedRandIndex(true_grouping,res$prob_est$group) # 3. misclassification Az <- lotR_example_data_tree$truth$Z Bz <- apply(res$mod$vi_params$rmat[,permute_est,drop=FALSE],1,which.max) table(Az,Bz) sum((Az!=Bz)/length(Az)) # misclassification rate # 4. coverage (conditional: need to decide if a true group is in the estimated groups) gtab <- table(true_grouping,res$prob_est$group) # get the total number of estimated groups: G <- nrow(gtab) for (g in 1:G){# for each true group if (sum(gtab[g,]>0)==1){ # # did true group get split into multiple estimated groups? # if No... h <- as.integer(which(gtab[g,]>0)) # which estimated group contains # all leaf nodes in true group g: if (sum(gtab[,h]>0)==1){ # does this estimated group contain other # leaf nodes, not in true group g: # if no, then true group g and estimated group h are identical. # Proceed with estimating coverage. # Did the lotR estimate cover the truth? interval_est <- summary(res)[[h]]$est[permute_est,,drop=FALSE] v <- which(as.integer(true_grouping)==g)[1] # pick a leaf node in the group. true_curr <- lotR_example_data_tree$truth$pi_mat[v,] covered <- (true_curr >= interval_est[,"cil_pi"]) & (true_curr <= interval_est[,"ciu_pi"]) # Did the ad hoc group-specific LCM cover the truth? # NB: this is not done yet; we have point estimates, # but not yet extracted the posterior sd yet. cat("True Group: ", g, "\n") print(data.frame(truth = round(lotR_example_data_tree$truth$pi_mat[v,],3), covered=covered)) cat("----------\n") } } } #--------------------------------------# # Individual leaf nodes estimates. #--------------------------------------# # 1. RMSE ## truth vs individual nodes: ## no node_select - the leaf nodes have distinct estimates of the pie A <- lotR_example_data_tree$truth$pi_mat # leaf level; in truth, # we have group structure. B <- res$prob_est_indiv$pi[,permute_est,drop=FALSE] #leaf level rmse_bias_indiv <- rmse_bias_AB(A,B) print(rmse_bias_indiv) # # K = 2; simulate data # # # This code is not designed as an example for a particular function, # # but it simulates the example data set: lotR_example_data_tree_K2 # ## This is the code for fixing K=2 issues. # # rm(list=ls()) # library(igraph) # set.seed(2020) # n = 3000 # tau <- c(0.7,0.3) # itemprob <- rbind(rep(rep(c(0.9, 0.9), each = 1),9), # rep(rep(c(0.1, 0.1), each = 1),9)) # # data("lotR_example_edges") # mytree <- igraph::graph_from_edgelist(lotR_example_edges, directed = TRUE) # # Plot tree # # nodes <- names(igraph::V(mytree)) # leaves <- names(igraph::V(mytree)[degree(mytree, mode = "out") == 0]) # pL = length(leaves) # p = length(igraph::V(mytree)) # # ###### # K = nrow(itemprob) # # specify the nodes that have non-trivial alpha_u, this was called # # xi_u, because xi_u = s_u*alpha_u and s_u = 1 if we set it in the simulation. # alpha_mat = rbind(logit(prob2stick(tau)[-K]), # c(-1), # c(1), # matrix(0,nrow=p-3,ncol=K-1) # ) # # # get lists of ancestors for each leaf_ids: # d <- igraph::diameter(mytree,weights=NA) # # need to set weight=NA to prevent the use of edge lengths in determining the diameter. # ancestors <- igraph::ego(mytree, order = d + 1, nodes = leaves, mode = "in") # ancestors <- sapply(ancestors, names, simplify = FALSE) # ancestors <- sapply(ancestors, function(a, nodes) which(nodes %in% a), # nodes = nodes, simplify = FALSE) # names(ancestors) <- leaves # # # calculate the class probabilities for all leaf nodes; each leaf node # # should have a K-dim vector that sums to one; Some nodes may share # # the same set of K-dim probability vector, others may differ. There are # # one or more groups of leaf nodes with distinct K-dim probability vectors. # # Note the branch lengths may also be used here. # pi_mat <- matrix(NA,nrow=pL,ncol=K) # for (v in seq_along(leaves)){ # pi_mat[v,-K] <- colSums(alpha_mat[ancestors[[v]],,drop=FALSE]) # pi_mat[v,] <- tsb(c(expit(pi_mat[v,-K]),1)) # } # # # s = c(1, 1,1,0,0, rep(0,pL)) # effective nodes # h_pau = rep(1,p) # # lotR_example_data_tree <- simulate_lcm_tree(n,itemprob,mytree,pi_mat,h_pau) # table(lotR_example_data_tree$truth$Z) # # #save the simulated data to the R package for illustration: # save(lotR_example_data_tree, file = "data/lotR_example_data_tree_K2.rda", compress = "xz")
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.