In this script, we perform test runs to validate the ordtpx package. We take out chunks from the MRA analysis of the ordtpx code and then try to dry run or test run these code chunks.
library(ordtpx) theta_tree <- mra_tree_prior_theta(4, c(1,20,200)) print(theta_tree) plot(theta_tree[[4]], type="l")
Note that the $\Theta$ matrix is pretty smooth because the lower level $\beta$ values are generated from a very concentrated Beta distribution $Beta(200,200)$. If we want less smooth theta, one may take a less concentrated prior at the bottom levels.
theta_tree <- mra_tree_prior_theta(4, c(1,10,20)) print(theta_tree) plot(theta_tree[[4]], type="l")
We now build the mu tree over the theta tree we generate above.
mu_tree <- mra_tree_prior_mu(4, c(1,20,200),2,2); print(mu_tree) plot(mu_tree[[4]], type="l")
We extract the theta tree from the mu tree we generated. The shape of the extracted theta would be similar to that of the mu (lower level)
theta_tree_extract <- extract_theta_tree_from_mu(mu_tree) plot(theta_tree_extract[[4]], type="l")
If we know the leaf node theta/ mu values, we can construct the tree by adding the adjacent values together.
theta_tree <- mra_tree_prior_theta(4, c(1,20,200)) print(theta_tree) plot(theta_tree[[4]], type="l")
theta_tree_construct <- mra_bottom_up(theta_tree[[4]]) print(theta_tree) print(theta_tree_construct) ## should be same as theta_tree
Now we generate counts data using the Multinomial model.
nclus <- 2; del_beta <- c(2,10,50, 100, 500, 1000, 2000); levels <- 8 a_mu <- 30; b_mu <- 80; mu_tree_set <- lapply(1:nclus, function(s) return(mra_tree_prior_mu(levels,del_beta, a_mu, b_mu))); param_set <- param_extract_mu_tree(mu_tree_set) prior_calc <- prior_calc_fn(param_set, del_beta, a_mu, b_mu) theta_sim <- do.call(rbind, lapply(1:nclus, function(l) mu_tree_set[[l]][[levels]]/mu_tree_set[[l]][[1]])); n.out <- 200 omega_sim <- rbind( cbind( rep(1, n.out), rep(0, n.out)), cbind( rep(0, n.out), rep(1, n.out)), cbind( seq(0.6, 0.4, length.out = n.out), 1- seq(0.6, 0.4,length.out=n.out)) ) dim(omega_sim) K <- dim(omega_sim)[2] barplot(t(omega_sim), col = 2:(K+1), axisnames = F, space = 0, border = NA, main=paste("No. of clusters=", K), las=1, ylim=c(0,1), cex.axis=1.5, cex.main=1.4) counts <- t(do.call(cbind,lapply(1:dim(omega_sim)[1], function(x) rmultinom(1,1000,prob=omega_sim[x,]%*%theta_sim))));
We also plot the theta matrices
plot(theta_sim[1,], type="l") plot(theta_sim[2,], type="l")
We first apply maptpx package.
topic_clus <- maptpx::topics(counts, K=2, tol=0.1) K <- 2 barplot(t(topic_clus$omega), col = 2:(K+1), axisnames = F, space = 0, border = NA, main=paste("No. of clusters=", K), las=1, ylim=c(0,1), cex.axis=1.5, cex.main=1.4)
We also plot the theta matrices
plot(topic_clus$theta[,1], type="l") plot(topic_clus$theta[,2], type="l")
We now perform ordtpx.
library(slam) initopics=NULL tol=0.1 ord=TRUE ztree_options=1 verb=1 reflect=TRUE tmax=10000 wtol=10^(-4) qn=100 grp=NULL admix=TRUE nonzero=FALSE dcut=-10 init_method="taddy" adapt.method="smash" shape=NULL use_squarem=FALSE nbundles=1 change_start_points=FALSE light=1 method_admix=1 top_genes=100 burn_in=5 source("../R/ord_count.R") source("../R/ord_mra.R") source("../R/ord_tpx.R") source("../R/tpx.R") source("../R/ord_topics.R") source("../R/count.R") source("../R/binshrink.R")
K <- 2 del_beta_1 <- c(50, 500, 5000, 50000, 500000, 5000000, 50000000) library(inline) #library(Rcpp) library(ordtpx) library(maptpx) library(slam) library(smashr) system.time(ord_topics <- ord_topics(counts, K=2, ztree_options=1, tol=0.1, adapt.method="bash", init_method = "taddy", acc=TRUE)); barplot(t(ord_topics$omega), col = 2:(K+1), axisnames = F, space = 0, border = NA, main=paste("No. of clusters=", K), las=1, ylim=c(0,1), cex.axis=1.5, cex.main=1.4)
plot(ord_topics$theta[,1], type="l") plot(ord_topics$theta[,2], type="l")
K <- 2 del_beta_1 <- c(50, 500, 5000, 50000, 500000, 5000000, 50000000) system.time(ord_topics <- ord_topics(counts, K=2, del_beta = del_beta_1, a_mu=10, b_mu=30, ztree_options=2, tol=100)); barplot(t(ord_topics$omega), col = 2:(K+1), axisnames = F, space = 0, border = NA, main=paste("No. of clusters=", K), las=1, ylim=c(0,1), cex.axis=1.5, cex.main=1.4)
plot(ord_topics$theta[,1], type="l") plot(ord_topics$theta[,2], type="l")
K <- 2 levels <- 7 del_beta_1 <- 10*rep(10^{1:levels}); system.time(ord_topics <- ord_topics(counts, K=2, ztree_options=2, tol=1, init_method = "taddy", adapt.method="smash")); barplot(t(omega), col = 2:(K+1), axisnames = F, space = 0, border = NA, main=paste("No. of clusters=", K), las=1, ylim=c(0,1), cex.axis=1.5, cex.main=1.4)
plot(ord_topics$theta[,1], type="l") plot(ord_topics$theta[,2], type="l")
K <- 2 levels <- 7 del_beta_1 <- rep(2,levels) system.time(ord_topics <- ord_topics(counts, K=2, del_beta = del_beta_1, a_mu=10, b_mu=30, ztree_options=2, tol=0.001)); barplot(t(ord_topics$omega), col = 2:(K+1), axisnames = F, space = 0, border = NA, main=paste("No. of clusters=", K), las=1, ylim=c(0,1), cex.axis=1.5, cex.main=1.4)
plot(ord_topics$theta[,1], type="l") plot(ord_topics$theta[,2], type="l")
K <- 2 levels <- 7 del_beta_1 <- rep(50000,levels) system.time(ord_topics <- ord_topics(counts, K=2, del_beta = del_beta_1, a_mu=10, b_mu=30, ztree_options=2, tol=0.001)); barplot(t(ord_topics$omega), col = 2:(K+1), axisnames = F, space = 0, border = NA, main=paste("No. of clusters=", K), las=1, ylim=c(0,1), cex.axis=1.5, cex.main=1.4)
plot(ord_topics$theta[,1], type="l") plot(ord_topics$theta[,2], type="l")
K <- 2 levels <- 7 del_beta_1 <- del_beta system.time(ord_topics <- ord_topics(counts, K=2, del_beta = del_beta_1, a_mu=10, b_mu=30, ztree_options=2, tol=0.001)); barplot(t(ord_topics$omega), col = 2:(K+1), axisnames = F, space = 0, border = NA, main=paste("No. of clusters=", K), las=1, ylim=c(0,1), cex.axis=1.5, cex.main=1.4)
plot(ord_topics$theta[,1], type="l") plot(ord_topics$theta[,2], type="l")
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.