# wch's setting knitr::opts_chunk$set(collapse = TRUE, comment = "#>")
To illustrate the TSSB prior, we load the bitphyloR
package and initiate a Node
object.
library(bitphyloR) n0 <- Node$new()
The Node
object is the basic class representing components in the mixture.
n0
Now, let's initiate a TSSB
object. In this case,
fixedSeed <- 9 set.seed(fixedSeed) tssb <- TSSB$new(n0, data = matrix(rnorm(50),50,1), dpAlpha = 1, dpGamma = 1, dpLambda = 1) tssb
The tssb object generates an initial configuration of weights and tree.
res <- tssb$GetMixture() barplot(res$weight)
res2 <- tssb$ConvertTssbToIgraph() g <- res2$g plot.igraph(g, layout = layout.reingold.tilford(g), vertex.label = get.vertex.attribute(g, name = "size"), vertex.size = 15, edge.arrow.size = 0.5)
There are several empty leaf nodes. They can be removed by the CullTree
function
tssb$CullTree() res3 <- tssb$ConvertTssbToIgraph() g <- res3$g plot.igraph(g, layout = layout.reingold.tilford(g), vertex.label = get.vertex.attribute(g, name = "size"), vertex.size = 15, edge.arrow.size = 0.5)
Let's test a simple dataset.
set.seed(fixedSeed) m <- 200 dims <- 2 testData <- rbind(matrix(0.051*rnorm(m)+0.4,m/2,dims), matrix( 0.01*rnorm(m)+0.6,m/2,dims)) plot(testData[,1], testData[,2])
Get an estimate of hyperparameters
empCov <- cov(testData) priorSigmaScale = empCov + diag(rep(1e-6,dims), dims) priorDriftScale = priorSigmaScale
We are going fit the data with a tree-structured normal mixture model. To do that, initiate a Normal node object
n0 <- Normal$new(priorSigmaScale = priorSigmaScale, priorDriftScale = priorDriftScale) n0
Next, we initiate a TssbMCMC node object for MCMC functions. The initialiser also generates a guess of the tree structure and mass distribution based on tssb parameters.
tssbMCMC <- TssbMCMC$new(n0, data = testData, dpAlpha = 1, dpGamma = 1, dpLambda = 1) g0 <- tssbMCMC$ConvertTssbToIgraph()$g plot.igraph(g0, layout = layout.reingold.tilford(g0), vertex.label = get.vertex.attribute(g0, name = "size"), vertex.size = 15, edge.arrow.size = 0.5)
Sample the assignment for each point to clusters
par(mfrow = c(1, 2)) plot.igraph(g0, layout = layout.reingold.tilford(g0), vertex.label = get.vertex.attribute(g0, name = "size"), vertex.size = 15, edge.arrow.size = 0.5) tssbMCMC$ResampleAssignments() g1 <- tssbMCMC$ConvertTssbToIgraph()$g plot.igraph(g1, layout = layout.reingold.tilford(g1), vertex.label = get.vertex.attribute(g1, name = "size"), vertex.size = 15, edge.arrow.size = 0.5)
Remove empty path
par(mfrow = c(1, 2)) plot.igraph(g1, layout = layout.reingold.tilford(g1), vertex.label = get.vertex.attribute(g1, name = "size"), vertex.size = 15, edge.arrow.size = 0.5) tssbMCMC$CullTree() g2 <- tssbMCMC$ConvertTssbToIgraph()$g plot.igraph(g2, layout = layout.reingold.tilford(g2), vertex.label = get.vertex.attribute(g2, name = "size"), vertex.size = 15, edge.arrow.size = 0.5)
Sample cluster parameters
tssbMCMC$ResampleNodeParameters()
Sample cluster hyperparameters, normally parameters of the transition. Note that this function can only be called from the root node object
n0$ResampleHyperParams()
Sample sticks according to the new configuration
par(mfrow = c(1, 2)) tssbMCMC$ResampleSticks() res <- tssbMCMC$GetMixture() barplot(res$weight) sum(res$weight)
tssbMCMC$ResampleStickOrders() res <- tssbMCMC$GetMixture() barplot(res$weight) sum(res$weight)
Compare the trees after 10 iterations
par(mfrow = c(1, 2)) g <- tssbMCMC$ConvertTssbToIgraph()$g plot.igraph(g, layout = layout.reingold.tilford(g), vertex.label = get.vertex.attribute(g, name = "size"), vertex.size = 15, edge.arrow.size = 0.5) trace <- RunNormal(data = testData, numOfMCMC = 10, tssb = tssbMCMC) g <- tssbMCMC$ConvertTssbToIgraph()$g plot.igraph(g, layout = layout.reingold.tilford(g), vertex.label = get.vertex.attribute(g, name = "size"), vertex.size = 15, edge.arrow.size = 0.5)
Now, let's check a longer run,
traces <- RunNormal(data = testData, numOfMCMC = 1000, tssb = tssbMCMC)
Plot the marginal likelihood trace and its autocorrelation function
par(mfrow = c(1, 2)) plot(traces$likelihood, type = "l") acf(traces$likelihood, lag.max = 200)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.