# wch's setting 
knitr::opts_chunk$set(collapse = TRUE, comment = "#>")

The TSSB prior

To illustrate the TSSB prior, we load the bitphyloR package and initiate a Node object.

library(bitphyloR)
n0 <- Node$new()

The Node object is the basic class representing components in the mixture.

n0

Now, let's initiate a TSSB object. In this case,

fixedSeed <- 9
set.seed(fixedSeed)
tssb <- TSSB$new(n0, data = matrix(rnorm(50),50,1), dpAlpha = 1, dpGamma = 1,
                 dpLambda = 1)
tssb

The tssb object generates an initial configuration of weights and tree.

res <- tssb$GetMixture()
barplot(res$weight)
res2 <- tssb$ConvertTssbToIgraph()
g <- res2$g
plot.igraph(g, layout = layout.reingold.tilford(g), 
     vertex.label = get.vertex.attribute(g, name = "size"),
     vertex.size = 15, edge.arrow.size = 0.5)

There are several empty leaf nodes. They can be removed by the CullTree function

tssb$CullTree()
res3 <- tssb$ConvertTssbToIgraph()
g <- res3$g
plot.igraph(g, layout = layout.reingold.tilford(g), 
     vertex.label = get.vertex.attribute(g, name = "size"),
     vertex.size = 15, edge.arrow.size = 0.5)

Inference via MCMC

Let's test a simple dataset.

set.seed(fixedSeed)
m <- 200
dims <- 2
testData <- rbind(matrix(0.051*rnorm(m)+0.4,m/2,dims),
                  matrix( 0.01*rnorm(m)+0.6,m/2,dims))
plot(testData[,1], testData[,2])

Get an estimate of hyperparameters

empCov <- cov(testData)
priorSigmaScale = empCov + diag(rep(1e-6,dims), dims)
priorDriftScale = priorSigmaScale

We are going fit the data with a tree-structured normal mixture model. To do that, initiate a Normal node object

n0 <- Normal$new(priorSigmaScale = priorSigmaScale, priorDriftScale = priorDriftScale)
n0

Next, we initiate a TssbMCMC node object for MCMC functions. The initialiser also generates a guess of the tree structure and mass distribution based on tssb parameters.

tssbMCMC <- TssbMCMC$new(n0, data = testData, dpAlpha = 1, dpGamma = 1, dpLambda = 1)
g0 <- tssbMCMC$ConvertTssbToIgraph()$g
plot.igraph(g0, layout = layout.reingold.tilford(g0), 
     vertex.label = get.vertex.attribute(g0, name = "size"),
     vertex.size = 15, edge.arrow.size = 0.5)

Sample the assignment for each point to clusters

par(mfrow = c(1, 2))
plot.igraph(g0, layout = layout.reingold.tilford(g0), 
     vertex.label = get.vertex.attribute(g0, name = "size"),
     vertex.size = 15, edge.arrow.size = 0.5)
tssbMCMC$ResampleAssignments()
g1 <- tssbMCMC$ConvertTssbToIgraph()$g
plot.igraph(g1, layout = layout.reingold.tilford(g1), 
     vertex.label = get.vertex.attribute(g1, name = "size"),
     vertex.size = 15, edge.arrow.size = 0.5)

Remove empty path

par(mfrow = c(1, 2))
plot.igraph(g1, layout = layout.reingold.tilford(g1), 
     vertex.label = get.vertex.attribute(g1, name = "size"),
     vertex.size = 15, edge.arrow.size = 0.5)
tssbMCMC$CullTree()
g2 <- tssbMCMC$ConvertTssbToIgraph()$g
plot.igraph(g2, layout = layout.reingold.tilford(g2), 
     vertex.label = get.vertex.attribute(g2, name = "size"),
     vertex.size = 15, edge.arrow.size = 0.5)

Sample cluster parameters

tssbMCMC$ResampleNodeParameters()

Sample cluster hyperparameters, normally parameters of the transition. Note that this function can only be called from the root node object

n0$ResampleHyperParams()

Sample sticks according to the new configuration

par(mfrow = c(1, 2))
tssbMCMC$ResampleSticks()
res <- tssbMCMC$GetMixture()
barplot(res$weight)
sum(res$weight)
tssbMCMC$ResampleStickOrders()
res <- tssbMCMC$GetMixture()
barplot(res$weight)
sum(res$weight)

Compare the trees after 10 iterations

par(mfrow = c(1, 2))
g <- tssbMCMC$ConvertTssbToIgraph()$g
plot.igraph(g, layout = layout.reingold.tilford(g), 
     vertex.label = get.vertex.attribute(g, name = "size"),
     vertex.size = 15, edge.arrow.size = 0.5)

trace <- RunNormal(data = testData, numOfMCMC = 10, tssb = tssbMCMC)
g <- tssbMCMC$ConvertTssbToIgraph()$g
plot.igraph(g, layout = layout.reingold.tilford(g), 
     vertex.label = get.vertex.attribute(g, name = "size"),
     vertex.size = 15, edge.arrow.size = 0.5)

Now, let's check a longer run,

traces <- RunNormal(data = testData, numOfMCMC = 1000, tssb = tssbMCMC)

Plot the marginal likelihood trace and its autocorrelation function

par(mfrow = c(1, 2))
plot(traces$likelihood, type = "l")
acf(traces$likelihood, lag.max = 200)


keyuan/bitphyloR documentation built on May 20, 2019, 9:20 a.m.