Nothing
### R code from vignette source 'movMF.Rnw'
###################################################
### code chunk number 1: movMF.Rnw:79-94
###################################################
set.seed(1234)
options(width=65, prompt = "R> ", continue = "+ ", useFancyQuotes = FALSE)
cache <- TRUE
library("slam")
library("lattice")
library("movMF")
palette(colorspace::heat_hcl(4))
ltheme <- canonical.theme("pdf", FALSE)
for (i in grep("padding", names(ltheme$layout.heights))) {
ltheme$layout.heights[[i]] <- 0.2
}
for (i in grep("padding", names(ltheme$layout.widths))) {
ltheme$layout.widths[[i]] <- 0
}
lattice.options(default.theme = ltheme)
###################################################
### code chunk number 2: movMF.Rnw:97-156
###################################################
rotationMatrix <- function(mu) {
beta <- asin(mu[1])
alpha <- atan( - mu[2] / mu[3]) + sign(mu[2] * mu[3]) * (mu[3] < 0) * pi
cosa <- cos(alpha); cosb <- cos(beta)
sina <- sin(alpha); sinb <- sin(beta)
matrix(c(cosb, sina * sinb, - cosa * sinb,
0, cosa, sina,
sinb, - sina * cosb, cosa * cosb),
nrow = 3)
}
getIsolines <- function(d, mu, length = 100) {
theta <- seq(0, 2*pi, length = length)
sinphi <- sin(acos(d))
tcrossprod(cbind(cos(theta) * sinphi, sin(theta) * sinphi, d),
rotationMatrix(mu))
}
plotGlobe <- function(x, class, pars = NULL, main = "", theta = 0, phi = 0,
Q = c(0.5, 0.95), n = 100000,
xlim = c(-1, 1), ylim = c(-1, 1), zlim = c(-1, 1), ...) {
pmat <- persp(0:1, 0:1, matrix(, 2, 2), theta = theta, phi = phi,
xlim = xlim, ylim = ylim, zlim = zlim, scale = FALSE,
xlab="x", ylab="y", zlab="z", main = main, ...)
trans3d <- function(x, y, z) {
tr <- cbind(x, y, z, 1) %*% pmat
list(x = tr[, 1]/tr[, 4],
y = tr[, 2]/tr[, 4])
}
x <- x / row_norms(x)
x3D <- trans3d(x[,1], x[,2], x[,3])
theta <- seq(0, 2*pi, length = 41)
phi <- seq(0, pi/2, length = 21)
x <- cos(theta) %o% sin(phi)
y <- sin(theta) %o% sin(phi)
z <- rep(1, length(theta)) %o% cos(phi)
for (j in seq(phi)[-1]) for (i in seq(theta)[-1]) {
idx <- rbind(c(i-1,j-1), c(i,j-1), c(i,j), c(i-1,j))
polygon(trans3d(x[idx], y[idx], z[idx]), border = "grey")
}
points(x3D$x, x3D$y, col = class, pch = 20)
if (!is.null(pars)) {
kappa <- row_norms(pars)
d <- sapply(kappa, function(K) rmovMF(n, K * c(1, 0, 0))[,1])
mu <- pars / kappa
for (i in seq_along(Q)) {
M <- apply(d, 2, quantile, 1-Q[i])
isos <- lapply(seq_len(nrow(mu)), function(i) getIsolines(M[i], mu[i,]))
isostrans <- lapply(isos, function(x) trans3d(x[,1], x[,2], x[,3]))
lapply(seq_along(isostrans), function(j)
polygon(isostrans[[j]]$x, isostrans[[j]]$y, border = j, lwd = 2, lty = i))
}
mu3D <- trans3d(mu[,1], mu[,2], mu[,3])
points(mu3D$x, mu3D$y, pch = 4, lwd = 2, col = seq_len(nrow(mu)))
}
invisible(pmat)
}
###################################################
### code chunk number 3: corpus
###################################################
data("useR_2008_abstracts", package = "movMF")
###################################################
### code chunk number 4: illustration1
###################################################
data("household", package = "HSAUR3")
x <- as.matrix(household[, c(1:2, 4)])
gender <- household$gender
theta <- rbind(female = movMF(x[gender == "female", ], k = 1)$theta,
male = movMF(x[gender == "male", ], k = 1)$theta)
set.seed(2008)
vMFs <- lapply(1:5, function(K)
movMF(x, k = K, control= list(nruns = 20)))
###################################################
### code chunk number 5: movMF.Rnw:484-488
###################################################
kappa <- row_norms(theta)
tab2 <- table(max.col(vMFs[[2]]$P), household$gender)
mc2 <- min(c(sum(diag(tab2)), sum(tab2) - sum(diag(tab2))))
tab3 <- table(max.col(vMFs[[3]]$P), household$gender)
###################################################
### code chunk number 6: movMF.Rnw:510-533
###################################################
par(mar = 0.1 + c(0, 0.5, 2, 0), mfrow = c(2, 2))
plotGlobe(x, household$gender, main = "Data",
theta = -30, phi = 10, zlim = c(0, 1))
plotGlobe(x, household$gender, theta, main = "Known group membership",
theta = -30, phi = 10, zlim = c(0, 1))
mu <- lapply(vMFs, function(x) x$theta / row_norms(x$theta))
ordering <- lapply(mu, function(x) order(x[,1], decreasing = TRUE))
class <- alpha <- kappa <- theta <- vector("list", length(ordering))
for (i in seq_along(ordering)) {
alpha[[i]] <- vMFs[[i]]$alpha[ordering[[i]]]
mu[[i]] <- mu[[i]][ordering[[i]],,drop=FALSE]
theta[[i]] <- vMFs[[i]]$theta[ordering[[i]],,drop=FALSE]
kappa[[i]] <- row_norms(vMFs[[i]]$theta[ordering[[i]],,drop=FALSE])
class[[i]] <- max.col(vMFs[[i]]$P[,ordering[[i]]])
}
for (k in 2:3) {
plotGlobe(x, class[[k]], theta[[k]],
main = paste("Mixtures of vMFs with K =", k),
theta = -30, phi = 10, zlim = c(0, 1))
}
###################################################
### code chunk number 7: movMF.Rnw:740-751
###################################################
X2 <- cbind(alpha[[2]], mu[[2]], kappa[[2]], c(BIC(vMFs[[2]]), NA))
X2 <- format(round(X2, digits = 2), nsmall = 2)
X2[1,6] <- paste("$", X2[1,6], "$", sep = "")
X2 <- gsub("NA", " ", X2)
X2 <- apply(cbind(c("$K = 2$", ""), X2), 1, paste, collapse = "&")
X3 <- cbind(alpha[[3]], mu[[3]], kappa[[3]], c(BIC(vMFs[[3]]), rep(NA, 2)))
X3 <- format(round(X3, digits = 2), nsmall = 2)
X3[1,6] <- paste("$", X3[1,6], "$", sep = "")
X3 <- gsub("NA", " ", X3)
X3 <- apply(cbind(c("$K = 3$", rep("", 2)), X3), 1, paste, collapse = "&")
###################################################
### code chunk number 8: movMF.Rnw:782-783
###################################################
cat(paste("R> ", prompt(movMF, filename = NA)$usage[[2]]))
###################################################
### code chunk number 9: movMF.Rnw:897-898 (eval = FALSE)
###################################################
## data("household", package = "HSAUR3")
## x <- as.matrix(household[, c(1:2, 4)])
## gender <- household$gender
##
## theta <- rbind(female = movMF(x[gender == "female", ], k = 1)$theta,
## male = movMF(x[gender == "male", ], k = 1)$theta)
##
## set.seed(2008)
## vMFs <- lapply(1:5, function(K)
## movMF(x, k = K, control= list(nruns = 20)))
###################################################
### code chunk number 10: movMF.Rnw:903-904
###################################################
sapply(vMFs, BIC)
###################################################
### code chunk number 11: movMF.Rnw:1405-1406 (eval = FALSE)
###################################################
## data("useR_2008_abstracts", package = "movMF")
###################################################
### code chunk number 12: movMF.Rnw:1430-1442
###################################################
library("tm")
abstracts_titles <-
apply(useR_2008_abstracts[,c("Title", "Abstract")],
1, paste, collapse = " ")
useR_2008_abstracts_corpus <- VCorpus(VectorSource(abstracts_titles))
useR_2008_abstracts_DTM <-
DocumentTermMatrix(useR_2008_abstracts_corpus,
control = list(
tokenize = "MC",
stopwords = TRUE,
stemming = TRUE,
wordLengths = c(3, Inf)))
###################################################
### code chunk number 13: movMF.Rnw:1457-1460
###################################################
library("slam")
ColSums <- col_sums(useR_2008_abstracts_DTM > 0)
sort(ColSums, decreasing = TRUE)[1:10]
###################################################
### code chunk number 14: movMF.Rnw:1468-1471
###################################################
useR_2008_abstracts_DTM <-
useR_2008_abstracts_DTM[, ColSums >= 5 & ColSums <= 90]
useR_2008_abstracts_DTM
###################################################
### code chunk number 15: movMF.Rnw:1476-1477
###################################################
useR_2008_abstracts_DTM <- weightTfIdf(useR_2008_abstracts_DTM)
###################################################
### code chunk number 16: fit-movMF (eval = FALSE)
###################################################
## set.seed(2008)
## library("movMF")
## Ks <- c(1:5, 10, 20)
## splits <- sample(rep(1:10, length.out = nrow(useR_2008_abstracts_DTM)))
## useR_2008_movMF <-
## lapply(Ks, function(k)
## sapply(1:10, function(s) {
## m <- movMF(useR_2008_abstracts_DTM[splits != s,],
## k = k, nruns = 20)
## logLik(m, useR_2008_abstracts_DTM[splits == s,])
## }))
## useR_2008_movMF_common <-
## lapply(Ks, function(k)
## sapply(1:10, function(s) {
## m <- movMF(useR_2008_abstracts_DTM[splits != s,],
## k = k, nruns = 20,
## kappa = list(common = TRUE))
## logLik(m, useR_2008_abstracts_DTM[splits == s,])
## }))
###################################################
### code chunk number 17: movMF.Rnw:1512-1525
###################################################
if(cache & file.exists("movMF.rda")) {
load("movMF.rda")
library("movMF")
Ks <- c(1:5, 10, 20)
} else {
set.seed(2008)
library("movMF")
Ks <- c(1:5, 10, 20)
splits <- sample(rep(1:10, length.out = nrow(useR_2008_abstracts_DTM)))
useR_2008_movMF <-
lapply(Ks, function(k)
sapply(1:10, function(s) {
m <- movMF(useR_2008_abstracts_DTM[splits != s,],
k = k, nruns = 20)
logLik(m, useR_2008_abstracts_DTM[splits == s,])
}))
useR_2008_movMF_common <-
lapply(Ks, function(k)
sapply(1:10, function(s) {
m <- movMF(useR_2008_abstracts_DTM[splits != s,],
k = k, nruns = 20,
kappa = list(common = TRUE))
logLik(m, useR_2008_abstracts_DTM[splits == s,])
}))
if(cache) {
save(useR_2008_movMF, useR_2008_movMF_common,
file = "movMF.rda")
} else {
if(file.exists("movMF.rda")) file.remove("movMF.rda")
}
}
###################################################
### code chunk number 18: movMF.Rnw:1538-1550
###################################################
logLiks <- data.frame(logLik = c(unlist(useR_2008_movMF),
unlist(useR_2008_movMF_common)),
K = c(rep(Ks, sapply(useR_2008_movMF, length)),
rep(Ks, sapply(useR_2008_movMF_common, length))),
Dataset = seq_len(length(useR_2008_movMF[[1]])),
Method = factor(rep(1:2, each = length(unlist(useR_2008_movMF))),
1:2, c("free", "common")))
logLiks$logLik <- logLiks$logLik - rep(rep(with(logLiks, tapply(logLik, Dataset, mean)), length(Ks)), 2)
print(xyplot(logLik ~ K | Method, data = logLiks, groups = Dataset, type = "l", lty = 1,
xlab = "Number of components", ylab = "Predictive log-likelihood",
strip = strip.custom(factor.levels =
expression(paste("Free ", kappa), paste("Common ", kappa)))))
###################################################
### code chunk number 19: movMF.Rnw:1563-1566
###################################################
set.seed(2008)
best_model <- movMF(useR_2008_abstracts_DTM, k = 2, nruns = 20,
kappa = list(common = TRUE))
###################################################
### code chunk number 20: movMF.Rnw:1572-1574
###################################################
apply(coef(best_model)$theta, 1, function(x)
colnames(coef(best_model)$theta)[order(x, decreasing = TRUE)[1:10]])
###################################################
### code chunk number 21: movMF.Rnw:1587-1594
###################################################
clustering <- predict(best_model)
keywords <- useR_2008_abstracts[, "Keywords"]
keywords <- sapply(keywords,
function(x) sapply(strsplit(x, ", ")[[1]], function(y)
strsplit(y, "-")[[1]][1]))
tab <- table(Keyword = unlist(keywords),
Component = rep(clustering, sapply(keywords, length)))
###################################################
### code chunk number 22: movMF.Rnw:1600-1601
###################################################
(tab <- tab[rowSums(tab) > 8, ])
###################################################
### code chunk number 23: movMF.Rnw:1614-1618
###################################################
library("vcd")
mosaic(tab, shade = TRUE,
labeling_args = list(rot_labels = 0, just_labels = c("center", "right"),
pos_varnames = c("left", "right"), rot_varnames = 0))
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.