# Compute and show the reconstructed matrix
# (the best, if the algorithm has to choose K)
devtools::load_all()
library(dplyr)
library(tidyr)
library(ggplot2)
library(Matrix)
library(gtools)
library(logisticPCA)
library(unvotes)
text_components <- 7
text_features <- 7
plots_file <- "fig6_E_V_reconstruction"
data("unvotes100coldwar_absna")
data("paleo")
data("catalanparliament")
# Datasets used in this experiment in the paper
dataset_names <- c('paleo', 'unvotes100coldwar_absna', 'parlamentcat')
datasets <- list(paleo, unvotes100coldwar_absna, catalanparliament)
# Datasets to do a fast check
dataset_names <- c("parlamentcat")
datasets <- list(catalanparliament)
# Training parameters
# Use small parameters for fast checks
# Use larger parameters for accurate results
gibbs.samples <- 100
burnin <- 0.9
vb.iters <- 100
# Hyper-parameters
K = 100
alpha = 1
beta = 1
gamma = 1
df.results <- data.frame()
for (i in 1:length(dataset_names)) {
dataset <- dataset_names[i]
V <- datasets[[i]]
F <- dim(V)[1]
N <- dim(V)[2]
plot_V(V, rowlabels=FALSE)
# Gibbs DirBer ---------------------------------------------------------------
res <- BernoulliNMF(V,
K=K,
model="DirBer",
alpha=1,
beta=1,
gamma=1/K,
iter=gibbs.samples,
burnin = burnin)
# Expectations
E_Z <- res$E_Z
E_W <- res$E_W
E_H <- res$E_H
E_V <- expectation(res)
# Estimated log-likelihood from samples of the posterior of V.
like <- loglikelihood(res, V)$loglikelihood
# Save results
p <- plot_V(E_V)
filename <- paste0(plots_file, "_GibbsDirBer_", dataset, ".eps")
ggsave(p,
filename = filename,
height=20,
width=20,
units='cm')
df.results <- bind_rows(df.results, list(dataset = dataset,
model = "Gibbs Beta-Dir",
likelihood = like))
# Optional:
# * A single sample V, W, H free from label switching
# (Monte-Carlo estimations of E[W], E[H] may contain label switchs).
# * Trace of the component sizes in the Monte-Carlo samples
# * Expectation of the dictionary.
sample <- sample_VWH.GibbsDirBer(res)
plot_trace_size_components(res)
plot_dictionary(E_W, Kmax=10, rowlabels=TRUE, aspect.ratio = 7)
# Gibbs DirDir ---------------------------------------------------------------
res <- BernoulliNMF(V,
K = K,
model = "DirDir",
alpha = 1,
gamma = 1/K,
iter = gibbs.samples,
burnin = burnin)
# Expectations
E_Z <- res$E_Z
E_C <- res$E_C
E_W <- res$E_W
E_H <- res$E_H
E_V <- expectation(res)
# Estimated log-likelihood from samples of the posterior of V.
like <- loglikelihood(res, V)$loglikelihood
# Save results
df.results <- bind_rows(df.results, list(dataset = dataset,
model = "Gibbs Dir-Dir",
likelihood = like))
filename <- paste0(plots_file, "_GibbsDirDir_", dataset, ".eps")
p <- plot_V(E_V)
ggsave(p,
filename = filename,
height = 20,
width = 20,
units = 'cm')
# Optional:
# * A single sample V, W, H free from label switching
# (Monte-Carlo estimations of E[W], E[H] may contain label switchs).
# * Trace of the component sizes in the Monte-Carlo samples
# * Expectation of the dictionary.
sample <- sample_VWH.GibbsDirDir(res)
plot_trace_size_components(res)
plot_dictionary(E_W, Kmax=15, rowlabels=FALSE)
# VB DirBer ------------------------------------------------------------------
res <- BernoulliNMF(V,
K=K,
model="DirBer",
method="VB",
alpha=1,
beta=1,
gamma=1/K,
iter=vb.iters)
# Expectations
E_Z <- res$E_Z
E_W <- res$E_W
E_H <- res$E_H
E_V <- expectation(res)
# Estimated log-likelihood from the variational posterior of V.
like <- loglikelihood(res, V)$loglikelihood
# Save results
p <- plot_V(E_V)
filename <- paste0(plots_file, "_VBDirBer", dataset, ".eps")
ggsave(p,
filename = filename,
height=20,
width=20,
units='cm')
lb <- max(res$lb)
df.results <- bind_rows(df.results, list(dataset = dataset,
model = "VB Beta-Dir",
likelihood = like,
lowerbound = lb))
# Optional:
# * Expectation of the dictionary.
plot_dictionary(E_W, Kmax=10, rowlabels=FALSE)
# VB Aspect ------------------------------------------------------------------
Kmax <- 9
likes <- rep(-Inf, Kmax)
lowerbounds <- rep(-Inf, Kmax)
models <- list()
for(k in 1:Kmax){
res <- BernoulliNMF(V, K=k, model="aspectRcpp", method="VB",
alpha=1, beta=1, gamma=1,
iter=vb.iters)
# Expectations
E_Z <- res$E_Z
E_W <- res$E_W
E_H <- res$E_H
# Estimated log-likelihood from samples of the posterior of V.
like <- loglikelihood(res, V)$loglikelihood
# Save results for this number of components k.
lb <- max(res$lowerbounds)
likes[k] <- like
lowerbounds[k] <- lb
models[[k]] <- res
df.results <- bind_rows(df.results, list(dataset = dataset,
model = "VB Aspect",
likelihood = like,
lowerbound = lb,
K=k))
}
# Choose the best model
res <- models[[which.max(likes)]]
# Expectations
E_Z <- res$E_Z
E_W <- res$E_W
E_H <- res$E_H
E_V <- expectation(res)
# Save results
p <- plot_V(E_V)
filename <- paste0(plots_file, "_VBAspect_", dataset, ".eps")
ggsave(p,
filename = filename,
height=20, width=20, units='cm')
# Optional:
# * Expectation of the dictionary.
plot_dictionary(E_W, Kmax=100, rowlabels=TRUE)
# logPCA ---------------------------------------------------------------------
Kmax <- 20
likes <- rep(-Inf, Kmax)
models <- list()
for(k in 1:Kmax){
logpca_model <- logisticPCA::logisticSVD(V, k, max_iters = 2000)
W <- logpca_model$A
H <- logpca_model$B
E_V <- fitted(logpca_model, type = "response")
V.probs <- V*E_V + (1-V)*(1-E_V)
like <- sum(log(V.probs), na.rm = TRUE)
likes[k] <- like
models[[k]] <- logpca_model
df.results <- bind_rows(df.results, list(dataset = dataset,
model= "logPCA",
loglikelihood = like,
K=k))
}
# Choose the bets model
res <- models[[which.max(likes)]]
# Expectation of the posterior of V
E_V <- fitted(res, type = "response")
# Save results
p <- plot_V(E_V)
filename <- paste0(plots_file, "_logPCA_", dataset, ".eps")
ggsave(p, filename = filename, height=20, width=20, units='cm')
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.