Nothing
## ----setup, include = FALSE----------------------------------------------
knitr::opts_chunk$set(
collapse = TRUE,
comment = "#>"
)
## ---- eval=T, include=T--------------------------------------------------
library(rare)
library(Matrix)
# Design matrix = document-term matrix
dim(data.dtm)
# Ratings for the reviews in data.dtm
length(data.rating)
## ---- fig.height=4, fig.width=7, eval=T, include=T-----------------------
hist(colMeans(sign(data.dtm)) * 100, breaks = 50, main = "Histogram of Adjective Rarity in the TripAdvisor Sample",
xlab = "% of Reviews Using Adjective")
## ---- results='hide', fig.height=3, fig.width=10, eval=T, include=T------
par(cex=0.35)
plot(as.dendrogram(data.hc))
## ---- eval=T, include=T--------------------------------------------------
set.seed(100)
ts <- sample(1:length(data.rating), 400) # Train set indices
## ---- eval=T, include=F--------------------------------------------------
load("vignette_results.RData")
## ---- eval=F, include=T--------------------------------------------------
# ourfit <- rarefit(y = data.rating[ts], X = data.dtm[ts, ], hc = data.hc, lam.min.ratio = 1e-6,
# nlam = 20, nalpha = 10, rho = 0.01, eps1 = 1e-5, eps2 = 1e-5, maxite = 1e4)
## ---- eval=F, include=T--------------------------------------------------
# # Cross validation
# ourfit.cv <- rarefit.cv(ourfit, y = data.rating[ts], X = data.dtm[ts, ],
# rho = 0.01, eps1 = 1e-5, eps2 = 1e-5, maxite = 1e4)
## ---- eval=F, include=T--------------------------------------------------
# # Prediction on test set
# pred <- rarefit.predict(ourfit, ourfit.cv, data.dtm[-ts, ])
# pred.error <- mean((pred - data.rating[-ts])^2)
# pred.error
## ---- eval=T, include=T--------------------------------------------------
# Find recovered groups at optimal beta and gamma
ibest.lambda <- ourfit.cv$ibest[1]
ibest.alpha <- ourfit.cv$ibest[2]
beta.opt <- ourfit$beta[[ibest.alpha]][, ibest.lambda]
gamma.opt <- ourfit$gamma[[ibest.alpha]][, ibest.lambda]
groups.opt <- group.recover(gamma.opt, ourfit$A)
length(groups.opt) # total number of aggregated groups
## ---- eval=T, include=T, results='hide', fig.height=3, fig.width=10------
# Visualize the groups at optimal beta and gamma
par(cex=0.35)
group.plot(beta.opt, gamma.opt, ourfit$A, data.hc)
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.