Nothing
## ---- include = FALSE---------------------------------------------------------
knitr::opts_chunk$set(
collapse = TRUE,
comment = "#>"
)
options(rmarkdown.html_vignette.check_title = FALSE)
## ---- include = FALSE---------------------------------------------------------
### Don't overload CRAN servers
### https://stackoverflow.com/questions/28961431/computationally-heavy-r-vignettes
is_check <- ("CheckExEnv" %in% search()) || any(c("_R_CHECK_TIMINGS_",
"_R_CHECK_LICENSE_") %in% names(Sys.getenv()))
## ---- message=FALSE-----------------------------------------------------------
library(Matrix)
library(MatrixExtra)
library(data.table)
library(kableExtra)
library(recommenderlab)
library(cmfrec)
library(recometrics)
data(MovieLense)
X_raw <- MovieLense@data
### Converting it to implicit-feedback
X_implicit <- as.coo.matrix(filterSparse(X_raw, function(x) x >= 4))
str(X_implicit)
## -----------------------------------------------------------------------------
reco_split <- create.reco.train.test(
X_implicit,
users_test_fraction = NULL,
max_test_users = 100,
items_test_fraction = 0.3,
seed = 123
)
X_train <- reco_split$X_train ## Train data for test users
X_test <- reco_split$X_test ## Test data for test users
X_rem <- reco_split$X_rem ## Data to fit the model
users_test <- reco_split$users_test ## IDs of the test users
## -----------------------------------------------------------------------------
### Random recommendations (random latent factors)
set.seed(123)
UserFactors_random <- matrix(rnorm(nrow(X_test) * 5), nrow=5)
ItemFactors_random <- matrix(rnorm(ncol(X_test) * 5), nrow=5)
### Non-personalized recommendations
model_baseline <- cmfrec::MostPopular(as.coo.matrix(X_rem), implicit=TRUE)
item_biases <- model_baseline$matrices$item_bias
## ---- eval=FALSE--------------------------------------------------------------
# ### Typical implicit-feedback ALS model
# ### a.k.a. "WRMF" (weighted regularized matrix factorization)
# model_wrmf <- cmfrec::CMF_implicit(as.coo.matrix(X_rem), k=10, verbose=FALSE)
# UserFactors_wrmf <- t(cmfrec::factors(model_wrmf, X_train))
#
# ### As a comparison, this is the typical explicit-feedback model,
# ### implemented by software such as Spark,
# ### and called "Weighted-Lambda-Regularized Matrix Factorization".
# ### Note that it determines the user factors using the train+test data.
# model_wlr <- cmfrec::CMF(as.coo.matrix(X_raw[-users_test, ]),
# lambda=0.1, scale_lam=TRUE,
# user_bias=FALSE, item_bias=FALSE,
# k=10, verbose=FALSE)
# UserFactors_wlr <- t(cmfrec::factors(model_wlr, as.csr.matrix(X_raw)[users_test,]))
#
# ### This is a different explicit-feedback model which
# ### uses the same regularization for each user and item
# ### (as opposed to the "weighted-lambda" model) and which
# ### adds "implicit features", which are a binarized version
# ### of the input data, but without weights.
# ### Note that it determines the user factors using the train+test data.
# model_hybrid <- cmfrec::CMF(as.coo.matrix(X_raw[-users_test, ]),
# lambda=20, scale_lam=FALSE,
# user_bias=FALSE, item_bias=FALSE,
# add_implicit_features=TRUE,
# k=10, verbose=FALSE)
# UserFactors_hybrid <- t(cmfrec::factors(model_hybrid, as.csr.matrix(X_raw)[users_test,]))
## ---- echo=FALSE--------------------------------------------------------------
### Don't overload CRAN servers
if (!is_check) {
model_wrmf <- cmfrec::CMF_implicit(as.coo.matrix(X_rem), k=10, verbose=FALSE)
UserFactors_wrmf <- t(cmfrec::factors(model_wrmf, X_train))
model_wlr <- cmfrec::CMF(as.coo.matrix(X_raw[-users_test, ]),
lambda=0.1, scale_lam=TRUE,
user_bias=FALSE, item_bias=FALSE,
k=10, verbose=FALSE)
UserFactors_wlr <- t(cmfrec::factors(model_wlr, as.csr.matrix(X_raw)[users_test,]))
model_hybrid <- cmfrec::CMF(as.coo.matrix(X_raw[-users_test, ]),
lambda=20, scale_lam=FALSE,
user_bias=FALSE, item_bias=FALSE,
add_implicit_features=TRUE,
k=10, verbose=FALSE)
UserFactors_hybrid <- t(cmfrec::factors(model_hybrid, as.csr.matrix(X_raw)[users_test,]))
} else {
model_wrmf <- cmfrec::CMF_implicit(as.coo.matrix(X_rem), k=3,
verbose=FALSE, niter=2, nthreads=1)
UserFactors_wrmf <- t(cmfrec::factors(model_wrmf, X_train))
}
## ---- eval=!is_check----------------------------------------------------------
### Processing user side information
U <- as.data.table(MovieLenseUser)[-users_test, ]
mean_age <- mean(U$age)
sd_age <- sd(U$age)
levels_occ <- levels(U$occupation)
MatrixExtra::restore_old_matrix_behavior()
process.U <- function(U, mean_age,sd_age, levels_occ) {
U[, `:=`(
id = NULL,
age = (age - mean_age) / sd_age,
sex = as.numeric(sex == "M"),
occupation = factor(occupation, levels_occ),
zipcode = NULL
)]
U <- Matrix::sparse.model.matrix(~.-1, data=U)
U <- as.coo.matrix(U)
return(U)
}
U <- process.U(U, mean_age,sd_age, levels_occ)
U_train <- as.data.table(MovieLenseUser)[users_test, ]
U_train <- process.U(U_train, mean_age,sd_age, levels_occ)
### Processing item side information
I <- as.data.table(MovieLenseMeta)
mean_year <- mean(I$year, na.rm=TRUE)
sd_year <- sd(I$year, na.rm=TRUE)
I[
is.na(year), year := mean_year
][, `:=`(
title = NULL,
year = (year - mean_year) / sd_year,
url = NULL
)]
I <- as.coo.matrix(I)
### Manually re-creating a binarized matrix and weights
### that will mimic the WRMF model
X_rem_ones <- as.coo.matrix(mapSparse(X_rem, function(x) rep(1, length(x))))
W_rem <- as.coo.matrix(mapSparse(X_rem, function(x) x+1))
X_train_ones <- as.coo.matrix(mapSparse(X_train, function(x) rep(1, length(x))))
W_train <- as.coo.matrix(mapSparse(X_train, function(x) x+1))
## ---- eval=!is_check----------------------------------------------------------
### WRMF model, but with item biases/intercepts
model_bwrmf <- cmfrec::CMF(X_rem_ones, weight=W_rem, NA_as_zero=TRUE,
lambda=1, scale_lam=FALSE,
center=FALSE, user_bias=FALSE, item_bias=TRUE,
k=10, verbose=FALSE)
UserFactors_bwrmf <- t(cmfrec::factors(model_bwrmf, X_train_ones, weight=W_train))
### Collective WRMF model (taking user and item attributes)
model_cwrmf <- cmfrec::CMF_implicit(as.coo.matrix(X_rem), U=U, I=I,
NA_as_zero_user=TRUE, NA_as_zero_item=TRUE,
center_U=TRUE, center_I=TRUE,
lambda=0.1,
k=10, verbose=FALSE)
UserFactors_cwrmf <- t(cmfrec::factors(model_cwrmf, X_train, U=U_train))
### Collective WRMF plus item biases/intercepts
model_bcwrmf <- cmfrec::CMF(X_rem_ones, weight=W_rem, NA_as_zero=TRUE,
U=U, I=I, center_U=FALSE, center_I=FALSE,
NA_as_zero_user=TRUE, NA_as_zero_item=TRUE,
lambda=0.1, scale_lam=FALSE,
center=FALSE, user_bias=FALSE, item_bias=TRUE,
k=10, verbose=FALSE)
UserFactors_bcwrmf <- t(cmfrec::factors(model_bcwrmf, X_train_ones,
weight=W_train, U=U_train))
## ---- eval=FALSE--------------------------------------------------------------
# k <- 5 ## Top-K recommendations to evaluate
#
# ### Baselines
# metrics_random <- calc.reco.metrics(X_train, X_test,
# A=UserFactors_random,
# B=ItemFactors_random,
# k=k, all_metrics=TRUE)
# metrics_baseline <- calc.reco.metrics(X_train, X_test,
# A=NULL, B=NULL,
# item_biases=item_biases,
# k=k, all_metrics=TRUE)
#
# ### Simple models
# metrics_wrmf <- calc.reco.metrics(X_train, X_test,
# A=UserFactors_wrmf,
# B=model_wrmf$matrices$B,
# k=k, all_metrics=TRUE)
# metrics_wlr <- calc.reco.metrics(X_train, X_test,
# A=UserFactors_wlr,
# B=model_wlr$matrices$B,
# k=k, all_metrics=TRUE)
# metrics_hybrid <- calc.reco.metrics(X_train, X_test,
# A=UserFactors_hybrid,
# B=model_hybrid$matrices$B,
# k=k, all_metrics=TRUE)
#
# ### More complex models
# metrics_bwrmf <- calc.reco.metrics(X_train, X_test,
# A=UserFactors_bwrmf,
# B=model_bwrmf$matrices$B,
# item_biases=model_bwrmf$matrices$item_bias,
# k=k, all_metrics=TRUE)
# metrics_cwrmf <- calc.reco.metrics(X_train, X_test,
# A=UserFactors_cwrmf,
# B=model_cwrmf$matrices$B,
# k=k, all_metrics=TRUE)
# metrics_bcwrmf <- calc.reco.metrics(X_train, X_test,
# A=UserFactors_bcwrmf,
# B=model_bcwrmf$matrices$B,
# item_biases=model_bcwrmf$matrices$item_bias,
# k=k, all_metrics=TRUE)
## ---- echo=FALSE--------------------------------------------------------------
if (!is_check) {
k <- 5 ## Top-K recommendations to evaluate
### Baselines
metrics_random <- calc.reco.metrics(X_train, X_test,
A=UserFactors_random,
B=ItemFactors_random,
k=k, all_metrics=TRUE)
metrics_baseline <- calc.reco.metrics(X_train, X_test,
A=NULL, B=NULL,
item_biases=item_biases,
k=k, all_metrics=TRUE)
### Simple models
metrics_wrmf <- calc.reco.metrics(X_train, X_test,
A=UserFactors_wrmf,
B=model_wrmf$matrices$B,
k=k, all_metrics=TRUE)
metrics_wlr <- calc.reco.metrics(X_train, X_test,
A=UserFactors_wlr,
B=model_wlr$matrices$B,
k=k, all_metrics=TRUE)
metrics_hybrid <- calc.reco.metrics(X_train, X_test,
A=UserFactors_hybrid,
B=model_hybrid$matrices$B,
k=k, all_metrics=TRUE)
### More complex models
metrics_bwrmf <- calc.reco.metrics(X_train, X_test,
A=UserFactors_bwrmf,
B=model_bwrmf$matrices$B,
item_biases=model_bwrmf$matrices$item_bias,
k=k, all_metrics=TRUE)
metrics_cwrmf <- calc.reco.metrics(X_train, X_test,
A=UserFactors_cwrmf,
B=model_cwrmf$matrices$B,
k=k, all_metrics=TRUE)
metrics_bcwrmf <- calc.reco.metrics(X_train, X_test,
A=UserFactors_bcwrmf,
B=model_bcwrmf$matrices$B,
item_biases=model_bcwrmf$matrices$item_bias,
k=k, all_metrics=TRUE)
} else {
k <- 5 ## Top-K recommendations to evaluate
### Baselines
metrics_random <- calc.reco.metrics(X_train, X_test,
A=UserFactors_random,
B=ItemFactors_random,
k=k, all_metrics=TRUE,
nthreads=1)
metrics_baseline <- calc.reco.metrics(X_train, X_test,
A=NULL, B=NULL,
item_biases=item_biases,
k=k, all_metrics=TRUE,
nthreads=1)
### Simple models
metrics_wrmf <- calc.reco.metrics(X_train, X_test,
A=UserFactors_wrmf,
B=model_wrmf$matrices$B,
k=k, all_metrics=TRUE,
nthreads=1)
}
## -----------------------------------------------------------------------------
metrics_baseline %>%
head(5) %>%
kable() %>%
kable_styling()
## ---- eval=FALSE--------------------------------------------------------------
# all_metrics <- list(
# `Random` = metrics_random,
# `Non-personalized` = metrics_baseline,
# `Weighted-Lambda` = metrics_wlr,
# `Hybrid-Explicit` = metrics_hybrid,
# `WRMF (a.k.a. iALS)` = metrics_wrmf,
# `bWRMF` = metrics_bwrmf,
# `CWRMF` = metrics_cwrmf,
# `bCWRMF` = metrics_bcwrmf
# )
# results <- all_metrics %>%
# lapply(function(df) as.data.table(df)[, lapply(.SD, mean)]) %>%
# data.table::rbindlist() %>%
# as.data.frame()
# row.names(results) <- names(all_metrics)
#
# results %>%
# kable() %>%
# kable_styling()
## ---- echo=FALSE--------------------------------------------------------------
if (!is_check) {
all_metrics <- list(
`Random` = metrics_random,
`Non-personalized` = metrics_baseline,
`Weighted-Lambda` = metrics_wlr,
`Hybrid-Explicit` = metrics_hybrid,
`WRMF (a.k.a. iALS)` = metrics_wrmf,
`bWRMF` = metrics_bwrmf,
`CWRMF` = metrics_cwrmf,
`bCWRMF` = metrics_bcwrmf
)
} else {
all_metrics <- list(
`Random` = metrics_random,
`Non-personalized` = metrics_baseline,
`WRMF (a.k.a. iALS)` = metrics_wrmf
)
}
results <- all_metrics %>%
lapply(function(df) as.data.table(df)[, lapply(.SD, mean)]) %>%
data.table::rbindlist() %>%
as.data.frame()
row.names(results) <- names(all_metrics)
results %>%
kable() %>%
kable_styling()
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.