Nothing
## ----setup, include = FALSE---------------------------------------------------
knitr::opts_chunk$set(
collapse = TRUE,
comment = "#>"
)
options(rmarkdown.html_vignette.check_title = FALSE)
## ---- include = FALSE---------------------------------------------------------
### Don't overload CRAN servers
### https://stackoverflow.com/questions/28961431/computationally-heavy-r-vignettes
is_check <- ("CheckExEnv" %in% search()) || any(c("_R_CHECK_TIMINGS_",
"_R_CHECK_LICENSE_") %in% names(Sys.getenv()))
## ---- message=FALSE-----------------------------------------------------------
library(cmfrec)
library(Matrix)
library(MatrixExtra)
library(recommenderlab)
data("MovieLense")
X <- as.coo.matrix(MovieLense@data)
str(X)
## -----------------------------------------------------------------------------
subsample_coo_matrix <- function(X, indices) {
X@i <- X@i[indices]
X@j <- X@j[indices]
X@x <- X@x[indices]
return(X)
}
n_ratings <- length(X@x)
set.seed(123)
ix_train <- sample(n_ratings, floor(0.75 * n_ratings), replace=FALSE)
X_train <- subsample_coo_matrix(X, ix_train)
X_test <- subsample_coo_matrix(X, -ix_train)
## ---- eval=FALSE--------------------------------------------------------------
# model.classic <- CMF(X_train, k=25, lambda=0.1, scale_lam=TRUE, verbose=FALSE)
## ---- echo=FALSE--------------------------------------------------------------
### Don't overload CRAN servers
if (!is_check) {
model.classic <- CMF(X_train, k=25, lambda=0.1, scale_lam=TRUE, verbose=FALSE)
} else {
model.classic <- CMF(X_train, k=5, lambda=0.1, scale_lam=TRUE, verbose=FALSE,
niter=2, nthreads=1)
}
## -----------------------------------------------------------------------------
print_rmse <- function(X_test, X_hat, model_name) {
rmse <- sqrt(mean( (X_test@x - X_hat@x)^2 ))
cat(sprintf("RMSE for %s is: %.4f\n", model_name, rmse))
}
pred_classic <- predict(model.classic, X_test)
print_rmse(X_test, pred_classic, "classic model")
## -----------------------------------------------------------------------------
model.baseline <- MostPopular(X_train, lambda=10, scale_lam=FALSE)
pred_baseline <- predict(model.baseline, X_test)
print_rmse(X_test, pred_baseline, "non-personalized model")
## ---- eval=FALSE--------------------------------------------------------------
# model.improved <- CMF(X_train, k=25, lambda=0.1, scale_lam=TRUE,
# add_implicit_features=TRUE, w_main=0.75, w_implicit=0.25,
# use_cg=FALSE, niter=30, verbose=FALSE)
# pred_improved <- predict(model.improved, X_test)
# print_rmse(X_test, pred_improved, "improved classic model")
## ---- echo=FALSE--------------------------------------------------------------
### Don't overload CRAN servers
if (!is_check) {
model.improved <- CMF(X_train, k=25, lambda=0.1, scale_lam=TRUE,
add_implicit_features=TRUE, w_main=0.75, w_implicit=0.25,
use_cg=FALSE, niter=30, verbose=FALSE)
} else {
model.improved <- CMF(X_train, k=5, lambda=0.1, scale_lam=TRUE,
add_implicit_features=TRUE, w_main=0.75, w_implicit=0.25,
use_cg=FALSE, verbose=FALSE,
niter=2, nthreads=1)
}
pred_improved <- predict(model.improved, X_test)
print_rmse(X_test, pred_improved, "improved classic model")
## -----------------------------------------------------------------------------
U <- MovieLenseUser
U$id <- NULL
U$zipcode <- NULL
U$age2 <- U$age^2
### Note that `cmfrec` does not standardize features beyond mean centering
U$age <- (U$age - mean(U$age)) / sd(U$age)
U$age2 <- (U$age2 - mean(U$age2)) / sd(U$age2)
U <- model.matrix(~.-1, data=U)
I <- MovieLenseMeta
I$title <- NULL
I$url <- NULL
I$year <- ifelse(is.na(I$year), median(I$year, na.rm=TRUE), I$year)
I$year2 <- I$year^2
I$year <- (I$year - mean(I$year)) / sd(I$year)
I$year2 <- (I$year2 - mean(I$year2)) / sd(I$year2)
I <- as.coo.matrix(I)
cat(dim(U), "\n")
cat(dim(I), "\n")
## ---- eval=FALSE--------------------------------------------------------------
# model.w.sideinfo <- CMF(X_train, U=U, I=I, NA_as_zero_item=TRUE,
# k=25, lambda=0.1, scale_lam=TRUE,
# niter=30, use_cg=FALSE, include_all_X=FALSE,
# w_main=0.75, w_user=0.5, w_item=0.5, w_implicit=0.5,
# center_U=FALSE, center_I=FALSE,
# verbose=FALSE)
# pred_side_info <- predict(model.w.sideinfo, X_test)
# print_rmse(X_test, pred_side_info, "model with side info")
## ---- echo=FALSE--------------------------------------------------------------
### Don't overload CRAN servers
if (!is_check) {
model.w.sideinfo <- CMF(X_train, U=U, I=I, NA_as_zero_item=TRUE,
k=25, lambda=0.1, scale_lam=TRUE,
niter=30, use_cg=FALSE, include_all_X=FALSE,
w_main=0.75, w_user=0.5, w_item=0.5, w_implicit=0.5,
center_U=FALSE, center_I=FALSE,
verbose=FALSE)
} else {
model.w.sideinfo <- CMF(X_train, U=U, I=I, NA_as_zero_item=TRUE,
k=5, lambda=0.1, scale_lam=TRUE, scale_lam_sideinfo=TRUE,
use_cg=FALSE, include_all_X=FALSE,
w_main=0.75, w_user=0.5, w_item=0.5, w_implicit=0.5,
center_U=FALSE, center_I=FALSE,
verbose=FALSE, niter=2, nthreads=1)
}
pred_side_info <- predict(model.w.sideinfo, X_test)
print_rmse(X_test, pred_side_info, "model with side info")
## -----------------------------------------------------------------------------
library(kableExtra)
calc_rmse <- function(X_test, X_hat) {
return(sqrt(mean( (X_test@x - X_hat@x)^2 )))
}
results <- data.frame(
NonPersonalized = calc_rmse(X_test, pred_baseline),
ClassicalModel = calc_rmse(X_test, pred_classic),
ClassicPlusImplicit = calc_rmse(X_test, pred_improved),
CollectiveModel = calc_rmse(X_test, pred_side_info)
)
results <- as.data.frame(t(results))
names(results) <- "RMSE"
results %>%
kable() %>%
kable_styling()
## ---- eval=FALSE--------------------------------------------------------------
# ### Re-fitting the earlier model to all the data,
# ### this time *without* scaled regularization
# model.classic <- CMF(X, k=20, lambda=10, scale_lam=FALSE, verbose=FALSE)
# model.w.sideinfo <- CMF(X, U=U, I=I, k=20, lambda=10, scale_lam=FALSE,
# w_main=0.75, w_user=0.125, w_item=0.125,
# verbose=FALSE)
## ---- echo=FALSE--------------------------------------------------------------
### Don't overload CRAN servers
if (!is_check) {
model.classic <- CMF(X, k=20, lambda=10, scale_lam=FALSE, verbose=FALSE)
model.w.sideinfo <- CMF(X, U=U, I=I, k=20, lambda=10, scale_lam=FALSE,
w_main=0.75, w_user=0.125, w_item=0.125,
verbose=FALSE)
} else {
model.classic <- CMF(X, k=5, lambda=10, scale_lam=FALSE, verbose=FALSE,
niter=2, nthreads=1)
model.w.sideinfo <- CMF(X, U=U, I=I, k=5, lambda=10, scale_lam=FALSE,
w_main=0.75, w_user=0.125, w_item=0.125,
verbose=FALSE, niter=2, nthreads=1)
}
## -----------------------------------------------------------------------------
user_to_recommend <- 10
### Note: slicing of 'X' is provided by 'MatrixExtra',
### returning a 'sparseVector' object as required by cmfrec
topN(model.classic, user=user_to_recommend, n=10,
exclude=X[user_to_recommend, , drop=TRUE])
## -----------------------------------------------------------------------------
### A handy function for visualizing recommendations
movie_names <- colnames(X)
n_ratings <- colSums(as.csc.matrix(X, binary=TRUE))
avg_ratings <- colSums(as.csc.matrix(X)) / n_ratings
print_recommended <- function(rec, txt) {
cat(txt, ":\n",
paste(paste(1:length(rec), ". ", sep=""),
movie_names[rec],
" - Avg rating:", round(avg_ratings[rec], 2),
", #ratings: ", n_ratings[rec],
collapse="\n", sep=""),
"\n", sep="")
}
recommended <- topN(model.w.sideinfo, user=user_to_recommend, n=5,
exclude=X[user_to_recommend, , drop=TRUE])
print_recommended(recommended, "Recommended for user_id=10")
## -----------------------------------------------------------------------------
recommended_new <- topN_new(model.w.sideinfo, n=5,
exclude=X[user_to_recommend, , drop=TRUE],
X=X[user_to_recommend, , drop=TRUE],
U=U[user_to_recommend, , drop=TRUE])
print_recommended(recommended_new, "Recommended for user_id=10 as new user")
## -----------------------------------------------------------------------------
recommended_new <- topN_new(model.w.sideinfo, n=5,
exclude=X[user_to_recommend, , drop=TRUE],
X=X[user_to_recommend, , drop=TRUE])
print_recommended(recommended_new, "Recommended for user_id=10 as new user (NO sideinfo)")
## -----------------------------------------------------------------------------
recommended_cold <- topN_new(model.w.sideinfo, n=5,
exclude=X[user_to_recommend, , drop=TRUE],
U=U[user_to_recommend, , drop=TRUE])
print_recommended(recommended_cold, "Recommended for user_id=10 as new user (NO ratings)")
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.