R/implicitcf.R

#' Collaborative Filtering for Implicit Feedback Datasets
#'
#' @param R A sparse implicit feedback matrix, where the rows typically represent users
#'   and the columns typically represent items. The elements of the matrix represent the
#'   number of times that the users have interacted with the items
#' @param alpha Used to calculate cost matrix \code{C} = 1 + \code{alpha} * \code{R}
#'   if \code{C1} is not specified
#' @param C1 Equal the cost matrix (\code{C}) minus 1, which should be sparse
#' @param P A binary matrix, indicating whether or not the users interacted with the items
#' @param f The rank of the matrix factorization
#' @param lambda The L2 squared norm penalty on the latent row and column features
#' @param init_stdv Standard deviation to initialize the latent row and column features
#' @param max_iters How many iterations to run the algorithm for
#' @param parallel Whether to use \code{foreach} package to parallelize the computation.
#'    See the example for how to use.
#' @param quiet Whether or not to print out progress
#'
#' @return An S3 object of class \code{implicitcf} which is a list with the following components:
#'   \item{X}{the rank-\code{f} latent features for the users}
#'   \item{Y}{the rank-\code{f} latent features for the items}
#'   \item{loss_trace}{the loss function after each iteration. It should be non-increasing}
#'   \item{f}{the rank used}
#'   \item{lambda}{the penalty parameter used}
#'
#' @details This function impliments the algorithm of Hu et al. (2008) in R using sparse matrices.
#'   It solves for \code{X} and \code{Y} by minimizing the loss function:
#'   \deqn{\sum_{u, i} c_{ui} (p_{ui} - x_u^Ty_i)^2 + \lambda (||X||_F^2 + ||Y||_F^2)}
#'
#'   It does this by iteratively solving for \eqn{x_u, u = 1, ...,}\code{nrow(R)} and
#'   \eqn{y_i, i = 1, ...,}\code{ncol(R)}, holding everything else constant.
#'
#'   Since implicit feedback data is typically sparse, the algorithm and this code are optimized
#'   take advantage of the sparsity. That being said, the algorithm involves looping over
#'   the rows and columns of the matrix, which R is slow at.
#'
#'   To curtail this, I have implemented
#'   a parallel option using the \code{foreach} package. It speeds up calculations when there
#'   are a decent number of rows or columns (e.g. > 100).
#'
#'   This algorithm also should not have any memory issues because the only inversion is
#'   of an \code{f} dimensional matrix and sparse matrices are used throughout.
#'
#' @references
#' Hu, Y., Koren, Y., Volinsky, C., 2008. Collaborative filtering for implicit feedback datasets.
#' In Data Mining, 2008. ICDM'08. Eighth IEEE International Conference on (pp. 263-272). IEEE.
#'
#' @examples
#'  rows <- 20
#'  cols <- 10
#'  X <- matrix(rnorm(rows * 2, 0, 1), rows, 2)
#'  Y <- matrix(rnorm(cols * 2, 0, 2), cols, 2)
#'  noise <- matrix(rnorm(rows * cols, 0, 0.5), rows, cols)
#'  R <- round(pmax(tcrossprod(X, Y) + noise, 0))
#'
#'  icf <- implicitcf(R, f = 2, alpha = 1, lambda = 0.1, quiet = FALSE)
#'
#'  # should be decreasing
#'  plot(icf$loss_trace)
#'
#'  \dontrun{
#'  # to use parallel on Mac/Linux
#'  library(doMC)
#'  registerDoMC(cores <- parallel::detectCores())
#'  icf <- implicitcf(R, f = 2, alpha = 1, lambda = 0.1, quiet = FALSE, parallel = TRUE)
#'
#'  # to use parallel on Windows
#'  library(doParallel)
#'  cl <- makeCluster(parallel::detectCores())
#'  registerDoParallel(cl)
#'  icf <- implicitcf(R, f = 2, alpha = 1, lambda = 0.1, quiet = FALSE, parallel = TRUE)
#'  stopCluster(cl)
#'  }
#'
#' @export
implicitcf <- function(R, alpha = 1, C1 = alpha * R, P = (R > 0) * 1,
                       f = 10, lambda = 0,
                       init_stdv = ifelse(lambda == 0, 0.01, 1 / sqrt(2 * lambda)),
                       max_iters = 10, parallel = FALSE, quiet = TRUE) {
  # check C1 and P dimensions
  stopifnot(all(dim(C1) == dim(P)))

  # C1, and P are sparseMatrix class
  if (!inherits(C1, "sparseMatrix")) {
    C1 = Matrix::Matrix(C1, sparse = TRUE)
  }
  if (!inherits(P, "sparseMatrix")) {
    P = Matrix::Matrix(P, sparse = TRUE)
  }

  # check C1 and P 0's match up
  if (!all(Matrix::which(C1 > 0) == Matrix::which(P > 0))) {
    warning("non-zero elements of C1 and P do not match. This could cause issues in the algorithm")
  }

  # R doesn't need to be specified as long as C1 and P are
  if (!missing(R)) {
    rm(R)
  }

  if (parallel) {
    if (!requireNamespace("foreach", quietly = TRUE)) {
      warning("foreach package not installed. Setting parallel = FALSE.\n",
              "Install foreach package to use parallel.")
      parallel = FALSE
    }
  }

  nrows = nrow(P)
  ncols = ncol(P)

  # f is typically small, so I don't know if this helps
  Lambda = Matrix::Diagonal(f, x = lambda)
  # Lambda = diag(x = lambda, f, f)
  CP = C1 * P + P

  # only initialize X so that it knows the size
  X = Matrix::Matrix(rnorm(nrows * f, 0, init_stdv), nrows, f)
  Y = Matrix::Matrix(rnorm(ncols * f, 0, init_stdv), ncols, f)

  loss_trace = rep(NA, max_iters)
  for (iter in 1:max_iters) {
    if (!quiet) {
      cat("Iter", iter, "-- ")
    }

    YtY = Matrix::crossprod(Y)
    if (parallel) {
      X = foreach::`%dopar%`(
        foreach::foreach(u = 1:nrows, .combine = rbind),
        {
          inv = YtY + Matrix::t(Y) %*% diag(C1[u, ]) %*% Y + Lambda
          rhs = Matrix::t(Y) %*% CP[u, ]
          Matrix::t(Matrix::solve(inv, rhs))
        }
      )
    } else {
      for (u in 1:nrows) {
        # TODO: compare diag(C1[u, ]) to Diagonal(x = C1[u, ]) since Diagonal keeps 0's
        inv = YtY + Matrix::t(Y) %*% diag(C1[u, ]) %*% Y + Lambda

        # TODO: compare to rhs = t(Y) %*% CP[u, ], to make sure it gives same results
        # rhs = t(Y) %*% diag(C1[u, ] + 1) %*% P[u, ]
        rhs = Matrix::t(Y) %*% CP[u, ]

        x = Matrix::solve(inv, rhs)
        X[u, ] = as.numeric(x)
      }
    }

    XtX = Matrix::crossprod(X)
    if (parallel) {
#       Y = foreach::foreach(i = 1:ncols, .combine = rbind) %dopar% {
#         inv = XtX + Matrix::t(X) %*% diag(C1[, i]) %*% X + Lambda
#         rhs = Matrix::t(X) %*% CP[, i]
#         Matrix::t(Matrix::solve(inv, rhs))
#       }
      Y = foreach::`%dopar%`(
        foreach::foreach(i = 1:ncols, .combine = rbind),
        {
          inv = XtX + Matrix::t(X) %*% diag(C1[, i]) %*% X + Lambda
          rhs = Matrix::t(X) %*% CP[, i]
          Matrix::t(Matrix::solve(inv, rhs))
        }
      )
    } else {
      for (i in 1:ncols) {
        # TODO: compare diag(C1[, i]) to Diagonal(x = C1[, i]) since Diagonal keeps 0's
        inv = XtX + Matrix::t(X) %*% diag(C1[, i]) %*% X + Lambda

        # TODO: compare to rhs = t(X) %*% CP[, i], to make sure it gives same results
        # rhs = t(X) %*% diag(C1[, i] + 1) %*% P[, i]
        rhs = Matrix::t(X) %*% CP[, i]

        y = Matrix::solve(inv, rhs)
        Y[i, ] = as.numeric(y)
      }
    }

    loss_trace[iter] = sum((C1 + 1) * (P - Matrix::tcrossprod(X, Y))^2) + lambda * (sum(X^2) + sum(Y^2))
    if (!quiet) {
      cat("Loss =", loss_trace[iter], "\n")
    }
  }
  structure(
    list(
      X = X,
      Y = Y,
      loss_trace = loss_trace,
      # alpha = alpha,
      f = f,
      lambda = lambda
    ),
    class = "implicitcf"
  )
}
andland/implicitcf documentation built on May 10, 2019, 10:29 a.m.