#' Fit a topic model using Latent Semantic Analysis
#' @description A wrapper for \code{RSpectra::svds} that returns
#' a nicely-formatted latent semantic analysis topic model.
#' @param dtm A document term matrix of class \code{Matrix::dgCMatrix}
#' @param k Number of topics
#' @param return_all Should all objects returned from \code{RSpectra::svds} be
#' returned here? Defaults to \code{FALSE}
#' @param ... Other arguments to pass to \code{\link[RSpectra]{svds}} through
#' its \code{opts} parameter.
#' @return Returns a list with a minimum of three objects: \code{phi},
#' \code{theta}, and \code{sv}. The rows of \code{phi} index topics and the
#' columns index tokens. The rows of \code{theta} index documents and the
#' columns index topics. \code{sv} is a vector of singular values.
#' @details Latent semantic analysis, LSA, uses single value decomposition to
#' factor the document term matrix. In many LSA applications, TF-IDF weights are
#' applied to the DTM before model fitting. However, this is not strictly
#' necessary.
#' @examples
#' # Load a pre-formatted dtm
#' data(nih_sample_dtm)
#'
#' # Convert raw word counts to TF-IDF frequency weights
#' idf <- log(nrow(nih_sample_dtm) / Matrix::colSums(nih_sample_dtm > 0))
#'
#' dtm_tfidf <- Matrix::t(nih_sample_dtm) * idf
#'
#' dtm_tfidf <- Matrix::t(dtm_tfidf)
#'
#' # Fit an LSA model
#' model <- FitLsaModel(dtm = dtm_tfidf, k = 5)
#'
#' str(model)
#'
#' @export
FitLsaModel <- function(dtm, k, return_all = FALSE, ...){
opts <- list(...)
# Fit LSA using single value decomposition on sparse matrices
lsa <- RSpectra::svds(A = dtm, k = k, opts = opts)
# Rename/transform objects so they conform to the convention in textmineR
names(lsa)[ names(lsa) == "v" ] <- "phi"
lsa$phi <- t(lsa$phi)
colnames(lsa$phi) <- colnames(dtm)
rownames(lsa$phi) <- paste("t", 1:nrow(lsa$phi), sep="_")
names(lsa)[ names(lsa) == "u" ] <- "theta"
rownames(lsa$theta) <- rownames(dtm)
colnames(lsa$theta) <- rownames(lsa$phi)
names(lsa)[ names(lsa) == "d" ] <- "sv"
names(lsa$sv) <- names(lsa$theta)
if(! return_all ){
lsa$niter <- NULL
lsa$nops <- NULL
}
return(lsa)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.