Nothing
# // Copyright (C) 2015 - 2016 Dmitriy Selivanov
# // This file is part of text2vec
# //
# // text2vec is free software: you can redistribute it and/or modify it
# // under the terms of the GNU General Public License as published by
# // the Free Software Foundation, either version 2 of the License, or
# // (at your option) any later version.
# //
# // text2vec is distributed in the hope that it will be useful, but
# // WITHOUT ANY WARRANTY; without even the implied warranty of
# // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# // GNU General Public License for more details.
# //
# // You should have received a copy of the GNU General Public License
# // along with text2vec. If not, see <http://www.gnu.org/licenses/>.
#' @name LatentSemanticAnalysis
#'
#' @title Latent Semantic Analysis model
#' @description Creates LSA(Latent semantic analysis) model.
#' See \url{https://en.wikipedia.org/wiki/Latent_semantic_analysis} for details.
#' @format \code{\link{R6Class}} object.
#' @section Usage:
#' For usage details see \bold{Methods, Arguments and Examples} sections.
#' \preformatted{
#' lsa = LatentSemanticAnalysis$new(n_topics)
#' lsa$fit_transform(x, ...)
#' lsa$transform(x, ...)
#' lsa$components
#' }
#' @section Methods:
#' \describe{
#' \item{\code{$new(n_topics)}}{create LSA model with \code{n_topics} latent topics}
#' \item{\code{$fit_transform(x, ...)}}{fit model to an input sparse matrix (preferably in \code{dgCMatrix}
#' format) and then transform \code{x} to latent space}
#' \item{\code{$transform(x, ...)}}{transform new data \code{x} to latent space}
#'}
#' @section Arguments:
#' \describe{
#' \item{lsa}{A \code{LSA} object.}
#' \item{x}{An input document-term matrix. Preferably in \code{dgCMatrix} format}
#' \item{n_topics}{\code{integer} desired number of latent topics.}
#' \item{...}{Arguments to internal functions. Notably useful for \code{fit_transform()} -
#' these arguments will be passed to \code{rsparse::soft_svd}}
#' }
#' @export
#' @examples
#' data("movie_review")
#' N = 100
#' tokens = word_tokenizer(tolower(movie_review$review[1:N]))
#' dtm = create_dtm(itoken(tokens), hash_vectorizer(2**10))
#' n_topics = 5
#' lsa_1 = LatentSemanticAnalysis$new(n_topics)
#' d1 = lsa_1$fit_transform(dtm)
#' # the same, but wrapped with S3 methods
#' d2 = fit_transform(dtm, lsa_1)
#'
LatentSemanticAnalysis = R6::R6Class(
"LatentSemanticAnalysis",
inherit = mlapiDecomposition,
public = list(
#----------------------------------------------------------------------------
# methods
# constructor
initialize = function(n_topics) {
private$n_topics = n_topics
private$fitted = FALSE
},
fit_transform = function(x, ...) {
stopifnot(inherits(x, "matrix") || inherits(x, "sparseMatrix") || inherits("float32"))
FUN = function(..., y) rsparse::soft_svd(x, private$n_topics, ...)
svd_fit = FUN(...)
documents = svd_fit$u %*% diag(x = svd_fit$d)
private$components_ = t(svd_fit$v %*% diag(x = svd_fit$d))
private$vt = svd_fit$v
rm(svd_fit)
rownames(documents) = rownames(x)
colnames(private$components_) = colnames(x)
calculate_col_var = function(x) {
colMeans(x * x) - colMeans(x) ^ 2
}
private$explained_variance = calculate_col_var(documents)
private$explained_variance_ratio = private$explained_variance / sum(calculate_col_var(x))
private$fitted = TRUE
documents
},
transform = function(x, ...) {
if (private$fitted) {
stopifnot(ncol(x) == ncol(private$components_))
temp = x %*% private$vt
rownames(temp) = rownames(x)
as.matrix(temp)
}
else
stop("Fit the model first woth model$fit_transform()!")
},
active = list(
# make components read only via active bindings
components = function(value) {
if (!missing(value)) stop("Sorry this is a read-only field")
else {
if(is.null(private$components_)) stop("model was not fitted yet!")
else private$components_
}
}
),
get_word_vectors = function() {
.Deprecated("model$components")
},
get_explained_variance = function(){
if (private$fitted){
private$explained_variance
} else {
stop("Fit the model first with model$fit_transform()")
}
},
get_explained_variance_ratio = function(){
if (private$fitted){
private$explained_variance_ratio
} else {
stop("Fit the model first with model$fit_transform()")
}
}
),
private = list(
n_topics = NULL,
components_ = NULL,
fitted = FALSE,
vt = NULL,
explained_variance = NULL,
explained_variance_ratio = NULL
)
)
#' @rdname LatentSemanticAnalysis
#' @export
LSA = LatentSemanticAnalysis
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.