R/as.sparseMatrix.R

#' @include S4classes.R polmineR.R partition.R partition_bundle.R context.R cooccurrences.R TermDocumentMatrix.R 
NULL

setOldClass("simple_triplet_matrix")

#' Type conversion - get sparseMatrix.
#' 
#' Turn objects into the sparseMatrix as defined in the Matrix package.
#' 
#' @param x object to convert
#' @param col column name to get values from (if x is a bundle)
#' @param ... Further arguments that are passed to a call to
#'   \code{sparseMatrix}. Can be used, for instance to set \code{giveCsparse} to
#'   \code{FALSE} to get a \code{dgTMatrix}, not a \code{dgCMatrix}.
#' @exportMethod as.sparseMatrix
#' @rdname as.sparseMatrix
setGeneric("as.sparseMatrix", function(x,...) standardGeneric("as.sparseMatrix"))


#' @docType methods
#' @importFrom Matrix sparseMatrix
#' @rdname as.sparseMatrix
setMethod("as.sparseMatrix", "simple_triplet_matrix", function(x, ...){
  Matrix::sparseMatrix(
    i = x$i, j = x$j, x = x$v,
    dims = c(x$nrow, x$ncol),
    dimnames = dimnames(x),
    ...
    )
})


#' @details Returns a \code{sparseMatrix} based on the counts of term cooccurrences. At this stage,
#' it is required that decoded tokens are present.
#' @exportMethod as.sparseMatrix
#' @rdname all-cooccurrences-class
#' @examples 
#' \donttest{
#' use(pkg = "RcppCWB", corpus = "REUTERS")
#' 
#' X <- Cooccurrences("REUTERS", p_attribute = "word", left = 5L, right = 5L)
#' decode(X)
#' sm <- as.sparseMatrix(X)
#' stm <- as.simple_triplet_matrix(X)
#' }
setMethod("as.sparseMatrix", "Cooccurrences", function(x, col = "ab_count", ...){
  
  decoded_tokens <- reindex(x)
  retval <- sparseMatrix(
    i = x@stat[["a_new_index"]],
    j = x@stat[["b_new_index"]],
    x = x@stat[[col]], 
    dims = c(length(decoded_tokens), length(decoded_tokens)),
    dimnames = list(decoded_tokens, decoded_tokens),
    ...
  ) 
  
  # restore original data.table and remove columns generated during reindexing
  x@stat[, "a_new_index" := NULL][, "b_new_index" := NULL]
  retval
})



#' @importFrom Matrix sparseMatrix
#' @rdname as.sparseMatrix
setMethod("as.sparseMatrix", "TermDocumentMatrix", function(x, ...){
  Matrix::sparseMatrix(
    i = x$i, j = x$j, x = x$v,
    dims = c(x$nrow, x$ncol),
    dimnames = dimnames(x),
    ...
  )
})

#' @rdname as.sparseMatrix
setMethod("as.sparseMatrix", "DocumentTermMatrix", function(x, ...){
  Matrix::sparseMatrix(
    i = x$i, j = x$j, x = x$v,
    dims = c(x$nrow, x$ncol),
    dimnames = dimnames(x),
    ...
  )
})



#' @docType methods
#' @rdname as.sparseMatrix
setMethod("as.sparseMatrix", "bundle", function(x, col, ...){
  cli_process_start("convert partition_bundle to `TermDocumentMatrix`")
  tdm_stm <- as.TermDocumentMatrix(x = x, col = col)
  cli_process_done()
  
  cli_process_start("converting `TermDocumentMatrix` to `Matrix`")
  retval <- as.sparseMatrix(tdm_stm, ...)
  cli_process_done()
  
  retval
})

Try the polmineR package in your browser

Any scripts or data that you put into this service are public.

polmineR documentation built on Nov. 2, 2023, 5:52 p.m.