R/text_as_numbers.R

#' Creates Corpus object
#'
#' This ....
#'
#' @param corpus xx
#' 
#' 
#' @return xx
#' 
#' 
setGeneric("text_as_numbers",
           valueClass = "Corpus",
           function(corpus){
  standardGeneric("text_as_numbers")
})


#' @importFrom utils as.relistable relist
#' @rdname text_as_numbers
#' @export
setMethod("text_as_numbers",
          valueClass = "Corpus",
          signature("Corpus"),
          function(corpus){
            
            bag_length <- length(unlist(corpus@words_by_turn))
            corpus_bag <- unlist(as.relistable(corpus@words_by_turn))
            corpus_bag[1:bag_length] <- factor(corpus_bag[1:bag_length])
            
            corpus_bag <- relist(corpus_bag)
            attributes(corpus_bag)$class <- "list"
            corpus@tokens_by_turn <- lapply(corpus_bag, as.numeric)
            corpus@tokens_by_turn <- lapply(corpus@tokens_by_turn, FUN = function(x) x-1)
            
            bag_check <- unlist(corpus@tokens_by_turn)
            if(min(bag_check) != 0){
              stop("Something went wrong with numeric representation.  Minimum
                   should be 0.")
            }
            if(max(bag_check) != length(unique(unlist(corpus@words_by_turn)))-1){
              stop("Something went wrong with numeric representation.  Maximum
                   should equal length of unique words.")
            }
            
            return(corpus)
})
erossiter/sitsr documentation built on Aug. 11, 2017, 9:23 p.m.