R/as.RollingLDA.R

Defines functions print.RollingLDA is.RollingLDA as.RollingLDA

Documented in as.RollingLDA is.RollingLDA

#' @title RollingLDA Object
#'
#' @description Constructor for RollingLDA objects used in this package.
#' The function may be useful to create a RollingLDA object out of a standard
#' \code{\link[ldaPrototype]{LDA}} object to use it as initial model and
#' update it using \code{\link{updateRollingLDA}}.
#'
#' @details
#' If you call \code{as.RollingLDA} on an object \code{x} which already is of
#' the structure of an \code{RollingLDA} object (in particular a \code{RollingLDA}
#' object itself), the additional arguments \code{id, param, ...}
#' may be used to override the specific elements.
#'
#' @family RollingLDA functions
#'
#' @param x [\code{named list}]\cr
#' \code{\link{RollingLDA}} object. Alternatively each element can be passed for
#' individual results. Individually set elements overwrite elements from \code{x}.
#' @param id [\code{character(1)}]\cr
#' Name for the computation/model.
#' @param lda [\code{named list}]\cr
#' \code{\link[ldaPrototype]{LDA}} object.
#' @param docs [\code{named list}]\cr
#' Texts in a preprocessed format. See \code{\link[tosca]{LDAprep}}.
#' @param dates [\code{(un)named Date}]\cr
#' Dates of the texts. If unnamed, it must match the order of docs.
#' @param vocab [\code{character}]\cr
#' Vocabularies.
#' @param chunks [\code{data.table}]\cr
#' with specifications for each model chunk
#' \describe{
#'   \item{\code{chunk.id}}{[\code{integer}] Index counting up starting with \code{0}.}
#'   \item{\code{start.date}}{[\code{Date}] Minimum of each chunk's dates.}
#'   \item{\code{end.date}}{[\code{Date}] Maximum of each chunk's dates.}
#'   \item{\code{memory}}{[\code{Date}] Date from which texts are considered as memory.}
#'   \item{\code{n}}{[\code{integer}] Number of fitted texts.}
#'   \item{\code{n.dicsarded}}{[\code{integer}] Number of lost texts through preprocessing.}
#'   \item{\code{n.memory}}{[\code{integer}] Number of texts considered as memory.}
#'   \item{\code{n.vocab}}{[\code{integer}] Number of vocabularies (monotonously increasing).}
#' }
#' If not passed, \code{lda} is interpreted as initialization chunk.
#' @param param [\code{named list(4)}]\cr
#' Parameters of the object, i.e. parameters for future updates fitted on the
#' to be created model. List always should contain names "vocab.abs", "vocab.rel",
#' "vocab.fallback" and "doc.abs".
#' @param obj [\code{R} object]\cr
#' Object to test.
#' @param verbose [\code{logical(1)}]\cr
#' Should test information be given in the console?
#' @return [\code{named list}] \code{\link{RollingLDA}} object.
#'
#' @examples
#' roll_lda = RollingLDA(texts = economy_texts,
#'                       dates = economy_dates,
#'                       chunks = "quarter",
#'                       memory = "3 quarter",
#'                       init = "2008-07-03",
#'                       K = 10,
#'                       type = "lda")
#'
#' is.RollingLDA(roll_lda, verbose = TRUE)
#' getID(roll_lda)
#' roll_lda = as.RollingLDA(roll_lda, id = "newID")
#' getID(roll_lda)
#'
#' @export as.RollingLDA
as.RollingLDA = function(x, id, lda, docs, dates, vocab, chunks, param){
  if (!missing(x)){
    if (!is.RollingLDA(x)){
      is.RollingLDA(x, verbose = TRUE)
      stop("\"x\" is not a RollingLDA object")
    }
    if (missing(id)) id = x$id
    if (missing(lda)) lda = x$lda
    if (missing(docs)) docs = x$docs
    if (missing(dates)) dates = x$dates
    if (missing(vocab)) vocab = x$vocab
    if (missing(chunks)) chunks = x$chunks
    if (missing(param)) param = x$param
  }
  if (missing(id)) id = "rolling - converted"
  if (!is.LDA(lda)){
    is.LDA(lda, verbose = TRUE)
    stop("\"lda\" not an LDA object")
  }
  if (is.null(names(dates))) names(dates) = names(docs)
  dates = as.Date(dates[match(names(dates), names(docs))])
  if (missing(vocab)) vocab = colnames(getTopics(lda))
  if (missing(chunks)){
    chunks = data.table(
      chunk.id = 0L,
      start.date = min(dates),
      end.date = max(dates),
      memory = NA_Date_,
      n = length(docs),
      n.discarded = NA_integer_,
      n.memory = NA_integer_,
      n.vocab = length(vocab)
    )
  }
  if (missing(param)) param = .defaultParam()
  res = list(
    id = id,
    lda = lda,
    docs = docs,
    dates = dates,
    vocab = vocab,
    chunks = chunks,
    param = param)
  class(res) = "RollingLDA"
  if (!is.RollingLDA(res)){
    is.RollingLDA(res, verbose = TRUE)
    stop("input arguments do not create a RollingLDA object")
  }
  res
}

#' @rdname as.RollingLDA
#' @export
is.RollingLDA = function(obj, verbose = FALSE){
  assert_flag(verbose)

  if (!inherits(obj, "RollingLDA")){
    if (verbose) message("object is not of class \"RollingLDA\"")
    return(FALSE)
  }

  if (!is.list(obj)){
    if (verbose) message("object is not a list")
    return(FALSE)
  }

  testNames = c("id", "lda", "docs", "dates", "vocab", "chunks", "param")

  if (!test_list(obj, types = c("character", "LDA", "list", "Date", "character", "data.table", "list"),
                 names = "named", any.missing = FALSE)){
    if (verbose) message(check_list(obj, types = c("character", "LDA", "list", "Date", "character", "data.table", "list"),
                                    names = "named", any.missing = FALSE))
    return(FALSE)
  }
  if (!test_set_equal(names(obj), testNames)){
    if (verbose) message(check_set_equal(names(obj), testNames))
    return(FALSE)
  }

  #id
  if (verbose) message("id: ", appendLF = FALSE)
  id = getID(obj)
  if (!is.character(id) || !(length(id) == 1)){
    if (verbose) message("not a character of length 1")
    return(FALSE)
  }
  if (verbose) message("checked")

  #lda
  if (verbose) message("lda: ", appendLF = FALSE)
  lda = try(getLDA(obj), silent = !verbose)
  if(inherits(lda, "try-error")){
    # should not happen
    return(FALSE)
  }
  if(!is.LDA(lda)){
    if (verbose) message("not an \"LDA\" object")
    return(FALSE)
  }
  if (verbose) message("checked")

  #docs
  if (verbose) message("docs: ", appendLF = FALSE)
  docs = getDocs(obj)
  if (!test_list(docs, min.len = 1, names = "unique", types = "matrix", any.missing = FALSE)){
    if (verbose) message(check_list(docs, min.len = 1, names = "unique", types = "matrix", any.missing = FALSE))
    return(FALSE)
  }
  if (!all(sapply(docs, nrow) == 2)){
    if (verbose) message("not all elements have two rows")
    return(FALSE)
  }
  if (!all(sapply(docs, function(x) all(x[2,] == 1)))){
    if (verbose) message("not all values in the second row equal 1")
    return(FALSE)
  }
  if (verbose) message("checked")

  #dates
  if (verbose) message("dates: ", appendLF = FALSE)
  dates = getDates(obj)
  if (!test_date(dates, any.missing = FALSE)){
    if (verbose) message(check_date(dates, any.missing = FALSE))
    return(FALSE)
  }
  if (!all(names(dates) %in% names(docs)) || !all(names(docs) %in% names(dates))){
    if (verbose) message("not same names as \"docs\"")
    return(FALSE)
  }
  if (length(dates) != length(docs)){
    # should not happen
    if (verbose) message("not same length as \"docs\"")
    return(FALSE)
  }
  if (verbose) message("checked")

  #vocab
  if (verbose) message("vocab: ", appendLF = FALSE)
  vocab = getVocab(obj)
  if (!test_character(vocab, any.missing = FALSE, unique = TRUE)){
    if (verbose) message(check_character(vocab, any.missing = FALSE, unique = TRUE))
    return(FALSE)
  }
  if (verbose) message("checked")

  #chunks
  if (verbose) message("chunks: ", appendLF = FALSE)
  chunks = getChunks(obj)
  if (!is.data.table(chunks) ||
      !all(c("chunk.id", "start.date", "end.date", "memory", "n", "n.discarded",
             "n.memory", "n.vocab") %in% colnames(chunks))){
    if (verbose) message("not a data.table with standard parameters")
    return(FALSE)
  }
  if (anyDuplicated(chunks$chunk.id)){
    if (verbose) message("duplicated \"chunk.id\"")
    return(FALSE)
  }
  if (!is.integer(chunks$chunk.id)){
    if (verbose) message("\"chunk.id\" is not an integer")
    return(FALSE)
  }
  if (!is.integer(chunks$n)){
    if (verbose) message("\"n\" is not an integer")
    return(FALSE)
  }
  if (!is.integer(chunks$n.discarded)){
    if (verbose) message("\"n.discarded\" is not an integer")
    return(FALSE)
  }
  if (!is.integer(chunks$n.memory)){
    if (verbose) message("\"n.memory\" is not an integer")
    return(FALSE)
  }
  if (!is.integer(chunks$n.vocab)){
    if (verbose) message("\"n.vocab\" is not an integer")
    return(FALSE)
  }
  if (!is.Date(chunks$start.date)){
    if (verbose) message("\"start.date\" is not a Date object")
    return(FALSE)
  }
  if (!is.Date(chunks$end.date)){
    if (verbose) message("\"end.date\" is not a Date object")
    return(FALSE)
  }
  if (!is.Date(chunks$memory)){
    if (verbose) message("\"memory\" is not a Date object")
    return(FALSE)
  }
  if (any(is.na(chunks$chunk.id))){
    if (verbose) message("NA(s) in \"chunk.id\"")
    return(FALSE)
  }
  if (any(is.na(chunks$n))){
    if (verbose) message("NA(s) in \"n\"")
    return(FALSE)
  }
  if (any(is.na(chunks$n.vocab))){
    if (verbose) message("NA(s) in \"n.vocab\"")
    return(FALSE)
  }
  if (any(is.na(chunks$start.date))){
    if (verbose) message("NA(s) in \"start.date\"")
    return(FALSE)
  }
  if (any(is.na(chunks$end.date))){
    if (verbose) message("NA(s) in \"end.date\"")
    return(FALSE)
  }
  if (length(dates) != sum(chunks$n)){
    if (verbose) message("sum of \"n\" does not match number of texts")
    return(FALSE)
  }
  if (length(vocab) != max(chunks$n.vocab)){
    if (verbose) message("max of \"n.vocab\" does not match number of vocabularies")
    return(FALSE)
  }
  if (is.unsorted(chunks$n.vocab)){
    if (verbose) message("\"n.vocab\" is not monotonously increasing")
    return(FALSE)
  }
  if (min(dates) < min(chunks$start.date)){
    if (verbose) message("minimum of \"start.date\" is larger than minimum of text's dates")
    return(FALSE)
  }
  if (max(dates) > max(chunks$end.date)){
    if (verbose) message("maximum of \"end.date\" is smaller than maximum of text's dates")
    return(FALSE)
  }
  if (verbose) message("checked")

  #param
  if (verbose) message("param: ", appendLF = FALSE)
  param = getParam(obj)
  testNames = c("vocab.abs", "vocab.rel", "vocab.fallback", "doc.abs")
  if (!test_list(param, types = c("numeric", "integer"), names = "named", any.missing = FALSE)){
    if (verbose) message(check_list(param, types = c("numeric", "integer"), names = "named", any.missing = FALSE))
    return(FALSE)
  }
  if (!test_set_equal(names(param), testNames)){
    if (verbose) message(check_set_equal(names(param), testNames))
    return(FALSE)
  }
  if (param$vocab.abs < 0){
    if (verbose) message("\"vocab.abs\" is smaller than 0")
    return(FALSE)
  }
  if (param$vocab.rel < 0){
    if (verbose) message("\"vocab.rel\" is smaller than 0")
    return(FALSE)
  }
  if (param$vocab.rel > 1){
    if (verbose) message("\"vocab.rel\" is greater than 0")
    return(FALSE)
  }
  if (param$vocab.fallback < 0){
    if (verbose) message("\"vocab.fallback\" is smaller than 0")
    return(FALSE)
  }
  if (param$doc.abs < 0){
    if (verbose) message("\"doc.abs\" is smaller than 0")
    return(FALSE)
  }
  if (verbose) message("checked")

  return(TRUE)
}

#' @export
print.RollingLDA = function(x, ...){
  elements = paste0("\"", names(which(!sapply(x, is.null))), "\"")
  cat(
    "RollingLDA Object named \"", getID(x),
    "\" with elements\n", paste0(elements, collapse = ", "), "\n ",
    nrow(getChunks(x)), " Chunks with Texts from ",
    as.character(min(getDates(x))), " to ", as.character(max(getDates(x))),
    "\n ", paste0(paste0(names(getParam(x)), ": ",
                         unlist(getParam(x))), collapse = ", "),
    "\n\n", sep = "")
  print(getLDA(x))
}

Try the rollinglda package in your browser

Any scripts or data that you put into this service are public.

rollinglda documentation built on May 29, 2024, 3:32 a.m.