Nothing
#' Plot a wordcloud
#'
#' Use the \pkg{wordcloud} package to plot a wordcloud for a particular topic
#'
#' Uses the \pkg{wordcloud} package to make a word cloud of a particular topic.
#' The option \code{"model"} uses the topic-word model parameters. Thus it
#' shows words weighted by their probability conditional that the word comes
#' from a particular topic. With content covariates it averages over the
#' values for all levels of the content covariate weighted by the empirical
#' frequency in the dataset. The option \code{"documents"} plots the words
#' which appear in documents that have a topic proportion higher than
#' \code{thresh}. Thus \code{"model"} gives a pure model based interpretation
#' of the topic while \code{"documents"} gives a picture of all the words in
#' documents which are highly associated with the topic.
#'
#' @param stmobj The STM model object to be used in making the word cloud.
#' @param topic NULL to plot the marginal distribution of words in the corpus,
#' or a single integer indicating the topic number.
#' @param type Specifies how the wordcloud is constructed. The type
#' \code{"model"} which is used by default is based on the probability of the
#' word given the topic. The type \code{"documents"} plots words within
#' documents that have a topic proportion of higher than \code{thresh}. This
#' requires that the \code{documents} argument also be specified.
#' @param documents The documents object of the same kind as passed to
#' \code{\link{stm}}. This is only necessary if \code{type="documents"}.
#' @param thresh The threshold for including a document in the
#' \code{type="documents"} setting.
#' @param max.words The maximum number of words to be plotted.
#' @param ... Additional parameters passed to \code{wordcloud}.
#' @seealso \code{\link{plot.STM}}
#' @references Ian Fellows (2014). wordcloud: Word Clouds. R package version
#' 2.5. \url{https://cran.r-project.org/package=wordcloud}
#' @examples
#' cloud(gadarianFit, 1)
#' @export
cloud <- function(stmobj, topic=NULL, type=c("model", "documents"), documents,
thresh=.9, max.words=100, ...) {
if(!requireNamespace("wordcloud", quietly=TRUE)) {
stop("wordcloud package required to use this function.")
} else {
if(!inherits(stmobj,"STM")) stop("cloud function only works for STM models. See wordcloud package for general tools.")
if(length(topic)>1) stop("Please only select 1 topic.")
mod <- stmobj
type <- match.arg(type)
vocab <- mod$vocab
#if they didn't specify a topic overwrite the choice to documents
if(is.null(topic)) type <- "documents"
if(type=="model") {
#Here we are interested in the model parameters
if(length(mod$beta$logbeta)==1) {
#in the case with no content covariates its simply the reweighted p(w|z)
vec <- exp(mod$beta$logbeta[[1]])[topic,]*sum(mod$settings$dim$wcounts$x)
} else {
#in the case with content covariates we need to reweight
levels <- table(mod$settings$covariates$betaindex)
weights <- levels/sum(levels)
#now average over the weights
vec <- weights[1]*exp(mod$beta$logbeta[[1]])[topic,]
for(i in 2:length(mod$beta$logbeta)) {
vec <- vec + weights[i]*exp(mod$beta$logbeta[[i]])[topic,]
}
#and finally reweight by the marginal word distribution
vec <- vec*sum(mod$settings$dim$wcounts$x)
}
} else {
#Here we care about documents with high theta loadings on topic
if(is.null(topic)) {
#if no topic is specified its just working with the marginals
vec <- mod$settings$dim$wcounts$x
} else {
if(is.null(documents)) stop("documents needed to give topic specific document values.")
#Subset to documents fulfulling threshold
docnums<- which(mod$theta[,topic]>thresh)
if(length(docnums)==0) stop(sprintf("No documents have a topic loading higher than %s", thresh))
subdoc <- documents[docnums]
#Aggregate over the indices to get the margins
indices <- unlist(lapply(subdoc, "[", 1, ))
counts <- unlist(lapply(subdoc, "[", 2, ))
out <- aggregate(counts, by=list(indices), FUN=sum)
#Fill in the vector of zeroes (to account for words that don't show up in the subpopulation)
vec <- rep(0, length(vocab))
vec[out$Group.1] <- out$x
}
}
wordcloud::wordcloud(words=vocab,freq=vec, max.words=max.words, ...)
}
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.