R/data.R
In validateIt: Validating Topic Coherence and Topic Labels

#' An Example Topic Model
#'
#' A structural topic model (STM) object generated from the \code{stm} package using a random 
#' sample of US senators' Facebook posts. 
#' 
#' @format A STM object.
#'
#' @references 
#' Roberts, Margaret E., Brandon M. Stewart, and Dustin Tingley. "Stm: An R package for structural
#'  topic models." Journal of Statistical Software 91 (2019): 1-40.
#' 
#' @source 
#' See \url{https://CRAN.R-project.org/package=stm} for more details.
#'
#' @docType data
#' @name modtest
#' @usage data(modtest)
NULL


#' An Example of the Combined Mass for Words with the Same Roots
#'
#' A list of two with the words (the most frequent form in each topic) and the corresponding word 
#' probabilities.
#' 
#' \describe{
#' \item{\code{vocab}}{A matrix of words for each topic. Each row represents a
#' topic and each column represents the words. Words with the same roots are
#' only represented by the most common form in that topic.}
#' \item{\code{beta}}{A matrix of combined word probabilities for each topic. 
#' Each row represents a topic and each column represents a combined word.}
#' }
#' 
#' @format A list of two.
#'
#' @docType data
#' @name masstest
#' @usage data(masstest)
NULL


#' An Example Object of Prepared Documents
#'
#' An output from the \code{prepDocuments} function of the \code{stm} package. 
#' 
#' @format A list containing a documents and vocab object.
#'
#' @references 
#' Roberts, Margaret E., Brandon M. Stewart, and Dustin Tingley. "Stm: An R package for structural
#'  topic models." Journal of Statistical Software 91 (2019): 1-40.
#' 
#' @source 
#' See \url{https://CRAN.R-project.org/package=stm} for more details.
#'
#' @docType data
#' @name stmPreptest
#' @usage data(stmPreptest)
NULL


#' An Example Heldout Test Set
#'
#' An output from the \code{make.heldout} function of the \code{stm} package. 
#' 
#' @format A list of the heldout documents, vocab, and missing.
#'
#' @references 
#' Roberts, Margaret E., Brandon M. Stewart, and Dustin Tingley. "Stm: An R package for structural
#'  topic models." Journal of Statistical Software 91 (2019): 1-40.
#' 
#' @source 
#' See \url{https://CRAN.R-project.org/package=stm} for more details.
#'
#' @docType data
#' @name heldouttest
#' @usage data(heldouttest)
NULL


#' Example R4WSI0 Tasks
#'
#' Data of 15 example R4WSI0 Tasks structured as a matrix.
#' 
#' Please note that the difference between the R4WSI0 examples used here and the R4WSI tasks
#' is that the R4WSI tasks do not present any documents.
#' 
#' @format A matrix with 15 rows and 6 columns.
#' \describe{
#' \item{\code{topic}}{Index of topics}
#' \item{\code{doc}}{Example documents associated with each topic}
#' \item{\code{opt1}}{Words set option 1}
#' \item{\code{opt2}}{Words set option 2}
#' \item{\code{opt3}}{Words set option 3}
#' \item{\code{optcrt}}{Words set option 4, also the correct choice}
#' }
#'
#' @docType data
#' @keywords datasets
#' @name R4WSItasktest
#' @usage data(R4WSItasktest)
NULL


#' Example Gold-Standard R4WSI0 Tasks
#'
#' Data frame of 5 example gold-standard R4WSI0 Tasks.
#' 
#' @format A data frame of 5 rows and 6 columns.
#' \describe{
#' \item{\code{topic}}{Index of topics}
#' \item{\code{doc}}{Example documents associated with each topic}
#' \item{\code{opt1}}{Words set option 1}
#' \item{\code{opt2}}{Words set option 2}
#' \item{\code{opt3}}{Words set option 3}
#' \item{\code{optcrt}}{Words set option 4, also the correct choice}
#' }
#'
#' @docType data
#' @keywords datasets
#' @name goldR4WSItest
#' @usage data(goldR4WSItest)
NULL


#' Example R4WSI Tasks with Regular and Gold-Standard Tasks
#'
#' Data frame of 20 example R4WSI0 Tasks, with 5 of them being gold-standard and 15 of them not.
#' 
#' @format A data frame of 20 rows and 6 columns.
#' \describe{
#' \item{\code{topic}}{Index of topics}
#' \item{\code{id}}{Index of topics}
#' \item{\code{doc}}{Example documents associated with each topic}
#' \item{\code{opt1}}{Words set option 1}
#' \item{\code{opt2}}{Words set option 2}
#' \item{\code{opt3}}{Words set option 3}
#' \item{\code{optcrt}}{Words set option 4, also the correct choice}
#' }
#'
#' @docType data
#' @keywords datasets
#' @name allR4WSItasktest
#' @usage data(allR4WSItasktest)
NULL


#' Example Local Record of the R4WSI Tasks
#'
#' Local record generated by the \code{recordTasks} function. 
#' 
#' To be compared with the answers from the online workers to evaluate the topic model performance.
#' 
#' @format A list of two data frames.
#' \describe{
#' \item{\code{data.frame1}}{A data frame of tasks with the \code{optcrt} indicating the 
#' machine preficted choice.}
#' \item{\code{data.frame2}}{A data frame of tasks with randomized choices. 
#' Exactly the same with what would be sent online.}
#' }
#'
#' @docType data
#' @name recordtest
#' @usage data(recordtest)
NULL


#' Example Results Retrieved from Mturk
#'
#' @format A data frame of ten example tasks retrieved from the Mturk with or without 
#' online workers' answers.
#' 
#' \describe{
#' \item{\code{assignment_id}}{Assignment id. Mturk assigned. If 0, then the task hasn't been completed.}
#' \item{\code{batch_id}}{User specified batch id.}
#' \item{\code{completed_at}}{Timestamp when the task was completed. If 0, then the task hasn't been completed.}
#' \item{\code{local_task_id}}{Local task id.}
#' \item{\code{mturk_hit_id}}{Mturk HIT id. Mturk assigned.}
#' \item{\code{result}}{Choice made by the worker. 1-4. If 0, then the task hasn't been completed.}
#' \item{\code{worker_id}}{Mturk worker id. If 0, then the task hasn't been completed.}
#' }
#'
#' @docType data
#' @keywords datasets
#' @name resultstest
#' @usage data(resultstest)
NULL


#' Example Answer Keys
#'
#' @format A list of two data frames. Similar to \code{recordtest}.
#' 
#' \describe{
#' \item{\code{data.frame1}}{A data frame of tasks with the \code{optcrt} indicating the 
#' machine predicted choice.}
#' \item{\code{data.frame2}}{A data frame of tasks with randomized choices. 
#' Exactly the same with what would be sent online.}
#' }
#'
#' @docType data
#' @name keypostedtest
#' @usage data(keypostedtest)
NULL