R/run_stm.R

Defines functions run_stm

Documented in run_stm

#' Run STM analysis
#'
#' Run STM analysis and save output to aid interpretation.
#' @param input_file Path to file containing objects created by function \code{create_stm_input}.
#' @param output_dir Path to directory in which to save model output.
#' @param k Number of topics for STM analysis.
#' @param split_docs Logical indicating whether documents have been split into 3,000 word chunks, in which case a flag will be added as a prevalence covariate to indicate which chunks come from the same full document.
#' @param representative_docs If TRUE (the default), save full texts of most representative documents. If FALSE, save only a list of titles of the most representative documents.
#' @export

run_stm <- function(input_file, output_dir, k, split_docs=FALSE, representative_docs=TRUE) {

    load(file=input_file)

    prev <- "~year+geog"

    if(split_docs) {
        prev <- paste0(prev, "+report")
    }

    prev <- as.formula(prev)

    model_name <- output_dir

    assign(model_name, stm::stm(stm_input$documents, stm_input$vocab,
                       K=k, prevalence=prev,
                       data=stm_input$meta))
    save(list=output_dir, file=file.path(output_dir, paste0(model_name, ".Rdata")))
    texanaaid::create_topic_model_report(
        get(model_name),
        file.path(output_dir),
        stm_input$meta,
        raw_reports ,
        diff_cov="geog",
        diff_cov.value1="US",
        diff_cov.value2="EU",
        representative_docs=representative_docs
    )
}
dtburk/gensci.stm documentation built on Nov. 13, 2019, 12:33 a.m.