R/mod_tfidf.R

Defines functions mod_tfidf_server mod_tfidf_ui

#' tfidf_and_word_processing UI Function
#'
#' @description A shiny Module.
#'
#' @param id,input,output,session Internal parameters for {shiny}.
#'
#' @noRd 
#'
#' @importFrom shiny NS tagList 
mod_tfidf_ui <- function(id) {
  ns <- NS(id)
  tagList(
    
    fluidRow(
      column(
        width = 4,
        uiOutput(ns("classControl"))
      ),
      
      column(
        width = 4,
        uiOutput(ns("ngramsNumControl"))
      ),
      
      column(
        width = 4,
        uiOutput(ns("barsNumberControl"))
      )
    ),
    
    fluidRow(
      width = 12,
      
      column(
        width = 12,
        
        box(
          width = NULL,
          
          downloadButton(ns("downloadTfidfNgrams"), "Download plot"),
          
          plotOutput(ns("tfidfBars"), height = "1000px") %>%
            shinycssloaders::withSpinner(hide.ui = TRUE),
          
          box(
            htmlOutput(ns("tfidfExplanation")), 
            background = "red", 
            width = NULL
          )
        )
      )
    )
  )
}

#' tfidf_and_word_processing Server Functions
#'
#' @noRd 
mod_tfidf_server <- function(id, x, target, text_col) {
  moduleServer(id, function(input, output, session) {
    ns <- session$ns
    
    # Define reactive function arguments to pass to plotTfidfNgrams(). This is 
    # necessary for the download button, as the plot to download is for the 
    # chosen ngram type and class.
    ngramsType <- reactive({input$ngramsType})
    filterClass <- reactive({input$class})
    
    dataTfidf <- reactive({
      
      req(filterClass())
      
      x %>% 
        experienceAnalysis::calc_tfidf_ngrams(
          target_col_name = target, 
          text_col_name = text_col,
          filter_class = filterClass(), 
          ngrams_type = ngramsType(),
          number_of_ngrams = input$barsNum
        )
    })
    
    # We want to debounce the EXPRESSION dataTfidf (note no "()") before
    # passing the reactive element dataTfidf() that has the data. We therefore 
    # must use object dataTfidf_d instead of the original dataTfidf in what 
    # follows from now on.
    dataTfidf_d <- dataTfidf %>% # No "()" to indicate we are debouncing the EXPRESSION NOT the reactive object.
      debounce(1000)
      
    output$tfidfBars <- renderPlot({
      
      req(filterClass())
      
      withProgress(
        message = 'Calculation in progress',
        detail = 'This may take a few seconds...',
        value = 0,
        {
          dataTfidf_d() %>% 
            plotTfidfNgrams(
              title = paste0("Most frequent ", ngramsType(),
                             " in feedback text that is about\n",
                             "\"", filterClass(), "\"")
            )
        }
      )
    })
    
    output$tfidfExplanation <- renderText({
      
      HTML(paste0("*TF-IDF stands for
          <u><a target='_blank' rel='noopener noreferrer' href='https://en.wikipedia.org/wiki/Tf%E2%80%93idf'>
          Term Frequency–Inverse Document Frequency</a></u>.
          It is a standard way of calculating the frequency (i.e. importance)
          of a word or series of words 
          (i.e. <u><a target='_blank' rel='noopener noreferrer' href='https://en.wikipedia.org/wiki/N-gram'>n-grams</a></u>) 
          in the given text. It is a little more sophisticated than
          standard frequency as it adjusts for words that appear too frequently
          in the text. For example, stop words like ", "\"", "a", "\"", " and ",
          "\"", "the", "\"", " are very frequent but uniformative of
          the cotent of the text."))
    })
    
    output$classControl <- renderUI({
      
      # There are nonsense criticality values in the dataset that must be 
      # filtered out so they do not show on the class selection box. We have the
      # row indices of the valid criticality values (as we do for the valid 
      # label values) and thus we can use a simple join to keep pnly them.
      if (target == "label") {
        
        aux <- x %>%
          dplyr::right_join(row_index_label, by = 'row_index')
      } else {
        
        aux <- x %>%
          dplyr::right_join(row_index_criticality, by = 'row_index')
      }
      
      choices <- sort(unique(unlist(aux[[target]])))
      
      selectInput(
        session$ns("class"), 
        "Choose a class:",
        choices = choices,
        selected = choices[1]
      )
    })
    
    output$ngramsNumControl <- renderUI({
      
      selectInput(
        session$ns("ngramsType"),
        label = HTML("<b>Choose between unigrams or bigrams:</b>"),
        choices = c("Unigrams", "Bigrams"),
        selected = "Unigrams"
      )
    })
    
    output$barsNumberControl <- renderUI({
      
      sliderInput(
        session$ns("barsNum"),
        label = HTML("<b>Number of bars:</b>"),
        value = 15,
        min = 1,
        max = 100
      )
    })
    
    output$downloadTfidfNgrams <- downloadHandler(
      filename = function() {
        
        filterClass_clean <- clean_text(filterClass())
        
        paste0("tfidf_bars_", target, "_", filterClass_clean, "_", 
               tolower(ngramsType()), ".pdf")
      },
      content = function(file) {
        ggplot2::ggsave(file, 
                        plot = plotTfidfNgrams(x = dataTfidf_d(), 
                                               title = paste0("Most frequent ", 
                                                              ngramsType(),
                                                              " in feedback text that is about\n",
                                                              "\"", 
                                                              filterClass(), 
                                                              "\"")), 
                        device = pdf, height = 16, units = "in")
      }
    )
  })
}
CDU-data-science-team/pxtextminingdashboard documentation built on Sept. 29, 2023, 12:23 a.m.