R/mod_reddit.R

Defines functions mod_reddit_server mod_reddit_ui

#' reddit UI Function
#'
#' @description A shiny Module.
#'
#' @param id,input,output,session Internal parameters for {shiny}.
#'
#' @noRd 
#'
#' @importFrom shiny NS tagList 
mod_reddit_ui <- function(id){
  ns <- NS(id)
  tagList(
    fluidPage(
      
      # Application title
      titlePanel("Reddit Comment Analysis"),
      
      # Sidebar with a slider input for number of bins
      # sidebarLayout(
      
      
      
      # Show a plot of the generated distribution
      #mainPanel(
      bs4Dash::tabsetPanel(
        
        shiny::tabPanel("Search/Load",
                        shiny::textInput(ns("search_q"),
                                         "Search Text (double quotes for exact match)"),
                        shiny::selectInput(ns("search_size"),
                                           "Number of Comments:",
                                           choices = c(10, 100, 500, 1000, 5000, 10000)),
                        shiny::textInput(ns("search_subreddit"),
                                         "Subreddit(s) (comma-separated)"),
                        shiny::textInput(ns("search_author"),
                                         "User Search"),
                        shiny::actionButton(ns("search_button"),
                                            "Run Search",
                                            icon = shiny::icon("search")), # , verify_fa = FALSE
                        shiny::hr(),
                        shiny::downloadButton(ns("comments_download"), label = "Download Comments"),
                        shiny::hr(),
                        
                        # Input: Select a file ----
                        shiny::fileInput(ns("file1"), "Upload Comments",
                                         multiple = FALSE,
                                         accept = c("text/csv",
                                                    "text/comma-separated-values,text/plain",
                                                    ".csv"))
                        
                        
        ),
        
        shiny::tabPanel("Comments",
                        DT::dataTableOutput(ns("comments_table")) %>%  #DT::dataTableOutput
                          shinycssloaders::withSpinner()
        ),
        
        shiny::tabPanel("Subreddits",
                        fluidRow(
                          bs4Dash::box(width = 8, 
                                       collapsible = FALSE,
                                       shiny::tableOutput(ns("subreddit_table"))  %>%
                                         shinycssloaders::withSpinner()),
                          bs4Dash::box(width = 4, 
                                       collapsible = FALSE,
                                       shiny::plotOutput(ns("subreddit_plot")) %>%
                                         shinycssloaders::withSpinner())
                        )
        ),
        
        shiny::tabPanel("Authors",
                        fluidRow(
                          bs4Dash::box(width = 8, 
                                       collapsible = FALSE,
                                       shiny::tableOutput(ns("author_table")) %>%
                                         shinycssloaders::withSpinner()),
                          bs4Dash::box(width = 4,
                                       collapsible = FALSE,
                                       shiny::plotOutput(ns("author_plot")) %>%
                                         shinycssloaders::withSpinner())
                        )
        ),
        
        shiny::tabPanel("ngram Frequency",
                        fluidRow(
                          bs4Dash::box(
                            title = "Word Counts and Usage Rates",
                            width = 6, 
                            collapsible = FALSE,
                            #shiny::tableOutput(ns("freq1gram_table")) %>%
                            div(DT::dataTableOutput(ns("freq1gram_table")) %>% shinycssloaders::withSpinner(), style = "font-size: 90%; width:90%")
                            #DT::dataTableOutput(ns("freq1gram_table")) %>%
                            #shinycssloaders::withSpinner()
                          ),
                          bs4Dash::box(
                            title = "2-Gram Counts and Usage Rates",
                            width = 6, 
                            collapsible = FALSE,
                            shiny::tableOutput(ns("freq2gram_table")) %>%
                              shinycssloaders::withSpinner())
                        )
        ),
        
        shiny::tabPanel("Wordcloud",
                        fluidRow(
                          bs4Dash::box(
                            title = "Word cloud 200 single words, excluding top result (search term)",
                            width = 12,
                            height = "100%",
                            collapsible = FALSE,
                            wordcloud2::wordcloud2Output(
                              ns("wordcloud"),
                              height = "600px") %>%
                              shinycssloaders::withSpinner()
                          )
                        )
        ),
        
        shiny::tabPanel("Posts Over Time",
                        fluidRow(
                          bs4Dash::box(width = 6, 
                                       collapsible = FALSE,
                                       shiny::plotOutput(ns("time_plot")) %>%
                                         shinycssloaders::withSpinner()),
                          bs4Dash::box(width = 6, 
                                       collapsible = FALSE,
                                       shiny::plotOutput(ns("time_heatmap")) %>%
                                         shinycssloaders::withSpinner())
                        )
        ),
        
        shiny::tabPanel("COWO Processing",
                        shiny::h1("Process and Download Data for COWO"),
                        shiny::p("This algorithm processes the loaded Reddit comments as follows:"),
                        tags$ul(
                          tags$li("OPTIONALLY: Transforms a single n-gram into a 1-gram by replacing spaces with underscores."),
                          tags$li("Removes stop words."),
                          tags$li("Selects ONLY the comments (no author, subreddit, etc.)."),
                          tags$li("Creates a text file with one comment per line.")
                        ),
                        shiny::textInput(ns("cowo_2gram_input"), "Custom n-gram to preserve (EXPERIMENTAL)"),
                        shiny::downloadButton(ns("cowo_download"), label = "Download Data Processed for COWO")
        )
        
      ) # closes tabsetPanel
      
      #) # closes mainPanel
      #) # closes sidebarLayout
    )
  )
  
}

#' reddit Server Functions
#'
#' @noRd 
mod_reddit_server <- function(id){
  moduleServer( id, function(input, output, session){
    ns <- session$ns
    # set these up initially for download handler
    q <- size <- author <- subreddit <- NULL
    created_utc <- plot_adjust <- pct <- plot_colour <- word <- ngram <- word1 <- word2 <- created_datetime <- plot_date <- plot_day <- plot_hour <- text <- data <- n <- NULL
    
    
    # set up notifications
    notify <- function(msg, id = NULL, duration = NULL, type = "message") {
      showNotification(msg, id = id, duration = duration, closeButton = FALSE, type = type)
    }
    
    # set up empty data
    reddit_comments <- reactiveValues(data = dplyr::tribble(~id, ~author, ~body, ~subreddit, ~score, ~created_utc, ~created_datetime))
    
    ## SEARCH BUTTON
    
    
    observeEvent(input$search_button,
                 {
                   
                   # set up validation flag
                   valid <- TRUE
                   # GET INPUT
                   q <<- size <<- author <<- subreddit <<- NULL
                   
                   if (!is.null(input$search_q)) q <<- input$search_q
                   if (!is.null(input$search_size)) size <<- as.numeric(input$search_size)
                   if (!is.null(input$search_author)) author <<- input$search_author
                   if (!is.null(input$search_subreddit)) subreddit <<- input$search_subreddit
                   
                   # BASIC INPUT VALIDATION: must select school.
                   if (FALSE){
                     notify("Search parameters invalid.", type = "error")
                     valid <- FALSE
                   }
                   
                   # IF VALIDATED, DO THE THING
                   if (valid){
                     message_id <- notify(sprintf("Loading %s comments. Expect this to take at least %s seconds.", size, round(size/100) + 5), type = "message")
                     
                     # try the API
                     response <- try(pushshiftR::get_reddit_comments(q = q, size = size, fields = "id,author,body,subreddit,score,created_utc", author = author, subreddit = subreddit))
                     
                     # validate the response
                     if ("tbl_df" %in% class(response)) {
                       if (nrow(response) > 0){
                         # we got at least 1 result
                         reddit_comments$data <- response
                         
                         # UPDATE USER
                         message("Successful API query")
                         notify("Successful API query. Finished fetching data.", type = "message", duration = 5, id = message_id)
                         message(reddit_comments$data)
                         
                       } else { # 0 rows
                         message("no results found")
                         notify("API returned a response, but no results found.", type = "error", duration = 5, id = message_id)
                       }
                     }
                     
                     # if we got an error, we remove the notification and show an error notification
                     if ("try-error" %in% class(response)){
                       
                       message(response[1])
                       removeNotification(id)
                       showNotification("Error accessing Pushshift API.", type = "error")
                     }
                     
                     # On successful update:
                     # submitted <<- TRUE
                     # shiny::updateActionButton(session = getDefaultReactiveDomain(),
                     #                           inputId = "test_submit",
                     #                           label = "Results Submitted!",
                     #                           icon = icon("check"))
                     
                     
                   }
                   
                 })
    
    
    # Render the comments table
    output$comments_table <- DT::renderDataTable(reddit_comments$data) # DT::renderDataTable
    
    # Download button handler
    
    output$comments_download <- downloadHandler(
      filename = function() {
        stringr::str_replace_all(paste("reddit-comments-",q,"-", Sys.Date(), ".csv", sep=""), '"', "'")
      },
      content = function(file) {utils::write.csv(reddit_comments$data, file, row.names = FALSE)}#{readr::write_csv(reddit_comments$data, file)}
    )
    
    # Upload handler
    observeEvent(input$file1,{
      message("File uploaded")
      
      tryCatch({
        new_data <- utils::read.csv(input$file1$datapath) #readr::read_csv(input$file1$datapath)
        message(utils::head(new_data))
        # Input validation: make sure it has all the right columns
        if (!all(c("author", "body", "id", "subreddit", "created_utc", "created_datetime", "score") %in% names(new_data))) {
          notify("Error: Input not in correct format. Please use a .csv file downloaded from this web app.")
        } else {
          reddit_comments$data <- new_data %>%
            dplyr::mutate(created_datetime = as.POSIXct(created_utc, origin = "1970-01-01", tz = "EST"))
          notify("Comments successfully loaded from file.", duration = 5)
        }
        
      })
      
    })
    
    
    ####### SUBREDDITS
    # Render the subreddit table
    output$subreddit_table <- renderTable(
      
      reddit_comments$data %>%
        make_subreddit_table()
      
    )
    
    output$subreddit_plot <- renderPlot(
      
      reddit_comments$data %>%
        make_subreddit_plot()
      
    )
    
    
    
    ####### AUTHORS
    # Render the author table
    output$author_table <- renderTable(
      if (nrow(reddit_comments$data) > 0) {
        
        reddit_comments$data %>%
          dplyr::group_by(author) %>%
          dplyr::count(sort = TRUE) %>%
          dplyr::ungroup() %>%
          dplyr::mutate(percent = sprintf("%.1f%%", (n / sum(n))*100)) %>%
          dplyr::slice_head(n=20)
        
      } else {NULL}
    )
    
    output$author_plot <- renderPlot(
      if (nrow(reddit_comments$data) > 0) {
        
        reddit_comments$data %>%
          dplyr::group_by(author) %>%
          dplyr::count(sort = TRUE) %>%
          dplyr::ungroup() %>%
          dplyr::mutate(pct = sprintf("%.1f%%", 100 * n/sum(n))) %>%
          dplyr::mutate(plot_adjust = dplyr::if_else(n > .5 * max(n), n - .1 * (max(n) - min(n)), n + .1 * (max(n) - min(n)))) %>%
          dplyr::mutate(plot_colour = dplyr::if_else(n > .5 * max(n), "white", "black")) %>%
          dplyr::slice_head(n=10) %>%
          ggplot2::ggplot() +
          ggplot2::geom_col(ggplot2::aes(x=stats::reorder(author,n), y = n), fill = "lightblue") +
          ggplot2::geom_text(ggplot2::aes(x=stats::reorder(author,n), y = plot_adjust, label = pct, fontface = "bold", colour = plot_colour), colour = "black") +
          ggplot2::coord_flip() +
          ggplot2::theme_minimal() +
          ggplot2::labs(y="Count",
                        x = "Author",
                        title = "Top 10 Authors by Search Term Prevalence",
                        subtitle = paste0("Search terms: q=",q,"; subreddit=",subreddit,"; author=",author)) +
          ggplot2::scale_y_continuous(breaks = function(x) unique(floor(pretty(seq(0, (max(x) + 1) * 1.1)))))
        
      } else {NULL}
    )
    
    
    ## 1-GRAM and 2-GRAM FREQUENCIES
    # get all 1-grams, remove stop words, count, get top 10
    output$freq1gram_table <- DT::renderDataTable({
      reddit_comments$data %>%
        get_top_words()
    },
    
    extensions = 'Buttons',
    options = list(buttons = c('copy', 'csv', 'excel'),
                   dom = "lbtipB")
    
    )
    
    # get all 2-grams, remove any with stop words, count, get top 10
    output$freq2gram_table <- renderTable(
      reddit_comments$data %>%
        get_top_2grams()
    )
    
    ###################
    # word cloud
    output$wordcloud <- wordcloud2::renderWordcloud2({
      if (nrow(reddit_comments$data) > 0) {
        
        reddit_comments$data %>%
          dplyr::select(id, body) %>%
          tidytext::unnest_tokens(word, body) %>%
          dplyr::anti_join(custom_stop_words) %>%
          dplyr::group_by(word) %>% 
          dplyr::count(sort = TRUE) %>%
          dplyr::ungroup() %>%
          dplyr::slice(-1) %>%
          dplyr::slice_head(n=200) %>%
          wordcloud2::wordcloud2()
        
      } else {NULL}
    })
    
    
    ##################
    # TIME PLOTS
    output$time_plot <- renderPlot({
      
      reddit_comments$data %>%
        plot_reddit_posts_over_time()
      
    })
    
    output$time_heatmap <- renderPlot({
      
      reddit_comments$data %>%
        plot_reddit_heatmap()
      
    })
    
    
    ### PROCESS AND DOWNLOAD FOR COWO
    output$cowo_download <- downloadHandler(
      filename = function() {
        
        stringr::str_replace_all(paste("reddit-comments-cowo-",q,"-", Sys.Date(), ".txt", sep=""), '"', "'")
      },
      content = function(file) {
        
        message(sprintf("cowo_2gram_input = %s", input$cowo_2gram_input))
        
        cowo_text <- reddit_comments$data %>%
          dplyr::select(id, body)
        
        if (!is.null(input$cowo_2gram_input) & !(input$cowo_2gram_input == "")){
          
          #custom_2gram_input <- "coffee maker"
          cowo_2gram_replace <- stringr::str_replace(input$cowo_2gram_input, " ", "_")
          
          cowo_text <- cowo_text %>%
            dplyr::mutate(body = stringr::str_replace_all(body, input$cowo_2gram_input, cowo_2gram_replace))
        }
        
        cowo_text <- cowo_text %>%
          dplyr::select(id, body) %>%
          tidytext::unnest_tokens(word, body) %>%
          dplyr::anti_join(custom_stop_words) %>%
          #dplyr::group_by(id) %>%
          #nest(data = c(word)) %>%
          dplyr::group_nest(id, .key = "data") %>%
          dplyr::ungroup() %>%
          dplyr::mutate(text = purrr::map_chr(data, function(x) stringr::str_flatten(unlist(x), collapse = " "))) %>%
          dplyr::select(text)
        
        #readr::write_delim(cowo_text, file, col_names = FALSE)
        utils::write.table(cowo_text, file=file, sep="", row.names = FALSE, col.names = FALSE, quote=FALSE)
      }
    )
    
  })
}

## To be copied in the UI
# mod_reddit_ui("reddit_ui_1")

## To be copied in the server
# mod_reddit_server("reddit_ui_1")
BelangerAnalytics/socialastronomy.pbs4dash documentation built on Feb. 15, 2022, 8:06 a.m.