R/GetComments.R

#' Get Reddit Comments
#'
#' This function gets reddit comments on a given thread
#' @param fullname WThe reddit fullname of a comment (probably starts with t3_)
#' @param subreddit The subreddit in which the comment resides
#' @keywords reddit subreddit search API
#' @export
#' @examples
#' GetComments("t3_buttbutt", "CatsStandingUp")

### Function to get all of the comments given a thread ID
GetComments <- function(fullname, subreddit=all) {
  suppressPackageStartupMessages(library(tidyverse))
  suppressPackageStartupMessages(library(httr))
  threadURL <- paste0("https://oauth.reddit.com/r/", subreddit, "/comments/", substring(fullname,4), ".json")
  print(paste("Fetching thread", fullname))

  ### Pause to respect API rules
  Sys.sleep(1.1)

  ### Get all of the information on the thread
  threadResponse <- GET(threadURL,
                        user_agent("screddr comment scraper v0.1"),
                        config(token = token)) %>% content()

  CountReplies <- function(x) {
    if (length(x$data$replies) == 1) {
      return(0)
    } else {
      return(length(x$data$replies$data$children))
    }
  }

  WithoutReplies <- function(x) {
    output <- x[-which(names(x) == "replies")]
    output[lengths(output) == 0] <- NA
    output <- lapply(output, as.character)
    return(output)
  }

  InspectReplies <- function(y) {
    allComments <- data_frame()

    rInspectReplies <- function(endsInIndex) {
      if (CountReplies(endsInIndex) > 0) {
        allComments <- bind_rows(allComments, map_dfr(endsInIndex$data$replies$data$children, ~ WithoutReplies(.$data)))
        allComments <- bind_rows(allComments, map_dfr(endsInIndex$data$replies$data$children, ~ rInspectReplies(.)))
      }
      return(as_data_frame(allComments))
    }
    return(rInspectReplies(y))
  }

  childComments <- map_dfr(threadResponse[[2]]$data$children, ~ InspectReplies(.)) # Get all of the child(second-or-deeper) comments
  topComments <- map_dfr(threadResponse[[2]]$data$children, ~ WithoutReplies(.$data)) # Get all the top levels

  allComments2 <- bind_rows(topComments, childComments)  # Join the top and child comments

  return(type_convert(allComments2, col_type = RedditColTypes("comment")))
}
colindouglas/screddr documentation built on May 27, 2019, 1:08 p.m.