#' Search Subreddit Function
#'
#' This function allows you to search reddit for a searchTerm
#' in a specific subreddit and returns a dataframe with where each
#' row is a thread and each column is an attribute of the thread
#' @param seachTerm What you're searching for
#' @param subreddit The subreddit in which you're searching
#' @keywords reddit subreddit search API
#' @export
#' @examples
#' SearchSubreddit("cats with dogs", "CatsStandingUp")
SearchSubreddit <- function(searchTerm, subreddit = all) {
library(tidyverse)
library(httr)
### Accessory function to remove lists from thread data
CleanUpLists <- function(x) {
types <- unlist(map(x, ~typeof(.)))
x[(which(types == "NULL"))] <- NA
output <- x[-(which(types == "list"))]
output <- lapply(output, as.character)
return(output)
}
### The initial API call to get search results
initialURL <- paste0("https://oauth.reddit.com/r/", subreddit, "/search.json?q=", searchTerm, "&sort=new&type=link&restrict_sr=TRUE&t=all&raw_json=1&limit=100")
### Perform the search API call
response <- GET(initialURL,
user_agent("Reddit Comment Scraper"),
config(token = token)) %>% content()
threads <- map_dfr(response$data$children, ~ (CleanUpLists(.$data)))
### Print status update
searchPage <- 1
print(paste("Finished page", searchPage, "of results for", searchTerm))
### Init newThreads for the while loop.
newThreads <- data_frame()
### Keep searching until you don't get the max number of search results (100)
while((nrow(newThreads) == 100 | nrow(threads) == 100) & searchPage < 10) {
### Count how many pages deep we're going
searchPage <- searchPage + 1
print(paste("Getting page", searchPage, "of results for", searchTerm))
# Pause to respect API rules.
Sys.sleep(1.1)
### Get the fullname of the last thread in the search results
lastThread <- tail(threads$name, n = 1L)
### Append the last thread fullname to the search URL
nextURL <- paste0(initialURL, "&after=", lastThread)
### Get the next set of search results
response <- GET(nextURL,
user_agent("Reddit Comment Scraper"),
config(token = token)) %>% content()
### Extract the data
newThreads <- map_dfr(response$data$children, ~ (CleanUpLists(.$data)))
### Combine the old data with the new
threads <- bind_rows(threads, newThreads)
}
print(paste("Done searching for", searchTerm, "after", searchPage, "pages"))
threads <- threads
return(suppressMessages(type_convert(threads, col_type = RedditColTypes("thread"))))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.