#' A Cat Function
#'
#' This function allows you to scrape poll data from realclearpolitics.com
#' @param url the URL of the page with the html table of your data\
#' @export
#' @import ggplot2
#' @import dplyr
#' @import stringr
#' @import rvest
#' @examples
#' trump_approval <- scrape_rcp(url = "https://www.realclearpolitics.com/epolls/other/president_trump_job_approval-6179.html")
scrape_rcp <- function(url) {
# Pull table
raw_table <- paste(url) %>%
read_html %>%
html_nodes("table") %>%
html_table()
# Select table elementstoday
raw_table <- raw_table[[4]]
raw_table <- raw_table[-c(1:2), ]
# Format Poll dates
close_date <- unlist(lapply(str_split(raw_table$Date, "- "), tail, 1))
close_date <- as.Date(close_date, format = "%m/%d")
# Identify horserace columns
hr <- raw_table[!names(raw_table) %in% c("Poll", "Date", "Spread", "MoE", "Sample")]
spread <- (hr[,1] - hr[,2])/100
# Format sample size
sample_size <- unlist(lapply(str_split(raw_table$Sample, " "), head, 1))
# Pull standard error
se <- (as.numeric(raw_table$MoE)/100)/1.96
# Identify polls
poll <- str_replace_all(raw_table$Poll, "[*]", "")
poll_id <- match(poll, unique(poll))
# Combine into one data frame
poll_data <- data.frame(poll,
poll_id,
close_date,
sample_size,
hr[, 1]/100,
hr[, 2]/100,
spread)
names(poll_data)[5:6] <- names(hr)
# Look at moore margin over time
plot <- ggplot(poll_data, aes(x = close_date)) +
geom_hline(aes(yintercept = 0),
lty = 2,
col = "darkgrey") +
geom_point(aes(y = spread),
col = "red") +
geom_smooth(aes(y = spread),
fill = "pink",
col = "red") +
xlab(" ") +
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.background = element_blank(),
axis.line = element_line(colour = "black"),
axis.text = element_text(colour = "black",
size = 11),
axis.title = element_text(colour = "black",
size = 16,
face = "bold"),
plot.title = element_text(size = 20,
face = "bold"))
out <- list(poll_data = poll_data,
plot = plot)
return(out)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.