#' Go Get IMDb Ratings
#'
#' Go get the data about your favorite show on IMDb. Grabs ratings for every episode of selected seasons.
#'
#' @param imdb_code String in URL that identifies the desired show (format: "tt<numbers>")
#' @param seasons Vector of the seasons to include
#' @return Dataframe of the desired show's episodes (tibble)
#' @details Breakdown of dataframe contents
#' \itemize{
#' \item{"show"}{: Name of the show}
#' \item{"season"}{: Season Number}
#' \item{"episode"}{: Episode Number}
#' \item{"air_date"}{: Date the episode originally aired}
#' \item{"title"}{: Name of the Episode}
#' \item{"rating"}{: IMDb rating for the episode}
#' \item{"votes"}{: How many votes the IMDb rating is based off of}
#' }
#' @examples
#' # The Magicians (https://www.imdb.com/title/tt4254242/)
#' grab_imdb_ratings("tt4254242", c(1:5))
#' @export
grab_imdb_ratings <- function(imdb_code, seasons) {
# Grabbing Rating Data for a show on IMDb
#
# - imdb_code: url code for a given show (the "tt<number_string>" in the url)
# - seasons: list of desired seasons
# empty list to store dataframes
df_list = list()
print(imdb_code)
# lez go!
for (season in seasons) {
print(season)
# define url, using glue for combining strings
base_url <- "https://www.imdb.com/title/"
season_url <- glue::glue("{base_url}{imdb_code}/episodes?season={season}")
# go get the html
html <- xml2::read_html(season_url)
# isolate the desired data
show <- rvest::html_nodes(html, ".parent a") %>%
rvest::html_text(trim = TRUE)
title <- rvest::html_nodes(html, "#episodes_content strong a") %>%
rvest::html_text(trim = TRUE)
rating <- rvest::html_nodes(html, ".ipl-rating-star.small .ipl-rating-star__rating") %>%
rvest::html_text(trim = TRUE) %>%
as.numeric()
votes <- rvest::html_nodes(html, ".ipl-rating-star__total-votes") %>%
rvest::html_text(trim = TRUE) %>%
readr::parse_number() # this saved the day! super helpful readr function
air_date <- rvest::html_nodes(html, ".airdate") %>%
rvest::html_text(trim = TRUE) %>%
stringr::str_remove("[.]") %>% # remove periods (May doesn't have a period like the rest: Apr., Oct.)
readr::parse_date("%d %b %Y")
# make a tibble for each season
df <- tibble::tibble(show, air_date, title, rating, votes) %>%
dplyr::mutate(season = season,
episode = seq(1, nrow(.))) %>%
dplyr::select(show, season:episode, everything())
# add to list
df_list[[season]] <- df
}
# smoosh the list into one tibble
show_run <- dplyr::bind_rows(df_list)
return(show_run)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.