#' Scrape exercises from internet
#'
#' This function scrapes all the exercises on bodybuilding.com. Data is stored in a dataframe, containing exercise name, muscle group, grade and rating. It is possible that some exercises are not rated.
#'
#' @param pages number of pages to scrape. Defaults to 73 as it is the current max.
#'
#' @return data.frame
#'
#' @export
#' @importFrom rvest html_nodes html_text
#' @importFrom xml2 read_html
#' @importFrom tibble tibble
#' @importFrom dplyr mutate case_when bind_rows
#'
#' @examples
#' df <- scrapeExercises(pages = 5)
#' head(df)
scrapeExercises <- function(pages = 73) {
url_base <- "https://www.bodybuilding.com/exercises/finder/"
exercise.df <- NULL
for (i in 1:pages) {
cat(".")
read <- xml2::read_html(paste0(url_base, i))
ex.df <- tibble::tibble(
exercises = rvest::html_nodes(read, ".ExResult-resultsHeading") %>%
rvest::html_text() %>%
trimws(),
muscles = rvest::html_nodes(read, ".ExResult-muscleTargeted") %>%
rvest::html_text() %>%
trimws() %>%
gsub("Muscle Targeted:\n", "", .) %>%
gsub("\n", "", .) %>%
gsub(" ", "", .),
# Induces errors because it can only find Excellent exercises.
# rating = html_nodes(read, '.ExRating-description--Excellent') %>%
# html_text() %>%
# trimws(),
grade = rvest::html_nodes(read, ".ExRating-badge") %>%
rvest::html_text() %>%
trimws()
)
exercise.df <- exercise.df %>%
bind_rows(ex.df)
}
# Fix rating
exercise.df <- exercise.df %>%
dplyr::mutate(grade = as.numeric(grade)) %>%
dplyr::mutate(
rating = dplyr::case_when(
grade >= 8.0 ~ "Excellent",
grade >= 5.0 ~ "Good",
grade <= 4.9 ~ "Average",
is.na(grade) ~ "Not yet rated"
) %>%
factor(c("Excellent", "Good", "Average", "Not yet rated"))
) %>%
dplyr::mutate(muscles = factor(muscles)) %>%
janitor::clean_names(., case = "lower_camel")
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.