Nothing
#' Get fbref Full Player Scouting Report
#'
#' Returns the scouting report for a selected player
#'
#' @param player_url the URL of the player (can come from fb_player_urls())
#' @param pos_versus either "primary" or "secondary" as fbref offer comparisons against multiple positions
#' @param time_pause the wait time (in seconds) between page loads
#'
#' @return returns a dataframe of a player's full scouting information for all seasons available on FBref
#'
#' @importFrom magrittr %>%
#' @importFrom rlang .data
#'
#' @export
#'
#' @examples
#' \dontrun{
#' try({
#' fb_player_scouting_report(player_url = "https://fbref.com/en/players/d70ce98e/Lionel-Messi",
#' pos_versus = "primary")
#'
#' # to filter for the last 365 days:
#' fb_player_scouting_report(player_url = "https://fbref.com/en/players/d70ce98e/Lionel-Messi",
#' pos_versus = "primary") %>% dplyr::filter(scouting_period == "Last 365 Days")
#'
#' # to get secondary positions
#' fb_player_scouting_report(player_url = "https://fbref.com/en/players/d70ce98e/Lionel-Messi",
#' pos_versus = "secondary")
#'
#' # for the 2020-2021 La Liga season
#' fb_player_scouting_report(player_url = "https://fbref.com/en/players/d70ce98e/Lionel-Messi",
#' pos_versus = "secondary") %>% dplyr::filter(scouting_period == "2020-2021 La Liga")
#' })
#' }
fb_player_scouting_report <- function(player_url, pos_versus, time_pause=3) {
main_url <- "https://fbref.com"
# put sleep in as per new user agreement on FBref
Sys.sleep(time_pause)
player_page <- xml2::read_html(player_url)
player_name <- player_page %>% rvest::html_node("h1") %>% rvest::html_text() %>% stringr::str_squish()
main_cats <- player_page %>% rvest::html_nodes("#inner_nav") %>% rvest::html_nodes(".full.hasmore")
span_names <- main_cats %>% rvest::html_nodes("span") %>% rvest::html_text()
main_cats <- main_cats[grep("Scouting Report", span_names)]
# scout_level1_url <- main_cats %>% rvest::html_nodes("ul li") %>% .[1] %>%
# rvest::html_nodes("a") %>% rvest::html_attr("href") %>% paste0(main_url, .)
scout_level1_url <- main_cats %>% rvest::html_nodes("ul li") %>%
rvest::html_nodes("a") %>% rvest::html_attr("href") %>% paste0(main_url, .)
all_scout_pos <- data.frame()
for(each_scout_url in scout_level1_url) {
Sys.sleep(time_pause)
scout_pg <- xml2::read_html(each_scout_url)
period <- scout_pg %>% rvest::html_nodes("#all_scout") %>% rvest::html_nodes(".section_heading_text") %>% rvest::html_text() %>%
unique() %>% stringr::str_squish()
# .pkg_message("Scraping full scouting report for {player_name} for period: {period}")
outer <- scout_pg %>% rvest::html_nodes("#all_scout") %>% rvest::html_nodes(".filter.switcher") %>% rvest::html_nodes("div")
# if(pos_versus == "primary") {
# pos_versus <- 1
# tryCatch({versus <- outer %>% rvest::html_text() %>%
# stringr::str_squish() %>% .[1]}, error = function(e) {versus <- data.frame()})
# } else if (pos_versus == "secondary") {
# pos_versus <- 2
# tryCatch({versus <- outer %>% rvest::html_text() %>%
# stringr::str_squish() %>% .[2]}, error = function(e) {versus <- data.frame()})
# }
# else {
# stop(glue::glue("Select a correct 'pos_versus' value from either 'primary' or 'secondary'"))
# }
if(length(outer) == 1) {
pos_versus_idx <- 1
versus <- outer %>% rvest::html_text() %>%
stringr::str_squish()
} else if (length(outer) > 1) {
if(pos_versus != "primary") {
pos_versus_idx <- 2
} else {
pos_versus_idx <- 1
}
versus <- outer %>% rvest::html_text() %>%
stringr::str_squish() %>% .[pos_versus_idx]
} else {
stop(glue::glue("Full scouting report not available for {player_name}"))
}
scouting_all <- scout_pg %>% rvest::html_nodes("#all_scout") %>% rvest::html_nodes(".table_container")
scout_pos <- scouting_all[pos_versus_idx] %>%
rvest::html_nodes("table") %>% rvest::html_table() %>% data.frame()
missing_idx <- scout_pos[,1] != ""
scout_pos <- scout_pos[missing_idx,]
names(scout_pos) <- scout_pos[1,]
scout_pos <- scout_pos %>%
dplyr::rename(Per90=.data$`Per 90`)
df <- data.frame(Statistic="Standard", Per90="Standard", Percentile="Standard")
scout_pos <- rbind(df, scout_pos)
scout_pos$stat_group <- NA_character_
stat_names <- scout_pos$Statistic
idx <- grep("Statistic", stat_names)
stat_vct <- c()
for(i in 1:length(idx)) {
id <- idx[i]-1
st <- stat_names[id]
tryCatch(scout_pos[c(id:(idx[i+1]-2)), "stat_group"] <- st, error = function(e) scout_pos[c(id:nrow(scout_pos)), "stat_group"] <- st)
}
scout_pos$stat_group[is.na(scout_pos$stat_group)] <- st
scout_pos <- scout_pos[-c(idx, idx-1), ]
scout_pos <- scout_pos %>%
dplyr::mutate(Player=player_name,
Versus=gsub("vs. ", "", versus),
Per90 = gsub("\\+", "", .data[["Per90"]]) %>% gsub("\\%", "", .) %>% gsub("\\,", "", .) %>% as.numeric(),
Percentile = as.numeric(.data[["Percentile"]])) %>%
dplyr::select(.data[["Player"]], .data[["Versus"]], StatGroup=.data[["stat_group"]], dplyr::everything())
mins_played <- scout_pg %>% rvest::html_nodes(".footer") %>% rvest::html_nodes("strong") %>%
rvest::html_text() %>% gsub(" minutes", "", .) %>% as.numeric() %>% unique()
scout_pos <- scout_pos %>%
dplyr::mutate(BasedOnMinutes = mins_played)
scout_pos <- scout_pos %>%
dplyr::mutate(scouting_period = period)
all_scout_pos <- dplyr::bind_rows(all_scout_pos, scout_pos)
}
return(all_scout_pos)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.