README.md

bballR: Scrape basketball data from basketball-reference.com in R

The bballR package uses tidyverse packages to efficiently scrape data from basketball-reference and return it in the tidy data format.

Installation

devtools::install_github("bobbyingram/bballR")

Usage

Players

To scrape high-level player information for players in the basketball-reference player database use

library(bballR)
players <- scrape_all_players()
dplyr::glimpse(players)
#> Observations: 4,569
#> Variables: 10
#> $ Player       <chr> "Alaa Abdelnaby", "Zaid Abdul-Aziz", "Kareem Abdu...
#> $ PlayerId     <chr> "abdelal01", "abdulza01", "abdulka01", "abdulma02...
#> $ From         <int> 1991, 1969, 1970, 1991, 1998, 1997, 1977, 1957, 1...
#> $ To           <int> 1995, 1978, 1989, 2001, 2003, 2008, 1981, 1957, 1...
#> $ Pos          <chr> "F-C", "C-F", "C", "G", "F", "F", "F", "G", "F", ...
#> $ Ht           <chr> "6-10", "6-9", "7-2", "6-1", "6-6", "6-9", "6-7",...
#> $ Wt           <int> 240, 235, 225, 162, 223, 225, 220, 180, 195, 190,...
#> $ `Birth Date` <date> 1968-06-24, 1946-04-07, 1947-04-16, 1969-03-09, ...
#> $ College      <chr> "Duke University", "Iowa State University", "Univ...
#> $ HoF          <lgl> FALSE, FALSE, TRUE, FALSE, FALSE, FALSE, FALSE, F...

Teams

To scrape high-level information for teams in the the basketball-reference team database use

all_teams <- scrape_teams()
active_teams <- scrape_teams(status = "active")
defunct_teams <- scrape_teams(status = "defunct")
dplyr::glimpse(all_teams)
#> Observations: 30
#> Variables: 14
#> $ Franchise   <chr> "Atlanta Hawks", "Boston Celtics", "Brooklyn Nets"...
#> $ FranchiseId <chr> "ATL", "BOS", "NJN", "CHA", "CHI", "CLE", "DAL", "...
#> $ Lg          <chr> "NBA", "NBA/BAA", "NBA/ABA", "NBA", "NBA", "NBA", ...
#> $ From        <int> 1950, 1947, 1968, 1989, 1967, 1971, 1981, 1968, 19...
#> $ To          <int> 2018, 2018, 2018, 2018, 2018, 2018, 2018, 2018, 20...
#> $ Yrs         <int> 69, 72, 51, 28, 52, 48, 38, 51, 70, 72, 51, 51, 48...
#> $ G           <int> 5451, 5624, 4121, 2229, 4195, 3868, 3049, 4121, 55...
#> $ W           <int> 2712, 3318, 1774, 980, 2177, 1815, 1534, 2051, 268...
#> $ L           <int> 2739, 2306, 2347, 1249, 2018, 2053, 1515, 2070, 28...
#> $ `W/L%`      <dbl> 0.498, 0.590, 0.430, 0.440, 0.519, 0.469, 0.503, 0...
#> $ Plyfs       <int> 46, 54, 26, 10, 35, 21, 21, 33, 41, 33, 31, 33, 13...
#> $ Div         <int> 11, 31, 5, 0, 9, 6, 3, 10, 11, 10, 5, 9, 2, 32, 0,...
#> $ Conf        <int> 0, 9, 2, 0, 6, 4, 2, 0, 5, 4, 4, 1, 0, 18, 0, 5, 2...
#> $ Champ       <int> 1, 17, 2, 0, 6, 1, 1, 0, 3, 5, 2, 3, 0, 16, 0, 3, ...

Seasons

To scrape summary player performance data for an individual season use

season <- 2017
per_game_2017 <- scrape_season_per_game(season)
per_100_2017 <- scrape_season_per_100_poss(season)
per_36_2017 <- scrape_season_per_36_minute(season)
adv_2017 <- scrape_season_advanced(season)
totals_2017 <- scrape_season_totals(season)
dplyr::glimpse(per_game_2017)
#> Observations: 595
#> Variables: 31
#> $ Season   <chr> "2016-17", "2016-17", "2016-17", "2016-17", "2016-17"...
#> $ Player   <chr> "Alex Abrines", "Quincy Acy", "Quincy Acy", "Quincy A...
#> $ PlayerId <chr> "abrinal01", "acyqu01", "acyqu01", "acyqu01", "adamss...
#> $ Pos      <chr> "SG", "PF", "PF", "PF", "C", "SG", "C", "C", "PF", "P...
#> $ Age      <int> 23, 26, 26, 26, 23, 31, 28, 28, 31, 27, 35, 26, 38, 3...
#> $ Tm       <chr> "OKC", "TOT", "DAL", "BRK", "OKC", "SAC", "NOP", "MIN...
#> $ G        <int> 68, 38, 6, 32, 80, 61, 39, 62, 72, 61, 71, 61, 12, 30...
#> $ GS       <int> 6, 1, 0, 1, 80, 45, 15, 0, 72, 5, 66, 25, 0, 0, 10, 2...
#> $ MP       <dbl> 15.5, 14.7, 8.0, 15.9, 29.9, 25.9, 15.0, 8.6, 32.4, 1...
#> $ FG       <dbl> 2.0, 1.8, 0.8, 2.0, 4.7, 3.0, 2.3, 0.7, 6.9, 1.3, 3.9...
#> $ FGA      <dbl> 5.0, 4.5, 2.8, 4.8, 8.2, 6.9, 4.6, 1.4, 14.6, 2.8, 8....
#> $ `FG%`    <dbl> 0.393, 0.412, 0.294, 0.425, 0.571, 0.440, 0.500, 0.52...
#> $ `3P`     <dbl> 1.4, 1.0, 0.2, 1.1, 0.0, 1.0, 0.0, 0.0, 0.3, 0.0, 0.2...
#> $ `3PA`    <dbl> 3.6, 2.4, 1.2, 2.6, 0.0, 2.5, 0.1, 0.0, 0.8, 0.0, 0.8...
#> $ `3P%`    <dbl> 0.381, 0.411, 0.143, 0.434, 0.000, 0.411, 0.000, NA, ...
#> $ `2P`     <dbl> 0.6, 0.9, 0.7, 0.9, 4.7, 2.0, 2.3, 0.7, 6.6, 1.3, 3.6...
#> $ `2PA`    <dbl> 1.4, 2.1, 1.7, 2.2, 8.2, 4.4, 4.5, 1.4, 13.8, 2.7, 7....
#> $ `2P%`    <dbl> 0.426, 0.413, 0.400, 0.414, 0.572, 0.457, 0.511, 0.52...
#> $ `eFG%`   <dbl> 0.531, 0.521, 0.324, 0.542, 0.571, 0.514, 0.500, 0.52...
#> $ FT       <dbl> 0.6, 1.2, 0.3, 1.3, 2.0, 1.4, 0.7, 0.2, 3.1, 0.4, 1.1...
#> $ FTA      <dbl> 0.7, 1.6, 0.5, 1.8, 3.2, 1.5, 1.0, 0.4, 3.8, 0.5, 1.8...
#> $ `FT%`    <dbl> 0.898, 0.750, 0.667, 0.754, 0.611, 0.892, 0.725, 0.68...
#> $ ORB      <dbl> 0.3, 0.5, 0.3, 0.6, 3.5, 0.1, 1.2, 0.8, 2.4, 1.7, 2.3...
#> $ DRB      <dbl> 1.0, 2.5, 1.0, 2.8, 4.2, 1.9, 3.4, 1.7, 4.9, 1.9, 3.2...
#> $ TRB      <dbl> 1.3, 3.0, 1.3, 3.3, 7.7, 2.0, 4.5, 2.5, 7.3, 3.6, 5.5...
#> $ AST      <dbl> 0.6, 0.5, 0.0, 0.6, 1.1, 1.3, 0.3, 0.4, 1.9, 0.9, 1.4...
#> $ STL      <dbl> 0.5, 0.4, 0.0, 0.4, 1.1, 0.3, 0.5, 0.4, 0.6, 0.3, 1.6...
#> $ BLK      <dbl> 0.1, 0.4, 0.0, 0.5, 1.0, 0.1, 0.6, 0.4, 1.2, 0.4, 0.4...
#> $ TOV      <dbl> 0.5, 0.6, 0.3, 0.6, 1.8, 0.7, 0.8, 0.3, 1.4, 0.5, 1.4...
#> $ PF       <dbl> 1.7, 1.8, 1.5, 1.8, 2.4, 1.7, 2.0, 1.4, 2.2, 1.3, 2.5...
#> $ `PS/G`   <dbl> 6.0, 5.8, 2.2, 6.5, 11.3, 8.4, 5.3, 1.7, 17.3, 2.9, 9...

Game Logs

To scrape an individual players regular season game logs from a single season you need to know use unique player Id from the players lookup:

player <- "Dwyane Wade"
wade <- dplyr::filter(players, Player == player)
wade_logs <- scrape_game_logs(wade$PlayerId, season)
dplyr::glimpse(wade_logs)
#> Observations: 82
#> Variables: 30
#> $ Season   <chr> "2016-17", "2016-17", "2016-17", "2016-17", "2016-17"...
#> $ Player   <chr> "Dwyane Wade", "Dwyane Wade", "Dwyane Wade", "Dwyane ...
#> $ G        <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, NA, 14, 15...
#> $ Date     <date> 2016-10-27, 2016-10-29, 2016-10-31, 2016-11-02, 2016...
#> $ Age      <dbl> 34.77596, 34.78142, 34.78689, 34.79235, 34.79781, 34....
#> $ Tm       <chr> "CHI", "CHI", "CHI", "CHI", "CHI", "CHI", "CHI", "CHI...
#> $ Opp      <chr> "BOS", "IND", "BRK", "BOS", "NYK", "IND", "ORL", "ATL...
#> $ GS       <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, NA, 1, 1, 1, 1...
#> $ MP       <dbl> 32.41667, 21.06667, 28.75000, 35.31667, 33.33333, 21....
#> $ FG       <int> 7, 4, 5, 5, 12, 1, 7, 10, 5, 5, 8, 7, 9, NA, 9, 11, 7...
#> $ FGA      <int> 18, 7, 13, 13, 20, 9, 12, 17, 17, 17, 18, 17, 20, NA,...
#> $ `FG%`    <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, NA, 0, 0, 0, 0...
#> $ `3P`     <int> 4, 0, 1, 0, 5, 0, 0, 2, 0, 0, 0, 0, 5, NA, 2, 1, 0, 1...
#> $ `3PA`    <int> 6, 0, 3, 3, 7, 2, 2, 5, 4, 2, 0, 2, 9, NA, 4, 3, 1, 4...
#> $ `3P%`    <int> 0, NA, 0, 0, 0, 0, 0, 0, 0, 0, NA, 0, 0, NA, 0, 0, 0,...
#> $ FT       <int> 4, 6, 1, 5, 6, 2, 2, 3, 3, 4, 3, 4, 5, NA, 2, 3, 3, 1...
#> $ FTA      <int> 4, 6, 1, 6, 8, 2, 2, 4, 3, 4, 5, 4, 6, NA, 2, 5, 4, 2...
#> $ `FT%`    <int> 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 0, NA, 1, 0, 0, 0...
#> $ ORB      <int> 2, 2, 0, 0, 2, 0, 1, 2, 1, 2, 2, 1, 3, NA, 1, 1, 1, 3...
#> $ DRB      <int> 4, 3, 3, 1, 8, 1, 2, 1, 6, 0, 3, 1, 6, NA, 2, 4, 3, 2...
#> $ TRB      <int> 6, 5, 3, 1, 10, 1, 3, 3, 7, 2, 5, 2, 9, NA, 3, 5, 4, ...
#> $ AST      <int> 5, 2, 4, 4, 1, 0, 4, 3, 4, 2, 5, 1, 3, NA, 2, 2, 6, 4...
#> $ STL      <int> 0, 2, 3, 2, 0, 1, 2, 5, 1, 1, 2, 0, 1, NA, 1, 3, 2, 2...
#> $ BLK      <int> 1, 0, 1, 0, 0, 0, 2, 0, 0, 1, 0, 1, 2, NA, 1, 1, 0, 0...
#> $ TOV      <int> 3, 0, 5, 0, 3, 2, 1, 3, 3, 2, 1, 1, 3, NA, 0, 1, 5, 4...
#> $ PF       <int> 4, 1, 3, 0, 3, 2, 3, 1, 1, 2, 2, 2, 2, NA, 3, 2, 3, 2...
#> $ PTS      <int> 22, 14, 12, 15, 35, 4, 16, 25, 13, 14, 19, 18, 28, NA...
#> $ GmSc     <int> 14, 16, 6, 12, 25, -3, 15, 22, 6, 5, 14, 9, 21, NA, 1...
#> $ `+/-`    <chr> "+15", "+16", "+11", "-10", "-24", "-15", "+19", "-8"...
#> $ PlayerId <chr> "wadedw01", "wadedw01", "wadedw01", "wadedw01", "wade...


bobbyingram/bballR documentation built on Dec. 11, 2019, 9:52 a.m.