#' Scrape Leaderboards from FanGraphs
#'
#' @description This function allows you to scrape all leaderboard statistics (basic and advanced) from FanGraphs.com.
#' @param year Season for which you want data.
#' @param league Option for limiting results to different leagues or overall results. Options are "al", "nl", or "all".
#' @param qual Whether you want only batters/pitchers that qualified in a given season, or the minimum number of plate appearances for inclusion. If you only want qualified hitters, use qual. If a minimumm number of plate appearaces/innings pitched, use the number desired. Defaults to "y".
#' @param ind Whether or not to break the seasons out individual, or roll them up together. 1 = split seasons, 0 = aggregate seasons.
#' @param pit_bat pitcher or batter
#'
#' @export
#' @examples
#' \dontrun{fg_mlb_leaders(year = 2015, pit_bat = "batter")}
fg_mlb_leaders <- function(year, league = "all", qual = "y", ind = 0, pit_bat) {
switch(pit_bat,
batter = fg_bat_mlb_leaders(year, league, qual, ind),
pitcher = fg_pitch_mlb_leaders(year, league, qual, ind)
)
}
#' Scrape Batter Leaderboards from FanGraphs
#'
#' @description This function allows you to scrape all leaderboard statistics (basic and advanced) from FanGraphs.com.
#' @param year Season for which you want data.
#' @param league Option for limiting results to different leagues or overall results. Options are "al", "nl", or "all".
#' @param qual Whether you want only batters/pitchers that qualified in a given season, or the minimum number of plate appearances for inclusion. If you only want qualified hitters, use qual. If a minimumm number of plate appearaces/innings pitched, use the number desired. Defaults to "y".
#' @param ind Whether or not to break the seasons out individual, or roll them up together. 1 = split seasons, 0 = aggregate seasons.
#'
#' @importFrom xml2 read_html
#' @importFrom rvest html_nodes html_table html_attr
#'
#' @examples
#' \dontrun{fg_bat_mlb_leaders(x = 2015, y = 2015, qual = 400)}
fg_bat_mlb_leaders <- function(year, league = "all", qual = "y", ind = 0) {
url <- paste0("http://www.fangraphs.com/leaders.aspx?pos=all&stats=bat&lg=", league, "&qual=", qual,
"&type=c,-1,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223,224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239,240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255,256,257,258,259,260,261,262,263,264,265,266,267,268,269,270,271,272,273,274,275,276,277,278,279,280,281,282,283,284,285,286&season=", year, "&month=0&season1=", year, "&ind=", ind, "&team=&rost=&age=&filter=&players=&page=1_100000")
payload <- read_html(url)
leaders <- payload %>%
html_nodes("table") %>%
`[[`(length(.)-1) %>%
html_table(fill=TRUE)
leaders <- leaders[-c(1,3),]
colnames(leaders) <- leaders[1,]
leaders <- leaders[-1,]
c <- as.matrix(names(leaders))
c <- gsub("%", "_pct", c, fixed = TRUE)
c <- gsub(" (pfx)", "_pfx", c, fixed = TRUE)
c <- gsub(" (pi)", "_pi", c, fixed = TRUE)
c <- gsub("/", "_", c, fixed = TRUE)
c <- ifelse(substr(c, nchar(c), nchar(c)) == ".", gsub("\\.", "_pct", c), c)
r <- c("wRC_plus", "WPA_minus", "WPA_plus", "FBall_pct", "AgeRng")
if (ind == 0) {
c[c(62,64,65,73,202),] <- r
Seasons <- year
colnames(leaders) <- c
leaders <- cbind(Seasons, leaders)
leaders <- as.data.frame(sapply(leaders, function(x) (gsub("%", "", x))), stringsAsFactors=F)
leaders <- as.data.frame(sapply(leaders, function(x) (gsub("$", "", x, fixed = TRUE))), stringsAsFactors=F)
leaders$Dol <- gsub("\\(", "-", leaders$Dol)
leaders$Dol <- gsub("\\)", "", leaders$Dol)
for(i in c(5:203, 205:ncol(leaders)))
leaders[,i] <- as.numeric(leaders[,i])
} else {
c[c(63, 65, 66, 74, 204),] <- r
colnames(leaders) <- c
leaders <- as.data.frame(sapply(leaders, function(x) (gsub("%", "", x))), stringsAsFactors=F)
leaders <- as.data.frame(sapply(leaders, function(x) (gsub("$", "", x, fixed = TRUE))), stringsAsFactors=F)
leaders$Dol <- gsub("\\(", "-", leaders$Dol)
leaders$Dol <- gsub("\\)", "", leaders$Dol)
for(i in c(5:ncol(leaders)))
leaders[,i] <- as.numeric(leaders[,i])
}
playerids <- payload %>%
html_nodes("table") %>%
.[[length(.)-1]] %>%
html_nodes("a") %>%
html_attr("href") %>%
as.data.frame() %>%
rename(slug = '.') %>%
filter(grepl("playerid", slug)) %>%
mutate(playerid = sub(".*[=] *(.*?) *[&].*", "\\1", slug))
leaders <- leaders %>%
mutate(playerid = playerids$playerid) %>%
select(playerid, everything())
return(leaders)
}
#' Scrape Pitcher Leaderboards from FanGraphs
#'
#' @description This function allows you to scrape all leaderboard statistics (basic and advanced) from FanGraphs.com.
#' @param year Season for which you want data.
#' @param league Option for limiting results to different leagues or overall results. Options are "al", "nl", or "all".
#' @param qual Whether you want only batters/pitchers that qualified in a given season, or the minimum number of plate appearances for inclusion. If you only want qualified hitters, use qual. If a minimumm number of plate appearaces/innings pitched, use the number desired. Defaults to "y".
#' @param ind Whether or not to break the seasons out individual, or roll them up together. 1 = split seasons, 0 = aggregate seasons.
#'
#' @importFrom xml2 read_html
#' @importFrom rvest html_nodes html_table html_attr
#'
#' @examples
#' \dontrun{fg_pitch_mlb_leaders(year = 2015, qual = 150)}
fg_pitch_mlb_leaders <- function(year, league = "all", qual = "y", ind = 0) {
url <- paste0("https://www.fangraphs.com/leaders.aspx?pos=all&stats=pit&lg=", league, "&qual=", qual,
"&type=c,-1,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223,224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239,240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255,256,257,258,259,260,261,262,263,264,265,266,267,268,269,270,271,272,273,274,275,276,277,278,279,280,281,282,283,284,285,286,287,288,289,290,291,292,293,294,295,296,297,298,299&season=", year, "&month=0&season1=", year, "&ind=", ind,"&team=&rost=&age=&filter=&players=&page=1_100000")
payload <- read_html(url)
leaders <- payload %>%
html_nodes("table") %>%
`[[`(length(.)-1) %>%
html_table(fill=TRUE)
leaders <- leaders[-c(1,3),]
names(leaders) <- leaders[1,]
leaders <- leaders[-1,]
c <- as.matrix(names(leaders))
c <- gsub("%", "_pct", c, fixed = TRUE)
c <- gsub(" (pfx)", "_pfx", c, fixed = TRUE)
c <- gsub(" (pi)", "_pi", c, fixed = TRUE)
c <- gsub("/", "_", c, fixed = TRUE)
c <- ifelse(substr(c, nchar(c), nchar(c)) == ".", gsub("\\.", "_pct", c), c)
r <- c("Start_IP", "Relief_IP", "WPA_minus",
"WPA_plus", "FBall_pct", "AgeRng")
if (ind == 0) {
c[c(56,58,65,66,76,217),] <- r
Seasons <- year
colnames(leaders) <- c
leaders <- cbind(Seasons, leaders)
leaders <- as.data.frame(sapply(leaders, function(x) (gsub("%", "", x))), stringsAsFactors=F)
leaders <- as.data.frame(sapply(leaders, function(x) (gsub("$", "", x, fixed = TRUE))), stringsAsFactors=F)
leaders$Dol <- gsub("\\(", "-", leaders$Dol)
leaders$Dol <- gsub("\\)", "", leaders$Dol)
# Replace any empty cells with NA to avoid a warning message.
is.na(leaders) <- leaders==""
# Convert columns 5 to 213 to numeric, exept column 204 "Age Rng"
for(i in c(5:203, 205:217, 219:ncol(leaders)))
leaders[,i] <- as.numeric(leaders[,i])
} else {
c[c(57,59,66,67,77,218),] <- r
colnames(leaders) <- c
leaders <- as.data.frame(sapply(leaders, function(x) (gsub("%", "", x))), stringsAsFactors=F)
leaders <- as.data.frame(sapply(leaders, function(x) (gsub("$", "", x, fixed = TRUE))), stringsAsFactors=F)
leaders$Dol <- gsub("\\(", "-", leaders$Dol)
leaders$Dol <- gsub("\\)", "", leaders$Dol)
for(i in c(5:217, 219:ncol(leaders)))
leaders[,i] <- as.numeric(leaders[,i])
}
playerids <- payload %>%
html_nodes("table") %>%
.[[length(.)-1]] %>%
html_nodes("a") %>%
html_attr("href") %>%
as.data.frame() %>%
rename(slug = '.') %>%
filter(grepl("playerid", slug)) %>%
mutate(playerid = sub(".*[=] *(.*?) *[&].*", "\\1", slug))
leaders <- leaders %>%
mutate(playerid = playerids$playerid) %>%
select(playerid, everything())
return(leaders)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.