###############################################################################
###############################################################################
#' Scrape Batted Ball Distance/Velocity Data from Baseball Savant / MLB Statcast
#'
#' This function allows you to scrape all leaderboard statistics from the Baseball Savant batted ball data leaderboard
#' @param bat_pitch either 'bat' or 'pit'
#' @param year YYYY number format
#' @param from.date YYYY-MM-DD format
#' @param qual Number of ABs that meets the qualification
#' @return data frame
#' @examples
#' statcast<-statcast_leaderboard('bat', 2016, "2016-04-10", 20)
#' @export
###############################################################################
statcast_leaderboard <-
function(bat_pitch = 'pit',year = NULL, from.date = NULL, qual = 20) {
library(rvest)
library(stringr)
library(sqldf)
options(warn = -1)
if (bat_pitch == 'bat') {
bat_pitch <- 'batter'
} else if (bat_pitch == 'pit') {
bat_pitch <- 'pitcher'
}
# default to current year unless specified
year <- ifelse(is.null(year),
ifelse(
as.numeric(format(Sys.time(), "%m%d")) <= 331,as.numeric(format(Sys.time(), "%Y")) -
1,as.numeric(format(Sys.time(), "%Y"))
)
,year)
# default to pull since APR 1st unless specified
from_date <-
ifelse(is.null(from.date),paste0(year,"-","04-01"),from.date)
# Scrap AVG distance
base_url <-
paste0(
"https://baseballsavant.mlb.com/statcast_search?hfPT=&hfZ=&hfGT=R%7C&hfPR=&hfAB=&stadium=&hfBBT=&hfBBL=&hfC=&season=",
"&player_type=",
bat_pitch,
"&hfOuts=&pitcher_throws=&batter_stands=&start_speed_gt=&start_speed_lt=&perceived_speed_gt=&perceived_speed_lt=&spin_rate_gt=&spin_rate_lt=&exit_velocity_gt=&exit_velocity_lt=&launch_angle_gt=&launch_angle_lt=&distance_gt=&distance_lt=&batted_ball_angle_gt=&batted_ball_angle_lt=&game_date_gt=",
from_date,
"&game_date_lt=&team=&position=&hfRO=&home_road=&hfInn=&min_pitches=0&min_results=0&group_by=name&sort_col=bbdist&sort_order=desc&min_abs=",
qual,
"&px1=&px2=&pz1=&pz2=#results"
)
df <- read_html(base_url)
df <-
df %>% html_nodes(xpath = '//*[@id="search_results"]') %>% html_table(fill = TRUE)
# guess_encoding(df)
avg_dist <- as.data.frame(df)
avg_dist <- avg_dist[,c(1:4)]
names(avg_dist) <-
c("Rank", "Name", "Events", "Avg_Distance")
avg_dist <- avg_dist[complete.cases(avg_dist),]
avg_dist$Avg_Distance <-
substr(avg_dist$Avg_Distance,1,nchar(avg_dist$Avg_Distance) - 3)
# Char to integer
for (i in c(3:ncol(avg_dist))) {
avg_dist[,i] <-
as.integer(as.character(avg_dist[,i]))
}
#Replacing Names that match Fangraphs
avg_dist$Name <-
str_replace(avg_dist$Name,"Byung Ho Park","Byung-ho Park")
avg_dist$Name <-
str_replace(avg_dist$Name,"Norichika Aoki","Nori Aoki")
# Scrap AVG Exit Velocity
base_url <-
paste0(
"https://baseballsavant.mlb.com/statcast_search?hfPT=&hfZ=&hfGT=R%7C&hfPR=&hfAB=&stadium=&hfBBT=&hfBBL=&hfC=&season=",
"&player_type=",
bat_pitch,
"&hfOuts=&pitcher_throws=&batter_stands=&start_speed_gt=&start_speed_lt=&perceived_speed_gt=&perceived_speed_lt=&spin_rate_gt=&spin_rate_lt=&exit_velocity_gt=&exit_velocity_lt=&launch_angle_gt=&launch_angle_lt=&distance_gt=&distance_lt=&batted_ball_angle_gt=&batted_ball_angle_lt=&game_date_gt=",
from_date,
"&game_date_lt=&team=&position=&hfRO=&home_road=&hfInn=&min_pitches=0&min_results=0&group_by=name&sort_col=exit_velocity&sort_order=desc&min_abs=",
qual,
"&px1=&px2=&pz1=&pz2=#results"
)
df <- read_html(base_url)
df <-
df %>% html_nodes(xpath = '//*[@id="search_results"]') %>% html_table(fill = TRUE)
# guess_encoding(df)
avg_exit <- as.data.frame(df)
avg_exit <- avg_exit[,c(1:4)]
names(avg_exit) <-
c("Rank", "Name", "Events", "Avg_Exit_Velocity")
avg_exit <- subset(avg_exit, select = c("Rank", "Name", "Events", "Avg_Exit_Velocity"))
avg_exit <- avg_exit[complete.cases(avg_exit),]
avg_exit$Avg_Exit_Velocity <-
substr(avg_exit$Avg_Exit_Velocity,1,nchar(avg_exit$Avg_Exit_Velocity) -
4)
# Char to numeric
for (i in c(3:ncol(avg_exit))) {
avg_exit[,i] <-
as.numeric(as.character(avg_exit[,i]))
}
#Replacing Names that match Fangraphs
avg_exit$Name <-
str_replace(avg_exit$Name,"Byung Ho Park","Byung-ho Park")
avg_exit$Name <-
str_replace(avg_exit$Name,"Norichika Aoki","Nori Aoki")
#combine distance and exit velocity
df <-
sqldf(
"select avg_dist.Name, avg_dist.Events, avg_dist.Avg_Distance, avg_exit.Avg_Exit_Velocity
from avg_dist
join avg_exit on lower(avg_exit.Name) = lower(avg_dist.Name)
where 1=1
order by avg_exit.Avg_Exit_Velocity desc"
)
return(df)
}
###############################################################################
###############################################################################
#' Scrape Batter Leaderboards from FanGraphs.com
#'
#' This function allows you to scrape all leaderboard statistics from FanGraphs.com.
#' @param bat_pitch either 'bat' or 'pit'
#' @param yearfrom First season for which you want data.
#' @param yearto Last season for which you want data. If multiple years selected, data returned will be aggregate data for the date range. If yearto = yearfrom, function will return single-season data.
#' @param qual Whether you want only batters that qualified in a given season, or the minimum number of plate appearances for inclusion. If you only want qualified hitters, use qual. If a minimumm number of plate appearaces, use the number desired.
#' @param split '0' - full season '1' - 7 days '2' - 15 days '3' - 30 days '30' - 1st half '31' - 2nd half '15' - Home '16' - Away '13' - vs LHP or LHB '14' - vs RHP or RHB
#' @return data frame
#' @examples
#' fgh<-fangraphs_leaderboard('pit',2016,2016, 60, 0)
#' @export
###############################################################################
fangraphs_leaderboard <-
function(bat_pitch = 'pit', yearfrom = NULL, yearto = NULL, qual = NULL, split = NULL) {
library(data.table)
library(XML)
library(stringr)
options(warn = -1)
qual <- ifelse(is.null(qual),"y",qual)
split <- ifelse(is.null(split),0,split)
yearfrom <- ifelse(
is.null(yearfrom),
ifelse(
as.numeric(format(Sys.time(), "%m%d")) <= 331,as.numeric(format(Sys.time(), "%Y")) -
1,as.numeric(format(Sys.time(), "%Y"))
)
,yearfrom
)
yearto <- ifelse(is.null(yearto),
ifelse(
as.numeric(format(Sys.time(), "%m%d")) <= 331,as.numeric(format(Sys.time(), "%Y")) -
1,as.numeric(format(Sys.time(), "%Y"))
)
,yearto)
if (bat_pitch == 'bat') {
end_params <- '&players=0&sort=4,d'
type <-
'c,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207,208,209,210,211'
} else if (bat_pitch == 'pit') {
end_params <- '&players=0&sort=9,d'
type <-
'c,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223,224'
}
base_url <-
paste0(
"http://www.fangraphs.com/leaders.aspx?pos=all&stats=",
bat_pitch,"&lg=all&qual=",
qual,"&type=",
type,"&season=",
yearto, "&month=",
split,"&season1=",
yearfrom, "&ind=0&team="
)
teams <- unlist(strsplit(as.character(1:30),','))
urls <- paste0(base_url, rep(teams, each = 1),
"&rost=1&age=0&filter=",end_params)
# Scrape
leaders <-
lapply(urls, function(x) {
data.table(
readHTMLTable(x, as.data.frame = TRUE, stringsAsFactors = FALSE)$LeaderBoard1_dg1_ctl00
)
})
# Combine Scrapes
fangraphs_leaders <- c()
for (i in 1:30) {
fangraphs_leaders <-
as.data.frame(rbind(fangraphs_leaders,leaders[[i]]))
}
# Rename columns
c <- as.matrix(names(fangraphs_leaders))
c <- gsub("%", "_pct", c, fixed = TRUE)
c <- gsub(" (pfx)", "_pfx", c, fixed = TRUE)
c <- gsub("+", "plus", c, fixed = TRUE)
c <- gsub("/", "_", c, fixed = TRUE)
c <- gsub("-", "_", c, fixed = TRUE)
c <- gsub("1b", "x1b", c, fixed = TRUE)
c <- gsub("2b", "x2b", c, fixed = TRUE)
c <- gsub("3b", "x3b", c, fixed = TRUE)
c <-
ifelse(substr(c, nchar(c) - 1 + 1, nchar(c)) == ".", gsub("\\.", "_pct", c), c)
names(fangraphs_leaders) <- c
# Remove percentages
for (i in c(1,4:ncol(fangraphs_leaders))) {
fangraphs_leaders[,i] <-
str_trim(str_replace_all(fangraphs_leaders[,i],"%",""))
}
# Char to Number
for (i in c(1,4:ncol(fangraphs_leaders))) {
fangraphs_leaders[,i] <-
as.numeric(as.character(fangraphs_leaders[,i]))
}
# replace NA with 0
fangraphs_leaders[is.na(fangraphs_leaders)] <- 0
return(fangraphs_leaders)
}
###############################################################################
###############################################################################
#' Returns a full list of MLB players eligibility
#' for hitters or pitchers for ESPN Standard leagues'
#' Retreives primary position only
#' @param bat_pitch either 'bat' or 'pit'
#' @return data frame
#' @export
###############################################################################
ESPN_eligibility <- function(bat_pitch) {
library(sqldf)
library(data.table)
library(XML)
library(stringr)
options(warn = -1)
if (bat_pitch == 'bat') {
end_params <- '&slotCategoryGroup=1'
} else if (bat_pitch == 'pit') {
end_params <- '&slotCategoryGroup=2'
}
base_url <-
paste0("http://games.espn.go.com/flb/tools/eligibility?startIndex=")
indx <- unlist(strsplit(as.character(seq(0, 900, 50)),','))
urls <-
paste0(base_url, rep(indx, each = 1), end_params)
# Scrape
ESPN <-
lapply(urls, function(x) {
data.table(readHTMLTable(x, as.data.frame = TRUE, stringsAsFactors = FALSE)$playertable_0[-1,]) # removing first row
})
# Combine Scrapes
eligibility <- c()
for (i in c(1:18)) {
eligibility <-
as.data.frame(rbind(eligibility,ESPN[[i]]))
}
# Rename columns
colnames(eligibility) <-
c(
"NameTeamPos","Name","C","x1B","x2B","x3B","SS","LF","CF","RF","DH","SP","RP"
)
# Remove DTD/SSPD/DL
eligibility$NameTeamPos <-
str_trim(gsub("DTD", "", eligibility$NameTeamPos))
eligibility$NameTeamPos <-
str_trim(gsub("SSPD", "", eligibility$NameTeamPos))
eligibility$NameTeamPos <-
str_trim(gsub("DL60", "", eligibility$NameTeamPos))
eligibility$NameTeamPos <-
str_trim(gsub("DL15", "", eligibility$NameTeamPos))
# Parse Name
eligibility$Name <- gsub(",.*$", "", eligibility$NameTeamPos)
eligibility$Name <-
str_trim(gsub('[^.a-zA-Z0-9]',' ',eligibility$Name))
eligibility <- sqldf(
"select eligibility.*,
case when C = 'PP' then 'C'
when x1B = 'PP' then '1B'
when x2B = 'PP' then '2B'
when x3B = 'PP' then '3B'
when SS = 'PP' then 'SS'
when LF = 'PP' then 'OF'
when CF = 'PP' then 'OF'
when RF = 'PP' then 'OF'
when DH = 'PP' then 'DH'
when SP = 'PP' then 'SP'
when RP = 'PP' then 'RP'
else '--' end as Position
from eligibility
"
)
return(eligibility)
}
###############################################################################
###############################################################################
#' Returns projections for hitters pr pitchers for WAR Games ESPN FB league (ONLY)
#' because column headings will be different for other leagues!!
#' @param bat_pitch either 'bat' or 'pit'
#' @param leagueID ESPN league ID
#' @examples
#' WARGames.Hitters<-WarGames_ESPN_proj('bat',86607)
#' @export
###############################################################################
WarGames_ESPN_proj <- function(bat_pitch,leagueID = 86607) {
library(sqldf)
library(data.table)
library(XML)
library(stringr)
options(warn = -1)
leagueID <- ifelse(is.null(leagueID),86607,86607)
if (bat_pitch == 'bat') {
end_params <- '&slotCategoryGroup=1'
cols <-
c(
"Number","NameTeamPos","Type","Name","AB","H","x2B","x3B","HR","BB","HBP","SAC","SB","CS","PTS"
)
} else if (bat_pitch == 'pit') {
end_params <- '&slotCategoryGroup=2'
cols <-
c(
"Number","NameTeamPos","Type","Name","GS","IP","H","HR","BB","HB","K","SV","HD","PTS"
)
}
base_url <-
paste0(
"http://games.espn.go.com/flb/tools/projections?leagueId="
,leagueID,end_params,"&startIndex="
)
indx <- unlist(strsplit(as.character(seq(0, 900, 40)),','))
urls <- paste0(base_url, rep(indx, each = 1))
# Scrape
ESPNproj <-
lapply(urls, function(x) {
data.table(readHTMLTable(x, as.data.frame = TRUE, stringsAsFactors = FALSE)$playertable_0[-1,]) # removing first row
})
# Combine Scrapes
ESPNprojections <- c()
for (i in c(1:15)) {
ESPNprojections <-
as.data.frame(rbind(ESPNprojections,ESPNproj[[i]]))
}
# Rename columns
colnames(ESPNprojections) <- cols
# Remove DTD/SSPD/DL
ESPNprojections$NameTeamPos <-
str_trim(gsub("DTD", "", ESPNprojections$NameTeamPos))
ESPNprojections$NameTeamPos <-
str_trim(gsub("SSPD", "", ESPNprojections$NameTeamPos))
ESPNprojections$NameTeamPos <-
str_trim(gsub("DL60", "", ESPNprojections$NameTeamPos))
ESPNprojections$NameTeamPos <-
str_trim(gsub("DL15", "", ESPNprojections$NameTeamPos))
# Char to Number
for (i in c(1,5:ncol(ESPNprojections))) {
ESPNprojections[,i] <-
as.numeric(as.character(ESPNprojections[,i]))
}
# replace NA with 0
ESPNprojections[is.na(ESPNprojections)] <- 0
# Clean Name
ESPNprojections$Name <-
gsub(",.*$", "", ESPNprojections$NameTeamPos)
ESPNprojections$Name <-
str_trim(gsub('[^.a-zA-Z0-9]',' ',ESPNprojections$Name))
ESPNprojections$PTS <- round(ESPNprojections$PTS)
# Sort by Points desc
out <- ESPNprojections[order(-ESPNprojections$PTS),]
return(out)
}
###############################################################################
###############################################################################
#' Returns projections for hitters or pitchers for a standard 5x5 ROTO leagues
#' @param bat_pitch either 'bat' or 'pit'
#' @param leagueID ESPN league ID
#' @examples
#' ESPN_hitters<-ESPN_proj('bat')
#'
###############################################################################
ESPN_proj <- function(bat_pitch,leagueID = 0) {
library(data.table)
library(XML)
library(stringr)
options(warn = -1)
leagueID <- ifelse(is.null(leagueID),0,0)
if (bat_pitch == 'bat') {
end_params <- '&slotCategoryGroup=1'
cols <- c("Number","NameTeamPos","R","HR","RBI","SB","AVG")
} else if (bat_pitch == 'pit') {
end_params <- '&slotCategoryGroup=2'
cols <- c("Number","NameTeamPos","K","W","SV","ERA","WHIP")
}
base_url <-
paste0(
"http://games.espn.go.com/flb/tools/projections?leagueId="
,leagueID,end_params,"&startIndex="
)
indx <- unlist(strsplit(as.character(seq(0, 900, 40)),','))
urls <- paste0(base_url, rep(indx, each = 1))
# Scrape
ESPNproj <-
lapply(urls, function(x) {
data.table(readHTMLTable(x, as.data.frame = TRUE, stringsAsFactors = FALSE)$playertable_0[-1,]) # removing first row
})
# Combine Scrapes
ESPNprojections <- c()
for (i in c(1:15)) {
ESPNprojections <-
as.data.frame(rbind(ESPNprojections,ESPNproj[[i]]))
}
# Rename columns
colnames(ESPNprojections) <- cols
# Remove DTD/SSPD/DL
ESPNprojections$NameTeamPos <-
str_trim(gsub("DTD", "", ESPNprojections$NameTeamPos))
ESPNprojections$NameTeamPos <-
str_trim(gsub("SSPD", "", ESPNprojections$NameTeamPos))
ESPNprojections$NameTeamPos <-
str_trim(gsub("DL60", "", ESPNprojections$NameTeamPos))
ESPNprojections$NameTeamPos <-
str_trim(gsub("DL15", "", ESPNprojections$NameTeamPos))
# Char to Number
for (i in c(1,5:ncol(ESPNprojections))) {
ESPNprojections[,i] <-
as.numeric(as.character(ESPNprojections[,i]))
}
# replace NA with 0
ESPNprojections[is.na(ESPNprojections)] <- 0
# Parse Name
ESPNprojections$Name <-
gsub(",.*$", "", ESPNprojections$NameTeamPos)
ESPNprojections$Name <-
str_trim(gsub('[^.a-zA-Z0-9]',' ',ESPNprojections$Name))
# Sort by Rank
out <- ESPNprojections[order(ESPNprojections$Number),]
return(out)
}
###############################################################################
###############################################################################
#' Scrape Pitching or Hitting data from Baseball Reference
#'
#' Original code from Bill Petti and modified as needed
#' @param bat_pitch either 'bat' or 'pit'
#' @param t1 First date data should be scraped from. Should take the form "YEAR-DAY-MONTH"
#' @param t2 Second date data should be scraped from. Should take the form "YEAR-DAY-MONTH"
#' @param n use number of days prior from today
#' @export
#' @examples
#' bb_p<-bbref_leaderboard('pit',"2015-05-10", "2015-06-20")
#' bb_h<-bbref_leaderboard('bat',n=30) # current year; last 30 days from today
bbref_leaderboard <- function(bat_pitch, t1 = NULL, t2 = NULL, n = 30) {
library(rvest)
library(dplyr)
library(stringr)
if (bat_pitch == 'bat') {
pos <- 'h'
base_url <-
paste0(
"http://www.baseball-reference.com/leagues/daily.cgi?user_team=&bust_cache=&type=",pos,
"&lastndays=",n,
"&dates=fromandto&fromandto=", t1, ".", t2,
"&level=mlb&franch=&stat=&stat_value=0"
)
df <- read_html(base_url)
df <-
df %>% html_nodes(xpath = '//*[@id="daily"]') %>% html_table(fill = TRUE)
df <- as.data.frame(df)[-c(1,3,5)]
names(df)[1:4] <- c("Name", "Age", "Level", "Team")
# Remove percentages
for (i in c(5:ncol(df))) {
df[,i] <-
str_trim(str_replace_all(df[,i],"%",""))
}
# Char to Number
for (i in c(5:ncol(df))) {
df[,i] <-
as.numeric(as.character(df[,i]))
}
# replace NA with 0
df[is.na(df)] <- 0
df$X1B <- with(df, H - (X2B + X3B + HR))
currentyear <- as.numeric(format(Sys.time(), "%Y"))
t1 <- ifelse(is.null(t1),currentyear,t1)
season <- substr(t1, 1, 4)
df$season <- season
df$uBB <- with(df, BB - IBB)
df$Team <- gsub(" $", "", df$Team, perl = T)
df <- filter_(df, ~ Name != "Name")
df <- arrange_(df, ~ desc(AB), ~ desc(OPS))
} else if (bat_pitch == 'pit') {
pos <- 'p'
base_url <-
paste0(
"http://www.baseball-reference.com/leagues/daily.cgi?user_team=&bust_cache=&type=",pos,
"&lastndays=",n,
"&dates=fromandto&fromandto=", t1, ".", t2,
"&level=mlb&franch=&stat=&stat_value=0"
)
df <- read_html(base_url)
df <-
df %>% html_nodes(xpath = '//*[@id="daily"]') %>% html_table(fill = TRUE)
df <- as.data.frame(df)[-c(1,3,5)]
names(df)[1:4] <- c("Name", "Age", "Level", "Team")
# Remove percentages
for (i in c(5:ncol(df))) {
df[,i] <-
str_trim(str_replace_all(df[,i],"%",""))
}
# Remove percentages
for (i in c(5:ncol(df))) {
df[,i] <-
str_trim(str_replace_all(df[,i],"%",""))
}
# Char to Number
for (i in c(5:ncol(df))) {
df[,i] <-
as.numeric(as.character(df[,i]))
}
# replace NA with 0
df[is.na(df)] <- 0
df$X1B <- with(df, H - (X2B + X3B + HR))
currentyear <- as.numeric(format(Sys.time(), "%Y"))
t1 <- ifelse(is.null(t1),currentyear,t1)
season <- substr(t1, 1, 4)
df$season <- season
df$uBB <- with(df, BB - IBB)
# Percent to decimal
df$Str <- df$Str / 100
df$StL <- df$StL / 100
df$StS <- df$StS / 100
df$GB.FB <- df$GB.FB / 100
df$SO_perc <- with(df, round(SO / BF,3))
df$uBB_perc <- with(df, round(uBB / BF,3))
df$SO_uBB <- with(df, round(SO_perc - uBB_perc))
df$Team <- gsub(" $", "", df$Team, perl = T)
df <- filter_(df, ~ Name != "Name")
df <- arrange_(df, ~ desc(IP), ~ desc(WHIP))
}
return(df)
}
###############################################################################
###############################################################################
#' Scrape Home Run Tracker leadersboards
#'
#' @details Limitations: Cannot change year. Always current
#' @param bat_pitch either 'bat' or 'pit'
#' @export
#' @examples
#' hrt<-HRtracker_leaderboard('bat')
HRtracker_leaderboard <- function(bat_pitch = 'bat') {
library(XML)
library(dplyr)
library(stringr)
library(griffun)
library(Hmisc)
if (bat_pitch == 'bat') {
# No doubt HR
url <-
"http://www.hittrackeronline.com/homeruns_special.php?league=&type=ND"
htmltbl <-
readHTMLTable(doc = url)
nd <- data.frame(htmltbl[5])
colnames(nd) <-
c("RawName","HR")
# Just enough HR
url <-
"http://www.hittrackeronline.com/homeruns_special.php?league=&type=JE"
htmltbl <-
readHTMLTable(doc = url)
je <- data.frame(htmltbl[5])
colnames(je) <-
c("RawName","HR")
# Lucky HR
url <-
"http://www.hittrackeronline.com/homeruns_special_luck.php?league=&type=L"
htmltbl <-
readHTMLTable(doc = url)
luck <- data.frame(htmltbl[5])
colnames(luck) <-
c("RawName","HR")
# Golden Sledgehammer
url <- "http://www.hittrackeronline.com/golden_sledgehammer.php"
htmltbl <-
readHTMLTable(doc = url)
gs <- data.frame(htmltbl[5])
colnames(gs) <-
c("RawName","Team","GoldenSledgeHammer","Avg Distance")
#Full outer join
m1 <- merge(x = gs, y = nd, by = "RawName", all = TRUE)
m2 <- merge(x = m1, y = je, by = "RawName", all = TRUE)
df <- merge(x = m2, y = luck, by = "RawName", all = TRUE)
colnames(df) <-
c(
"RawName","Team","GoldenSledgeHammer","Avg Distance","NoDoubt","JustEnough","Lucky"
)
}
else if (bat_pitch == 'pit') {
# No doubt HR
url <-
"http://www.hittrackeronline.com/homeruns_allowed_special.php?league=&type=ND"
htmltbl <-
readHTMLTable(doc = url)
nd <- data.frame(htmltbl[5])
colnames(nd) <-
c("RawName","HR")
# Just enough HR
url <-
"http://www.hittrackeronline.com/homeruns_allowed_special.php?league=&type=JE"
htmltbl <-
readHTMLTable(doc = url)
je <- data.frame(htmltbl[5])
colnames(je) <-
c("RawName","HR")
# Lucky HR
url <-
"http://www.hittrackeronline.com/homeruns_allowed_special_luck.php?league=&type=L"
htmltbl <-
readHTMLTable(doc = url)
luck <- data.frame(htmltbl[5])
colnames(luck) <-
c("RawName","HR")
# Golden Anvil
url <- "http://www.hittrackeronline.com/golden_anvil.php"
htmltbl <-
readHTMLTable(doc = url)
gs <- data.frame(htmltbl[5])
colnames(gs) <-
c("RawName","Team","GoldenAnvil","Avg Distance")
#Full outer join
m1 <- merge(x = gs, y = nd, by = "RawName", all = TRUE)
m2 <- merge(x = m1, y = je, by = "RawName", all = TRUE)
df <- merge(x = m2, y = luck, by = "RawName", all = TRUE)
colnames(df) <-
c(
"RawName","Team","GoldenAnvil","Avg Distance","NoDoubt","JustEnough","Lucky"
)
}
# Char to Number
for (i in c(3:ncol(df))) {
df[,i] <-
as.numeric(as.character(df[,i]))
}
# replace NA with 0
df[is.na(df)] <- 0
# Currently does NOT handle Jr.
df$Name <- reverse_name(df$RawName)
return(df)
}
###############################################################################
###############################################################################
#' Scrape Baseball Heatmaps leadersboards
#'
#' @details Limitations: Cannot change year. Always current
#' @param bat_pitch either 'bat' or 'pit'
#' @export
#' @examples
#' heatmaps<-Heatmaps_leaderboard('bat')
Heatmaps_leaderboard <- function(bat_pitch = 'bat') {
library(XML)
library(dplyr)
library(stringr)
library(griffun)
library(Hmisc)
library(sqldf)
if (bat_pitch == 'bat') {
url <- "http://www.baseballheatmaps.com/graph/distanceleader.php"
htmltbl <-
readHTMLTable(doc = url)
df <- data.frame(htmltbl[1])
colnames(df) <-
c("Rank","RawName","Stance","Year","Hits","Distance","Angle")
df$RawName <-
str_replace(df$RawName,"Stanton Michael","Stanton Giancarlo")
df$RawName <-
str_replace(df$RawName,"Davis Khristopher","Davis Khris")
df$RawName <-
str_replace(df$RawName,"Moreland Mitchell","Moreland Mitch")
df$RawName <-
str_replace(df$RawName,"Machado Manuel","Machado Manny")
df$RawName <-
str_replace(df$RawName,"Freeman Frederick","Freeman Freddie")
df$RawName <-
str_replace(df$RawName,"Park Byung Ho","Park Byung-ho")
# Currently does NOT handle Jr.
df$Name <- reverse_name(df$RawName)
# Resovle duplicates because of Stance
df <- sqldf(
"select Name, Year, sum(Hits) as Hits,
avg(Distance) as Distance,
avg(Angle) as Angle
from df
group by Name, Year
"
)
}
else if (bat_pitch == 'pit') {
url <-
"http://www.baseballheatmaps.com/graph/pitcherdistanceleader.php"
htmltbl <-
readHTMLTable(doc = url)
df <- data.frame(htmltbl[1])
colnames(df) <-
c("Rank","RawName","Stance","Year","Hits","Distance","Angle")
df$RawName <-
str_replace(df$RawName,"Fister Douglas","Fister Doug")
df$RawName <-
str_replace(df$RawName,"Archer Christopher","Archer Chris")
df$RawName <-
str_replace(df$RawName,"Bolsinger Michael","Bolsinger Mike")
df$RawName <-
str_replace(df$RawName,"Colome Alexander","Colome Alex")
df$RawName <-
str_replace(df$RawName,"Martinez Nicholas","Martinez Nick")
# Currently does NOT handle Jr.
df$Name <- reverse_name(df$RawName)
# Resolve duplicates because of Stance
df <- sqldf(
"select Name, Year, sum(Hits) as Hits,
avg(Distance) as Distance,
avg(Angle) as Angle
from df
group by Name, Year
"
)
}
# Char to Number
for (i in c(3:ncol(df))) {
df[,i] <-
as.numeric(as.character(df[,i]))
}
# replace NA with 0
df[is.na(df)] <- 0
return(df)
}
###############################################################################
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.