#' **Get KenPom Ratings**
#'
#' @param min_year First year of data to pull
#' @param max_year Last year of data to pull
#' @return Returns a tibble of ratings
#'
#' |col_name |types |
#' |:---------------|:---------|
#' |year |integer |
#' |rk |numeric |
#' |team |character |
#' |conf |character |
#' |w_l |character |
#' |adj_em |numeric |
#' |adj_o |numeric |
#' |adj_o_rk |numeric |
#' |adj_d |numeric |
#' |adj_d_rk |numeric |
#' |adj_t |numeric |
#' |adj_t_rk |numeric |
#' |luck |numeric |
#' |luck_rk |numeric |
#' |sos_adj_em |numeric |
#' |sos_adj_em_rk |numeric |
#' |sos_opp_o |numeric |
#' |sos_opp_o_rk |numeric |
#' |sos_opp_d |numeric |
#' |sos_opp_d_rk |numeric |
#' |ncsos_adj_em |numeric |
#' |ncsos_adj_em_rk |numeric |
#' |ncaa_seed |numeric |
#'
#' @importFrom cli cli_abort
#' @importFrom dplyr select filter mutate arrange bind_rows
#' @importFrom tidyr everything
#' @importFrom stringr str_remove str_replace str_trim
#' @import rvest
#' @export
#' @keywords Ratings
#' @family KenPom Ratings Functions
#'
#' @examples
#' \donttest{
#' try(kp_pomeroy_ratings(min_year = 2020, max_year = 2021))
#' }
kp_pomeroy_ratings <- function(min_year, max_year = most_recent_mbb_season()){
tryCatch(
expr = {
if (!has_kp_user_and_pw()) stop("This function requires a KenPom subscription e-mail and password combination,\n set as the system environment variables KP_USER and KP_PW.", "\n See ?kp_user_pw for details.", call. = FALSE)
browser <- login()
if (!(is.numeric(min_year) && nchar(min_year) == 4 && min_year >= 2001)) {
# Check if year is numeric, if not NULL
cli::cli_abort("Enter valid min_ as a number (YYYY), data only goes back to 2001")
}
years <- min_year:max_year
for (year in years) {
### Pull Data
url <- paste0("https://kenpom.com/index.php?y=", year)
page <- rvest::session_jump_to(browser, url)
Sys.sleep(5)
header_cols <- c("Rk", "Team", "Conf", "W-L",
"AdjEM", "AdjO", "AdjO.Rk", "AdjD", "AdjD.Rk",
"AdjT", "AdjT.Rk", "Luck", "Luck.Rk",
"SOS.AdjEM", "SOS.AdjEM.Rk", "SOS.OppO", "SOS.OppO.Rk",
"SOS.OppD", "SOS.OppD.Rk", "NCSOS.AdjEM", "NCSOS.AdjEM.Rk")
x <- (page %>%
xml2::read_html() %>%
rvest::html_elements("#ratings-table"))[[1]] %>%
rvest::html_table()
colnames(x) <- header_cols
suppressWarnings(
x <- x %>%
dplyr::filter(!is.na(as.numeric(.data$SOS.OppO)))
)
x <- dplyr::mutate(x,
"NCAA_Seed" = NA_integer_,
"NCAA_Seed" = sapply(.data$Team, function(arg) { as.numeric(gsub("[^0-9]", "", arg)) }),
"Team" = sapply(.data$Team, function(arg) {
stringr::str_trim(stringr::str_replace(stringr::str_remove(arg,'\\d+| \\*| \\*+'),'\\*+','')) }),
"Year" = year) %>%
as.data.frame()
x <- x %>%
dplyr::mutate_at(
c("Rk","AdjEM","AdjO","AdjO.Rk","AdjD","AdjD.Rk","AdjT","AdjT.Rk",
"Luck","Luck.Rk","SOS.AdjEM","SOS.AdjEM.Rk","SOS.OppO","SOS.OppO.Rk",
"SOS.OppD","SOS.OppD.Rk","NCSOS.AdjEM","NCSOS.AdjEM.Rk"), as.numeric) %>%
dplyr::select("Year", tidyr::everything())
### Store Data
if (year == min_year) {
kenpom <- x
} else {
kenpom <- dplyr::bind_rows(kenpom, x)
}
}
kenpom <- kenpom %>%
dplyr::arrange(-.data$Year, .data$Rk) %>%
janitor::clean_names()
},
error = function(e) {
message(glue::glue("{Sys.time()}: Invalid arguments or no pomeroy ratings data for {min_year} - {max_year} available!"))
},
warning = function(w) {
},
finally = {
}
)
return(kenpom)
}
#' **Get KenPom Efficiency and Tempo Summary**
#'
#' @param min_year First year of data to pull
#' @param max_year Last year of data to pull
#' @return Returns a tibble of efficiency and tempo ratings
#'
#' |col_name |types |
#' |:----------------------|:---------|
#' |team |character |
#' |conf |character |
#' |adj_t |numeric |
#' |adj_t_rk |numeric |
#' |raw_t |numeric |
#' |raw_t_rk |numeric |
#' |avg_poss_length_off |numeric |
#' |avg_poss_length_off_rk |numeric |
#' |avg_poss_length_def |numeric |
#' |avg_poss_length_def_rk |numeric |
#' |adj_o |numeric |
#' |adj_o_rk |numeric |
#' |raw_o |numeric |
#' |raw_o_rk |numeric |
#' |adj_d |numeric |
#' |adj_d_rk |numeric |
#' |raw_d |numeric |
#' |raw_d_rk |numeric |
#' |ncaa_seed |numeric |
#' |year |numeric |
#'
#' @importFrom cli cli_abort
#' @importFrom dplyr filter mutate mutate_at bind_rows
#' @importFrom stringr str_remove str_remove str_trim
#' @import rvest
#' @export
#' @keywords Efficiency
#' @family KenPom Ratings Functions
#'
#' @examples
#' \donttest{
#' try(kp_efficiency(min_year = 2020, max_year = 2021))
#' }
kp_efficiency <- function(min_year, max_year = most_recent_mbb_season()){
tryCatch(
expr = {
if (!has_kp_user_and_pw()) stop("This function requires a KenPom subscription e-mail and password combination,\n set as the system environment variables KP_USER and KP_PW.", "\n See ?kp_user_pw for details.", call. = FALSE)
browser <- login()
if (!(is.numeric(min_year) && nchar(min_year) == 4 && min_year >= 2001)) {
# Check if year is numeric, if not NULL
cli::cli_abort("Enter valid min_ as a number (YYYY), data only goes back to 2001")
}
years <- min_year:max_year
for (year in years) {
### Pull Data
url <- paste0("https://kenpom.com/summary.php?y=", year)
page <- rvest::session_jump_to(browser, url)
Sys.sleep(5)
if (year < 2010) {
header_cols <- c("Team", "Conf", "AdjT", "AdjT.Rk","RawT", "RawT.Rk",
"AdjO", "AdjO.Rk", "RawO", "RawO.Rk",
"AdjD", "AdjD.Rk", "RawD", "RawD.Rk")
x <- (page %>%
xml2::read_html() %>%
rvest::html_elements("#ratings-table"))[[1]] %>%
rvest::html_table()
x <- x[,1:14]
colnames(x) <- header_cols
suppressWarnings(
x <- x %>%
dplyr::filter(!is.na(as.numeric(.data$AdjO)))
)
x <- x %>%
dplyr::mutate(
AvgPossLengthOff = 0.0,
AvgPossLengthOff.Rk = 0,
AvgPossLengthDef = 0.0,
AvgPossLengthDef.Rk = 0)
x <- x[,c("Team", "Conf", "AdjT", "AdjT.Rk","RawT", "RawT.Rk",
"AvgPossLengthOff","AvgPossLengthOff.Rk",
"AvgPossLengthDef","AvgPossLengthDef.Rk",
"AdjO", "AdjO.Rk", "RawO", "RawO.Rk",
"AdjD", "AdjD.Rk", "RawD", "RawD.Rk")]
x <- dplyr::mutate(x,
"NCAA_Seed" = NA_integer_,
"NCAA_Seed" = sapply(.data$Team, function(arg) { as.numeric(gsub("[^0-9]", "", arg)) }),
"Team" = sapply(.data$Team, function(arg) {
stringr::str_trim(str_replace(stringr::str_remove(arg,'\\d+| \\*| \\*+'),'\\*+','')) }),
"Year" = year) %>%
dplyr::mutate_at(
c("AdjT", "AdjT.Rk","RawT", "RawT.Rk",
"AvgPossLengthOff","AvgPossLengthOff.Rk",
"AvgPossLengthDef","AvgPossLengthDef.Rk",
"AdjO", "AdjO.Rk", "RawO", "RawO.Rk",
"AdjD", "AdjD.Rk", "RawD", "RawD.Rk", "Year"),
as.numeric) %>%
as.data.frame()
} else {
header_cols <- c("Team", "Conf", "AdjT", "AdjT.Rk","RawT", "RawT.Rk",
"AvgPossLengthOff","AvgPossLengthOff.Rk",
"AvgPossLengthDef","AvgPossLengthDef.Rk",
"AdjO", "AdjO.Rk", "RawO", "RawO.Rk",
"AdjD", "AdjD.Rk", "RawD", "RawD.Rk")
x <- (page %>%
xml2::read_html() %>%
rvest::html_elements("#ratings-table"))[[1]] %>%
rvest::html_table()
x <- x[,1:18]
colnames(x) <- header_cols
suppressWarnings(
x <- x %>%
dplyr::filter(!is.na(as.numeric(.data$AdjO)))
)
x <- dplyr::mutate(x,
"NCAA_Seed" = NA_integer_,
"NCAA_Seed" = sapply(.data$Team, function(arg) { as.numeric(gsub("[^0-9]", "", arg)) }),
"Team" = sapply(.data$Team, function(arg) {
stringr::str_trim(stringr::str_replace(stringr::str_remove(arg,'\\d+| \\*| \\*+'),'\\*+','')) }),
"Year" = year) %>%
dplyr::mutate_at(
c("AdjT", "AdjT.Rk","RawT", "RawT.Rk",
"AvgPossLengthOff","AvgPossLengthOff.Rk",
"AvgPossLengthDef","AvgPossLengthDef.Rk",
"AdjO", "AdjO.Rk", "RawO", "RawO.Rk",
"AdjD", "AdjD.Rk", "RawD", "RawD.Rk", "Year"),
as.numeric) %>%
as.data.frame()
}
### Store Data
if (year == min_year) {
kenpom <- x
} else {
kenpom <- dplyr::bind_rows(kenpom, x)
}
}
kenpom <- kenpom %>%
dplyr::arrange(-.data$Year, .data$AdjT.Rk) %>%
janitor::clean_names()
},
error = function(e) {
message(glue::glue("{Sys.time()}: Invalid arguments or no efficiency data for {min_year} - {max_year} available!"))
},
warning = function(w) {
},
finally = {
}
)
return(kenpom)
}
#' **Get Four Factors Data**
#'
#' @param min_year First year of data to pull
#' @param max_year Last year of data to pull
#' @return Returns a tibble of four factors ratings
#'
#' |col_name |types |
#' |:---------------|:---------|
#' |team |character |
#' |conf |character |
#' |adj_t |numeric |
#' |adj_t_rk |numeric |
#' |adj_o |numeric |
#' |adj_o_rk |numeric |
#' |off_e_fg_pct |numeric |
#' |off_e_fg_pct_rk |numeric |
#' |off_to_pct |numeric |
#' |off_to_pct_rk |numeric |
#' |off_or_pct |numeric |
#' |off_or_pct_rk |numeric |
#' |off_ft_rate |numeric |
#' |off_ft_rate_rk |numeric |
#' |adj_d |numeric |
#' |adj_d_rk |numeric |
#' |def_e_fg_pct |numeric |
#' |def_e_fg_pct_rk |numeric |
#' |def_to_pct |numeric |
#' |def_to_pct_rk |numeric |
#' |def_or_pct |numeric |
#' |def_or_pct_rk |numeric |
#' |def_ft_rate |numeric |
#' |def_ft_rate_rk |numeric |
#' |ncaa_seed |numeric |
#' |year |numeric |
#'
#' @importFrom cli cli_abort
#' @importFrom dplyr select mutate filter bind_rows
#' @importFrom stringr str_remove str_replace str_trim
#' @import rvest
#' @export
#' @keywords Four Factors
#' @family KenPom Ratings Functions
#'
#' @examples
#' \donttest{
#' try(kp_fourfactors(min_year = 2020, max_year = 2021))
#' }
kp_fourfactors <- function(min_year, max_year = most_recent_mbb_season()){
tryCatch(
expr = {
if (!has_kp_user_and_pw()) stop("This function requires a KenPom subscription e-mail and password combination,\n set as the system environment variables KP_USER and KP_PW.", "\n See ?kp_user_pw for details.", call. = FALSE)
browser <- login()
if (!(is.numeric(min_year) && nchar(min_year) == 4 && min_year >= 2001)) {
# Check if year is numeric, if not NULL
cli::cli_abort("Enter valid min_ as a number (YYYY), data only goes back to 2001")
}
years <- min_year:max_year
for (year in years) {
### Pull Data
url <- paste0("https://kenpom.com/stats.php?",
"y=", year)
page <- rvest::session_jump_to(browser, url)
Sys.sleep(5)
header_cols <- c("Team", "Conf", "AdjT", "AdjT.Rk",
"AdjO", "AdjO.Rk", "Off.eFG.Pct", "Off.eFG.Pct.Rk",
"Off.TO.Pct", "Off.TO.Pct.Rk", "Off.OR.Pct", "Off.OR.Pct.Rk",
"Off.FTRate", "Off.FTRate.Rk",
"AdjD", "AdjD.Rk", "Def.eFG.Pct", "Def.eFG.Pct.Rk",
"Def.TO.Pct", "Def.TO.Pct.Rk", "Def.OR.Pct", "Def.OR.Pct.Rk",
"Def.FTRate", "Def.FTRate.Rk")
x <- (page %>%
xml2::read_html() %>%
rvest::html_elements(css = '#ratings-table'))[[1]] %>%
rvest::html_table()
x <- x[,1:24]
colnames(x) <- header_cols
suppressWarnings(
x <- x %>%
dplyr::filter(!is.na(as.numeric(.data$AdjO)))
)
x <- dplyr::mutate(x,
"NCAA_Seed" = NA_integer_,
"NCAA_Seed" = sapply(.data$Team, function(arg) { as.numeric(gsub("[^0-9]", "", arg)) }),
"Team" = sapply(.data$Team, function(arg) {
stringr::str_trim(stringr::str_replace(stringr::str_remove(arg,'\\d+| \\*| \\*+'),'\\*+','')) }),
"Year" = year) %>%
dplyr::mutate_at(
c("AdjT", "AdjT.Rk", "AdjO", "AdjO.Rk", "Off.eFG.Pct", "Off.eFG.Pct.Rk",
"Off.TO.Pct", "Off.TO.Pct.Rk", "Off.OR.Pct", "Off.OR.Pct.Rk",
"Off.FTRate", "Off.FTRate.Rk",
"AdjD", "AdjD.Rk", "Def.eFG.Pct", "Def.eFG.Pct.Rk",
"Def.TO.Pct", "Def.TO.Pct.Rk", "Def.OR.Pct", "Def.OR.Pct.Rk",
"Def.FTRate", "Def.FTRate.Rk", "Year"), as.numeric) %>%
as.data.frame()
### Store Data
if (year == min_year) {
kenpom <- x
} else {
kenpom <- dplyr::bind_rows(kenpom, x)
}
}
kenpom <- kenpom %>%
dplyr::arrange(-.data$Year,.data$AdjO.Rk) %>%
janitor::clean_names()
},
error = function(e) {
message(glue::glue("{Sys.time()}: Invalid arguments or no four factors data for {min_year} - {max_year} available!"))
},
warning = function(w) {
},
finally = {
}
)
return(kenpom)
}
#' **Get Team Points Distribution**
#'
#' @param min_year First year of data to pull
#' @param max_year Last year of data to pull
#' @return Returns a tibble of team points distributions
#'
#' |col_name |types |
#' |:---------------|:---------|
#' |team |character |
#' |conf |character |
#' |off_ft_pct |numeric |
#' |off_ft_pct_rk |numeric |
#' |off_fg_2_pct |numeric |
#' |off_fg_2_pct_rk |numeric |
#' |off_fg_3_pct |numeric |
#' |off_fg_3_pct_rk |numeric |
#' |def_ft_pct |numeric |
#' |def_ft_pct_rk |numeric |
#' |def_fg_2_pct |numeric |
#' |def_fg_2_pct_rk |numeric |
#' |def_fg_3_pct |numeric |
#' |def_fg_3_pct_rk |numeric |
#' |ncaa_seed |numeric |
#' |year |numeric |
#'
#' @importFrom cli cli_abort
#' @importFrom dplyr mutate filter bind_rows
#' @importFrom stringr str_remove str_replace str_trim
#' @import rvest
#' @export
#' @keywords Points
#' @family KenPom Ratings Functions
#'
#' @examples
#' \donttest{
#' try(kp_pointdist(min_year = 2020, max_year = 2021))
#' }
kp_pointdist <- function(min_year, max_year = most_recent_mbb_season()){
tryCatch(
expr = {
if (!has_kp_user_and_pw()) stop("This function requires a KenPom subscription e-mail and password combination,\n set as the system environment variables KP_USER and KP_PW.", "\n See ?kp_user_pw for details.", call. = FALSE)
browser <- login()
if (!(is.numeric(min_year) && nchar(min_year) == 4 && min_year >= 2001)) {
# Check if year is numeric, if not NULL
cli::cli_abort("Enter valid min_ as a number (YYYY), data only goes back to 2001")
}
years <- min_year:max_year
for (year in years) {
### Pull Data
url <- paste0("https://kenpom.com/pointdist.php?",
"y=", year)
page <- rvest::session_jump_to(browser, url)
Sys.sleep(5)
header_cols <- c("Team", "Conf",
"Off.FT.Pct", "Off.FT.Pct.Rk",
"Off.FG_2.Pct", "Off.FG_2.Pct.Rk",
"Off.FG_3.Pct", "Off.FG_3.Pct.Rk",
"Def.FT.Pct", "Def.FT.Pct.Rk",
"Def.FG_2.Pct", "Def.FG_2.Pct.Rk",
"Def.FG_3.Pct", "Def.FG_3.Pct.Rk")
x <- (page %>%
xml2::read_html() %>%
rvest::html_elements(css = '#ratings-table'))[[1]] %>%
rvest::html_table()
x <- x[,1:14]
colnames(x) <- header_cols
suppressWarnings(
x <- x %>%
dplyr::filter(!is.na(as.numeric(.data$Off.FT.Pct)))
)
x <- dplyr::mutate(x,
"NCAA_Seed" = NA_integer_,
"NCAA_Seed" = sapply(.data$Team, function(arg) { as.numeric(gsub("[^0-9]", "", arg)) }),
"Team" = sapply(.data$Team, function(arg) {
stringr::str_trim(stringr::str_replace(stringr::str_remove(arg,'\\d+| \\*| \\*+'),'\\*+','')) }),
"Year" = year) %>%
dplyr::mutate_at(
c("Off.FT.Pct", "Off.FT.Pct.Rk",
"Off.FG_2.Pct", "Off.FG_2.Pct.Rk",
"Off.FG_3.Pct", "Off.FG_3.Pct.Rk",
"Def.FT.Pct", "Def.FT.Pct.Rk",
"Def.FG_2.Pct", "Def.FG_2.Pct.Rk",
"Def.FG_3.Pct", "Def.FG_3.Pct.Rk","Year"), as.numeric) %>%
as.data.frame()
### Store Data
if (year == min_year) {
kenpom <- x
}else {
kenpom <- dplyr::bind_rows(kenpom, x)
}
}
kenpom <- kenpom %>%
janitor::clean_names()
},
error = function(e) {
message(glue::glue("{Sys.time()}: Invalid arguments or no point distribution data for {min_year} - {max_year} available!"))
},
warning = function(w) {
},
finally = {
}
)
return(kenpom)
}
#' **Get Heights, Experience, Bench and Continuity Data**
#'
#' @param min_year First year of data to pull
#' @param max_year Last year of data to pull
#' @return Returns a tibble of heights
#'
#' |col_name |types |
#' |:-------------|:---------|
#' |team |character |
#' |conf |character |
#' |avg_hgt |numeric |
#' |avg_hgt_rk |numeric |
#' |eff_hgt |numeric |
#' |eff_hgt_rk |numeric |
#' |c_hgt |numeric |
#' |c_hgt_rk |numeric |
#' |pf_hgt |numeric |
#' |pf_hgt_rk |numeric |
#' |sf_hgt |numeric |
#' |sf_hgt_rk |numeric |
#' |sg_hgt |numeric |
#' |sg_hgt_rk |numeric |
#' |pg_hgt |numeric |
#' |pg_hgt_rk |numeric |
#' |experience |numeric |
#' |experience_rk |numeric |
#' |bench |numeric |
#' |bench_rk |numeric |
#' |continuity |numeric |
#' |continuity_rk |numeric |
#' |ncaa_seed |numeric |
#' |year |integer |
#'
#' @importFrom cli cli_abort
#' @importFrom dplyr filter mutate bind_rows
#' @importFrom stringr str_remove str_replace str_trim
#' @import rvest
#' @export
#' @keywords Roster
#' @family KenPom Ratings Functions
#'
#' @examples
#' \donttest{
#' try(kp_height(min_year = 2020, max_year = 2021))
#' }
kp_height <- function(min_year,max_year = most_recent_mbb_season()){
tryCatch(
expr = {
if (!has_kp_user_and_pw()) stop("This function requires a KenPom subscription e-mail and password combination,\n set as the system environment variables KP_USER and KP_PW.", "\n See ?kp_user_pw for details.", call. = FALSE)
browser <- login()
if (!(is.numeric(min_year) && nchar(min_year) == 4 && min_year >= 2007)) {
# Check if year is numeric, if not NULL
cli::cli_abort("Enter valid min_year as a number (YYYY), data only goes back to 2007")
}
years <- min_year:max_year
for (year in years) {
### Pull Data
url <- paste0("https://kenpom.com/height.php?",
"y=", year)
page <- rvest::session_jump_to(browser, url)
Sys.sleep(5)
if (year < 2008) {
header_cols <- c("Team", "Conf",
"Avg.Hgt", "Avg.Hgt.Rk",
"Eff.Hgt", "Eff.Hgt.Rk",
"C.Hgt", "C.Hgt.Rk",
"PF.Hgt", "PF.Hgt.Rk",
"SF.Hgt", "SF.Hgt.Rk",
"SG.Hgt", "SG.Hgt.Rk",
"PG.Hgt", "PG.Hgt.Rk",
"Experience", "Experience.Rk",
"Bench", "Bench.Rk")
x <- (page %>%
xml2::read_html() %>%
rvest::html_elements(css = '#ratings-table'))[[1]] %>%
rvest::html_table()
x <- x[,1:20]
colnames(x) <- header_cols
x <- x %>%
dplyr::mutate(
Continuity = 0.0,
Continuity.Rk = 0)
x <- dplyr::mutate(x,
"NCAA_Seed" = NA_integer_,
"NCAA_Seed" = sapply(.data$Team, function(arg) { as.numeric(gsub("[^0-9]", "", arg)) }),
"Team" = sapply(.data$Team, function(arg) {
stringr::str_trim(stringr::str_replace(stringr::str_remove(arg,'\\d+| \\*| \\*+'),'\\*+','')) }),
"Year" = year) %>%
dplyr::mutate_at(
c("Avg.Hgt", "Avg.Hgt.Rk",
"Eff.Hgt", "Eff.Hgt.Rk",
"C.Hgt", "C.Hgt.Rk",
"PF.Hgt", "PF.Hgt.Rk",
"SF.Hgt", "SF.Hgt.Rk",
"SG.Hgt", "SG.Hgt.Rk",
"PG.Hgt", "PG.Hgt.Rk",
"Experience", "Experience.Rk",
"Bench", "Bench.Rk","Continuity","Continuity.Rk"), as.numeric) %>%
as.data.frame()
}else{
header_cols <- c("Team", "Conf",
"Avg.Hgt", "Avg.Hgt.Rk",
"Eff.Hgt", "Eff.Hgt.Rk",
"C.Hgt", "C.Hgt.Rk",
"PF.Hgt", "PF.Hgt.Rk",
"SF.Hgt", "SF.Hgt.Rk",
"SG.Hgt", "SG.Hgt.Rk",
"PG.Hgt", "PG.Hgt.Rk",
"Experience", "Experience.Rk",
"Bench", "Bench.Rk",
"Continuity", "Continuity.Rk")
x <- (page %>%
xml2::read_html() %>%
rvest::html_elements(css = '#ratings-table'))[[1]] %>%
rvest::html_table()
x <- x[,1:22]
colnames(x) <- header_cols
}
suppressWarnings(
x <- x %>%
dplyr::filter(!is.na(as.numeric(.data$Avg.Hgt)))
)
x <- dplyr::mutate(x,
"NCAA_Seed" = NA_integer_,
"NCAA_Seed" = sapply(.data$Team, function(arg) { as.numeric(gsub("[^0-9]", "", arg)) }),
"Team" = sapply(.data$Team, function(arg) {
stringr::str_trim(stringr::str_replace(stringr::str_remove(arg,'\\d+| \\*| \\*+'),'\\*+','')) }),
"Year" = year) %>%
dplyr::mutate_at(
c("Avg.Hgt", "Avg.Hgt.Rk",
"Eff.Hgt", "Eff.Hgt.Rk",
"C.Hgt", "C.Hgt.Rk",
"PF.Hgt", "PF.Hgt.Rk",
"SF.Hgt", "SF.Hgt.Rk",
"SG.Hgt", "SG.Hgt.Rk",
"PG.Hgt", "PG.Hgt.Rk",
"Experience", "Experience.Rk",
"Bench", "Bench.Rk","Continuity","Continuity.Rk"), as.numeric) %>%
as.data.frame()
### Store Data
if (year == min_year) {
kenpom <- x
} else {
kenpom <- dplyr::bind_rows(kenpom, x)
}
}
kenpom <- kenpom %>%
janitor::clean_names()
},
error = function(e) {
message(glue::glue("{Sys.time()}: Invalid arguments or no height data for {min_year} - {max_year} available!"))
},
warning = function(w) {
},
finally = {
}
)
return(kenpom)
}
#' **Get 2-Foul Participation Stats**
#'
#' @param min_year First year of data to pull
#' @param max_year Last year of data to pull
#' @return Returns a tibble of foul participation stats
#'
#' |col_name |types |
#' |:----------------------------|:---------|
#' |team |character |
#' |conf |character |
#' |two_foul_particpation_pct |numeric |
#' |two_foul_particpation_pct_rk |numeric |
#' |adj2fp |numeric |
#' |adj2fp_rk |numeric |
#' |two_foul_total_time |character |
#' |two_foul_total_time_rk |character |
#' |two_foul_time_on |character |
#' |two_foul_time_on_rk |character |
#' |bench_pct |numeric |
#' |bench_pct_rk |numeric |
#' |ncaa_seed |numeric |
#' |year |integer |
#'
#' @importFrom cli cli_abort
#' @importFrom dplyr mutate filter bind_rows
#' @importFrom stringr str_remove str_replace str_trim
#' @import rvest
#' @export
#' @keywords Foul Trouble
#' @family KenPom Ratings Functions
#'
#' @examples
#' \donttest{
#' try(kp_foul_trouble(min_year = 2020, max_year = most_recent_mbb_season()))
#' }
kp_foul_trouble <- function(min_year, max_year = most_recent_mbb_season()){
tryCatch(
expr = {
if (!has_kp_user_and_pw()) stop("This function requires a KenPom subscription e-mail and password combination,\n set as the system environment variables KP_USER and KP_PW.", "\n See ?kp_user_pw for details.", call. = FALSE)
browser <- login()
if (!(is.numeric(min_year) && nchar(min_year) == 4 && min_year >= 2010)) {
# Check if year is numeric, if not NULL
cli::cli_abort("Enter valid min_year as a number (YYYY), data only goes back to 2010")
}
years <- min_year:max_year
for (year in years) {
### Pull Data
url <- paste0("https://kenpom.com/foul_trouble.php?",
"y=", year)
page <- rvest::session_jump_to(browser, url)
Sys.sleep(5)
header_cols <- c("Team", "Conf", "TwoFoulParticpation.Pct",
"TwoFoulParticpation.Pct.Rk", "Adj2FP", "Adj2FP.Rk",
"TwoFoulTotalTime","TwoFoulTotalTime.Rk",
"TwoFoulTimeOn","TwoFoulTimeOn.Rk",
"Bench.Pct","Bench.Pct.Rk")
x <- (page %>%
xml2::read_html() %>%
rvest::html_elements(css = '#ratings-table'))[[1]] %>%
rvest::html_table()
x <- x[,1:12]
colnames(x) <- header_cols
suppressWarnings(
x <- x %>%
dplyr::filter(!is.na(as.numeric(.data$TwoFoulParticpation.Pct)))
)
x <- dplyr::mutate(x,
"NCAA_Seed" = NA_integer_,
"NCAA_Seed" = sapply(.data$Team, function(arg) { as.numeric(gsub("[^0-9]", "", arg)) }),
"Team" = sapply(.data$Team, function(arg) {
stringr::str_trim(stringr::str_replace(stringr::str_remove(arg,'\\d+| \\*| \\*+'),'\\*+','')) }),
"Year" = year) %>%
dplyr::mutate_at(
c("TwoFoulParticpation.Pct",
"TwoFoulParticpation.Pct.Rk", "Adj2FP", "Adj2FP.Rk",
"Bench.Pct","Bench.Pct.Rk"), as.numeric) %>%
as.data.frame()
### Store Data
if (year == min_year) {
kenpom <- x
}else {
kenpom <- dplyr::bind_rows(kenpom, x)
}
}
kenpom <- kenpom %>%
janitor::clean_names()
},
error = function(e) {
message(glue::glue("{Sys.time()}: Invalid arguments or no foul trouble data for {min_year} - {max_year} available!"))
},
warning = function(w) {
},
finally = {
}
)
return(kenpom)
}
#' **Get Team Stats**
#'
#' @param min_year First year of data to pull
#' @param max_year Last year of data to pull
#' @return Returns a tibble of team stats
#'
#' |col_name |types |
#' |:------------------|:---------|
#' |team |character |
#' |conf |character |
#' |off_fg_3_pct |numeric |
#' |off_fg_3_pct_rk |numeric |
#' |off_fg_2_pct |numeric |
#' |off_fg_2_pct_rk |numeric |
#' |off_ft_pct |numeric |
#' |off_ft_pct_rk |numeric |
#' |off_blk_pct |numeric |
#' |off_blk_pct_rk |numeric |
#' |off_stl_pct |numeric |
#' |off_stl_pct_rk |numeric |
#' |off_non_stl_pct |numeric |
#' |off_non_stl_pct_rk |numeric |
#' |off_a_pct |numeric |
#' |off_a_pct_rk |numeric |
#' |off_fg_3a_pct |numeric |
#' |off_fg_3a_pct_rk |numeric |
#' |adj_o |numeric |
#' |adj_o_rk |numeric |
#' |ncaa_seed |numeric |
#' |year |numeric |
#' |def_fg_3_pct |numeric |
#' |def_fg_3_pct_rk |numeric |
#' |def_fg_2_pct |numeric |
#' |def_fg_2_pct_rk |numeric |
#' |def_ft_pct |numeric |
#' |def_ft_pct_rk |numeric |
#' |def_blk_pct |numeric |
#' |def_blk_pct_rk |numeric |
#' |def_stl_pct |numeric |
#' |def_stl_pct_rk |numeric |
#' |def_non_stl_pct |numeric |
#' |def_non_stl_pct_rk |numeric |
#' |def_a_pct |numeric |
#' |def_a_pct_rk |numeric |
#' |def_fg_3a_pct |numeric |
#' |def_fg_3a_pct_rk |numeric |
#' |adj_d |numeric |
#' |adj_d_rk |numeric |
#'
#' @importFrom cli cli_abort
#' @importFrom dplyr filter mutate bind_rows
#' @importFrom stringr str_remove str_replace str_trim
#' @import rvest
#' @export
#' @keywords Team
#' @family KenPom Ratings Functions
#'
#' @examples
#' \donttest{
#' try(kp_teamstats(min_year = 2019, max_year =2021))
#' }
kp_teamstats <- function(min_year, max_year=most_recent_mbb_season()){
tryCatch(
expr = {
if (!has_kp_user_and_pw()) stop("This function requires a KenPom subscription e-mail and password combination,\n set as the system environment variables KP_USER and KP_PW.", "\n See ?kp_user_pw for details.", call. = FALSE)
browser <- login()
if (!(is.numeric(min_year) && nchar(min_year) == 4 && min_year >= 2001)) {
# Check if year is numeric, if not NULL
cli::cli_abort("Enter valid min_ as a number (YYYY), data only goes back to 2001")
}
years <- min_year:max_year
for (year in years) {
### Pull Data
url <- paste0("https://kenpom.com/teamstats.php?",
"y=", year, "&od=o")
page <- rvest::session_jump_to(browser, url)
Sys.sleep(5)
header_cols <- c("Team", "Conf",
"Off.FG_3.Pct", "Off.FG_3.Pct.Rk",
"Off.FG_2.Pct", "Off.FG_2.Pct.Rk",
"Off.FT.Pct", "Off.FT.Pct.Rk",
"Off.Blk.Pct", "Off.Blk.Pct.Rk",
"Off.Stl.Pct", "Off.Stl.Pct.Rk",
"Off.NonStl.Pct", "Off.NonStl.Pct.Rk",
"Off.A.Pct", "Off.A.Pct.Rk",
"Off.FG_3A.Pct", "Off.FG_3A.Pct.Rk",
"AdjO","AdjO.Rk")
x <- (page %>%
xml2::read_html() %>%
rvest::html_elements(css = '#ratings-table'))[[1]] %>%
rvest::html_table()
x <- x[,1:20]
colnames(x) <- header_cols
suppressWarnings(
x <- x %>%
dplyr::filter(!is.na(as.numeric(.data$Off.FG_3.Pct)))
)
x <- dplyr::mutate(x,
"NCAA_Seed" = NA_integer_,
"NCAA_Seed" = sapply(.data$Team, function(arg) { as.numeric(gsub("[^0-9]", "", arg)) }),
"Team" = sapply(.data$Team, function(arg) {
stringr::str_trim(stringr::str_replace(stringr::str_remove(arg,'\\d+| \\*| \\*+'),'\\*+','')) }),
"Year" = year) %>%
dplyr::mutate_at(
c("Off.FG_3.Pct", "Off.FG_3.Pct.Rk",
"Off.FG_2.Pct", "Off.FG_2.Pct.Rk",
"Off.FT.Pct", "Off.FT.Pct.Rk",
"Off.Blk.Pct", 'Off.Blk.Pct.Rk',
"Off.Stl.Pct", "Off.Stl.Pct.Rk",
"Off.NonStl.Pct", "Off.NonStl.Pct.Rk",
"Off.A.Pct", "Off.A.Pct.Rk",
"Off.FG_3A.Pct", "Off.FG_3A.Pct.Rk", "AdjO", "AdjO.Rk", "Year"), as.numeric) %>%
as.data.frame()
### Pull Data
url <- paste0("https://kenpom.com/teamstats.php?",
"y=", year, "&od=d")
page <- rvest::session_jump_to(browser, url)
d_header_cols <- c("Team", "Conf",
"Def.FG_3.Pct", "Def.FG_3.Pct.Rk",
"Def.FG_2.Pct", "Def.FG_2.Pct.Rk",
"Def.FT.Pct", "Def.FT.Pct.Rk",
"Def.Blk.Pct", 'Def.Blk.Pct.Rk',
"Def.Stl.Pct", "Def.Stl.Pct.Rk",
"Def.NonStl.Pct", "Def.NonStl.Pct.Rk",
"Def.A.Pct", "Def.A.Pct.Rk",
"Def.FG_3A.Pct", "Def.FG_3A.Pct.Rk",
"AdjD","AdjD.Rk")
y <- (page %>%
xml2::read_html() %>%
rvest::html_elements(css = '#ratings-table'))[[1]] %>%
rvest::html_table()
y <- y[,1:20]
colnames(y) <- d_header_cols
suppressWarnings(
y <- y %>%
dplyr::filter(!is.na(as.numeric(.data$Def.FG_3.Pct)))
)
y <- y %>%
dplyr::mutate_at(
c("Def.FG_3.Pct", "Def.FG_3.Pct.Rk",
"Def.FG_2.Pct", "Def.FG_2.Pct.Rk",
"Def.FT.Pct", "Def.FT.Pct.Rk",
"Def.Blk.Pct", 'Def.Blk.Pct.Rk',
"Def.Stl.Pct", "Def.Stl.Pct.Rk",
"Def.NonStl.Pct", "Def.NonStl.Pct.Rk",
"Def.A.Pct", "Def.A.Pct.Rk",
"Def.FG_3A.Pct", "Def.FG_3A.Pct.Rk",
"AdjD", "AdjD.Rk"), as.numeric) %>%
dplyr::mutate(
"Team" = sapply(.data$Team, function(arg) {
stringr::str_trim(stringr::str_replace(stringr::str_remove(arg,'\\d+| \\*| \\*+'),'\\*+','')) }),
"Year" = year) %>%
as.data.frame()
### Store Data
if (year == min_year) {
kenpom <- x %>%
dplyr::left_join(y, by = c("Team","Conf", "Year"))
} else {
z <- x %>%
dplyr::left_join(y, by = c("Team","Conf", "Year"))
kenpom <- dplyr::bind_rows(kenpom, z)
}
}
kenpom <- kenpom %>%
janitor::clean_names()
},
error = function(e) {
message(glue::glue("{Sys.time()}: Invalid arguments or no team stats data for {min_year} - {max_year} available!"))
},
warning = function(w) {
},
finally = {
}
)
return(kenpom)
}
#' **Get Player Stats Leaders by Metric**
#'
#' @param metric Used to get leaders for different metrics. Available values are: \cr
#' 'ORtg', 'Min', 'eFG', 'Poss', Shots', 'OR', 'DR', 'TO', 'ARate', 'Blk', \cr
#' 'FTRate', 'Stl', 'TS', 'FC40', 'FD40', '2P', '3P', 'FT'. \cr
#' Default is 'eFG'. 'ORtg' returns a list of four dataframes, as there are four tables: \cr
#' players that used more than 28 percent of possessions, more than 24 percent of possessions, more than 20 percent of possessions, and with no possession restriction.
#' @param conf Used to limit to players in a specific conference. Allowed values are: 'A10', 'ACC', 'AE', 'AMER', \cr
#' 'ASUN', 'B10', 'B12', 'BE', 'BSKY', 'BSTH', 'BW', 'CAA', 'CUSA', 'HORZ', 'IND', IVY', \cr
#' 'MAAC', 'MAC', 'MEAC', 'MVC', 'MWC', 'NEC', 'OVC', 'P12', 'PAT', 'SB', 'SC', 'SEC', 'SLND', \cr
#' 'SUM', 'SWAC', 'WAC', 'WCC'. \cr
#' If you try to use a conference that doesn't exist for a given season, like 'IND' and '2018', \cr
#' you'll get an empty table, as kenpom.com doesn't serve 404 pages for invalid table queries like that.\cr
#' No filter applied by default.
#' @param conf_only Used to define whether stats should reflect conference games only.\cr
#' Only available if specific conference is defined. Only available for season after 2013, FALSE by default.
#' @param year Year of data to pull (earliest year of data available: 2004)
#' @return Returns a tibble of player stats
#'
#' |col_name |types |
#' |:--------|:---------|
#' |rk |character |
#' |player |character |
#' |team |character |
#' |e_fg |character |
#' |hgt |character |
#' |wgt |character |
#' |yr |character |
#' |year |numeric |
#'
#' @importFrom cli cli_abort
#' @importFrom dplyr filter mutate
#' @importFrom stringr str_remove str_replace str_trim
#' @import rvest
#' @export
#' @keywords Player
#' @family KenPom Ratings Functions
#'
#' @examples
#' \donttest{
#' try(kp_playerstats(metric = 'eFG', conf_only = FALSE, year=2021))
#' }
kp_playerstats <- function(metric = 'eFG', conf = NULL, conf_only = FALSE, year=most_recent_mbb_season()){
tryCatch(
expr = {
if (!has_kp_user_and_pw()) stop("This function requires a KenPom subscription e-mail and password combination,\n set as the system environment variables KP_USER and KP_PW.", "\n See ?kp_user_pw for details.", call. = FALSE)
browser <- login()
m_list <- c('ORtg', 'Min', 'eFG', 'Poss', 'Shots', 'OR', 'DR', 'TO', 'ARate', 'Blk', 'FTRate', 'Stl',
'TS', 'FC40', 'FD40', '2P', '3P', 'FT')
url_ext <- c('ORtg','PctMin', 'eFG', 'PctPoss','PctShots', 'ORPct', 'DRPct','TORate', 'ARate', 'PctBlocks', 'FTRate',
'PctStls', 'TS', 'FCper40', 'FDper40', 'FG2Pct', 'FG3Pct', 'FTPct')
metrics_data <- data.frame(m_list, url_ext)
conf_list <- c('A10', 'ACC', 'AE', 'AMER', 'ASUN', 'B10', 'B12', 'BE', 'BSKY', 'BSTH', 'BW',
'CAA', 'CUSA', 'HORZ', 'IND', 'IVY', 'MAAC', 'MAC', 'MEAC', 'MVC', 'MWC', 'NEC',
'OVC', 'P12', 'PAT', 'SB', 'SC', 'SEC', 'SLND', 'SUM', 'SWAC', 'WAC', 'WCC')
metric_url <- metrics_data$url_ext[m_list == metric]
if (!(is.numeric(year) && nchar(year) == 4 && year >= 2004)) {
# Check if year is numeric, if not NULL
cli::cli_abort("Enter valid year as a number (YYYY), data only goes back to 2004")
}
if (metric == "ORtg") {
### Pull Data
url <- paste0("https://kenpom.com/playerstats.php?",
"y=", year,
"&s=", metric_url,
"&f=", conf,
"&c=", conf_only)
page <- rvest::session_jump_to(browser, url)
Sys.sleep(5)
header_cols <- c("Rk","Player","Team", metric,
"Hgt","Wgt","Yr")
y <- list()
for (i in 1:4) {
groups <- c("At least 28% of possessions used", "At least 24% of possessions used",
"At least 20% of possessions used", "All players")
x <- (page %>%
xml2::read_html() %>%
rvest::html_elements("table"))[[i]] %>%
rvest::html_table()
x <- x[,1:7]
colnames(x) <- header_cols
suppressWarnings(
x <- x %>%
dplyr::filter(!is.na(as.numeric(.data$Wgt)))
)
x <- dplyr::mutate(x,
"Team" = sapply(.data$Team, function(arg) {
stringr::str_trim(stringr::str_replace(stringr::str_remove(arg,'\\d+| \\*| \\*+'),'\\*+','')) }),
"Year" = year,
"Group" = groups[i]) %>%
as.data.frame() %>%
janitor::clean_names()
y <- c(y,list(x))
}
### Store Data
kenpom <- y
} else {
### Pull Data
url <- paste0("https://kenpom.com/playerstats.php?",
"y=", year,
"&s=", metric_url,
"&f=", conf,
"&c=", conf_only)
page <- rvest::session_jump_to(browser, url)
Sys.sleep(5)
header_cols <- c("Rk","Player","Team", metric,
"Hgt","Wgt","Yr")
x <- (page %>%
xml2::read_html() %>%
rvest::html_elements(css = '#ratings-table'))[[1]] %>%
rvest::html_table()
x <- x[,1:7]
colnames(x) <- header_cols
suppressWarnings(
x <- x %>%
dplyr::filter(!is.na(as.numeric(.data$Wgt)))
)
x <- dplyr::mutate(x,
"Team" = sapply(.data$Team, function(arg) {
stringr::str_trim(stringr::str_replace(stringr::str_remove(arg,'\\d+| \\*| \\*+'),'\\*+','')) }),
"Year" = year) %>%
as.data.frame()
### Store Data
kenpom <- x %>%
janitor::clean_names()
}
},
error = function(e) {
message(glue::glue("{Sys.time()}: Invalid arguments or no player stats data for {year} {metric} available!"))
},
warning = function(w) {
},
finally = {
}
)
return(kenpom)
}
#' **Get KPoY Leaders Tables**
#'
#' @param year Year of data to pull (earliest year of data available: 2011)
#' @return Returns a list of tibbles: "kPoYRatings", "GameMVPs"
#'
#' **KPoYRatings**
#'
#'
#' |col_name |types |
#' |:-----------|:---------|
#' |rk |integer |
#' |player |character |
#' |kpoy_rating |numeric |
#' |team |character |
#' |hgt |character |
#' |wgt |numeric |
#' |exp |character |
#' |home_town |character |
#' |year |numeric |
#' |group |character |
#'
#' **GameMVPs**
#'
#'
#' |col_name |types |
#' |:----------|:---------|
#' |rk |numeric |
#' |player |character |
#' |game_mv_ps |numeric |
#' |team |character |
#' |hgt |character |
#' |wgt |numeric |
#' |exp |character |
#' |home_town |character |
#' |year |numeric |
#' |group |character |
#'
#' @importFrom cli cli_abort
#' @importFrom dplyr select filter mutate
#' @importFrom tidyr separate
#' @import stringr
#' @import rvest
#' @export
#' @keywords Leaders
#' @family KenPom Ratings Functions
#'
#' @examples
#' \donttest{
#' try(kp_kpoy(year=2021))
#' }
#'
kp_kpoy <- function(year=most_recent_mbb_season()){
tryCatch(
expr = {
if (!has_kp_user_and_pw()) stop("This function requires a KenPom subscription e-mail and password combination,\n set as the system environment variables KP_USER and KP_PW.", "\n See ?kp_user_pw for details.", call. = FALSE)
browser <- login()
if (!(is.numeric(year) && nchar(year) == 4 && year >= 2011)) {
# Check if year is numeric, if not NULL
cli::cli_abort("Enter valid year as a number (YYYY), data only goes back to 2011")
}
### Pull Data
url <- paste0("https://kenpom.com/kpoy.php?",
"y=", year)
page <- rvest::session_jump_to(browser, url)
Sys.sleep(5)
y <- list()
for (i in 1:2) {
groups <- c("kPoY Rating", "Game MVP Leaders")
x <- (page %>%
xml2::read_html() %>%
rvest::html_elements(css = '#kpoy-table'))[[i]] %>%
rvest::html_table() %>%
as.data.frame()
if (i == 1) {
header_cols <- c("Rk","Player","kpoyRating")
} else {
header_cols <- c("Rk","Player","GameMVPs")
}
colnames(x) <- header_cols
x <- x %>%
tidyr::separate("Player",
into = c("Player","col"),
sep = ",",
extra = "merge")
x <- x %>%
dplyr::mutate(
Team = stringr::str_extract(
stringr::str_extract(.data$col,'[^\\d-\\d{1,}]+'),".+"),
Hgt = stringr::str_extract(
stringi::stri_extract_first_regex(.data$col, '[^,]+'),"\\d{1,}-\\d{1,}+"),
Wgt =
stringr::str_extract(.data$col,'\\d{3}'),
Exp = stringr::str_extract(.data$col, "Fr|So|Jr|Sr|r-Fr|r-So|r-Jr|r-Sr"),
HomeTown = stringr::str_extract(
stringi::stri_extract_last_regex(.data$col, '[^\u00b7]+'),".*")
)
suppressWarnings(
if (i == 1) {
x <- x %>%
dplyr::mutate_at(c("kpoyRating","Wgt"), as.numeric)
} else {
x <- x %>%
dplyr::mutate_at(c("GameMVPs","Wgt"), as.numeric)
}
)
x <- x %>%
dplyr::mutate(
"Year" = year,
"Group" = groups[i]) %>%
as.data.frame()
x <- x %>%
dplyr::select(-"col")
if (i == 2) {
replace_na_with_last <- function(x, p = is.na, d = 0){c(d, x)[cummax(seq_along(x)*(!p(x))) + 1]}
x$Rk <- replace_na_with_last(x$Rk)
}
x <- x %>%
janitor::clean_names()
y <- c(y,list(x))
}
### Store Data
kenpom <- y
names(kenpom) <- c("KPoYRatings", "GameMVPs")
},
error = function(e) {
message(glue::glue("{Sys.time()}: Invalid arguments or no KenPom player of the year data for {year} available!"))
},
warning = function(w) {
},
finally = {
}
)
return(kenpom)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.