knitr::opts_chunk$set(echo = T, message = F, warning = F, eval = F)
# libraries
library(tidyverse)
library(httr2)

Pinnacle API

Resources:

URLs

pinn_base_url <- "https://guest.api.arcadia.pinnacle.com/0.1/"

pinnacle_ids <- list(
  sports = list(
    "baseball"   = 3L,
    "basketball" = 4L,
    "handball"   = 18L,
    "hockey"     = 19L,
    "rugby"      = 27L,
    "soccer"     = 29L,
    "tennis"     = 33L,
    "formula1"   = 41L
  ),
  leagues = list(
    "nfl" = 889L,
    "mlb" = 246L,
    "nba" = 487L,
    "nhl" = 1456L
  )
)

Endpoints

pinn_endpts <- list(

  # markets: leagues
  straight = "leagues/{league_id}/markets/straight"
  matchup  = "leagues/{league_id}/matchups"          # get info on markets

  # markets: sports
  straight = "sports/{sport_id}/markets/straight"    # ?primaryOnly=false&withSpecials=false
  matchup  = "sports/{sport_id}/matchups"            # ?withSpecials=false

  # sub markets: players
  straight = "matchups/{matchup_id}/markets/related/straight"
  related  = "matchups/{matchup_id}/related"

  # queries
  # oddsFormat <- c("American", "Decimal")
  # since: Retrieve incremental updates (see line 632 <https://github.com/pinnacleapi/openapi-specification/blob/master/linesapi-oas.yaml>)

)
url <- "https://guest.api.arcadia.pinnacle.com/0.1/"

hockey_leagues_url <- "https://guest.api.arcadia.pinnacle.com/0.1/sports/19/leagues?all=false"

api_resp <- pinnacle_request(hockey_leagues_url)

hockey_leagues <- map_dfr(api_resp, ~compact(.x))

GET Request

See `httr2 Wrapping APIs Article for more information.

pinnacle_request <- function(url, flatten = FALSE) { #, ...) {

  # url <- "https://guest.api.arcadia.pinnacle.com/0.1/"

  query_lst <- list(
    # ...,
    all = "false"
  )

  # create request that uses the base API url
  api_req <- httr2::request(url) |>
    httr2::req_headers(
      "Referer" = "https://www.pinnacle.com/",
      "X-Session" = "Vqlf7eLQOFpEeUYxrXlGfd9YMXj7xYGH",
      "X-API-Key" = "CmX2KcMrXuFmNg6YFbmTxE0y9CIrOi0R"
    )  |>
    # add on additional paths (i.e. endpoints)
    httr2::req_url_path_append("sports") |> 
    httr2::req_url_path_append("19") |> 
    httr2::req_url_path_append("leagues") |> 
    # modify query parameters
    # httr2::req_url_query(`all` = FALSE) |> 
    httr2::req_url_query(!!!query_lst) |> 
    httr2::req_perform()

   # result returned as JSON
  api_resp <- api_req |>
    httr2::resp_body_json(simplifyVector = flatten)

  api_resp 

}
pinn_url <- glue::glue("{pinn_base_url}{pinn_league_id}{pinn_type}")

api_resp <- pinnacle_request(pinn_url)
api_resp

Munge

url <- list(
  base_url = "https://guest.api.arcadia.pinnacle.com/0.1",
  endpoints = "leagues"
)

url <- paste(url, sep = "", collapse = "/")

sport_id <- pinnacle_ids$sports$hockey
all_sports <- glue::glue("https://guest.api.arcadia.pinnacle.com/0.1/sports/{sport_id}/leagues?all=false")

api_resp <- pinnacle_request(url = all_sports)

resp_unnested <- api_resp |>
  map_dfr(~as_tibble(compact(.x)))

api_resp[[1]] |> names()
api_resp[[1]][["sport"]] |> names()

pluck(api_resp, 1, "id")
pluck(api_resp, 1, "sport", "id")

tibble(
  id = map_int(api_resp, ~pluck(.x, "id")),
  sport_id = map_int(api_resp, ~pluck(.x, "sport", "id"))
)

x <- api_resp[[1]] |>
  compact() |>
  as_tibble() |>
  mutate(sport = as_tibble(sport))

x$sport |> as_tibble()

league_id <- pinnacle_ids$leagues$mlb
url <- glue::glue("https://guest.api.arcadia.pinnacle.com/0.1/leagues/{league_id}/matchups")
api_resp <- pinnacle_request(url, flatten = T)

api_resp$parent |>
  as_tibble() |>
  unnest(participants)

Action Network API

Resources:

# action network urls
action_network_urls <- list(
  base       = "https://api.actionnetwork.com/web/v1",
  books      = "books",
  scoreboard = "scoreboard"
)

Endpoints

Books

# api request for 'books' endpoint
action_net_req_url <- list(
  action_network_urls$base,
  action_network_urls$books
) |> 
  paste0(collapse = "/") 

action_net_resp <- jsonlite::fromJSON(action_net_req_url, flatten = FALSE)

# api response tidier for 'books' endpoint
action_net_books <- action_net_resp |> 
  pluck("books") |> 
  as_tibble() |> 
  unnest_wider(meta) |> 
  unnest_wider(logos) |> 
  unnest_wider(deeplink) |>
  relocate(book_parent_id, .after = source_name) |> 
  arrange(id, display_name)

action_net_books

# ontario only books
action_net_books_ON <- action_net_books |> 
  unnest(states) |>
  mutate(states = map(states, ~as_tibble_col(.x, "states"))) |> 
  unnest(states) |> 
  filter(states == "ON") |> 
  distinct()

action_net_books_ON
action_network_api <- function(league_abbr = "all", date_id = Sys.Date()) {

  # api request for 'scoreboard' endpoint
  action_net_req_url <- list(
    base_url = action_network_urls$base,
    endpoint = action_network_urls$scoreboard,
    league = league_abbr
  ) |> 
    paste0(collapse = "/") 

  query_lst <- compact(
    list(
      bookIds = NULL,
      date = str_remove_all(date_id, "-")
    )
  )

  query_vec <- paste(names(query_lst), query_lst, sep = "=", collapse = "&")

  action_net_resp <- paste(action_net_req_url, query_vec, sep = "?") |> 
    jsonlite::fromJSON(action_net_req_url, flatten = TRUE)

  action_net_odds <- action_net_resp |> 
    pluck("all_games") |> 
    as_tibble()
    filter(league_name == league_abbr) |>
    select(games) |>
    unnest(games) 

  teams_tidied <- action_net_odds |>
    transmute(
      row_id = row_number(),
      teams = map(
        .x = teams, 
        .f = ~as_tibble(.x) |> 
          add_column(venue = c("away", "home"), .after = 5)
      )
    ) |> 
    unnest(teams) |> 
    pivot_wider(
      id_cols = row_id,
      names_from = venue,
      names_glue = "{venue}_{.value}",
      values_from = -c(row_id, venue)
    ) |> 
    nest(team_info = -c(row_id, contains("abbr"), contains("standings"))) |> 
    nest(standings = c(contains("standings")))

  odds_tidied <- map_dfr(action_net_odds$odds, ~as_tibble(.x), .id = "row_id") |> 
    mutate(row_id = as.integer(row_id))

  teams_tidied |> 
    select(-c(team_info, standings)) |> 
    inner_join(odds_tidied, by = "row_id") |> 
    relocate(type, .after = row_id) |>
    filter(type == "game")

}
league_abbr <- c("nhl", "mlb")

action_network_lines <- action_network_api("nhl")

action_network_lines |> view()
test_url <- "https://api.actionnetwork.com/web/v1/scoreboard/nhl?date=20220429"

json_raw <- jsonlite::fromJSON(test_url) |> 
  pluck("games") |> 
  as_tibble()

json_raw |> view()
json_raw |> names()

json_raw$boxscore |> 
  as_tibble() |>
  unnest_wider(strength) |> 
  rename(away_strength = away, home_strength = home) |> 
  unnest_auto(linescore)

book_ids <- action_net_books |> 
  distinct(id, display_name, abbr, source_name, book_parent_id)

map_dfr(json_raw$odds, ~as_tibble(.x), .id = "row_id") |> 
  mutate(row_id = as.integer(row_id)) |> 
  relocate(type, book_id, .after = row_id) |> 
  filter(type == "game") |> 
  distinct(book_id) |> 
  left_join(book_ids, by = c("book_id" = "id"))

Line Manipulation

Overview

Moneyline

Probability $G_{1} < G_{2}$ or $G_{1} > G_{2}$

Spread

Probability $G_{F} - G_{D} \geq 2$ or $G_{F} > G_{A}$, where:

Total

Probability $G_{H} + G_{A} < X$ or $G_{H} + G_{A} > X$


Example

# source script
# source(here::here("code/pinnacle_scrape.R"))

library(tidyverse)
library(nhldata)

date_id <- Sys.Date()

pinn_df <- tidy_pinnacle() |>
  filter(lubridate::as_date(date_time) == date_id) |> 
  select(-c(date_time, matchup_id)) |>
  mutate(underdog = if_else(home_points_pl < 0, "away", "home"), .before = 1)
lines_df <- pinn_df |> 
  filter(home_team == "CGY")

names(lines_df)

# moneyline (win probabilities)
away_ml <- lines_df[1, "away_imp_prob_ml", drop = T]
home_ml <- lines_df[1, "home_imp_prob_ml", drop = T]

sum(away_ml, home_ml)
away_ml; home_ml

# spread (goal difference probabilities)
underdog <- lines_df[1, "underdog", drop = T]
away_pl <- lines_df[1, "away_imp_prob_pl", drop = T]
home_pl <- lines_df[1, "home_imp_prob_pl", drop = T]

away_pl; home_pl; underdog

# totals (total goal probability)
total <- unique(c(lines_df[1, "under_points", drop = T], lines_df[1, "over_points", drop = T]))
under <- lines_df[1, "under_imp_prob", drop = T]
over <- lines_df[1, "over_imp_prob", drop = T]

total; under; over
# moneyline (win probabilities)
away_ml <- 0.3167146
home_ml <- 0.6832854

sum(away_ml, home_ml)
away_ml; home_ml

# spread (goal difference probabilities)
underdog <- "away"
away_pl <- 0.5538448
home_pl <- 0.4461552

sum(away_pl, home_pl)
away_pl; home_pl; underdog

# totals (total goal probability)
total <- 5.5
under <- 0.5568303
over <- 0.4431697

sum(under, over)
total; under; over

Each team's goals will be Poisson distributed with mean rate $\lambda$.

Totals

Find values for $\lambda_{A}$ and $\lambda_{H}$ that equate to:

goals_from_line <- function(par, away_ml, home_ml, away_pl, home_pl, underdog, total, under, over) {

  lambda_away <- par[1]
  lambda_home <- par[2]

  dog <- underdog
  chalk <- if_else(underdog == "away", "home", "away")


  # initialize data ----
  goals_crossed <- crossing(
    goals_away = seq.int(0L, 12L), 
    goals_home = seq.int(0L, 12L)
  ) 

  joint_pdf_df <- goals_crossed |> 
    mutate(
      goals_diff = .data[[paste0("goals_", chalk)]] - .data[[paste0("goals_", dog)]],
      total_goals = goals_away + goals_home,
      pdf_away = dpois(goals_away, lambda = lambda_away),
      pdf_home = dpois(goals_home, lambda = lambda_home),
      joint_pdf = pdf_away * pdf_home
    )


  # moneyline ----
  moneyline_pdf <- joint_pdf_df |>
    mutate(
      ml_winner = case_when(
        goals_away > goals_home ~ "away",
        goals_away < goals_home ~ "home",
        goals_away == goals_home ~ "tie",
        TRUE ~ NA_character_
      )  
    ) |> 
    group_by(ml_winner) |> 
    summarize(pdf = sum(joint_pdf)) |> 
    filter(ml_winner != "tie") |> 
    mutate(pdf = pdf / sum(pdf))

  # sum(moneyline_pdf$pdf)

  prob_ml_away <- sum(moneyline_pdf[moneyline_pdf$ml_winner == "away", "pdf", drop = T])
  prob_ml_home <- sum(moneyline_pdf[moneyline_pdf$ml_winner == "home", "pdf", drop = T])
  sse_moneyline <- sum((prob_ml_away - away_ml)^2, (prob_ml_home - home_ml)^2)


  # puckline (spread) ----
  puckline_pdf <- joint_pdf_df |> 
     mutate(
      pl_winner = case_when(
        goals_diff >= 2L ~ chalk,
        goals_diff <= 1L ~ dog,
        goals_diff == 0L ~ "tie",
        TRUE ~ NA_character_
      )  
    ) |>
    group_by(pl_winner) |> 
    summarize(pdf = sum(joint_pdf)) |> 
    filter(pl_winner != "tie") |> 
    mutate(pdf = pdf / sum(pdf))

  # sum(puckline_pdf$pdf)

  prob_pl_away <- sum(puckline_pdf[puckline_pdf$pl_winner == "away", "pdf", drop = T])
  prob_pl_home <- sum(puckline_pdf[puckline_pdf$pl_winner == "home", "pdf", drop = T])
  sse_puckline <- sum((prob_pl_away - away_pl)^2, (prob_pl_home - home_pl)^2)


  # totals ----
  totals_pdf <- joint_pdf_df |> 
    group_by(total_goals) |> 
    summarize(pdf = sum(joint_pdf))

  # sum(totals_pdf$pdf)

  prob_under <- sum(totals_pdf[totals_pdf$total_goals < total, "pdf", drop = T])
  prob_over <- sum(totals_pdf[totals_pdf$total_goals > total, "pdf", drop = T])
  sse_totals <- sum((prob_under - under)^2, (prob_over - over)^2)


  # sum of squared residuals ----
  sse <- sse_moneyline + sse_puckline + sse_totals

  sse

}

# use mle to find most likely value of theta (i.e. P(data|theta))
mle_fit <- optim(
  par = c(1, 1),         # initial values
  fn = goals_from_line, 
  away_ml = away_ml,
  home_ml = home_ml,
  away_pl = away_pl,
  home_pl = home_pl,
  underdog = underdog,
  total = total, 
  under = under, 
  over = over,
  method = "L-BFGS-B",
  lower = c(
    lambda_away = 1, 
    lambda_home = 1
  ),
  upper = c(
    lambda_away = 8, 
    lambda_home = 8
  )
)

mle_fit

lambda_away <- mle_fit$par[1]
lambda_home <- mle_fit$par[2]

lambda_away; lambda_home
sum(mle_fit$par)

away_goals_sim <- rpois(1e6, lambda = mle_fit$par[1])
home_goals_sim <- rpois(1e6, lambda = mle_fit$par[2])

# moneyline
away_ml_pr_raw <- mean(away_goals_sim > home_goals_sim) 
home_ml_pr_raw <- mean(away_goals_sim < home_goals_sim)
away_ml_pr <- away_ml_pr_raw / (away_ml_pr_raw + home_ml_pr_raw)
home_ml_pr <- home_ml_pr_raw / (away_ml_pr_raw + home_ml_pr_raw)

away_ml_pr; home_ml_pr
sum(away_ml_pr, home_ml_pr)


# puckline

if (underdog == "away") {

  away_pl_pr_raw <- mean(home_goals_sim - away_goals_sim <= 1)
  home_pl_pr_raw <- mean(home_goals_sim - away_goals_sim >= 2)
  away_pl_pr <- away_pl_pr_raw / (away_pl_pr_raw + home_pl_pr_raw)
  home_pl_pr <- home_pl_pr_raw / (away_pl_pr_raw + home_pl_pr_raw)

} 

if (underdog == "home") {

  away_pl_pr_raw <- mean(away_goals_sim - home_goals_sim <= 1)
  home_pl_pr_raw <- mean(away_goals_sim - home_goals_sim >= 2)
  away_pl_pr <- away_pl_pr_raw / (away_pl_pr_raw + home_pl_pr_raw)
  home_pl_pr <- home_pl_pr_raw / (away_pl_pr_raw + home_pl_pr_raw)

}

away_pl_pr; home_pl_pr
sum(away_pl_pr, home_pl_pr)

# under / over probabilities
ppois(total, lambda = sum(mle_fit$par), lower.tail = TRUE)
ppois(total, lambda = sum(mle_fit$par), lower.tail = FALSE)

tribble(
  ~bet,         ~away,    ~home,    ~away_pred, ~home_pred,
  "moneyline",  away_ml,  home_ml,  away_ml_pr, home_ml_pr, 
  "puckline",   away_pl,  home_pl,  away_pl_pr, home_pl_pr, 
  "totals",     NA_real_, NA_real_, NA_real_,   NA_real_
)
lines_fn <- function(away_ml, home_ml, away_pl, home_pl, underdog, total, under, over) {

  # use mle to find most likely value of theta (i.e. P(data|theta))
  mle_fit <- optim(
    par = c(1, 1),         # initial values
    fn = goals_from_line, 
    away_ml = away_ml,
    home_ml = home_ml,
    away_pl = away_pl,
    home_pl = home_pl,
    underdog = underdog,
    total = total, 
    under = under, 
    over = over,
    method = "L-BFGS-B",
    lower = c(
      lambda_away = 1, 
      lambda_home = 1
    ),
    upper = c(
      lambda_away = 8, 
      lambda_home = 8
    )
  )

  tibble(lambda_away = mle_fit$par[1], lambda_home = mle_fit$par[2])

}
x <- pinn_df |> 
  mutate(ex_goals = pmap(
    list(
      away_imp_prob_ml,
      home_imp_prob_ml,
      away_imp_prob_pl,
      home_imp_prob_pl,
      underdog,
      total = over_points,
      under_imp_prob,
      over_imp_prob
    ),
    ~lines_fn(
      away_ml = ..1, 
      home_ml = ..2, 
      away_pl = ..3, 
      home_pl = ..4,
      underdog = ..5, 
      total = ..6, 
      under = ..7, 
      over = ..8
    )
  ))

x |> 
  select(away_team, home_team, underdog, ex_goals) |> 
  unnest_wider(ex_goals)

tribble(
  ~distribution, ~rv_type,     ~k_outcomes, ~n_draws,   ~alt_name,
  "categorical", "discrete",   "$k$",       "single",   "generalized Bernoulli",
  "multinomial", "discrete",   "$k$",       "multiple", "", 
  "dirichlet",   "continuous", "",          "",         "multivariate beta"
) |> 
  inner_join(tribble(
    ~distribution, ~example,     
    "categorical", "",
    "multinomial", "Models probability of counts for each side of a $k$-sided die rolled $n$ times.",        
    "dirichlet",   ""
  ), by = "distribution")
library(nhldata)

nhl_id <- nhl_player_search("Matthews") |> 
  pull(player_id)

# file paths
f_path <- c(
  "game_stats" = "C:/mike_nanos/r_files/nhl-data/data/game_stats/game_stats_20212022.rds"
)

# date of interest
date_id <- Sys.Date()

# import and aggregate skater game log stats
stats <- read_rds(f_path[["game_stats"]]) |> 
  filter(!is.na(goals), pos != "G", date < date_id) |> 
  filter(player_id == nhl_id) |> 
  rename(nhl_id = player_id) |> 
  unnest(stats) 

stats2 <- stats |> 
  select(team, venue, opp, goals) |> 
  transmute(
    team, 
    scored = if_else(goals == 0L, 0L, 1L),
    venue,
    opp = if_else(opp == "TBL", opp, "OTHER")
  ) 

scoring_tbl <- stats2 |> 
  group_by(venue, opp) |> 
  summarize(
    scored = sum(scored),
    gp = n(),
    Ps = scored / gp,
    .groups = "drop"
  ) 

$$ \begin{align} P\left(A \mid B \right) &= \frac{P\left(A\right) \cdot P\left(B \mid A \right)}{P\left(B\right)} = \frac{P\left(A \cap B \right)}{P\left(B\right)}\

\end{align} $$

$$ P\left(A \mid B, C \right) = \frac{P\left(B,C \mid A \right) \cdot P\left(A\right)}{P\left(B\right)} $$

$$ P \left( S \mid OPP \right) = \frac{P \left( OPP \mid S \right) \cdot P \left( S \right)}{P \left( OPP \right)} $$

stats2 |> 
  filter(scored == 1L) |> 
  group_by(opp) |> 
  count(opp)

opp_given_S <- scoring_tbl |> 
  count(opp, wt = scored, name = "n_obs")

P_opp_given_S <- opp_given_S[opp_given_S$opp != "OTHER", "n_obs", drop = T] / sum(opp_given_S$n_obs)

P_S <- sum(scoring_tbl$scored) / sum(scoring_tbl$gp)


stats2 |> 
  count(scored, opp)

P_S <- 42 / 76
P_not_S <- (76 - 42) / 76

sum(P_S, P_not_S)

P_OPP_given_S <- 2 / 42
P_not_OPP_given_S <- (42 - 2) / 42

sum(P_OPP_given_S, P_not_OPP_given_S)

P_OPP_given_not_S <- 4 / 34
P_not_OPP_given_not_S <- (34 - 4) / 34

sum(P_OPP_given_not_S, P_not_OPP_given_not_S)

P_S_and_OPP <- P_S * P_OPP_given_S
P_not_S_and_OPP <- P_not_S * P_OPP_given_not_S

P_S_given_OPP <- (P_S_and_OPP) / sum(P_S_and_OPP, P_not_S_and_OPP)

P_S_given_OPP
# libraries ---------------------------------------------------------------

library(tidyverse)
library(latex2exp)
library(ggraph)
library(tidygraph)

mn::plot_theme()

source(here::here("code/create_prob_tree.R"))

prob_graph <- create_prob_tree(event_ids = c("S", "AWAY", "TBL")) 

# as_tibble(prob_graph)

plot_prob_tree(prob_graph)


dfx <- stats |> 
  transmute(
    date, 
    team, 
    venue, 
    opp = if_else(opp == "TBL", opp, "OTH"),
    goals,
    scored = if_else(goals == 0L, "Yes", "No")
  )

prob_list <- vector(mode = "list", length = 15L)

event_A <- dfx |> count(scored)
A <- mn::lookup(event_A, "Yes", scored, n)
not_A <- mn::lookup(event_A, "No", scored, n)

event_B <- dfx |> count(scored, venue)
B_given_A <- mn::lookup(filter(event_B, scored == "Yes"), "away", venue, n)
not_B_given_A <- mn::lookup(filter(event_B, scored == "Yes"), "home", venue, n)

event_C <- dfx |> count(scored, venue, opp)
C_given_A_B <- mn::lookup(filter(event_C, scored == "Yes" & venue == "away"), "TBL", opp, n)


prob_list[[2]] <- A / sum(event_A$n)
prob_list[[3]] <- not_A / sum(event_A$n)

prob_list[[4]] <- B_given_A / pull(filter(event_A, scored == "Yes"), n)
prob_list[[5]] <- not_B_given_A / pull(filter(event_A, scored == "Yes"), n)

prob_list[[8]] <- C_given_A_B / pull(filter(event_B, scored == "Yes" & venue == "away"), n)


prob_list

P_A_B_C <- prob_list[[2]] * prob_list[[4]] * prob_list[[8]]
P_Ac_B_C <- prob_list[[3]] * prob_list[[6]] * prob_list[[12]]

# final result
P_A_given_B_C <- P_A_B_C / (P_A_B_C + P_Ac_B_C)
P_A_given_B_C


miken97/oddsR documentation built on May 12, 2022, 12:37 a.m.