R/mlb_model.R

Defines functions get_posP_val get_posC1B_val get_pos2B_val get_pos3B_val get_posSS_val get_posOF_val get_posUtil_val get_pts get_sal get_injury get_team get_opp_team get_players_stacks

load(file = "./data/example_mlb.Rda")

library(tidyverse)

# set total salary
tot_sal <- 35000

dat <- example_mlb

dat <- dat %>% rename(first = `First Name`, last = `Last Name`, injury_flag = `Injury Indicator`, injury_desc = `Injury Details`, probable = `Probable Pitcher`, order = `Batting Order`)
dat <- dat %>% mutate(posP = as.integer(Position == "P"),
               posC1B = as.integer(Position == "C" | Position == "1B"),
               pos2B = as.integer(Position == "2B"),
               pos3B = as.integer(Position == "3B"),
               posSS = as.integer(Position == "SS"),
               posOF = as.integer(Position == "OF"),
               posUtil = as.integer(!Position == "P"))


get_posP_val <- function(rownum,poscol){
  dat %>% filter(row_number() == rownum) %>% pull(posP)
}
get_posC1B_val <- function(rownum,poscol){
  dat %>% filter(row_number() == rownum) %>% pull(posC1B)
}
get_pos2B_val <- function(rownum,poscol){
  dat %>% filter(row_number() == rownum) %>% pull(pos2B)
}
get_pos3B_val <- function(rownum,poscol){
  dat %>% filter(row_number() == rownum) %>% pull(pos3B)
}
get_posSS_val <- function(rownum,poscol){
  dat %>% filter(row_number() == rownum) %>% pull(posSS)
}
get_posOF_val <- function(rownum,poscol){
  dat %>% filter(row_number() == rownum) %>% pull(posOF)
}
get_posUtil_val <- function(rownum,poscol){
  dat %>% filter(row_number() == rownum) %>% pull(posUtil)
}


# need to update this to get expected points from projection website
get_pts <- function(rownum){
  dat %>% filter(row_number() == rownum) %>% pull(FPPG)
}

get_sal <- function(rownum){
  dat %>% filter(row_number() == rownum) %>% pull(Salary)
}

get_injury <- function(rownum){
  dat %>% filter(row_number() == rownum) %>% mutate(inj = as.integer(!is.na(injury_flag))) %>% pull(inj)
}

#get number of players
n = nrow(dat)


# start model
library(ompr)
library(ompr.roi)
library(ROI.plugin.glpk)


model <- MIPModel() %>%
  add_variable(x[i], i = 1:n, type = "binary") %>%
  # set objective (maximize points)
  set_objective(sum_expr(colwise(get_pts(i))* x[i], i = 1:n)) %>%
  add_constraint(sum_expr(colwise(get_sal(i))* x[i], i = 1:n) <= tot_sal) %>%
  add_constraint(sum_expr(colwise(get_posP_val(i))*x[i], i = 1:n) == 1) %>%
  add_constraint(sum_expr(colwise(get_posC1B_val(i))*x[i], i = 1:n) <= 2) %>%
  add_constraint(sum_expr(colwise(get_pos2B_val(i))*x[i], i = 1:n) <= 2) %>%
  add_constraint(sum_expr(colwise(get_pos3B_val(i))*x[i], i = 1:n) <= 2) %>%
  add_constraint(sum_expr(colwise(get_posSS_val(i))*x[i], i = 1:n) <= 2) %>%
  add_constraint(sum_expr(colwise(get_posOF_val(i))*x[i], i = 1:n) <= 4) %>%
  add_constraint(sum_expr(colwise(get_posC1B_val(i))*x[i], i = 1:n) >= 1) %>%
  add_constraint(sum_expr(colwise(get_pos2B_val(i))*x[i], i = 1:n) >= 1) %>%
  add_constraint(sum_expr(colwise(get_pos3B_val(i))*x[i], i = 1:n) >= 1) %>%
  add_constraint(sum_expr(colwise(get_posSS_val(i))*x[i], i = 1:n) >= 1) %>%
  add_constraint(sum_expr(colwise(get_posOF_val(i))*x[i], i = 1:n) >= 3) %>%
  add_constraint(sum_expr(colwise(get_posUtil_val(i))*x[i], i = 1:n) == 8) %>%
  #add injury constraint
  add_constraint(sum_expr(colwise(get_injury(i))*x[i], i = 1:n) == 0)


#we dont want pitchers and hitters to face each other
# from this codebase:
# https://github.com/dfs-with-r/coach/tree/master/R
penalty <- 33
total_teams <- length(unique(dat$Team))
get_team <- function(team,rownum){
  dat %>% filter(row_number() == rownum) %>% mutate(t = as.integer(Team == team)) %>% pull(t)
}
get_opp_team <- function(team,rownum){
  dat %>% filter(row_number() == rownum) %>% mutate(t = as.integer(Opponent == team)) %>% pull(t)
}


for (j in unique(dat$Team)) {
  model <- add_constraint(
    model,
    sum_expr(colwise(get_opp_team(j, i)*get_posP_val(i)*penalty +  get_team(j, i)*get_posUtil_val(i)) * x[i], i = 1:n) <= penalty)
}

# we want to stack
# from this code/paper:
# https://github.com/zlisto/Daily-Fantasy-Baseball-Contests-in-DraftKings/blob/master/baseball_formulations.jl
#STACK: at least stack_size hitters from at least 1 team, consecutive hitting order
#define a variable for each stack on each team.  This variable =1 if the stack on the team is used
#@defVar(m, used_stack_batters[i=1:num_teams,j=1:num_stacks], Bin)
num_stacks <- 9 #9 different variations of the stack that can occur for each team
model <- model %>% add_variable(y[j,k], j = 1:total_teams, k = 1:num_stacks, type = "binary")

#constraint for each stack, used or not
#@addConstraint(m, constraint_stack[i=1:num_teams,j=1:num_stacks], stack_size*used_stack_batters[i,j] <=
#                 sum{players_teams[t, i]*players_stacks[t, j]*(1-P[t])*players_lineup[t], t=1:num_players})
stack_size <- 4
#function that adds stack columns to our data table
# batting order is 1-9
# stack size determines length of stack e.g. if stack size is 4 batting order included is like 1,2,3,4 or 8,9,1,2
# thus no matter the stack size there will be 9 stacks for baseball for each team
dat <- dat %>% mutate(order = case_when(order == 0 ~ NA_real_,
                                        order == 9 ~ 0,
                                        is.na(order) ~ NA_real_,
                                        TRUE ~ as.double(order)),
                      stack1 = as.integer(order %in% 1:stack_size),
                      stack2 = as.integer(order %in% ((1:stack_size +1) %% 9)),
                      stack3 = as.integer(order %in% ((1:stack_size +2) %% 9)),
                      stack4 = as.integer(order %in% ((1:stack_size +3) %% 9)),
                      stack5 = as.integer(order %in% ((1:stack_size +4) %% 9)),
                      stack6 = as.integer(order %in% ((1:stack_size +5) %% 9)),
                      stack7 = as.integer(order %in% ((1:stack_size +6) %% 9)),
                      stack8 = as.integer(order %in% ((1:stack_size +7) %% 9)),
                      stack9 = as.integer(order %in% ((1:stack_size +8) %% 9)),
                      )

# function to get if a player is in a particular stack
get_players_stacks <- function(stacknum,rownum){
  dat %>% filter(row_number() == rownum) %>% pull(paste0("stack",stacknum))
}


for (te in unique(dat$Team)){
  for (st in 1:num_stacks){
    model <- model %>% add_constraint(penalty*(1-y[j,k]) + sum_expr(colwise(get_team(te, t)*get_players_stacks(st,t)*(1-get_posP_val(t)))*x[t], t = 1:n) - stack_size >= 0)
  }
}


#make sure at least one stack is used
#@addConstraint(m, sum{used_stack_batters[i,j], i=1:num_teams,j=1:num_stacks} >= 1)
model <- model %>% add_constraint(sum_expr(y[j,k], j=1:total_teams,k=1:num_stacks) >=1)

result <- solve_model(model, with_ROI(solver = "glpk", verbose = TRUE))
res <- dat %>% filter(row_number() %in% (result %>% get_solution(x[i]) %>% filter(value == 1) %>% .$i))
jasbner/dfs documentation built on Dec. 3, 2019, 8:10 p.m.