R/tournament.R

#' Play simulated tournament games
#' @param g a tournament
#' @param node index of node to play
#' @param series_length length of each series
#' @param ... currently ignored
#' @importFrom igraph vertex_attr neighbors
#' @importFrom stats runif
#' @importFrom dplyr arrange_
#' @importFrom utils head tail
#' @export
#'
#'
#'

play <- function(g, node = 1, series_length = 1, ...) {
#  cat(paste("checking node", node, "\n"))
  self_theta <- igraph::vertex_attr(g, "theta", index = node)
#  cat(paste("...node value is", self_theta, "\n"))
  if (!is.na(self_theta)) {
    return(g)
  } else {
    children <- igraph::neighbors(g, v = node, mode = "out")
    g1 <- play(g, node = children[1], series_length)
#    cat(paste("completed left search\n"))
    g2 <- play(g1, node = children[2], series_length)
#    cat(paste("completed right search\n"))
    node_attr <- igraph::vertex_attr(g2, index = children) %>%
      as.data.frame() %>%
    # higher theta should always have HFA
      dplyr::arrange_(~seed)
#    l_theta <- igraph::vertex_attr(g2, "theta", index = children[1])
#    r_theta <- igraph::vertex_attr(g2, "theta", index = children[2])
#    l_alpha <- igraph::vertex_attr(g2, "alpha", index = children[1])
#    r_alpha <- igraph::vertex_attr(g2, "alpha", index = children[2])
#    l_seed <- igraph::vertex_attr(g2, "seed", index = children[1])
#    r_seed <- igraph::vertex_attr(g2, "seed", index = children[2])
#    alpha <- igraph::vertex_attr(g2, "alpha_sport", index = children[1])

    series_prob <- series_probability_df(node_attr, series_length)
#    cat(series_prob)
#    cat(paste(l_seed, ":", l_theta, "vs.", r_seed, ":", r_theta, "series_prob = ", series_prob, "\n"))
    if (stats::runif(1) < series_prob) {
      winner <- utils::head(node_attr, 1)
    } else {
      winner <- utils::tail(node_attr, 1)
    }
    g2 <- g2 %>%
      igraph::set_vertex_attr("theta", index = node, value = winner$theta) %>%
      igraph::set_vertex_attr("alpha", index = node, value = winner$alpha) %>%
      igraph::set_vertex_attr("seed", index = node, value = winner$seed) %>%
      igraph::set_vertex_attr("alpha_sport", index = node, value = winner$alpha)
    return(g2)
  }
}

#' Calculate the probability of winning a series
#' @param theta1 value of theta
#' @param theta2 value of theta for other team
#' @param alpha1 HFA for team1
#' @param alpha2 HFA for team2
#' @param alpha HFA for overall sport
#' @param series_length number of games in the series
#' @param ... currently ignored
#' @importFrom stats dnbinom runif
#' @export
#' @examples
#' if (require(dplyr)) {
#'
#'   # should be around 40%
#'   series_probability(spurs_bulls$mean_theta[1], spurs_bulls$mean_theta[2],
#'                      spurs_bulls$mean_alpha[1], spurs_bulls$mean_alpha[2],
#'                      spurs_bulls$alpha_sport[1], series_length = 1)
#'   long_run_prob(spurs_bulls, n = 10)
#'
#'   series_probability(spurs_bulls$mean_theta[1], spurs_bulls$mean_theta[2],
#'                      spurs_bulls$mean_alpha[1], spurs_bulls$mean_alpha[2],
#'                      spurs_bulls$alpha_sport[1], series_length = 7)
#'   long_run_prob(spurs_bulls, n = 10, series_length = 7)
#'
#'   # should be nearly certain
#'   series_probability(spurs_bulls$mean_theta[1], spurs_bulls$mean_theta[2],
#'                      spurs_bulls$mean_alpha[1], spurs_bulls$mean_alpha[2],
#'                      spurs_bulls$alpha_sport[1], series_length = 99)
#'   long_run_prob(spurs_bulls, n = 10, series_length = 99)
#' }

series_probability <- function(theta1, theta2, alpha1, alpha2, alpha, series_length = 1, ...) {
#  cat(paste("series_length =", series_length, "\n"))

  # define a home and away probability of winning for the first team
  p <- c(theta1 - theta2 + alpha + alpha1,
         theta1 - theta2 + alpha - alpha2)

  # ilogit
  bt_prob <- exp(p) / (1 + exp(p))
  # Max number of games played at home and away for higher seeded team
  win <- c(ceiling(series_length / 2), floor(series_length / 2))

  # Joint distribution of games won at home and games won away.
  joint <- outer(stats::dbinom(0:win[1], win[1], bt_prob[1]),
                 stats::dbinom(0:win[2], win[2], bt_prob[2]))

  # Find the cells that we want to add up in joint
  #Probability that the team associated with theta1 wins.
  sum(joint[row(joint) + col(joint) - 2 >= win[1]])

  # cumulative probability of fewer than `win` failures
  # This assume each probability is the same for all games
  # sum(stats::dnbinom(0:(win - 1), win, prob = bt_prob))
}

#' @rdname series_probability
#' @param data a data frame like \code{\link{bigfour_end}}
#' @export
#' @examples
#'
#' # Spurs at home, single game
#' if (require(dplyr)) {
#'   x <- spurs_bulls %>%
#'     rename(theta = mean_theta, alpha = mean_alpha) %>%
#'     arrange(desc(theta))
#'
#'   series_probability_df(x, 1)
#'   series_probability_df(arrange(x, theta), 1)
#'
#'   # Spurs at home, seven game series
#'   series_probability_df(x, 7)
#'
#'   # Spurs at home, 99 game series
#'   series_probability_df(x, 99)
#'
#'   # monotonic increase for Spurs
#'   sapply(seq(1, 15, by = 2), series_probability_df,
#'          data = x)
#'
#'   # monotonic decrease for Bulls
#'   sapply(seq(1, 15, by = 2), series_probability_df,
#'          data = arrange(x, theta))
#' }

series_probability_df <- function(data, series_length = 1, ...) {
  if (nrow(data) != 2) {
    stop("data frame must have exactly two rows")
  }
  series_probability(data$theta[1], data$theta[2],
                     data$alpha[1], data$alpha[2],
                     data$alpha_sport[1], series_length = series_length)
}


#' Seed a tournament
#' @param data a data frame
#' @param ... currently ignored
#' @importFrom igraph make_tree set_vertex_attr degree
#' @export

seed_tournament <- function(data, ...) {
  n <- nrow(data)
  i <- 1:n
  num_games <- n - 1
  t <- igraph::make_tree(num_games + n)
  leaves <- igraph::degree(t, mode = "out") == 0
  t_idx <- tournament_ordering(i)
  t <- t %>%
    set_vertex_attr("theta", index = leaves, value = data$mean_theta[t_idx]) %>%
    set_vertex_attr("alpha", index = leaves, value = data$mean_alpha[t_idx]) %>%
    set_vertex_attr("alpha_sport", index = leaves, value = data$alpha_sport[t_idx]) %>%
    set_vertex_attr("seed", index = leaves, value = t_idx)
  return(t)
}

#' Set tournament ordering
#' @param x a vector of potential seeds
#' @importFrom utils head
#' @export
#' @examples
#' tournament_ordering(1:16)

tournament_ordering <- function(x) {
  if (length(x) <= 2) {
    return(sort(x))
  }
  if (length(x) == 4) {
    return(c(1, 4, 3, 2))
  }
  # match all the pairs
  y <- utils::head(c(rbind(x, rev(x))), length(x))

  odds <- y[seq(from = 1, to = length(y), by = 2)]
  idx <- tournament_ordering(odds)
  new_idx <- rep(idx + idx - 1, each = 2) + rep(c(0, 1), by = 2)
  return(y[new_idx])
}

#' Finalize tournament results
#' @param g a tournament
#' @param ... currently ignored
#' @importFrom igraph vcount as_data_frame
#' @importFrom tibble rownames_to_column
#' @importFrom dplyr mutate group_by summarize n
#' @export

finish_tournament <- function(g, ...) {
  n <- igraph::vcount(g)
  g %>%
    igraph::as_data_frame(what = "vertices") %>%
    tibble::rownames_to_column() %>%
    dplyr::mutate_(finish = ~ceiling(log2(as.numeric(rowname) + 1))) %>%
    dplyr::group_by_(~seed) %>%
    dplyr::summarize_(theta = ~mean(theta), finish = ~min(finish), num_games = ~n())
}

#' Simulate a single tournament
#' @param data a data frame
#' @param series_length number of games in each series
#' @param ... arguments passed to \code{\link{play}}
#' @rdname many_simulations
#' @export
#' @importFrom dplyr mutate %>%
#' @examples
#'
#' if (require(dplyr)) {
#' nba <- bigfour_end %>%
#'   filter(sport == "nba", season == 10) %>%
#'   arrange(desc(mean_theta)) %>%
#'   head(16)
#'
#' one_simulation(nba)
#' one_simulation(nba, series_length = 7)
#' one_simulation(nba, series_length = 99)
#'
#' res <- best16_2016 %>%
#'   group_by(sport) %>%
#'   do(many_simulations(., n = 10, series_length = 7))
#
#' res %>%
#'   group_by(sport) %>%
#'   summarize(cor(seed, finish))
#' }
#'

one_simulation <- function(data, series_length = 1, ...) {
  data %>%
    seed_tournament() %>%
    play(series_length = series_length, ...) %>%
    finish_tournament() %>%
    dplyr::mutate_(sport = ~unique(data$sport))
}

#' Simulate many tournaments
#' @param n number of simulations
#' @importFrom tibble as.tibble
#' @importFrom tidyr unnest
#' @importFrom dplyr mutate
#' @export

many_simulations <- function(data, n = 2, series_length = 1, ...) {
  replicate(n, expr = one_simulation(data, series_length = series_length)) %>%
    t() %>%
    tibble::as.tibble() %>%
    tidyr::unnest() %>%
    dplyr::mutate(rep_id = rep(1:n, each = nrow(data)))
}

#' Calculate long run probabilities
#' @param ... arguments passed to \code{\link{many_simulations}}
#' @importFrom dplyr group_by_ summarize_ mutate_ filter_
#' @export
#' @examples
#'
#' parity <- data.frame(
#'   sport = "nba",
#'   mean_theta = c(0.01, 0),
#'   mean_alpha = c(0, 0),
#'   alpha_sport = 0
#' )
#'
#' long_run_prob(parity, n = 100, series_length = 99)
#'
#' if (require(dplyr) && require(tidyr)) {
#'   best16_2016 %>%
#'     group_by(sport) %>%
#'     do(wpct = long_run_prob(., n = 10, series_length = 7)) %>%
#'     unnest()
#' }
#'
#'

long_run_prob <- function(...) {
  many_simulations(...) %>%
    dplyr::group_by_(~seed) %>%
    dplyr::summarize_(N = ~n(), wins = ~sum(finish == 1)) %>%
    dplyr::mutate_(wpct = ~wins / N) %>%
    dplyr::filter_(~seed == 1)
}
beanumber/tourneyr documentation built on May 27, 2019, 4:49 a.m.