Work/code/2020-04-TexasHoldem-Analysis-pt1.R

#########################################################################################################################
#                                                                                                                       #
#  Title: Basic Texas Hold'em
#  Author: Chris Maerzluft
#  Last Edit: August 11, 2019
#                                                                                                                       #
#########################################################################################################################
# Description ###########################################################################################################
#
# Summarises the basic facts about Texas Hold'em. That is, if games were to be fully played out by every player what are
#   the best hands or cards. I use simulated data to study this. In order to bring in gambling techniques I need other
#   data. Some sources for that include:
#     https://poker.stackexchange.com/questions/881/publicly-available-poker-stats
#     http://poker.cs.ualberta.ca/irc_poker_database.html
#     http://web.archive.org/web/20110205042259/http://www.outflopped.com/questions/286/obfuscated-datamined-hand-histories
#
# Approx size of plots in blog: 861 x 612
#
# Setup R ###############################################################################################################
# Clean environment
rm(list = ls(all = TRUE))
gc()

# Libraries Needed
library(dplyr)
library(tidyr)
library(ggplot2)
library(scales)
library(wesanderson)
library(grid)
library(gridExtra)
library(cowplot)
options(scipen = 999)

#########################################################################################################################
# Load Data #############################################################################################################
n_players_index <- 8 - 1
datasize <- 1
units <- "M"
files <- grep(sprintf("sim%s%sgames", datasize, units), list.files("data-raw/TexasHoldEm"), value = TRUE)
# Make sure 10 is moved to last spot
files <- files[order(nchar(files), files)]
games_list <- list()
for (i1 in files) {
  print(i1)
  load(sprintf("data-raw/TexasHoldEm/%s", i1))
  name <- gsub(sprintf("sim%s%sgames_", datasize, units), "", i1)
  name <- gsub("players.Rdata", "", name)
  games_list[[sprintf("players%s", name)]] <- games
}

#########################################################################################################################
# Plot Options ##########################################################################################################
plt_background <- "#272935"
plt_wordscolor <- "white"
plt_colorscale <- wes_palette("Zissou1", 100, type = "continuous")
plt_colordiscr5 <- wes_palette("Zissou1", 5, type = "discrete")
plt_colordiscr6 <- c("#8c510a", "#d8b365", "#f6e8c3", "#c7eae5", "#5ab4ac", "#01665e")

plt_theme <- theme(
  plot.background = element_rect(fill = plt_background, colour = plt_background),
  plot.title = element_text(hjust = 0.5, size = 24, colour = plt_wordscolor),
  plot.subtitle = element_text(hjust = 0.5, size = 18, colour = plt_wordscolor),
  plot.caption = element_text(hjust = 0, size = 12, colour = plt_wordscolor),

  legend.text = element_text(colour = plt_wordscolor),
  legend.title = element_text(colour = plt_wordscolor),
  legend.background = element_rect(fill = plt_background),
  legend.key = element_rect(fill = plt_background, color = plt_background),

  panel.background = element_blank(),
  panel.grid = element_blank(),

  axis.title = element_blank(),
  axis.text = element_text(size = 16, colour = plt_wordscolor),
  axis.ticks = element_blank(),
  axis.line = element_blank()
)

#########################################################################################################################
# Data handling #########################################################################################################
hand_lvls <- c("Straight Flush", "Four of a Kind", "Full House", "Flush", "Straight", "Three of a Kind", "Two Pair",
               "Pair", "High Card")
games_list <- lapply(games_list, function(x) {
  # x$flop_hand_type <- factor(x$flop_hand_type, levels = hand_lvls)
  # x$turn_hand_type <- factor(x$turn_hand_type, levels = hand_lvls)
  x$final_hand_type <- factor(x$final_hand_type, levels = hand_lvls)
  return(x)
})

#########################################################################################################################
# Table 1 ###############################################################################################################
hand_distributions_8players <- prop.table(table(games_list[[n_players_index]]$final_hand_type))
hand_distributions_8players <- sprintf("%0.4f%%", hand_distributions_8players*100)

#########################################################################################################################
# Plot 1 and 2 ##########################################################################################################
# Data calculation
hand_win_tables <- lapply(games_list, function(x) {
  # What are chances a hand wins given you have it
  win_given_hand <- as.data.frame(prop.table(table(x$final_hand_type, x$winner), margin = 1))
  colnames(win_given_hand) <- c("Hand", "Win", "Percentage")
  win_given_hand$Comparison <- "Conditional"
  # What are chances a hand wins ever
  win_overall <- as.data.frame(prop.table(table(x$final_hand_type, x$winner), margin = 2))
  colnames(win_overall) <- c("Hand", "Win", "Percentage")
  win_overall$Comparison <- "Overall"
  win_pct <- rbind(win_given_hand, win_overall)
  win_pct <- win_pct[win_pct$Win == TRUE, ]
  win_pct$Percentage[is.na(win_pct$Percentage)] <- 0
  win_pct$Comparison <- factor(win_pct$Comparison, levels = c("Overall", "Conditional"))

  return(win_pct)
})
# Plot
hand_win_plots <- lapply(seq_along(hand_win_tables), function(x, df_list, name) {
  tmp_dta <- df_list[[x]]
  # title <- paste(gsub("players", "", name[x]), "Players", sep = " ")
  # p <- ggplot(data = tmp_dta) +
  #   geom_bar(aes(x = Hand, y = Percentage, fill = Comparison), stat = "identity", position = "dodge") +
  #   geom_hline(yintercept = c(1, .75, .5, .25), linetype = "dashed", color = plt_wordscolor) +
  #   scale_y_continuous(labels = percent, limits = c(0, 1), position = "right") +
  #   scale_x_discrete(limits = rev(levels(tmp_dta$Hand))) +
  #   # scale_fill_manual("How often does a hand win:", values = plt_colordiscr5[c(3, 1)]) +
  #   scale_fill_manual("How often does a hand win?", values = plt_colordiscr6[c(2, 5)]) +
  #   plt_theme + theme(
  #     legend.position = "top",
  #     legend.text = element_text(size = 16),
  #     legend.title = element_text(size = 16)
  #   ) + coord_flip() #+ ggtitle(title)

  tmp_dta$Labels <- levels(tmp_dta$Hand)
  tmp_dta$Labels <- gsub("Straight Flush", "Straight\nFlush", tmp_dta$Labels)
  tmp_dta$Labels <- gsub("Four of a Kind", "Four of a\nKind", tmp_dta$Labels)
  tmp_dta$Labels <- gsub("Three of a Kind", "Three of a\nKind", tmp_dta$Labels)

  g.mid <- ggplot(tmp_dta) +
    geom_text(aes(x = 1, y = Hand, label = Labels), color = plt_wordscolor) +
    scale_y_discrete(limits = rev(levels(tmp_dta$Hand)), expand = c(0.082, 0.082)) +
    labs(title = "", x = NULL, y = NULL) +
    theme(
      plot.background = element_rect(fill = plt_background, colour = plt_background),
      axis.title = element_blank(),
      panel.grid = element_blank(),
      axis.text.y = element_blank(),
      axis.ticks.y = element_blank(),
      panel.background = element_blank(),
      axis.text.x = element_text(color = NA),
      axis.ticks.x = element_line(color = NA),
      plot.margin = margin(t = 5, b = 5)
    )

  g1 <- ggplot(data = tmp_dta %>% filter(Comparison == "Overall")) +
    geom_bar(aes(x = Hand, y = Percentage), stat = "identity", fill = plt_colordiscr6[2]) +
    geom_hline(yintercept = c(1, .75, .5, .25), linetype = "dashed", color = plt_wordscolor) +
    scale_y_reverse(labels = percent) +
    scale_x_discrete(limits = rev(levels(tmp_dta$Hand))) +
    labs(title = "Overall") +
    theme(
      axis.title.x = element_blank(),
      axis.title.y = element_blank(),
      axis.text.y = element_blank(),
      axis.ticks.y = element_blank(),
      plot.margin = margin(l = 15, r = 5, t = 5, b = 5)
    ) +
    plt_theme +
    coord_flip()

  g2 <- ggplot(data = tmp_dta %>% filter(Comparison == "Conditional")) +
    geom_bar(aes(x = Hand, y = Percentage), stat = "identity", fill = plt_colordiscr6[5]) +
    geom_hline(yintercept = c(1, .75, .5, .25), linetype = "dashed", color = plt_wordscolor) +
    scale_y_continuous(labels = percent) +
    scale_x_discrete(limits = rev(levels(tmp_dta$Hand))) +
    labs(title = "Conditional", x = NULL) +
    theme(
      axis.title.x = element_blank(),
      axis.title.y = element_blank(),
      axis.text.y = element_blank(),
      axis.ticks.y = element_blank(),
      plot.margin = margin(l = 5, r = 15, t = 5, b = 5)
    ) +
    plt_theme +
    coord_flip()

  title <- ggdraw() +
    draw_label(
      paste(gsub("players", "", name[x]), "Players", sep = " "),
      size = 24,
      color = plt_wordscolor,
      fontface = "bold"
    ) +
    draw_line(
      c(0, 1), c(0.000000001, 0.000000001),
      color = plt_wordscolor,
      size = 2
    ) + theme(
      plot.background = element_rect(fill = plt_background, colour = plt_background)
    )

  plots <- plot_grid(g1, g.mid, g2, nrow = 1, rel_widths = c(45/100, 15/100, 45/100))
  p <- plot_grid(title, plots, ncol = 1, rel_heights = c(0.1, 1))

  return(p)
}, df_list = hand_win_tables, name = names(hand_win_tables))
win_distribution_2players <- hand_win_plots[[1]]
win_distribution_10players <- hand_win_plots[[9]]

#########################################################################################################################
# Plots 3 ###############################################################################################################
# Data Calculation
beaters <- function(gList, hand) {
  table_sizes <- as.numeric(gsub("players", "", names(gList)))
  tab <- do.call(rbind, lapply(seq_along(gList), function(x, df_list, n_players) {
    df <- df_list[[x]]
    loses <- df[df$hand_id %in% df$hand_id[df$winner == FALSE & df$final_hand_type == hand], ]
    dta <- as.data.frame(prop.table(table(loses$final_hand_type, loses$winner), margin = 2))
    colnames(dta) <- c("Hand", "Win", "Percentage")
    dta <- dta[dta$Win == TRUE & dta$Percentage > 0, ]
    dta$Players <- factor(n_players[x], levels = 2:10)
    return(dta)
  }, df_list = gList, n_players = table_sizes))
  return(tab)
}
fullhouse_loses <- beaters(games_list, "Full House")
flush_loses <- beaters(games_list, "Flush")
straight_loses <- beaters(games_list, "Straight")
kind3_loses <- beaters(games_list, "Three of a Kind")
fullhouse_loses$Loser <- "Full House"
flush_loses$Loser <- "Flush"
straight_loses$Loser <- "Straight"
kind3_loses$Loser <- "Three of a\nKind"
losers_8players <- rbind(fullhouse_loses, flush_loses, straight_loses, kind3_loses)
losers_8players <- losers_8players[losers_8players$Players == 8, ]
losers_8players$Hand <- droplevels(losers_8players$Hand)
losers_8players <- complete(losers_8players, Hand, Loser)
losers_8players$Percentage[is.na(losers_8players$Percentage)] <- 0
losers_8players$Loser <- factor(losers_8players$Loser, levels = gsub("Three of a Kind", "Three of a\nKind", hand_lvls))
losers_8players$Loser <- droplevels(losers_8players$Loser)

# Plot
beaters_8players <- ggplot(data = losers_8players) +
  geom_bar(aes(x = Loser, y = Percentage, fill = Hand), stat = "identity", position = "dodge") +
  geom_hline(yintercept = c(1, .75, .5, .25), linetype = "dashed", color = plt_wordscolor) +
  scale_fill_manual("Winning Hand", values = plt_colordiscr6) +
  scale_y_continuous(labels = percent, limits = c(0, 1)) +
  labs(title = "What Hands to Watch Out For") + plt_theme

#########################################################################################################################
# Plot 4 - 5 ################################################################################################################
# Data Calculation
pocket_information <- do.call(rbind, lapply(games_list, function(x) {
  # title <- "Pocket Cards Chances of Winning"
  x$pocket_card1 <- x$pocket_card1 %% 100
  x$pocket_card2 <- x$pocket_card2 %% 100
  x$Win <- ifelse(x$winner == TRUE, "Winner", "Loser")
  tmp <- x %>% group_by(pocket_card1, pocket_card2, suited_pocket, Win) %>%
    summarise(N = n()) %>%
    spread(Win, N, fill = 0) %>%
    arrange(pocket_card1, pocket_card2)
  tmp <- left_join(tmp, tmp, by = c("pocket_card1" = "pocket_card2", "pocket_card2" = "pocket_card1", "suited_pocket"))
  tmp$Winner <- ifelse(tmp$pocket_card1 == tmp$pocket_card2, tmp$Winner.x, tmp$Winner.x + tmp$Winner.y)
  tmp$Loser <- ifelse(tmp$pocket_card1 == tmp$pocket_card2, tmp$Loser.x, tmp$Loser.x + tmp$Loser.y)
  tmp <- tmp[, c("pocket_card1", "pocket_card2", "suited_pocket", "Winner", "Loser")]
  tmp$pct_get <- (tmp$Winner + tmp$Loser)/sum(tmp[, c("Winner", "Loser")])
  tmp$pct_win <- tmp$Winner/rowSums(tmp[, c("Winner", "Loser")])
  # Pairs
  tmp1 <- tmp[tmp$pocket_card1 == tmp$pocket_card2, ]
  # Unique Suited Cards
  tmp2 <- tmp[tmp$suited_pocket & tmp$pocket_card1 != tmp$pocket_card2, ]
  tmp2 <- tmp2[tmp2$pocket_card1 <= tmp2$pocket_card2, ]
  # Unique Non-suited Cards
  tmp3 <- tmp[!tmp$suited_pocket & tmp$pocket_card1 != tmp$pocket_card2, ]
  tmp3 <- tmp3[tmp3$pocket_card1 > tmp3$pocket_card2, ]
  tmp.new <- rbind(tmp1, tmp2, tmp3)

  # Control card values
  tmp.new$pocket_card1 <- gsub(11, "Ja", tmp.new$pocket_card1)
  tmp.new$pocket_card1 <- gsub(12, "Qu", tmp.new$pocket_card1)
  tmp.new$pocket_card1 <- gsub(13, "Ki", tmp.new$pocket_card1)
  tmp.new$pocket_card1 <- gsub(14, "Ac", tmp.new$pocket_card1)
  tmp.new$pocket_card2 <- gsub(11, "Ja", tmp.new$pocket_card2)
  tmp.new$pocket_card2 <- gsub(12, "Qu", tmp.new$pocket_card2)
  tmp.new$pocket_card2 <- gsub(13, "Ki", tmp.new$pocket_card2)
  tmp.new$pocket_card2 <- gsub(14, "Ac", tmp.new$pocket_card2)

  # Number of players
  tmp.new$Players <- max(table(x$hand_id[1:20])) # So we don't rely on player_id and don't have to table everything

  # Return
  tmp.new
}))
# Normalized versions of win percentages
pocket_information <- pocket_information %>%
  group_by(pocket_card1, pocket_card2) %>% mutate(
    min_win_pct = round(min(pct_win)*100, 0),
    max_win_pct = round(max(pct_win)*100, 0),
    avg_win_pct = round(mean(pct_win)*100, 0)
  ) %>%
  group_by(Players) %>% mutate(
    norm_win_pct = (min(pct_win) - pct_win)/(min(pct_win) - max(pct_win))
  ) %>% ungroup()
cards <- c("2", "3", "4", "5", "6", "7", "8", "9", "10", "Ja", "Qu", "Ki", "Ac")
pocket_information$pocket_card1 <- factor(pocket_information$pocket_card1, levels = cards)
pocket_information$pocket_card2 <- factor(pocket_information$pocket_card2, levels = cards)

# Plot
pocket_win_likeli <- lapply(unique(pocket_information$Players), function(x, df) {
  title <- "Chance of Winning with Pocket Cards"
  df <- df[df$Players == x, ]
  df$Label <- round(df$pct_win*100, 0)
  df$Label[df$pct_win <= quantile(df$pct_win, .75)] <- ""

  # Plot
  ggplot(data = df, aes(x = pocket_card1, y = pocket_card2)) +
    geom_tile(aes(fill = suited_pocket)) +
    geom_point(aes(color = pct_win, size = pct_win)) +
    geom_text(aes(x = pocket_card1, y = pocket_card2, label = Label), fontface = "bold", size = 4, color = "black") +
    scale_x_discrete(position = "top", limits = rev(levels(df$pocket_card1))) +
    scale_size_continuous("Probability of\nWinning", guide = "legend", range = c(1, 10), labels = percent) +
    scale_color_gradientn("Probability of\nWinning", guide = "legend", colours = plt_colorscale, labels = percent) +
    scale_fill_manual("Are Cards Suited?", values = c("FALSE" = "#707070", "TRUE" = plt_background), breaks = "FALSE", labels = "No") +
    # labs(title = title, subtitle = paste(x, "Players", sep = " "), x = "", y = "") +
    labs(title = NULL, x = "", y = "") +
    plt_theme
}, df = pocket_information)
pocket_win_likeli_8players <- pocket_win_likeli[[n_players_index]]

pocket_win_likeli_all <- ggplot(data = pocket_information, aes(x = pocket_card1, y = pocket_card2)) +
  geom_tile(aes(fill = suited_pocket)) +
  geom_point(data = pocket_information %>% filter(Players < 10), aes(colour = pct_win, size = pct_win)) +
  geom_point(data = pocket_information %>% filter(Players == 10), aes(colour = pct_win, size = pct_win), shape = 21, show.legend = FALSE, fill = plt_background) +
  scale_x_discrete(position = "top", limits = rev(levels(pocket_information$pocket_card1))) +
  scale_size_continuous("Probability of\nWinning", guide = "legend", range = c(1, 10), breaks = seq(0, 1, 0.2), labels = percent) +
  scale_color_gradientn("Probability of\nWinning", guide = "legend", colours = plt_colorscale, breaks = seq(0, 1, 0.2), labels = percent) +
  scale_fill_manual("Are Cards Suited?", values = c("FALSE" = "#707070", "TRUE" = plt_background), breaks = "FALSE", labels = "No") +
  labs(title = NULL, x = "", y = "") +
  plt_theme

#########################################################################################################################
# Plots not Used ########################################################################################################
# Makers 1 - how much more likely am I to have a final hand type if I start with pocket pairs
# pair_makers <- lapply(games_list, function(x, pocket_feature) {
#   results <- as.data.frame(prop.table(table(x$final_hand_type, x[, pocket_feature, drop = TRUE]), margin = 2))
#   colnames(results) <- c("Hand", pocket_feature, "Percentage")
#   results
# }, pocket_feature = "paired_pocket")
#
# plot_data <- pair_makers[[7]]
# ggplot(data = plot_data) +
#   geom_bar(aes(x = Hand, y = Percentage, fill = paired_pocket), stat = "identity", position = "dodge") +
#   plt_theme
#
# Makers 2 - how much more likely am I to have a final hand type if I start with suited pocket cards
# suited_makers <- lapply(games_list, function(x, pocket_feature) {
#   results <- as.data.frame(prop.table(table(x$final_hand_type, x[, pocket_feature, drop = TRUE]), margin = 2))
#   colnames(results) <- c("Hand", pocket_feature, "Percentage")
#   results
# }, pocket_feature = "suited_pocket")
#
# plot_data <- suited_makers[[7]]
# ggplot(data = plot_data) +
#   geom_bar(aes(x = Hand, y = Percentage, fill = suited_pocket), stat = "identity", position = "dodge") +
#   plt_theme

# # Plots
# title <- "Chance of Winning with Pocket Cards"
# win_likeli_all <- ggplot(data = pocket_information, aes(x = pocket_card1, y = pocket_card2)) +
#   geom_tile(aes(fill = suited_pocket)) +
#   geom_point(aes(colour = pct_win, size = pct_win)) +
#   scale_x_discrete(position = "top", limits = rev(levels(pocket_information$pocket_card1))) +
#   scale_size_continuous("Probability of\nWinning", guide = "legend", range = c(1, 20), breaks = seq(0, 1, 0.2), labels = percent) +
#   scale_color_gradientn("Probability of\nWinning", guide = "legend", colours = plt_colorscale, breaks = seq(0, 1, 0.2), labels = percent) +
#   scale_fill_manual("Are Cards Suited?", values = c("FALSE" = "#707070", "TRUE" = "#252732"), breaks = "FALSE", labels = "No") +
#   labs(title = title, subtitle = "2 - 10 Players", x = "", y = "",
#        caption = "The rings represents the probability that a person with the corresponding pocket cards will ultimately win the hand for a given number of players.\nLogically the smallest circle will be when there are more players (10 in this case) and the outer ring will be for the fewest number of players (2 in this case).") +
#   plt_theme
# win_likeli_all

#########################################################################################################################
# Save Results
save(
  hand_distributions_8players,
  win_distribution_2players, win_distribution_10players,
  beaters_8players,
  pocket_win_likeli_8players,
  pocket_win_likeli_all,
  # Output
  file = "Work/results/2020-01-TexasHoldem-Analysis-pt1.Rdata"
)

print("END")
#########################################################################################################################
#########################################################################################################################
#########################################################################################################################
#########################################################################################################################
#########################################################################################################################
cmaerzluft/TexasHoldEm documentation built on June 11, 2021, 2:46 p.m.