TexasHoldEm: Analysis of Texas Hold'Em

#########################################################################################################################
#                                                                                                                       #
#  Title: Basic Texas Hold'em
#  Author: Chris Maerzluft
#  Last Edit: August 11, 2019
#                                                                                                                       #
#########################################################################################################################
# Description ###########################################################################################################
#
# Summarises the basic facts about Texas Hold'em. That is, if games were to be fully played out by every player what are
#   the best hands or cards. I use simulated data to study this. In order to bring in gambling techniques I need other
#   data. Some sources for that include:
#     https://poker.stackexchange.com/questions/881/publicly-available-poker-stats
#     http://poker.cs.ualberta.ca/irc_poker_database.html
#     http://web.archive.org/web/20110205042259/http://www.outflopped.com/questions/286/obfuscated-datamined-hand-histories
#
# Approx size of plots in blog: 861 x 612
#
# Setup R ###############################################################################################################
# Clean environment
rm(list = ls(all = TRUE))
gc()

# Libraries Needed
library(dplyr)
library(tidyr)
library(ggplot2)
library(scales)
library(wesanderson)
library(grid)
library(gridExtra)
library(cowplot)
options(scipen = 999)

#########################################################################################################################
# Load Data #############################################################################################################
n_players_index <- 8 - 1
datasize <- 1
units <- "M"
files <- grep(sprintf("sim%s%sgames", datasize, units), list.files("data-raw/TexasHoldEm"), value = TRUE)
# Make sure 10 is moved to last spot
files <- files[order(nchar(files), files)]
games_list <- list()
for (i1 in files) {
  print(i1)
  load(sprintf("data-raw/TexasHoldEm/%s", i1))
  name <- gsub(sprintf("sim%s%sgames_", datasize, units), "", i1)
  name <- gsub("players.Rdata", "", name)
  games_list[[sprintf("players%s", name)]] <- games
}

#########################################################################################################################
# Plot Options ##########################################################################################################
plt_background <- "#272935"
plt_wordscolor <- "white"
plt_colorscale <- wes_palette("Zissou1", 100, type = "continuous")
plt_colordiscr5 <- wes_palette("Zissou1", 5, type = "discrete")
plt_colordiscr6 <- c("#8c510a", "#d8b365", "#f6e8c3", "#c7eae5", "#5ab4ac", "#01665e")

plt_theme <- theme(
  plot.background = element_rect(fill = plt_background, colour = plt_background),
  plot.title = element_text(hjust = 0.5, size = 24, colour = plt_wordscolor),
  plot.subtitle = element_text(hjust = 0.5, size = 18, colour = plt_wordscolor),
  plot.caption = element_text(hjust = 0, size = 12, colour = plt_wordscolor),

  legend.text = element_text(colour = plt_wordscolor),
  legend.title = element_text(colour = plt_wordscolor),
  legend.background = element_rect(fill = plt_background),
  legend.key = element_rect(fill = plt_background, color = plt_background),

  panel.background = element_blank(),
  panel.grid = element_blank(),

  axis.title = element_blank(),
  axis.text = element_text(size = 16, colour = plt_wordscolor),
  axis.ticks = element_blank(),
  axis.line = element_blank()
)

#########################################################################################################################
# Data handling #########################################################################################################
hand_lvls <- c("Straight Flush", "Four of a Kind", "Full House", "Flush", "Straight", "Three of a Kind", "Two Pair",
               "Pair", "High Card")
games_list <- lapply(games_list, function(x) {
  # x$flop_hand_type <- factor(x$flop_hand_type, levels = hand_lvls)
  # x$turn_hand_type <- factor(x$turn_hand_type, levels = hand_lvls)
  x$final_hand_type <- factor(x$final_hand_type, levels = hand_lvls)
  return(x)
})

#########################################################################################################################
# Table 1 ###############################################################################################################
hand_distributions_8players <- prop.table(table(games_list[[n_players_index]]$final_hand_type))
hand_distributions_8players <- sprintf("%0.4f%%", hand_distributions_8players*100)

#########################################################################################################################
# Plot 1 and 2 ##########################################################################################################
# Data calculation
hand_win_tables <- lapply(games_list, function(x) {
  # What are chances a hand wins given you have it
  win_given_hand <- as.data.frame(prop.table(table(x$final_hand_type, x$winner), margin = 1))
  colnames(win_given_hand) <- c("Hand", "Win", "Percentage")
  win_given_hand$Comparison <- "Conditional"
  # What are chances a hand wins ever
  win_overall <- as.data.frame(prop.table(table(x$final_hand_type, x$winner), margin = 2))
  colnames(win_overall) <- c("Hand", "Win", "Percentage")
  win_overall$Comparison <- "Overall"
  win_pct <- rbind(win_given_hand, win_overall)
  win_pct <- win_pct[win_pct$Win == TRUE, ]
  win_pct$Percentage[is.na(win_pct$Percentage)] <- 0
  win_pct$Comparison <- factor(win_pct$Comparison, levels = c("Overall", "Conditional"))

  return(win_pct)
})
# Plot
hand_win_plots <- lapply(seq_along(hand_win_tables), function(x, df_list, name) {
  tmp_dta <- df_list[[x]]
  # title <- paste(gsub("players", "", name[x]), "Players", sep = " ")
  # p <- ggplot(data = tmp_dta) +
  #   geom_bar(aes(x = Hand, y = Percentage, fill = Comparison), stat = "identity", position = "dodge") +
  #   geom_hline(yintercept = c(1, .75, .5, .25), linetype = "dashed", color = plt_wordscolor) +
  #   scale_y_continuous(labels = percent, limits = c(0, 1), position = "right") +
  #   scale_x_discrete(limits = rev(levels(tmp_dta$Hand))) +
  #   # scale_fill_manual("How often does a hand win:", values = plt_colordiscr5[c(3, 1)]) +
  #   scale_fill_manual("How often does a hand win?", values = plt_colordiscr6[c(2, 5)]) +
  #   plt_theme + theme(
  #     legend.position = "top",
  #     legend.text = element_text(size = 16),
  #     legend.title = element_text(size = 16)
  #   ) + coord_flip() #+ ggtitle(title)

  tmp_dta$Labels <- levels(tmp_dta$Hand)
  tmp_dta$Labels <- gsub("Straight Flush", "Straight\nFlush", tmp_dta$Labels)
  tmp_dta$Labels <- gsub("Four of a Kind", "Four of a\nKind", tmp_dta$Labels)
  tmp_dta$Labels <- gsub("Three of a Kind", "Three of a\nKind", tmp_dta$Labels)

  g.mid <- ggplot(tmp_dta) +
    geom_text(aes(x = 1, y = Hand, label = Labels), color = plt_wordscolor) +
    scale_y_discrete(limits = rev(levels(tmp_dta$Hand)), expand = c(0.082, 0.082)) +
    labs(title = "", x = NULL, y = NULL) +
    theme(
      plot.background = element_rect(fill = plt_background, colour = plt_background),
      axis.title = element_blank(),
      panel.grid = element_blank(),
      axis.text.y = element_blank(),
      axis.ticks.y = element_blank(),
      panel.background = element_blank(),
      axis.text.x = element_text(color = NA),
      axis.ticks.x = element_line(color = NA),
      plot.margin = margin(t = 5, b = 5)
    )

  g1 <- ggplot(data = tmp_dta %>% filter(Comparison == "Overall")) +
    geom_bar(aes(x = Hand, y = Percentage), stat = "identity", fill = plt_colordiscr6[2]) +
    geom_hline(yintercept = c(1, .75, .5, .25), linetype = "dashed", color = plt_wordscolor) +
    scale_y_reverse(labels = percent) +
    scale_x_discrete(limits = rev(levels(tmp_dta$Hand))) +
    labs(title = "Overall") +
    theme(
      axis.title.x = element_blank(),
      axis.title.y = element_blank(),
      axis.text.y = element_blank(),
      axis.ticks.y = element_blank(),
      plot.margin = margin(l = 15, r = 5, t = 5, b = 5)
    ) +
    plt_theme +
    coord_flip()

  g2 <- ggplot(data = tmp_dta %>% filter(Comparison == "Conditional")) +
    geom_bar(aes(x = Hand, y = Percentage), stat = "identity", fill = plt_colordiscr6[5]) +
    geom_hline(yintercept = c(1, .75, .5, .25), linetype = "dashed", color = plt_wordscolor) +
    scale_y_continuous(labels = percent) +
    scale_x_discrete(limits = rev(levels(tmp_dta$Hand))) +
    labs(title = "Conditional", x = NULL) +
    theme(
      axis.title.x = element_blank(),
      axis.title.y = element_blank(),
      axis.text.y = element_blank(),
      axis.ticks.y = element_blank(),
      plot.margin = margin(l = 5, r = 15, t = 5, b = 5)
    ) +
    plt_theme +
    coord_flip()

  title <- ggdraw() +
    draw_label(
      paste(gsub("players", "", name[x]), "Players", sep = " "),
      size = 24,
      color = plt_wordscolor,
      fontface = "bold"
    ) +
    draw_line(
      c(0, 1), c(0.000000001, 0.000000001),
      color = plt_wordscolor,
      size = 2
    ) + theme(
      plot.background = element_rect(fill = plt_background, colour = plt_background)
    )

  plots <- plot_grid(g1, g.mid, g2, nrow = 1, rel_widths = c(45/100, 15/100, 45/100))
  p <- plot_grid(title, plots, ncol = 1, rel_heights = c(0.1, 1))

  return(p)
}, df_list = hand_win_tables, name = names(hand_win_tables))
win_distribution_2players <- hand_win_plots[[1]]
win_distribution_10players <- hand_win_plots[[9]]

#########################################################################################################################
# Plots 3 ###############################################################################################################
# Data Calculation
beaters <- function(gList, hand) {
  table_sizes <- as.numeric(gsub("players", "", names(gList)))
  tab <- do.call(rbind, lapply(seq_along(gList), function(x, df_list, n_players) {
    df <- df_list[[x]]
    loses <- df[df$hand_id %in% df$hand_id[df$winner == FALSE & df$final_hand_type == hand], ]
    dta <- as.data.frame(prop.table(table(loses$final_hand_type, loses$winner), margin = 2))
    colnames(dta) <- c("Hand", "Win", "Percentage")
    dta <- dta[dta$Win == TRUE & dta$Percentage > 0, ]
    dta$Players <- factor(n_players[x], levels = 2:10)
    return(dta)
  }, df_list = gList, n_players = table_sizes))
  return(tab)
}
fullhouse_loses <- beaters(games_list, "Full House")
flush_loses <- beaters(games_list, "Flush")
straight_loses <- beaters(games_list, "Straight")
kind3_loses <- beaters(games_list, "Three of a Kind")
fullhouse_loses$Loser <- "Full House"
flush_loses$Loser <- "Flush"
straight_loses$Loser <- "Straight"
kind3_loses$Loser <- "Three of a\nKind"
losers_8players <- rbind(fullhouse_loses, flush_loses, straight_loses, kind3_loses)
losers_8players <- losers_8players[losers_8players$Players == 8, ]
losers_8players$Hand <- droplevels(losers_8players$Hand)
losers_8players <- complete(losers_8players, Hand, Loser)
losers_8players$Percentage[is.na(losers_8players$Percentage)] <- 0
losers_8players$Loser <- factor(losers_8players$Loser, levels = gsub("Three of a Kind", "Three of a\nKind", hand_lvls))
losers_8players$Loser <- droplevels(losers_8players$Loser)

# Plot
beaters_8players <- ggplot(data = losers_8players) +
  geom_bar(aes(x = Loser, y = Percentage, fill = Hand), stat = "identity", position = "dodge") +
  geom_hline(yintercept = c(1, .75, .5, .25), linetype = "dashed", color = plt_wordscolor) +
  scale_fill_manual("Winning Hand", values = plt_colordiscr6) +
  scale_y_continuous(labels = percent, limits = c(0, 1)) +
  labs(title = "What Hands to Watch Out For") + plt_theme

#########################################################################################################################
# Plot 4 - 5 ################################################################################################################
# Data Calculation
pocket_information <- do.call(rbind, lapply(games_list, function(x) {
  # title <- "Pocket Cards Chances of Winning"
  x$pocket_card1 <- x$pocket_card1 %% 100
  x$pocket_card2 <- x$pocket_card2 %% 100
  x$Win <- ifelse(x$winner == TRUE, "Winner", "Loser")
  tmp <- x %>% group_by(pocket_card1, pocket_card2, suited_pocket, Win) %>%
    summarise(N = n()) %>%
    spread(Win, N, fill = 0) %>%
    arrange(pocket_card1, pocket_card2)
  tmp <- left_join(tmp, tmp, by = c("pocket_card1" = "pocket_card2", "pocket_card2" = "pocket_card1", "suited_pocket"))
  tmp$Winner <- ifelse(tmp$pocket_card1 == tmp$pocket_card2, tmp$Winner.x, tmp$Winner.x + tmp$Winner.y)
  tmp$Loser <- ifelse(tmp$pocket_card1 == tmp$pocket_card2, tmp$Loser.x, tmp$Loser.x + tmp$Loser.y)
  tmp <- tmp[, c("pocket_card1", "pocket_card2", "suited_pocket", "Winner", "Loser")]
  tmp$pct_get <- (tmp$Winner + tmp$Loser)/sum(tmp[, c("Winner", "Loser")])
  tmp$pct_win <- tmp$Winner/rowSums(tmp[, c("Winner", "Loser")])
  # Pairs
  tmp1 <- tmp[tmp$pocket_card1 == tmp$pocket_card2, ]
  # Unique Suited Cards
  tmp2 <- tmp[tmp$suited_pocket & tmp$pocket_card1 != tmp$pocket_card2, ]
  tmp2 <- tmp2[tmp2$pocket_card1 <= tmp2$pocket_card2, ]
  # Unique Non-suited Cards
  tmp3 <- tmp[!tmp$suited_pocket & tmp$pocket_card1 != tmp$pocket_card2, ]
  tmp3 <- tmp3[tmp3$pocket_card1 > tmp3$pocket_card2, ]
  tmp.new <- rbind(tmp1, tmp2, tmp3)

  # Control card values
  tmp.new$pocket_card1 <- gsub(11, "Ja", tmp.new$pocket_card1)
  tmp.new$pocket_card1 <- gsub(12, "Qu", tmp.new$pocket_card1)
  tmp.new$pocket_card1 <- gsub(13, "Ki", tmp.new$pocket_card1)
  tmp.new$pocket_card1 <- gsub(14, "Ac", tmp.new$pocket_card1)
  tmp.new$pocket_card2 <- gsub(11, "Ja", tmp.new$pocket_card2)
  tmp.new$pocket_card2 <- gsub(12, "Qu", tmp.new$pocket_card2)
  tmp.new$pocket_card2 <- gsub(13, "Ki", tmp.new$pocket_card2)
  tmp.new$pocket_card2 <- gsub(14, "Ac", tmp.new$pocket_card2)

  # Number of players
  tmp.new$Players <- max(table(x$hand_id[1:20])) # So we don't rely on player_id and don't have to table everything

  # Return
  tmp.new
}))
# Normalized versions of win percentages
pocket_information <- pocket_information %>%
  group_by(pocket_card1, pocket_card2) %>% mutate(
    min_win_pct = round(min(pct_win)*100, 0),
    max_win_pct = round(max(pct_win)*100, 0),
    avg_win_pct = round(mean(pct_win)*100, 0)
  ) %>%
  group_by(Players) %>% mutate(
    norm_win_pct = (min(pct_win) - pct_win)/(min(pct_win) - max(pct_win))
  ) %>% ungroup()
cards <- c("2", "3", "4", "5", "6", "7", "8", "9", "10", "Ja", "Qu", "Ki", "Ac")
pocket_information$pocket_card1 <- factor(pocket_information$pocket_card1, levels = cards)
pocket_information$pocket_card2 <- factor(pocket_information$pocket_card2, levels = cards)

# Plot
pocket_win_likeli <- lapply(unique(pocket_information$Players), function(x, df) {
  title <- "Chance of Winning with Pocket Cards"
  df <- df[df$Players == x, ]
  df$Label <- round(df$pct_win*100, 0)
  df$Label[df$pct_win <= quantile(df$pct_win, .75)] <- ""

  # Plot
  ggplot(data = df, aes(x = pocket_card1, y = pocket_card2)) +
    geom_tile(aes(fill = suited_pocket)) +
    geom_point(aes(color = pct_win, size = pct_win)) +
    geom_text(aes(x = pocket_card1, y = pocket_card2, label = Label), fontface = "bold", size = 4, color = "black") +
    scale_x_discrete(position = "top", limits = rev(levels(df$pocket_card1))) +
    scale_size_continuous("Probability of\nWinning", guide = "legend", range = c(1, 10), labels = percent) +
    scale_color_gradientn("Probability of\nWinning", guide = "legend", colours = plt_colorscale, labels = percent) +
    scale_fill_manual("Are Cards Suited?", values = c("FALSE" = "#707070", "TRUE" = plt_background), breaks = "FALSE", labels = "No") +
    # labs(title = title, subtitle = paste(x, "Players", sep = " "), x = "", y = "") +
    labs(title = NULL, x = "", y = "") +
    plt_theme
}, df = pocket_information)
pocket_win_likeli_8players <- pocket_win_likeli[[n_players_index]]

pocket_win_likeli_all <- ggplot(data = pocket_information, aes(x = pocket_card1, y = pocket_card2)) +
  geom_tile(aes(fill = suited_pocket)) +
  geom_point(data = pocket_information %>% filter(Players < 10), aes(colour = pct_win, size = pct_win)) +
  geom_point(data = pocket_information %>% filter(Players == 10), aes(colour = pct_win, size = pct_win), shape = 21, show.legend = FALSE, fill = plt_background) +
  scale_x_discrete(position = "top", limits = rev(levels(pocket_information$pocket_card1))) +
  scale_size_continuous("Probability of\nWinning", guide = "legend", range = c(1, 10), breaks = seq(0, 1, 0.2), labels = percent) +
  scale_color_gradientn("Probability of\nWinning", guide = "legend", colours = plt_colorscale, breaks = seq(0, 1, 0.2), labels = percent) +
  scale_fill_manual("Are Cards Suited?", values = c("FALSE" = "#707070", "TRUE" = plt_background), breaks = "FALSE", labels = "No") +
  labs(title = NULL, x = "", y = "") +
  plt_theme

#########################################################################################################################
# Plots not Used ########################################################################################################
# Makers 1 - how much more likely am I to have a final hand type if I start with pocket pairs
# pair_makers <- lapply(games_list, function(x, pocket_feature) {
#   results <- as.data.frame(prop.table(table(x$final_hand_type, x[, pocket_feature, drop = TRUE]), margin = 2))
#   colnames(results) <- c("Hand", pocket_feature, "Percentage")
#   results
# }, pocket_feature = "paired_pocket")
#
# plot_data <- pair_makers[[7]]
# ggplot(data = plot_data) +
#   geom_bar(aes(x = Hand, y = Percentage, fill = paired_pocket), stat = "identity", position = "dodge") +
#   plt_theme
#
# Makers 2 - how much more likely am I to have a final hand type if I start with suited pocket cards
# suited_makers <- lapply(games_list, function(x, pocket_feature) {
#   results <- as.data.frame(prop.table(table(x$final_hand_type, x[, pocket_feature, drop = TRUE]), margin = 2))
#   colnames(results) <- c("Hand", pocket_feature, "Percentage")
#   results
# }, pocket_feature = "suited_pocket")
#
# plot_data <- suited_makers[[7]]
# ggplot(data = plot_data) +
#   geom_bar(aes(x = Hand, y = Percentage, fill = suited_pocket), stat = "identity", position = "dodge") +
#   plt_theme

# # Plots
# title <- "Chance of Winning with Pocket Cards"
# win_likeli_all <- ggplot(data = pocket_information, aes(x = pocket_card1, y = pocket_card2)) +
#   geom_tile(aes(fill = suited_pocket)) +
#   geom_point(aes(colour = pct_win, size = pct_win)) +
#   scale_x_discrete(position = "top", limits = rev(levels(pocket_information$pocket_card1))) +
#   scale_size_continuous("Probability of\nWinning", guide = "legend", range = c(1, 20), breaks = seq(0, 1, 0.2), labels = percent) +
#   scale_color_gradientn("Probability of\nWinning", guide = "legend", colours = plt_colorscale, breaks = seq(0, 1, 0.2), labels = percent) +
#   scale_fill_manual("Are Cards Suited?", values = c("FALSE" = "#707070", "TRUE" = "#252732"), breaks = "FALSE", labels = "No") +
#   labs(title = title, subtitle = "2 - 10 Players", x = "", y = "",
#        caption = "The rings represents the probability that a person with the corresponding pocket cards will ultimately win the hand for a given number of players.\nLogically the smallest circle will be when there are more players (10 in this case) and the outer ring will be for the fewest number of players (2 in this case).") +
#   plt_theme
# win_likeli_all

#########################################################################################################################
# Save Results
save(
  hand_distributions_8players,
  win_distribution_2players, win_distribution_10players,
  beaters_8players,
  pocket_win_likeli_8players,
  pocket_win_likeli_all,
  # Output
  file = "Work/results/2020-01-TexasHoldem-Analysis-pt1.Rdata"
)

print("END")
#########################################################################################################################
#########################################################################################################################
#########################################################################################################################
#########################################################################################################################
#########################################################################################################################
cmaerzluft/TexasHoldEm documentation built on June 11, 2021, 2:46 p.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
cmaerzluft/TexasHoldEm
Analysis of Texas Hold'Em

Work/code/2020-04-TexasHoldem-Analysis-pt1.R
In cmaerzluft/TexasHoldEm: Analysis of Texas Hold'Em

R Package Documentation

Browse R Packages

We want your feedback!

cmaerzluft/TexasHoldEm Analysis of Texas Hold'Em

Work/code/2020-04-TexasHoldem-Analysis-pt1.R In cmaerzluft/TexasHoldEm: Analysis of Texas Hold'Em

R Package Documentation

Browse R Packages

We want your feedback!

cmaerzluft/TexasHoldEm
Analysis of Texas Hold'Em

Work/code/2020-04-TexasHoldem-Analysis-pt1.R
In cmaerzluft/TexasHoldEm: Analysis of Texas Hold'Em