#### Documented in plotProportions

## P(win|not tie) in terms of expit(lambda_i - lambda_j)
GenDavidsonTie <- function(p){
scale <- match("tie.scale", substring(names(coef), 1, 9), 0)
if (scale != 0) scale <- exp(coef[scale])
else scale <- 1
tie.mode <- match("tie.mode", substring(names(coef), 1, 8), 0)
if (tie.mode != 0) tie.mode <- coef["tie.mode"]
delta <- coef[match("tie.max", substring(names(coef), 1, 7))]
## first player is at home
weight1 <- plogis(tie.mode)
weight2 <- 1 - weight1
## plogis = expit
plogis(delta - scale * (weight1 * log(weight1) + weight2 * log(weight2)) +
scale * (weight1 * log(p) + weight2 * log(1-p)))
}

#tmp <- eval(substitute(player1), data, parent.frame())

#' Plot Proportions of Tied Matches and Non-tied Matches Won
#'
#' Plot proportions of tied matches and non-tied matches won by the first
#' player, within matches binned by the relative player ability, as expressed
#' by the probability that the first player wins, given the match is not a tie.
#' Add fitted lines for each set of matches, as given by the generalized
#' Davidson model.
#'
#' If home.adv is specified, the results are re-ordered if necessary so
#' that the home player comes first; any matches played on neutral ground are
#' omitted.
#'
#' First the probability that the first player wins given that the match is not
#' a tie is computed: \deqn{expit(home.adv + abilities[player1] -
#' abilities[player2])} where home.adv and abilities are
#' parameters from a generalized Davidson model that have been estimated on the
#' log scale.
#'
#' The matches are then binned according to this probability, grouping together
#' matches with similar relative ability between the first player and the
#' second player. Within each bin, the proportion of tied matches is computed
#' and these proportions are plotted against the mid-point of the bin. Then the
#' bins are re-computed omitting the tied games and the proportion of non-tied
#' matches won by the first player is found and plotted against the new
#' mid-point.
#'
#' Finally curves are added for the probability of a tie and the conditional
#' probability of win given the match is not a tie, under a generalized
#' Davidson model with parameters as specified by tie.max,
#' tie.scale and tie.mode.
#'
#' The function can also be used to plot the proportions of wins along with the
#' fitted probability of a win under the Bradley-Terry model.
#'
#' @param win a logical vector: TRUE if player1 wins, FALSE
#' otherwise.
#' @param tie a logical vector: TRUE if the outcome is a tie,
#' FALSE otherwise (NULL if there are no ties).
#' @param loss a logical vector: TRUE if player1 loses, FALSE
#' otherwise.
#' @param player1 an ID factor specifying the first player in each contest,
#' with the same set of levels as player2.
#' @param player2 an ID factor specifying the second player in each contest,
#' with the same set of levels as player2.
#' @param abilities the fitted abilities from a generalized Davidson model (or
#' @param home.adv if applicable, the fitted home advantage parameter from a
#' generalized Davidson model (or a Bradley-Terry model).
#' @param tie.max the fitted parameter from a generalized Davidson model
#' corresponding to the maximum tie probability.
#' @param tie.scale if applicable, the fitted parameter from a generalized
#' Davidson model corresponding to the scale of dependence of the tie
#' probability on the probability that player1 wins, given the outcome
#' is not a draw.
#' @param tie.mode if applicable, the fitted parameter from a generalized
#' Davidson model corresponding to the location of maximum tie probability, in
#' terms of the probability that player1 wins, given the outcome is not
#' a draw.
#' @param at.home1 a logical vector: TRUE if player1 is at home,
#' FALSE otherwise.
#' @param at.home2 a logical vector: TRUE if player2 is at home,
#' FALSE otherwise.
#' @param data an optional data frame providing variables required by the
#' model, with one observation per match.
#' @param subset an optional logical or numeric vector specifying a subset of
#' observations to include in the plot.
#' @param bin.size the approximate number of matches in each bin.
#' @param xlab the label to use for the x-axis.
#' @param ylab the label to use for the y-axis.
#' @param legend text to use for the legend.
#' @param col a vector specifying colours to use for the proportion of non-tied
#' matches won and the proportion of tied matches.
#' @param \dots further arguments passed to plot.
#' @return A list of data frames: \item{win}{ a data frame comprising
#' prop.win, the proportion of non-tied matches won by the first player
#' in each bin and bin.win, the mid-point of each bin. } \item{tie}{
#' (when ties are present) a data frame comprising prop.tie, the
#' proportion of tied matches in each bin and bin.tie, the mid-point of
#' each bin. }
#' @note This function is designed for single match outcomes, therefore data
#' aggregated over player pairs will need to be expanded.
#' @author Heather Turner
#' @seealso [GenDavidson()], [BTm()]
#' @keywords models nonlinear
#' @examples
#'
#' #### A Bradley-Terry example using icehockey data
#'
#' standardBT <- BTm(outcome = result,
#'                   player1 = visitor, player2 = opponent,
#'                   id = "team", data = icehockey)
#'
#' ## comparing teams on a "level playing field"
#' levelBT <- BTm(result,
#'                data.frame(team = visitor, home.ice = 0),
#'                data.frame(team = opponent, home.ice = home.ice),
#'                ~ team + home.ice,
#'                id = "team", data = icehockey)
#'
#' ## compare fit to observed proportion won
#' ## exclude tied matches as not explicitly modelled here
#' par(mfrow = c(1, 2))
#' plotProportions(win = result == 1, loss = result == 0,
#'                 player1 = visitor, player2 = opponent,
#'                 abilities = BTabilities(standardBT)[,1],
#'                 data = icehockey, subset = result != 0.5,
#'                 main = "Without home advantage")
#'
#' plotProportions(win = result == 1, loss = result == 0,
#'                 player1 = visitor, player2 = opponent,
#'                 at.home1 = 0, at.home2 = home.ice,
#'                 abilities = BTabilities(levelBT)[,1],
#'                 data = icehockey, subset = result != 0.5,
#'                 main = "With home advantage")
#'
#' #### A generalized Davidson example using football data
#' if (require(gnm)) {
#'
#'     ## subset to first and last season for illustration
#'     football <- subset(football, season %in% c("2008-9", "2012-13"))
#'
#'     ## convert to trinomial counts
#'     football.tri <- expandCategorical(football, "result", idvar = "match")
#'
#'     ## add variable to indicate whether team playing at home
#'     football.tri$at.home <- !logical(nrow(football.tri)) #' #' ## fit Davidson model #' Dav <- gnm(count ~ GenDavidson(result == 1, result == 0, result == -1, #' home:season, away:season, home.adv = ~1, #' tie.max = ~1, #' at.home1 = at.home, #' at.home2 = !at.home) - 1, #' eliminate = match, family = poisson, data = football.tri) #' #' ## fit shifted & scaled Davidson model #' shifScalDav <- gnm(count ~ #' GenDavidson(result == 1, result == 0, result == -1, #' home:season, away:season, home.adv = ~1, #' tie.max = ~1, tie.scale = ~1, tie.mode = ~1, #' at.home1 = at.home, #' at.home2 = !at.home) - 1, #' eliminate = match, family = poisson, data = football.tri) #' #' ## diagnostic plots #' main <- c("Davidson", "Shifted & Scaled Davidson") #' mod <- list(Dav, shifScalDav) #' names(mod) <- main #' alpha <- names(coef(Dav)[-(1:2)]) #' #' ## use football.tri data so that at.home can be found, #' ## but restrict to actual match results #' par(mfrow = c(1,2)) #' for (i in 1:2) { #' coef <- parameters(mod[[i]]) #' plotProportions(result == 1, result == 0, result == -1, #' home:season, away:season, #' abilities = coef[alpha], #' home.adv = coef["home.adv"], #' tie.max = coef["tie.max"], #' tie.scale = coef["tie.scale"], #' tie.mode = coef["tie.mode"], #' at.home1 = at.home, #' at.home2 = !at.home, #' main = main[i], #' data = football.tri, subset = count == 1) #' } #' } #' #' @importFrom graphics curve plot points #' @importFrom stats na.omit #' @export plotProportions <- function(win, tie = NULL, loss, player1, player2, abilities = NULL, home.adv = NULL, tie.max = NULL, tie.scale = NULL, tie.mode = NULL, at.home1 = NULL, at.home2 = NULL, data = NULL, subset = NULL, bin.size = 20, xlab = "P(player1 wins | not a tie)", ylab = "Proportion", legend = NULL, col = 1:2, ...){ call <- as.list(match.call()) var <- intersect(names(call), c("win", "tie", "loss", "player1", "player2", "at.home1", "at.home2")) var <- var[!vapply(call[var], is.null, logical(1))] dat <- with(data, do.call("data.frame", call[var])) if (!missing(subset)){ subset <- eval(substitute(subset), data, parent.frame()) dat <- subset(dat, subset) } if (!missing(tie) && sum(dat$tie) == 0) dat$tie <- NULL if (!is.null(home.adv) && (missing(at.home1) || missing(at.home2))) stop("at.home1 and at.home2 must be specified") if (!is.null(home.adv)){ ## exclude neutral contests, make sure home player is first dat <- subset(dat, at.home1 | at.home2) swap <- which(as.logical(dat$at.home2))
if (length(swap)) {
dat$win[swap] <- dat$loss[swap]
if (is.null(dat$tie)) dat$loss[swap] <- !dat$win[swap] else dat$loss[swap] <- !(dat$win[swap] | dat$tie[swap])
tmp <- dat$player1[swap] dat$player1[swap] <- dat$player2[swap] dat$player2[swap] <- tmp
dat$at.home1[swap] <- TRUE dat$at.home2[swap] <- FALSE
}
### get proportions
p <- with(dat, plogis(home.adv + abilities[as.character(player1)] -
abilities[as.character(player2)]))
## Depending on the distribution of p_ij (across all matches),
## divide the range of probabilities p_ij into discrete "bins", each
## of which has at least (say) 20 matches in it
getBins <- function(p, bin.size) {
## alternatively estimate bins to same size intervals
## at least bin.size - distribute extra evenly over range
min.size <- bin.size
n <- length(p)
r <- n %% min.size
size <- rep(min.size, n %/% min.size)
if (r > 0) {
step <- length(size)/r
extra <- round(seq(from = step/2 + 0.01,
to = step/2 + 0.01 + (r - 1)*step, by = step))
size[extra] <- min.size + 1
}
bin <- factor(rep(seq(length(size)), size))[match(p, sort(p))]
low <- sort(p)[cumsum(c(1, size[-length(size)]))] #first
high <- sort(p)[cumsum(size)] #last
mid <- (high - low)/2 + low
list(bin = bin, mid = mid)
}
winBin <- getBins(p, bin.size)
## Within each bin b, calculate
## d_b = proportion of matches in that bin that were drawn
if (!is.null(dat$tie)) { tieBin <- winBin tri <- with(dat, win - (!win & !tie)) d_b <- tapply(tri, tieBin$bin, function(x) sum(x == 0)/length(x))
## recompute bins omitting ties
winBin <- getBins(p[!dat$tie], bin.size) } ## h_b = proportion of *non-drawn* matches in that bin that were won ## by the home team if (!is.null(dat$tie)) {
h_b <- tapply(tri[!dat$tie], winBin$bin,
function(x) sum(x == 1)/length(x))
}
else h_b <- tapply(dat$win, winBin$bin, function(x) sum(x == 1)/length(x))

## Plot d_b and h_b against the bin midpoints, in a plot with
## axis limits both (0,1)
plot(h_b ~ winBin$mid, xlim = c(0, 1), ylim = c(0, 1), xlab = xlab, ylab = ylab, ...) if (missing(legend)) { if (is.null(dat$tie)) legend <- "Matches won"
else legend <- c("Non-tied matches won", "Matches tied")
}
legend("topleft", legend, col = col[c(1, 2[!missing(tie)])],
pch = 1)
if (!is.null(dat$tie)) points(d_b ~ tieBin$mid, col = col)

## Add to the plot the lines/curves
## y = x
## y = expit(log(nu * sqrt(p_ij * (1 - p_ij))))
## The d_b should lie around the latter curve, and the h_b should
## lie around the former line.  Any clear patterns of departure are
## of interest.
curve(I, 0, 1, add = TRUE)

env <- new.env()
environment(GenDavidsonTie) <- env
tie.max = unname(tie.max),
tie.scale = unname(tie.scale),
tie.mode = unname(tie.mode)))
assign("coef", coef, envir=env)
curve(GenDavidsonTie, 0, 1, col = col, add = TRUE)
out <- list(win = data.frame(prop.win = h_b, bin.win = winBin$mid)) if (!is.null(dat$tie))
out <- c(out, tie = data.frame(prop.tie = d_b, bin.tie = tieBin\$mid))
invisible(out)
}