Nothing
## ----setup, include = FALSE---------------------------------------------------
library(knitr)
eval_all <- FALSE # evaluate all timings and pltree?
extra <- requireNamespace("kableExtra")
# if render using rmarkdown, use output format to decide table format
table.format <- opts_knit$get("rmarkdown.pandoc.to")
if (!identical(table.format, "latex")) table.format <- "html"
opts_knit$set(knitr.table.format = table.format)
opts_chunk$set(message = FALSE)
## ---- echo = FALSE------------------------------------------------------------
as.matrix(data.frame(choice = c(1, 1, 1, 2, 2),
`item 1` = c(1, 0, 0, 1, 0),
`item 2` = c(0, 1, 0, 0, 1),
`item 3` = c(0, 0, 1, 0, 0),
count = c(0, 0, 1, 1, 0), check.names = FALSE))
## ----soc, eval = eval_all, echo = FALSE---------------------------------------
# library(PlackettLuce)
# # read in example data sets
# preflib <- "https://www.preflib.org/static/data/"
# netflix <- read.soc(file.path(preflib, "netflix/00004-00000101.soc"))
# tshirt <- read.soc(file.path(preflib, "shirt/00012-00000001.soc"))
# sushi <- read.soc(file.path(preflib, "sushi/00014-00000001.soc"))
## ----wrappers, eval = eval_all, echo = FALSE----------------------------------
# pl <- function(dat, ...){
# # convert ordered items to ranking
# R <- as.rankings(dat[,-1], "ordering")
# # fit without adding pseudo-rankings, weight rankings by count
# PlackettLuce(R, npseudo = 0, weights = dat$Freq)
# }
# hyper2 <- function(dat, ...){
# requireNamespace("hyper2")
# # create likelihood object based on ordered items and counts
# H <- hyper2::hyper2(pnames = paste0("p", seq_len(ncol(dat) - 1)))
# for (i in seq_len(nrow(dat))){
# x <- dat[i, -1][dat[i, -1] > 0]
# H <- H + hyper2::order_likelihood(x, times = dat[i, 1])
# }
# # find parameters to maximise likelihood
# p <- hyper2::maxp(H)
# structure(p, loglik = hyper2::loglik(H, p[-length(p)]))
# }
# plmix <- function(dat, ...){
# requireNamespace("PLMIX")
# # disaggregate data (no functionality for weights or counts)
# r <- rep(seq_len(nrow(dat)), dat$Freq)
# # maximum a posteriori estimate, with non-informative prior
# # K items in each ranking, single component distribution
# # default starting values do not always work so specify as uniform
# K <- ncol(dat) - 1
# PLMIX::mapPLMIX(as.matrix(dat[r, -1]), K = K, G = 1,
# init = list(p = rep.int(1/K, K)), plot_objective = FALSE)
# }
# pmr <- function(dat, ...){
# requireNamespace("pmr")
# # convert ordered items to ranking
# R <- as.rankings(dat[,-1], "ordering")
# # create data frame with counts as required by pl
# X <- as.data.frame(unclass(R))
# X$Freq <- dat$Freq
# capture.output(res <- pmr::pl(X))
# res
# }
# statrank <- function(dat, iter){
# requireNamespace("StatRank")
# # disaggregate data (no functionality for weights or counts)
# r <- rep(seq_len(nrow(dat)), dat$Freq)
# capture.output(res <- StatRank::Estimation.PL.MLE(as.matrix(dat[r, -1]),
# iter = iter))
# res
# }
## ----timings, eval = eval_all, echo = FALSE, message = FALSE, warning = FALSE----
# timings <- function(dat, iter = NULL,
# fun = c("pl", "hyper2", "plmix", "pmr", "statrank")){
# res <- list()
# for (nm in c("pl", "hyper2", "plmix", "pmr", "statrank")){
# if (nm %in% fun){
# res[[nm]] <- suppressWarnings(
# system.time(do.call(nm, list(dat, iter)))[["elapsed"]])
# } else res[[nm]] <- NA
# }
# res
# }
# netflix_timings <- timings(netflix, 6)
# tshirt_timings <- timings(tshirt, 341,
# fun = c("pl", "hyper2", "plmix", "statrank"))
# sushi_timings <- timings(sushi, 5,
# fun = c("pl", "hyper2", "plmix", "statrank"))
## ----save-timings, echo = FALSE-----------------------------------------------
if (eval_all){
saveRDS(netflix_timings, "netflix_timings.rds", version = 2)
saveRDS(tshirt_timings, "tshirt_timings.rds", version = 2)
saveRDS(sushi_timings, "sushi_timings.rds", version = 2)
} else {
netflix_timings <- readRDS("netflix_timings.rds")
tshirt_timings <- readRDS("tshirt_timings.rds")
sushi_timings <- readRDS("sushi_timings.rds")
}
## ----data-features, echo = FALSE----------------------------------------------
features <- cbind(c(1256, 30, 5000),
c(24, 30, 4926),
c(4, 11, 10))
dimnames(features) <- list(c("Netflix", "T-shirt", "Sushi"),
c("Rankings", "Unique rankings", "Items"))
kab <- kable(features,
caption = "(ref:features)",
booktabs = TRUE)
if (extra){
kableExtra::kable_styling(kab)
} else kab
## ----timings-kable, echo = FALSE, results = "asis"----------------------------
res <- data.frame(netflix = round(unlist(netflix_timings), 3),
tshirt = format(unlist(tshirt_timings), 3),
sushi = round(unlist(sushi_timings), 3),
stringsAsFactors = FALSE)
res["pmr", "tshirt"] <- "a"
res["pmr", "sushi"] <- "a"
res <- t(as.matrix(res))
dimnames(res) <- list(c("Netflix", "T-shirt", "Sushi"),
c("PlackettLuce",
"hyper2", "PLMIX", "pmr", "StatRank"))
kab <- kable(res, booktabs = TRUE, align = rep("r", 6), caption = "(ref:timings)",
escape = FALSE)
if (extra){
kab <- kableExtra::kable_styling(kab)
kab <- kableExtra::add_header_above(kab, c(" " = 1, "Time elapsed (s)" = 5))
kableExtra::add_footnote(kab, "Function fails to complete.",
notation = "alphabet")
} else {
cat("**Time elapsed (s)**\n", kab, "\n^a Function fails to complete.",
sep = "\n")
}
## ----timings-sub, eval = eval_all, echo = FALSE, message = FALSE, warning = FALSE----
# data(Data.Nascar, package = "StatRank")
# # add column of frequencies so the format the same as before
# nascar <- cbind(n = 1, Data.Nascar)
# nascar_timings <- timings(nascar, fun = c("pl", "hyper2"))
## ----save-timings-sub, echo = FALSE-------------------------------------------
if (eval_all){
saveRDS(nascar_timings, "nascar_timings.rds", version = 2)
} else {
nascar_timings <- readRDS("nascar_timings.rds")
}
## ----nascar, echo = FALSE-----------------------------------------------------
res <- round(unlist(nascar_timings)[c("pl", "hyper2")], 3)
res <- data.frame(Rankings = 36,
Items = 83,
`Items per ranking` = "42-43",
PlackettLuce = res[1],
hyper2 = res[2],
check.names = FALSE, row.names = NULL)
kab <- kable(res, booktabs = TRUE, align = rep("r", 5),
caption = "(ref:nascar)")
if (requireNamespace("kableExtra")){
kab <- kableExtra::kable_styling(kab)
kableExtra::add_header_above(kab, c("Features of NASCAR data" = 3,
"Time elapsed (s)" = 2))
} else kab
## ----package-summary, echo = FALSE--------------------------------------------
tab <- data.frame(Feature = c("Inference", "Disconnected networks",
"Ties", "Teams", "Heterogenous case"),
PlackettLuce = c("Frequentist", "Yes", "Yes", "No", "Trees"),
hyper2 = c("No", "No", "No", "Yes", "No"),
pmr = c("No", "No", "No", "No", "No"),
StatRank = c("No", "No", "No", "No", "No"),
PLMIX = c("Bayesian", "Yes", "No", "No", "Mixtures"))
kab <- kable(tab, booktabs = TRUE,
caption = "Features of packages for fitting the Plackett-Luce model.")
if (requireNamespace("kableExtra")){
kableExtra::kable_styling(kab)
} else kab
## -----------------------------------------------------------------------------
library(PlackettLuce)
head(pudding)
## -----------------------------------------------------------------------------
i_wins <- data.frame(Winner = pudding$i, Loser = pudding$j)
j_wins <- data.frame(Winner = pudding$j, Loser = pudding$i)
if (getRversion() < "3.6.0"){
n <- nrow(pudding)
ties <- data.frame(Winner = array(split(pudding[c("i", "j")], 1:n), n),
Loser = rep(NA, 15))
} else {
ties <- data.frame(Winner = asplit(pudding[c("i", "j")], 1),
Loser = rep(NA, 15))
}
head(ties, 2)
## -----------------------------------------------------------------------------
R <- as.rankings(rbind(i_wins, j_wins, ties),
input = "orderings")
head(R, 2)
tail(R, 2)
## -----------------------------------------------------------------------------
head(unclass(R), 2)
## -----------------------------------------------------------------------------
w <- unlist(pudding[c("w_ij", "w_ji", "t_ij")])
## -----------------------------------------------------------------------------
mod <- PlackettLuce(R, weights = w, npseudo = 0, maxit = 7)
coef(mod, log = FALSE)
## -----------------------------------------------------------------------------
summary(mod)
## -----------------------------------------------------------------------------
summary(mod, ref = NULL)
## -----------------------------------------------------------------------------
qv <- qvcalc(mod)
summary(qv)
## ----pudding-qv, fig.cap = "Worth of brands of chocolate pudding. Intervals based on quasi-standard errors."----
plot(qv, xlab = "Brand of pudding", ylab = "Worth (log)", main = NULL)
## -----------------------------------------------------------------------------
R <- matrix(c(1, 2, 0, 0,
2, 0, 1, 0,
1, 0, 0, 2,
2, 1, 0, 0,
0, 1, 2, 0), byrow = TRUE, ncol = 4,
dimnames = list(NULL, LETTERS[1:4]))
R <- as.rankings(R)
## -----------------------------------------------------------------------------
A <- adjacency(R)
A
## ----always-loses, fig.cap = "Network representation of toy rankings.", fig.small = TRUE, fig.width = 3.5, fig.height = 3.5----
library(igraph)
net <- graph_from_adjacency_matrix(A)
plot(net, edge.arrow.size = 0.5, vertex.size = 30)
## -----------------------------------------------------------------------------
connectivity(A)
## -----------------------------------------------------------------------------
R2 <- R[, -4]
R2
mod <- PlackettLuce(R2, npseudo = 0)
summary(mod)
## -----------------------------------------------------------------------------
mod2 <- PlackettLuce(R)
coef(mod2)
## -----------------------------------------------------------------------------
data(nascar)
nascar[1:2, ]
## -----------------------------------------------------------------------------
R <- as.rankings(nascar, input = "orderings", items = attr(nascar, "drivers"))
R[1:2]
## -----------------------------------------------------------------------------
keep <- seq_len(83)
R2 <- R[, keep]
mod <- PlackettLuce(R2, npseudo = 0)
## -----------------------------------------------------------------------------
avRank <- apply(R, 2, function(x) mean(x[x > 0]))
coefs <- round(coef(mod)[order(avRank[keep])], 2)
head(coefs, 3)
tail(coefs, 3)
## -----------------------------------------------------------------------------
mod2 <- PlackettLuce(R)
## -----------------------------------------------------------------------------
coefs2 <- round(coef(mod2), 2)
coefs2[names(coefs)[1:3]]
coefs2[names(coefs)[81:83]]
## -----------------------------------------------------------------------------
coefs2[84:87]
## -----------------------------------------------------------------------------
coef(summary(mod2))[84:87,]
## ----nascar-qv, fig.cap = "Ability of drivers based on NASCAR 2002 season. Intervals based on quasi-standard errors.", fig.wide = TRUE----
qv <- qvcalc(mod2)
qv$qvframe <- qv$qvframe[order(coef(mod2)),]
plot(qv, xlab = NULL, ylab = "Ability (log)", main = NULL,
xaxt="n", xlim = c(3, 85))
axis(1, at = seq_len(87), labels = rownames(qv$qvframe), las = 2, cex.axis = 0.6)
## ----beans-preparation, results = "hide"--------------------------------------
example("beans", package = "PlackettLuce")
## -----------------------------------------------------------------------------
n <- nrow(beans)
G <- group(R, index = rep(seq_len(n), 4))
format(head(G, 2), width = 50)
## ----pltree, eval = eval_all--------------------------------------------------
# beans$year <- factor(beans$year)
# tree <- pltree(G ~ ., data = beans[c("season", "year", "maxTN")],
# minsize = 0.05*n, maxdepth = 3)
# tree
## ----save-tree, echo = FALSE--------------------------------------------------
if (eval_all){
png("pltree.png", width = 11, height = 5, units = "in", res = 72)
plot(tree, names = FALSE, abbreviate = 2, ylines = 2)
dev.off()
}
## ----plot-pltree-code, eval = FALSE-------------------------------------------
# plot(tree, names = FALSE, abbreviate = 2, ylines = 2)
## ----plot-pltree, fig.wide = TRUE, fig.cap = "Worth parameters for the ten trial varieties and the local variety for each node in the Plackett-Luce tree. Varieties are 1: ALS 0532-6, 2: BRT 103-182, 3: INTA Centro Sur, 4: INTA Ferroso, 5: INTA Matagalpa, 6: INTA Precoz, 7: INTA Rojo, 8: INTA Sequia, 9: Local, 10: PM2 Don Rey, 11: SJC 730-79.", fig.width = 11, fig.height = 5, out.width = "\\textwidth", echo = FALSE----
include_graphics("pltree.png")
## ----show-soc-----------------------------------------------------------------
## ----soc, eval = FALSE--------------------------------------------------------
# library(PlackettLuce)
# # read in example data sets
# preflib <- "https://www.preflib.org/static/data/"
# netflix <- read.soc(file.path(preflib, "netflix/00004-00000101.soc"))
# tshirt <- read.soc(file.path(preflib, "shirt/00012-00000001.soc"))
# sushi <- read.soc(file.path(preflib, "sushi/00014-00000001.soc"))
## ----chunk2-------------------------------------------------------------------
## ----wrappers, eval = FALSE---------------------------------------------------
# pl <- function(dat, ...){
# # convert ordered items to ranking
# R <- as.rankings(dat[,-1], "ordering")
# # fit without adding pseudo-rankings, weight rankings by count
# PlackettLuce(R, npseudo = 0, weights = dat$Freq)
# }
# hyper2 <- function(dat, ...){
# requireNamespace("hyper2")
# # create likelihood object based on ordered items and counts
# H <- hyper2::hyper2(pnames = paste0("p", seq_len(ncol(dat) - 1)))
# for (i in seq_len(nrow(dat))){
# x <- dat[i, -1][dat[i, -1] > 0]
# H <- H + hyper2::order_likelihood(x, times = dat[i, 1])
# }
# # find parameters to maximise likelihood
# p <- hyper2::maxp(H)
# structure(p, loglik = hyper2::loglik(H, p[-length(p)]))
# }
# plmix <- function(dat, ...){
# requireNamespace("PLMIX")
# # disaggregate data (no functionality for weights or counts)
# r <- rep(seq_len(nrow(dat)), dat$Freq)
# # maximum a posteriori estimate, with non-informative prior
# # K items in each ranking, single component distribution
# # default starting values do not always work so specify as uniform
# K <- ncol(dat) - 1
# PLMIX::mapPLMIX(as.matrix(dat[r, -1]), K = K, G = 1,
# init = list(p = rep.int(1/K, K)), plot_objective = FALSE)
# }
# pmr <- function(dat, ...){
# requireNamespace("pmr")
# # convert ordered items to ranking
# R <- as.rankings(dat[,-1], "ordering")
# # create data frame with counts as required by pl
# X <- as.data.frame(unclass(R))
# X$Freq <- dat$Freq
# capture.output(res <- pmr::pl(X))
# res
# }
# statrank <- function(dat, iter){
# requireNamespace("StatRank")
# # disaggregate data (no functionality for weights or counts)
# r <- rep(seq_len(nrow(dat)), dat$Freq)
# capture.output(res <- StatRank::Estimation.PL.MLE(as.matrix(dat[r, -1]),
# iter = iter))
# res
# }
## ----chunk2-------------------------------------------------------------------
## ----timings, eval = FALSE----------------------------------------------------
# timings <- function(dat, iter = NULL,
# fun = c("pl", "hyper2", "plmix", "pmr", "statrank")){
# res <- list()
# for (nm in c("pl", "hyper2", "plmix", "pmr", "statrank")){
# if (nm %in% fun){
# res[[nm]] <- suppressWarnings(
# system.time(do.call(nm, list(dat, iter)))[["elapsed"]])
# } else res[[nm]] <- NA
# }
# res
# }
# netflix_timings <- timings(netflix, 6)
# tshirt_timings <- timings(tshirt, 341,
# fun = c("pl", "hyper2", "plmix", "statrank"))
# sushi_timings <- timings(sushi, 5,
# fun = c("pl", "hyper2", "plmix", "statrank"))
## ----beans-1------------------------------------------------------------------
data(beans)
head(beans[c("best", "worst")], 2)
## ----beans-2------------------------------------------------------------------
beans$middle <- complete(beans[c("best", "worst")],
items = c("A", "B", "C"))
head(beans[c("best", "middle", "worst")], 2)
## ----beans-3------------------------------------------------------------------
head(beans[c("variety_a", "variety_b", "variety_c")], 2)
## ----beans-4------------------------------------------------------------------
order3 <- decode(beans[c("best", "middle", "worst")],
items = beans[c("variety_a", "variety_b", "variety_c")],
code = c("A", "B", "C"))
## ----beans-6------------------------------------------------------------------
head(beans[c("var_a", "var_b", "var_c")], 2)
## ----beans-7------------------------------------------------------------------
trial_variety <- unlist(beans[c("variety_a", "variety_b", "variety_c")])
outcome <- unlist(beans[c("var_a", "var_b", "var_c")])
## -----------------------------------------------------------------------------
order2 <- data.frame(Winner = ifelse(outcome == "Worse",
"Local", trial_variety),
Loser = ifelse(outcome == "Worse",
trial_variety, "Local"),
stringsAsFactors = FALSE, row.names = NULL)
head(order2, 2)
## ----beans-8------------------------------------------------------------------
R <- rbind(as.rankings(order3, input = "ordering"),
as.rankings(order2, input = "ordering"))
head(R)
tail(R)
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.