In tagteam/SuperRanker: Sequential Rank Agreement

knitr::opts_chunk$set(echo = TRUE)

Set up packages and global parameters

library(SuperRanker)
library(TopKLists)
library(MESS)

#Simulate a list of ranks
# K location of jump
# P = total length of predictors
# p = probability that top is permuted
simRank <- function(K, P, p) {
    topPermuted <- rbinom(1, 1, p)
    top <- (1-topPermuted) * (1:K) + topPermuted * sample(1:K)
    bottom <- sample((K+1):P)
    c(top, bottom)
}

blockSim <- function(blocksizes, maxlength) {
  x <- seq(maxlength)
  slut <- pmin(maxlength, c(cumsum(blocksizes), maxlength))
  start <- c(1, pmin(maxlength, cumsum(blocksizes)+1))
  unlist(sapply(seq(length(start)), function(i) { sample(seq(start[i], slut[i]))}))
}

# Example simulation
myrank <- sapply(1:3, function(i) simRank(K=10, P=100, p=0))

nsim <- 1000
N <- 400
listlength <- 1000
threshold <- 12     # Our threshold. q*
nlists <- 10       # Number of lists available
Klist <- c(3, 5, 8, 15, 20)  # How far the lists agree. The truth
nu <- 6 # Tuning parameter for topK. Should be somewhere between 2 and 10 as per their paper

Run simulations

simple <- function(K, nlists=nlists) {

res <- sapply(1:nsim, function(i) {

  # Simulate lists
  # Fixed parameters for now
  simlists <- bootstrappedlists(n=400, 
                                nlists=8, 
                                preal=15, 
                                listlength, 
                                effect=15)

  ## Return the results from sra and topK
  topk <- j0.multi(simlists, d=K, v=nu)$maxK
  myres <- c(
    min(which(sra(simlists, type="mad")>=K),listlength)-1,
    ifelse(is.na(topk), 1, topk)-1  # Becomes 0 later on
    )
  ## Indsæt predictor values her
  c(myres, length(unique(as.vector(simlists[0:myres[1],]))), length(unique(as.vector(simlists[0:myres[2],]))))
  })
res


# Store the results in a data frame

results <- data.frame(y=c(res[1,], res[2,]),
                       x=rep(c("sra", "topK"), times=rep(nsim, 2)),
                       np=c(res[3,], res[4,]),
                       K=K,
                       nlists=nlists)

results
}

Klist <- c(3, 5, 8, 10)
Knames <- c(`3` = "q=3",
            `5` = "q=5",
            `8` = "q=8", 
            `10` = "q=10"
                    )

www <- do.call(rbind, 
               lapply(Klist, function(K) simple(K, nlists=nlists)))

www <- www %>% group_by(x, K) %>% mutate(grp.mean.values = round(mean(np), 2))


p <- ggplot(www, aes(x=factor(x), y=y, fill=factor(x))) +
   geom_violin( scale="width", adjust=.4, bw=.7, kernel="rectangular")  + 
  facet_grid(. ~ K, labeller = as_labeller(Knames)) + xlab("") + ylab("Depth of agreement") + theme(legend.position="none") + theme_bw() + 
  stat_summary(aes(label=grp.mean.values), fun.y=mean, geom="text", size=6, fontface = "bold") + 
  theme(strip.text.x = element_text(size=16, face="bold"),
        text = element_text(size=14),
        panel.border = element_blank(),
        legend.position="none")  +  scale_fill_manual(values=c("#999999", "#E69F00", "#56B4E9"))
p

Now change the value of $K$ and run simulations again

Klist <- c(3, 5, 10, 50)

## Kør for fast antal prædiktorer men varierer antallet af lister
print(Klist)

www <- do.call(rbind, 
               lapply(Klist, function(K) simple(5, nlists=K)))

www <- www %>% group_by(x, nlists) %>% mutate(grp.mean.values = round(mean(np), 2))

save(www, file="www")
#www
#

Knames <- c(`3` = "L=3",
            `5` = "L=5",
            `10` = "L=10", 
            `50` = "L=50"
                    )


p <- ggplot(www, aes(x=factor(x), y=y, fill=factor(x))) +
   geom_violin( scale="width", adjust=.4, bw=.7, kernel="rectangular")  + 
  facet_grid(. ~ nlists, labeller = as_labeller(Knames)) + xlab("") + ylab("Depth of agreement") + theme(legend.position="none") + stat_summary(aes(label=grp.mean.values), fun.y=mean, geom="text", size=6, fontface = "bold") + theme_bw() + 
  theme(strip.text.x = element_text(size=16, face="bold"),
        panel.border = element_blank(),
        text = element_text(size=14), legend.position="none")  +  scale_fill_manual(values=c("#999999", "#E69F00", "#56B4E9"))
p