knitr::opts_chunk$set(echo = TRUE)
library(SuperRanker) library(TopKLists) library(MESS) #Simulate a list of ranks # K location of jump # P = total length of predictors # p = probability that top is permuted simRank <- function(K, P, p) { topPermuted <- rbinom(1, 1, p) top <- (1-topPermuted) * (1:K) + topPermuted * sample(1:K) bottom <- sample((K+1):P) c(top, bottom) } blockSim <- function(blocksizes, maxlength) { x <- seq(maxlength) slut <- pmin(maxlength, c(cumsum(blocksizes), maxlength)) start <- c(1, pmin(maxlength, cumsum(blocksizes)+1)) unlist(sapply(seq(length(start)), function(i) { sample(seq(start[i], slut[i]))})) } # Example simulation myrank <- sapply(1:3, function(i) simRank(K=10, P=100, p=0))
nsim <- 1000 N <- 400 listlength <- 1000 threshold <- 12 # Our threshold. q* nlists <- 10 # Number of lists available Klist <- c(3, 5, 8, 15, 20) # How far the lists agree. The truth nu <- 6 # Tuning parameter for topK. Should be somewhere between 2 and 10 as per their paper
simple <- function(K, nlists=nlists) { res <- sapply(1:nsim, function(i) { # Simulate lists # Fixed parameters for now simlists <- bootstrappedlists(n=400, nlists=8, preal=15, listlength, effect=15) ## Return the results from sra and topK topk <- j0.multi(simlists, d=K, v=nu)$maxK myres <- c( min(which(sra(simlists, type="mad")>=K),listlength)-1, ifelse(is.na(topk), 1, topk)-1 # Becomes 0 later on ) ## Indsæt predictor values her c(myres, length(unique(as.vector(simlists[0:myres[1],]))), length(unique(as.vector(simlists[0:myres[2],])))) }) res # Store the results in a data frame results <- data.frame(y=c(res[1,], res[2,]), x=rep(c("sra", "topK"), times=rep(nsim, 2)), np=c(res[3,], res[4,]), K=K, nlists=nlists) results } Klist <- c(3, 5, 8, 10) Knames <- c(`3` = "q=3", `5` = "q=5", `8` = "q=8", `10` = "q=10" ) www <- do.call(rbind, lapply(Klist, function(K) simple(K, nlists=nlists))) www <- www %>% group_by(x, K) %>% mutate(grp.mean.values = round(mean(np), 2)) p <- ggplot(www, aes(x=factor(x), y=y, fill=factor(x))) + geom_violin( scale="width", adjust=.4, bw=.7, kernel="rectangular") + facet_grid(. ~ K, labeller = as_labeller(Knames)) + xlab("") + ylab("Depth of agreement") + theme(legend.position="none") + theme_bw() + stat_summary(aes(label=grp.mean.values), fun.y=mean, geom="text", size=6, fontface = "bold") + theme(strip.text.x = element_text(size=16, face="bold"), text = element_text(size=14), panel.border = element_blank(), legend.position="none") + scale_fill_manual(values=c("#999999", "#E69F00", "#56B4E9")) p
Now change the value of $K$ and run simulations again
Klist <- c(3, 5, 10, 50) ## Kør for fast antal prædiktorer men varierer antallet af lister print(Klist) www <- do.call(rbind, lapply(Klist, function(K) simple(5, nlists=K))) www <- www %>% group_by(x, nlists) %>% mutate(grp.mean.values = round(mean(np), 2)) save(www, file="www") #www #
Knames <- c(`3` = "L=3", `5` = "L=5", `10` = "L=10", `50` = "L=50" ) p <- ggplot(www, aes(x=factor(x), y=y, fill=factor(x))) + geom_violin( scale="width", adjust=.4, bw=.7, kernel="rectangular") + facet_grid(. ~ nlists, labeller = as_labeller(Knames)) + xlab("") + ylab("Depth of agreement") + theme(legend.position="none") + stat_summary(aes(label=grp.mean.values), fun.y=mean, geom="text", size=6, fontface = "bold") + theme_bw() + theme(strip.text.x = element_text(size=16, face="bold"), panel.border = element_blank(), text = element_text(size=14), legend.position="none") + scale_fill_manual(values=c("#999999", "#E69F00", "#56B4E9")) p
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.