R/RankAggreg.R In RankAggreg: Weighted Rank Aggregation

Defines functions `RankAggreg`

````RankAggreg` <-
function(x, k, weights=NULL, method=c("CE", "GA"),
distance=c("Spearman", "Kendall"), seed=NULL, maxIter = 1000,
convIn=ifelse(method=="CE", 7, 30), importance=rep(1,nrow(x)),
rho=.1, weight=.25, N=10*k^2, v1=NULL,
popSize=100, CP=.4, MP=.01, verbose=TRUE, standardizeWeights = TRUE, ...)
{
if(!is.null(seed))
set.seed(seed)
method <- match.arg(method, c("CE", "GA"))
distance <- match.arg(distance, c("Spearman", "Kendall"))
argss <- list(...)
x <- x[,1:k]
orig.x <- x

orig.imp <- importance
importance <- importance/sum(importance) #rescale importance weights

distinct <- apply(x, 1, function(y) ifelse(length(unique(y)) < k, 1, 0))
if(sum(distinct) >= 1)
stop("Elements of Each Row Must Be Unique")
if(nrow(x)<2)
stop("X must have more than 1 row")
if(CP == 0 | MP == 0)
stop("Neither CP nor MP can be 0")

compr.list <- unique(sort(as.vector(x)))
n <- length(compr.list)
#compr.list <- rev(compr.list)
#cat(compr.list)

comp.list <- 1:n
x <- t(apply(x,1, function(xx) match(xx,compr.list)))

if(!is.null(weights)){
weights <- weights[,1:k]
#standardize weights:
if (standardizeWeights) {
weights <- t(apply(weights,1,function(z){if(max(z)==min(z))
rep(0, length(z)) else (z-min(z))/(max(z)-min(z))}))
for(i in 1:nrow(weights)) # make sure 1 is the best score for all lists
if(weights[i,k]!=0)
weights[i,] <- 1-weights[i,]
}
if(dim(x)[1] != dim(weights)[1] || dim(x)[2] != dim(weights)[2])
stop("Dimensions of x and weight matrices have to be the same")
}

if (k > n)
stop("k must be smaller or equal to n")

fyRes <- matrix(0,1,2)
colnames(fyRes) <- c("Minimums", "Medians")

if(method=="CE"){
v <- matrix(1/n,n,k)
if(!is.null(v1))
v <- as.matrix(v1)
y <- vector("numeric")

Nhat <- round(rho*N)
if (Nhat < 5)
stop("rho is too small")

t <- 1
resN <- matrix(0,N,k)
iter <- 0

repeat
{
cands <- matrix(.C("sampling", as.integer(N), as.double(v), as.integer(n),
as.integer(k), as.integer(resN), PACKAGE="RankAggreg")[[5]], N)

#mcmcProc(v, N, argss\$thin, argss\$burn.in, comp.list, verbose)

minf <- ifelse(t!=1, min(f.y), 0)

if(distance=="Spearman")
f.y <- spearman(x, cands, importance, weights)
else
f.y <- kendall(x, cands, importance, weights)

fy <- sort(f.y, ind=TRUE)
y[t] <- fy\$x[Nhat]
good.cand <- cands[f.y <= y[t],]

if(t==1)
fyRes[1,] <- c(min(f.y), median(f.y))
else
fyRes <- rbind(fyRes,c(min(f.y), median(f.y)))

v <- upd.prob(good.cand, v, weight, comp.list)

best.cand <- compr.list[cands[fy\$ix[1],]]
rm(cands) # clean up

y.l <- paste(best.cand, sep="", collapse=",")

if(verbose){
cat("\n", "Iteration", t, ": ",  c("Optimal value: ", min(f.y),
"\n Optimal List:  ", y.l, "\n"))
plotUpdate(f.y, fyRes, N, method)}

if(minf == min(f.y))
iter <- iter+1
else
iter <- 1

if(iter == convIn)
break

t <- t + 1

if (t > maxIter){
cat("Did not converge after ", maxIter, " iterations. Please increase sample size N\n")
break}
}
} else{
#generate initial population randomly
cands <- matrix(0, popSize, k)
for(i in 1:popSize)
cands[i,] <- sample(comp.list, k)

#calculate obj. fn
if(distance=="Spearman")
f.y <- spearman(x, cands, importance, weights)
else
f.y <- kendall(x, cands, importance, weights)

fyRes[1,] <- c(min(f.y), median(f.y))
best.cand <- compr.list[cands[which.min(f.y),]]
bestevery <- min(f.y)

conv=FALSE
t <- 1
iter <- 0
while(!conv){
#selection probability
minf <- min(f.y)
p.y <- (max(f.y)+1-f.y)/sum((max(f.y)+1-f.y))
cpy <- cumsum(p.y)

#select cands for the next generation
ind <- runif(popSize)
ind2 <- rep(0, popSize)
for(i in 1:popSize)
ind2[i] <- sum(ind[i] > cpy)+1
cands <- cands[ind2,]

# cross-over
pairstocross <- floor(popSize*CP/2)
samp <- sample(1:popSize, pairstocross*2)
pointsofcross <- sample(2:k, pairstocross, replace=TRUE)
for(i in 1:pairstocross){
if(pointsofcross[i] < k/2) {
swap <- 1:pointsofcross[i]
} else {
swap <- pointsofcross[i]:k
}
for(j in swap){
# this loop performs partially matched crossover (PMX) described in Section 10.5 of
# Data Mining: Concepts, Models, Methods, and Algorithms by Mehmed Kantardzic (2003)

t1 <- cands[samp[i],j]
t2 <- cands[samp[i+pairstocross],j]

if(!is.na(t3 <- match(t2, cands[samp[i],])))
cands[samp[i], t3] <- t1
if(!is.na(t3 <- match(t1, cands[samp[i+pairstocross],])))
cands[samp[i+pairstocross], t3] <- t2

cands[samp[i], j] <- t2
cands[samp[i+pairstocross], j] <- t1
}
}

# random mutations with probability MP
mutations <- round(popSize*k*MP)

rows <- sample(1:popSize, mutations, replace=TRUE)
cols <- sample(1:k, mutations, replace=TRUE)
switchWith <- sample(comp.list, mutations, replace=TRUE)

for(i in 1:mutations){
tempI <- cands[rows[i], cols[i]]
if(switchWith[i] %in% cands[rows[i],])
cands[rows[i],which(switchWith[i]==cands[rows[i],])] <- tempI
cands[rows[i], cols[i]] <- switchWith[i]
}

#calculate obj. fn
if(distance=="Spearman")
f.y <- spearman(x, cands, importance, weights)
else
f.y <- kendall(x, cands, importance, weights)

fyRes <- rbind(fyRes,c(min(f.y), median(f.y)))

y.l <- paste(compr.list[cands[which.min(f.y),]], sep="", collapse=",")
if(verbose){
cat("\n", "Iteration", t, ": ",  c("Optimal value: ", min(f.y),
"\n Optimal List:  ", y.l, "\n"))
plotUpdate(f.y, fyRes, popSize, method)}

if(minf == min(f.y))
iter <- iter+1
else
iter <- 1

if(iter == convIn)
conv=TRUE

if(min(f.y) < bestevery){
best.cand <- compr.list[cands[which.min(f.y),]]
bestevery <- min(f.y)
}
t <- t+1

if(t > maxIter){
cat("Did not converge after ", maxIter, " iterations.\n")
break}
}
}
rownames(fyRes) <- paste("Iter", 1:nrow(fyRes))
res <- list(top.list=best.cand, optimal.value=ifelse(method=="CE", fy\$x[1], bestevery),
sample.size = ifelse(method=="CE", N, popSize), num.iter=t, method=method, distance=distance,
importance=orig.imp, lists = orig.x, weights = weights, sample=f.y, summary = fyRes)
class(res) <- "raggr"
res
}
```

Try the RankAggreg package in your browser

Any scripts or data that you put into this service are public.

RankAggreg documentation built on May 2, 2019, 5:48 a.m.