# R/rank.R In SRCS: Statistical Ranking Color Scheme for Multiple Pairwise Comparisons

```# _________________________________________________________________________
## This function actually computes the rank of a set of algorithms using
## their performance results on a *single* problem configuration.
## It is based on the code published in the book chapter by I.G.Del Amo and D.Pelta
##
## @param data A vector with the results of the m algorithms, that is, the \code{n}
##   independent repetitions of the performance measure, for each algorithm.
##   Since we will be performing a paired test, the data must be carefully arranged
##   so that \code{data[j]} (j-th datum of the first group) is paired with all \code{data[j+Kn]}, K = 1,2,...,m
##   (these are the j-th datum of the remaining groups, where \code{n} is the number of samples of each group).
## @param group A factor vector for the data, indicating, for each corresponding
##     entry of \code{data}, the algorithm it belongs to.
## @param alpha Significance threshold for the tests. Defaults to 0.05
## @param max Boolean: should the performance measure be maximized?
##     TRUE means that the higher the performance measure,
##     the better. FALSE means the lower, the better. Defaults to TRUE.
## @return 	A dataframe object where every row refers to an algorith, and with the following columns:
##	  First two columns are the algorithm name and its rank, named "target" and "rank"
##	  Second two columns are the mean and std.dev. of the performance of the algorithm, named "mean" and "sd".
##    Finally: as many columns as algorithms, storing the p-value of every pairwise comparison of this algorithm with the rest,
##		named as the algorithm with ".pval" at the end.
## @examples
## dat <- c( 2.5, 2.3, 1.2, 0.7, 3.5, 4.1 )
## group <- factor(c("alg1", "alg1", "alg2", "alg2", "alg3", "alg3"))
## @references I.G. del Amo and D.A. Pelta.
##  SRCS: A Technique for Comparing Multiple Algorithms under Several Factors in Dynamic Optimization Problems
##  In: Metaheuristics for Dynamic Optimization. E. Alba A. Nakib, P. Siarry (Eds). Studies in Computational Intelligence 433, 61-77.
##  Springer, 2013.
# _________________________________________________________________________
.rank <- function(data, group, test, fun, correction, alpha = 0.05, maximize = TRUE, paired = FALSE, ...)
{
algorithms <- unique(group);
nalgorithms = length(algorithms);
rankFrame = data.frame(matrix(NA, nrow = length(algorithms), ncol = 2 + 2 + length(algorithms)));

rankFrame[,1] = as.factor(unlist(algorithms));

pvalColumnNames = paste0(algorithms,".pval");
names(rankFrame) = c("target","rank","mean","sd",pvalColumnNames);
rankFrame\$rank = 0;

alg.bool = list();
for(algorithm in algorithms){
v = (rankFrame\$target == algorithm);
alg.bool[[algorithm]] = which(v, arr.ind = TRUE);
}

# calculate the vector of means for all the algorithms' measures
means <- tapply(data, group, mean);
stdevs <- tapply(data, group, sd);

# perform a Kruskal-Wallis test to assess if there are differences
# among all the results
dataframe <- data.frame(group, data)
kruskal <- kruskal.test(data ~ group, data = dataframe)
if (!is.na( kruskal\$p.value) && kruskal\$p.value < alpha)
{
# post-hoc test: perform the indicated pairwise test with the desired correction method to assess individual differences
test.results = NULL;

## ---------------------------------
## APPLY THE SELECTED POST-HOC TEST
## ---------------------------------
if(test == "wilcoxon"){
test.results = pairwise.wilcox.test(data, group, p.adjust.method = correction, exact = FALSE, paired = paired);
}
if(test == "t"){
test.results = pairwise.t.test(data, group, p.adjust.method = correction, exact = FALSE, paired = paired);
}
if(test == "tukeyHSD"){
group2 = as.factor(gsub("-", "@", group)); # to avoid the case that the algorithm names already contain "-"
levels(group2) = gsub("-", "@", levels(group));
tempgroup = group2;
mypvalues = TukeyHSD(aov(data ~ tempgroup))\$tempgroup;
grouplevels = levels(group);

## Manually compose the p.values matrix
pvalue.matrix = matrix(NA, nrow = length(algorithms)-1, ncol = length(algorithms)-1);
colnames(pvalue.matrix) = grouplevels[1:(nlevels(group)-1)];
rownames(pvalue.matrix) = grouplevels[2:nlevels(group)];

separated = strsplit(rownames(mypvalues), "-");
for(myrow in 1:nrow(mypvalues)){
algs = gsub("@", "-", separated[[myrow]]);
pvalue.matrix[ algs, algs ] = mypvalues[myrow, "p adj"];
}
test.results = list(p.value = pvalue.matrix);
}

if(test == "custom"){
myargs = list(c(data, group), as.list(...));
test.results = do.call(fun, myargs);
}

for (algorithm1 in rownames(test.results\$p.value))
{
for (algorithm2 in colnames(test.results\$p.value))
{
if (!is.na( test.results\$p.value[algorithm1,algorithm2])){
if(test.results\$p.value[algorithm1,algorithm2] < alpha){
# there is a significant difference between algorithm1 and algorithm2;
# we need to identify which one is the best and which one the worst,
# we'll use the median for that purpose
if (means[algorithm1] > means[algorithm2])
{
best <- algorithm1
worst <- algorithm2
}
else
{
best <- algorithm2
worst <- algorithm1
}

if (!maximize)
{
# swap best and worst
tmp <- best
best <- worst
worst <- tmp
}

# update ranks
#rankFrame[rankFrame\$target == best, 2] = rankFrame[rankFrame\$target == best, 2] + 1;
#rankFrame[rankFrame\$target == worst, 2] = rankFrame[rankFrame\$target == worst, 2] - 1;
rankFrame[alg.bool[[best]], 2] = rankFrame[alg.bool[[best]], 2] + 1;
rankFrame[alg.bool[[worst]], 2] = rankFrame[alg.bool[[worst]], 2] - 1;
}
# store p-values of this comparison
#rankFrame[rankFrame\$target == algorithm1, paste0(algorithm2,".pval")] = test.results\$p.value[algorithm1,algorithm2];
#rankFrame[rankFrame\$target == algorithm2, paste0(algorithm1,".pval")] = test.results\$p.value[algorithm1,algorithm2];
rankFrame[alg.bool[[algorithm1]], paste0(algorithm2,".pval")] = test.results\$p.value[algorithm1,algorithm2];
rankFrame[alg.bool[[algorithm2]], paste0(algorithm1,".pval")] = test.results\$p.value[algorithm1,algorithm2];
}
}
}
for(algorithm in algorithms){
#store the mean and sd
#rankFrame[rankFrame\$target == algorithm, 3] = means[[algorithm]];
#rankFrame[rankFrame\$target == algorithm, 4] = stdevs[[algorithm]];
rankFrame[alg.bool[[algorithm]], 3] = means[[algorithm]];
rankFrame[alg.bool[[algorithm]], 4] = stdevs[[algorithm]];
}
}
return(rankFrame)
}

# ___________________________________________________________________________
## Same as .rank() but for several different performance columns. It ranks every column separately
##
## Argument data is not a vector of data, but a matrix of data where each COLUMN represents
## a different performance measure. nrows(data) must equal length(group).
##
## Returns a dataframe object with 1+ncol(data). First column is named "target" and contains unique(group).
## The rest are named "rank1", "rank2", ... "rankn", with n = ncol(data), i.e. the number of performance measures
# _________________________________________________________________________
.rankall <- function(alldata, group, test, fun, correction, alpha = 0.05, maximize = TRUE, paired = FALSE, ...){
algorithms <- unique(group);
nranks = ncol(alldata);
rankFrame = data.frame(matrix(NA, nrow = length(algorithms), ncol = 1 + nranks));

rankFrame[,1] = as.factor(unlist(algorithms));

names(rankFrame) = c("target",paste0("rank",seq(1:nranks)));
rankFrame[,2:(nranks + 1)] = 0;

alg.bool = list();
for(algorithm in algorithms){
v = (rankFrame\$target == algorithm);
alg.bool[[algorithm]] = which(v, arr.ind = TRUE);
}

means = sapply(X = names(alldata), FUN = function(algname) tapply(X = alldata[[algname]], INDEX = group, FUN = mean));

# perform a Kruskal-Wallis test to assess if there are differences
# among all the results
for(j in 1:nranks){
data = alldata[,j];
dataframe <- data.frame(group, data)
names(dataframe) = c("group", "data");
kruskal <- kruskal.test(data ~ group, data = dataframe)
if (!is.na( kruskal\$p.value) && kruskal\$p.value < alpha)
{
## ---------------------------------
## APPLY THE SELECTED POST-HOC TEST
## ---------------------------------
if(test == "wilcoxon"){
test.results = pairwise.wilcox.test(data, group, p.adjust.method = correction, exact = FALSE, paired = paired);
}
if(test == "t"){
test.results = pairwise.t.test(data, group, p.adjust.method = correction, exact = FALSE, paired = paired);
}
if(test == "tukeyHSD"){
group2 = as.factor(gsub("-", "@", group)); # to avoid the case that the algorithm names already contain "-"
levels(group2) = gsub("-", "@", levels(group));
tempgroup = group2;
mypvalues = TukeyHSD(aov(data ~ tempgroup))\$tempgroup;
grouplevels = levels(group);

## Manually compose the p.values matrix
pvalue.matrix = matrix(NA, nrow = length(algorithms)-1, ncol = length(algorithms)-1);
colnames(pvalue.matrix) = grouplevels[1:(nlevels(group)-1)];
rownames(pvalue.matrix) = grouplevels[2:nlevels(group)];

separated = strsplit(rownames(mypvalues), "-");
for(myrow in 1:nrow(mypvalues)){
algs = gsub("@", "-", separated[[myrow]]);
pvalue.matrix[ algs, algs ] = mypvalues[myrow, "p adj"];
}
test.results = list(p.value = pvalue.matrix);
}

if(test == "custom"){
myargs = list(c(data, group), as.list(...));
test.results = do.call(fun, myargs);
}

for (algorithm1 in rownames(test.results\$p.value))
{
for (algorithm2 in colnames(test.results\$p.value))
{
if (!is.na( test.results\$p.value[algorithm1,algorithm2])){
if(test.results\$p.value[algorithm1,algorithm2] < alpha){
# there is a significant difference between algorithm1 and algorithm2;
# we need to identify which one is the best and which one the worst,
# we'll use the median for that purpose
if (means[algorithm1,j] > means[algorithm2,j])
{
best <- algorithm1
worst <- algorithm2
}
else
{
best <- algorithm2
worst <- algorithm1
}

if (!maximize)
{
# swap best and worst
tmp <- best
best <- worst
worst <- tmp
}

# update ranks
rankFrame[alg.bool[[best]],  1+j] = rankFrame[alg.bool[[best]], 1+j] + 1;
rankFrame[alg.bool[[worst]], 1+j] = rankFrame[alg.bool[[worst]], 1+j] - 1;
}
}
}
}
}
};
return(rankFrame)
}
```

## Try the SRCS package in your browser

Any scripts or data that you put into this service are public.

SRCS documentation built on May 2, 2019, 8:34 a.m.