replaceMissingValues = function(x) {
lapply(x, function(y) {
if(is.null(y)) {
return((dplyr::tibble(acc = NaN, fw.abs = NaN)))
}
else {
return(y)
}
})
}
calcMeanAndQuantiles = function(data.aggr.spread) {
dataset = unique(data.aggr.spread$dataset)
# exclude full_predictor
mean_base = data.aggr.spread %>%
dplyr::filter(ids != "full_predictor")
# transform cols to numeric and exclude first col
base = sapply(mean_base[,c(-1,-2)], as.numeric)
means = as.data.frame(t(colMeans(base)))
quantiles = as.data.frame(apply(base, 2, function(x) quantile(x, na.rm = T)))
# browser()
# quantiles = sapply(quantiles, as.numeric)
# browser()
# round values
means[,c(2,4,6,8)] = sapply(means[,c(2,4,6,8)], function(x) (format(round(x, 1), nsmall = 1)))
means[,c(1,3,5,7)] = sapply(means[,c(1,3,5,7)], function(x) (format(round(x, 4), nsmall = 4)))
quantiles[,c(2,4,6,8)] = sapply(quantiles[,c(2,4,6,8)], function(x) (format(round(as.numeric(x), 1), nsmall = 1)))
quantiles[,c(1,3,5,7)] = sapply(quantiles[,c(1,3,5,7)], function(x) (format(round(as.numeric(x), 4), nsmall = 4)))
# browser()
means$ids = "mean"
means$dataset = rep(dataset, nrow(means))
quantiles$ids = paste(stringr::str_replace(row.names(quantiles), "%", "\\\\%"), "Quantile", sep = " ")
quantiles$dataset = rep(dataset, nrow(quantiles))
return(rbind(means, quantiles))
}
formatRows = function(data.aggr.spread) {
# browser()
if("k-NN_acc" %in% colnames(data.aggr.spread)) {
data.aggr.spread$`k-NN_acc` = (format(round(data.aggr.spread$`k-NN_acc`, 4), nsmall = 4))
data.aggr.spread$`k-NN_feats` = round(data.aggr.spread$`k-NN_feats`, 1)
}
else {
data.aggr.spread$`k-NN_acc` = rep(NA, nrow(data.aggr.spread))
data.aggr.spread$`k-NN_feats` = rep(NA, nrow(data.aggr.spread))
}
if("SVM_acc" %in% colnames(data.aggr.spread)) {
data.aggr.spread$SVM_acc = (format(round(data.aggr.spread$SVM_acc, 4), nsmall = 4))
data.aggr.spread$SVM_feats = round(data.aggr.spread$SVM_feats, 1)
}
else {
data.aggr.spread$SVM_acc = rep(NA, nrow(data.aggr.spread))
data.aggr.spread$SVM_feats = rep(NA, nrow(data.aggr.spread))
}
if("Decision Tree_acc" %in% colnames(data.aggr.spread)) {
data.aggr.spread$`Decision Tree_acc` = (format(round(data.aggr.spread$`Decision Tree_acc`, 4), nsmall = 4))
data.aggr.spread$`Decision Tree_feats` = round(data.aggr.spread$`Decision Tree_feats`, 1)
}
else {
data.aggr.spread$`Decision Tree_acc` = rep(NA, nrow(data.aggr.spread))
data.aggr.spread$`Decision Tree_feats` = rep(NA, nrow(data.aggr.spread))
}
if("Neural Network_acc" %in% colnames(data.aggr.spread)) {
data.aggr.spread$`Neural Network_acc` = (format(round(data.aggr.spread$`Neural Network_acc`, 4), nsmall = 4))
data.aggr.spread$`Neural Network_feats` = round(data.aggr.spread$`Neural Network_feats`, 1)
}
else {
data.aggr.spread$`Neural Network_acc` = rep(NA, nrow(data.aggr.spread))
data.aggr.spread$`Neural Network_feats` = rep(NA, nrow(data.aggr.spread))
}
return(data.aggr.spread)
}
table_results = function(resultDf, fullPredResults) {
# browser()
data.aggr.spread = resultDf %>%
dplyr::filter(ids != "full_predictor") %>%
dplyr::group_by(dataset, ids, classifier) %>%
dplyr::summarise(acc = mean(acc), feats = mean(feats)) %>%
dplyr::group_by(dataset, ids, classifier) %>%
tidyr::nest(feats, acc, .key = 'abs_acc') %>%
tidyr::spread(key = classifier, value = abs_acc) %>%
dplyr::mutate_each(replaceMissingValues) %>%
tidyr::unnest(.sep = '_')
# browser()
full_data.aggr.spread = fullPredResults %>%
dplyr::group_by(dataset, classifier) %>%
dplyr::summarise(acc = mean(acc), feats = mean(feats)) %>%
dplyr::group_by(dataset, classifier) %>%
tidyr::nest(feats, acc, .key = 'abs_acc') %>%
tidyr::spread(key = classifier, value = abs_acc) %>%
dplyr::mutate_each(replaceMissingValues) %>%
tidyr::unnest(.sep = '_')
full_data.aggr.spread$ids = rep("full_predictor", nrow(full_data.aggr.spread))
# browser()
data.aggr.spread = formatRows(data.aggr.spread)
full_data.aggr.spread = formatRows(full_data.aggr.spread)
# order by custom order
custom_order = c("variance",
"chi-squared",
"information gain",
"ReliefF",
"mRMR",
"FCBF",
"CFS",
"SFFS",
"SFBS",
"Genetic Algorithm",
"LASSO",
"Gram-Schmidt",
"ranger impurity",
"FASTCorrelation",
"HierarchicalClustering",
"PCA loading",
"PCA k-means",
"ALE-Plot (SVM)",
"ALE-Plot (nnet)",
"Surrogate-Tree (SVM)",
"Surrogate-Tree (nnet)",
"full_predictor"
)
data.aggr.spread = data.aggr.spread %>%
dplyr::arrange(factor(ids, levels = custom_order)) %>%
dplyr::select(ids, dataset, "k-NN_acc", "k-NN_feats", "SVM_acc", "SVM_feats", "Decision Tree_acc", "Decision Tree_feats", "Neural Network_acc", "Neural Network_feats")
return(rbind(data.aggr.spread, full_data.aggr.spread))
}
formatAggrResultsToLatex = function(data.aggr.spread) {
datasetname = unique(data.aggr.spread$dataset)
string = "\\begin{table}[caption=Results on DATASETNAME]\\begin{tabular}{lcccccccc}\\hline\\multicolumn{1}{c}{\\multirow{2}{*}{\\textbf{methods}}} & \\multicolumn{2}{c}{\\textbf{k-NN}} & \\multicolumn{2}{c}{\\textbf{SVM}} & \\multicolumn{2}{c}{\\textbf{Dec.}} & \\multicolumn{2}{c}{\\textbf{NNet}} \\\\ \\cmidrule{2-3} \\cmidrule{4-5} \\cmidrule{6-7} \\cmidrule{8-9} \\multicolumn{1}{c}{} & \\multicolumn{1}{l}{\\textbf{feat.}} & \\multicolumn{1}{l}{\\textbf{acc.}} & \\multicolumn{1}{l}{\\textbf{feat.}} & \\multicolumn{1}{l}{\\textbf{acc.}} & \\multicolumn{1}{l}{\\textbf{feat.}} & \\multicolumn{1}{l}{\\textbf{acc.}} & \\multicolumn{1}{l}{\\textbf{feat.}} & \\multicolumn{1}{l}{\\textbf{acc.}} \\\\ \\hline"
string = stringr::str_replace(string, "DATASETNAME", datasetname)
best_values = data.frame(
"k-NN_acc" = max(data.aggr.spread$`k-NN_acc`, na.rm = T),
"k-NN_feats" = min(data.aggr.spread$`k-NN_feats`, na.rm = T),
"SVM_acc" = max(data.aggr.spread$SVM_acc, na.rm = T),
"SVM_feats" = min(data.aggr.spread$SVM_feats, na.rm = T),
"Neural Network_acc" = max(data.aggr.spread$`Neural Network_acc`, na.rm = T),
"Neural Network_feats" = min(data.aggr.spread$`Neural Network_feats`, na.rm = T),
"Decision Tree_acc" = max(data.aggr.spread$`Decision Tree_acc`, na.rm = T),
"Decision Tree_feats" = min(data.aggr.spread$`Decision Tree_feats`, na.rm = T)
)
latex_cols = c(
"ids",
"k-NN_feats",
"k-NN_acc",
"SVM_feats",
"SVM_acc",
"Decision Tree_feats",
"Decision Tree_acc",
"Neural Network_feats",
"Neural Network_acc"
)
# append mean and median
# browser()
data.aggr.spread = rbind(data.aggr.spread, calcMeanAndQuantiles(data.aggr.spread))
for(index in 1:nrow(data.aggr.spread)) {
colValues = c()
for(colname in latex_cols) {
# store value temporarly
value = data.aggr.spread[index,colname]
# round if numeric
if(grepl("acc", colname) | grepl("feat", colname)) {
value = value[[1]]
colname_best_results = stringr::str_replace(colname,"-",".")
colname_best_results = stringr::str_replace(colname_best_results," ",".")
# browser()
# browser()
# print(paste(value, best_values[[colname]], sep= " "))
# check if value == best value
if(!grepl("Quantile", data.aggr.spread[index,1]$ids) && !is.na(value) && value == best_values[[colname_best_results]]) {
value = paste("\\textbf{", value, "}", sep="")
}
if(is.na(value) | is.nan(value) | value == "NaN") {
value = "-"
}
}
else {
value = replaceMethodName(value)
}
colValues = c(colValues, toString(value))
}
if(colValues[1] == "full_predictor") {
colValues[1] = "All Features"
tr = paste(colValues, collapse = " & ")
tr = paste("\\hline", tr, sep = "\n")
} else {
tr = paste(colValues, collapse = " & ")
}
if(index == 1) {
string = paste(string, tr, sep = "\n")
}
else {
string = paste(string, tr, sep = "\\\\ \n")
}
}
tableEnd = "\\hline \\end{tabular} \\end{table}"
string = paste(string, tableEnd, sep = "\\\\")
cat(string)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.