R/table_results.R

Defines functions replaceMissingValues calcMeanAndQuantiles formatRows table_results formatAggrResultsToLatex

replaceMissingValues = function(x) {
  lapply(x, function(y) {
    if(is.null(y)) {
      return((dplyr::tibble(acc = NaN, fw.abs = NaN)))
    }
    else {
      return(y)
    }
  })
}


calcMeanAndQuantiles = function(data.aggr.spread) {
  dataset = unique(data.aggr.spread$dataset)
  # exclude full_predictor
  mean_base = data.aggr.spread %>%
    dplyr::filter(ids != "full_predictor")

  # transform cols to numeric and exclude first col
  base = sapply(mean_base[,c(-1,-2)], as.numeric)

  means = as.data.frame(t(colMeans(base)))
  quantiles = as.data.frame(apply(base, 2, function(x) quantile(x, na.rm = T)))

  # browser()
  # quantiles = sapply(quantiles, as.numeric)

  # browser()

  # round values
  means[,c(2,4,6,8)] = sapply(means[,c(2,4,6,8)], function(x) (format(round(x, 1), nsmall = 1)))
  means[,c(1,3,5,7)] = sapply(means[,c(1,3,5,7)], function(x) (format(round(x, 4), nsmall = 4)))

  quantiles[,c(2,4,6,8)] = sapply(quantiles[,c(2,4,6,8)], function(x) (format(round(as.numeric(x), 1), nsmall = 1)))
  quantiles[,c(1,3,5,7)] = sapply(quantiles[,c(1,3,5,7)], function(x) (format(round(as.numeric(x), 4), nsmall = 4)))

  # browser()

  means$ids = "mean"
  means$dataset = rep(dataset, nrow(means))
  quantiles$ids = paste(stringr::str_replace(row.names(quantiles), "%", "\\\\%"), "Quantile", sep = " ")
  quantiles$dataset = rep(dataset, nrow(quantiles))

  return(rbind(means, quantiles))
}

formatRows = function(data.aggr.spread) {
  # browser()
  if("k-NN_acc" %in% colnames(data.aggr.spread)) {
    data.aggr.spread$`k-NN_acc` = (format(round(data.aggr.spread$`k-NN_acc`, 4), nsmall = 4))
    data.aggr.spread$`k-NN_feats` = round(data.aggr.spread$`k-NN_feats`, 1)
  }
  else {
    data.aggr.spread$`k-NN_acc` = rep(NA, nrow(data.aggr.spread))
    data.aggr.spread$`k-NN_feats` = rep(NA, nrow(data.aggr.spread))
  }

  if("SVM_acc" %in% colnames(data.aggr.spread)) {
    data.aggr.spread$SVM_acc = (format(round(data.aggr.spread$SVM_acc, 4), nsmall = 4))
    data.aggr.spread$SVM_feats = round(data.aggr.spread$SVM_feats, 1)
  }
  else {
    data.aggr.spread$SVM_acc = rep(NA, nrow(data.aggr.spread))
    data.aggr.spread$SVM_feats = rep(NA, nrow(data.aggr.spread))
  }

  if("Decision Tree_acc" %in% colnames(data.aggr.spread)) {
    data.aggr.spread$`Decision Tree_acc` = (format(round(data.aggr.spread$`Decision Tree_acc`, 4), nsmall = 4))
    data.aggr.spread$`Decision Tree_feats` = round(data.aggr.spread$`Decision Tree_feats`, 1)
  }
  else {
    data.aggr.spread$`Decision Tree_acc` = rep(NA, nrow(data.aggr.spread))
    data.aggr.spread$`Decision Tree_feats` = rep(NA, nrow(data.aggr.spread))
  }

  if("Neural Network_acc" %in% colnames(data.aggr.spread)) {
    data.aggr.spread$`Neural Network_acc` = (format(round(data.aggr.spread$`Neural Network_acc`, 4), nsmall = 4))
    data.aggr.spread$`Neural Network_feats` = round(data.aggr.spread$`Neural Network_feats`, 1)
  }
  else {
    data.aggr.spread$`Neural Network_acc` = rep(NA, nrow(data.aggr.spread))
    data.aggr.spread$`Neural Network_feats` = rep(NA, nrow(data.aggr.spread))
  }

  return(data.aggr.spread)
}

table_results = function(resultDf, fullPredResults) {
  # browser()
  data.aggr.spread = resultDf %>%
    dplyr::filter(ids != "full_predictor") %>%
    dplyr::group_by(dataset, ids, classifier) %>%
    dplyr::summarise(acc = mean(acc), feats = mean(feats)) %>%
    dplyr::group_by(dataset, ids, classifier) %>%
    tidyr::nest(feats, acc, .key = 'abs_acc') %>%
    tidyr::spread(key = classifier, value = abs_acc) %>%
    dplyr::mutate_each(replaceMissingValues) %>%
    tidyr::unnest(.sep = '_')

  # browser()

  full_data.aggr.spread = fullPredResults %>%
    dplyr::group_by(dataset, classifier) %>%
    dplyr::summarise(acc = mean(acc), feats = mean(feats)) %>%
    dplyr::group_by(dataset, classifier) %>%
    tidyr::nest(feats, acc, .key = 'abs_acc') %>%
    tidyr::spread(key = classifier, value = abs_acc) %>%
    dplyr::mutate_each(replaceMissingValues) %>%
    tidyr::unnest(.sep = '_')

  full_data.aggr.spread$ids = rep("full_predictor", nrow(full_data.aggr.spread))

  # browser()
  data.aggr.spread = formatRows(data.aggr.spread)
  full_data.aggr.spread = formatRows(full_data.aggr.spread)

  # order by custom order
  custom_order = c("variance",
                   "chi-squared",
                   "information gain",
                   "ReliefF",
                   "mRMR",
                   "FCBF",
                   "CFS",
                   "SFFS",
                   "SFBS",
                   "Genetic Algorithm",
                   "LASSO",
                   "Gram-Schmidt",
                   "ranger impurity",
                   "FASTCorrelation",
                   "HierarchicalClustering",
                   "PCA loading",
                   "PCA k-means",
                   "ALE-Plot (SVM)",
                   "ALE-Plot (nnet)",
                   "Surrogate-Tree (SVM)",
                   "Surrogate-Tree (nnet)",
                   "full_predictor"
  )

  data.aggr.spread = data.aggr.spread %>%
    dplyr::arrange(factor(ids, levels = custom_order)) %>%
    dplyr::select(ids, dataset, "k-NN_acc", "k-NN_feats", "SVM_acc", "SVM_feats", "Decision Tree_acc", "Decision Tree_feats", "Neural Network_acc", "Neural Network_feats")

  return(rbind(data.aggr.spread, full_data.aggr.spread))
}

formatAggrResultsToLatex = function(data.aggr.spread) {
  datasetname = unique(data.aggr.spread$dataset)
  string = "\\begin{table}[caption=Results on DATASETNAME]\\begin{tabular}{lcccccccc}\\hline\\multicolumn{1}{c}{\\multirow{2}{*}{\\textbf{methods}}} & \\multicolumn{2}{c}{\\textbf{k-NN}} & \\multicolumn{2}{c}{\\textbf{SVM}} & \\multicolumn{2}{c}{\\textbf{Dec.}} & \\multicolumn{2}{c}{\\textbf{NNet}} \\\\ \\cmidrule{2-3} \\cmidrule{4-5} \\cmidrule{6-7} \\cmidrule{8-9} \\multicolumn{1}{c}{} & \\multicolumn{1}{l}{\\textbf{feat.}} & \\multicolumn{1}{l}{\\textbf{acc.}} & \\multicolumn{1}{l}{\\textbf{feat.}} & \\multicolumn{1}{l}{\\textbf{acc.}} & \\multicolumn{1}{l}{\\textbf{feat.}} & \\multicolumn{1}{l}{\\textbf{acc.}} & \\multicolumn{1}{l}{\\textbf{feat.}} & \\multicolumn{1}{l}{\\textbf{acc.}} \\\\ \\hline"
  string = stringr::str_replace(string, "DATASETNAME", datasetname)

  best_values = data.frame(
    "k-NN_acc" = max(data.aggr.spread$`k-NN_acc`, na.rm = T),
    "k-NN_feats" = min(data.aggr.spread$`k-NN_feats`, na.rm = T),
    "SVM_acc" = max(data.aggr.spread$SVM_acc, na.rm = T),
    "SVM_feats" = min(data.aggr.spread$SVM_feats, na.rm = T),
    "Neural Network_acc" = max(data.aggr.spread$`Neural Network_acc`, na.rm = T),
    "Neural Network_feats" = min(data.aggr.spread$`Neural Network_feats`, na.rm = T),
    "Decision Tree_acc" = max(data.aggr.spread$`Decision Tree_acc`, na.rm = T),
    "Decision Tree_feats" = min(data.aggr.spread$`Decision Tree_feats`, na.rm = T)
  )

  latex_cols = c(
    "ids",
    "k-NN_feats",
    "k-NN_acc",
    "SVM_feats",
    "SVM_acc",
    "Decision Tree_feats",
    "Decision Tree_acc",
    "Neural Network_feats",
    "Neural Network_acc"
  )

  # append mean and median
  # browser()
  data.aggr.spread = rbind(data.aggr.spread, calcMeanAndQuantiles(data.aggr.spread))

  for(index in 1:nrow(data.aggr.spread)) {
    colValues = c()

    for(colname in latex_cols) {
      # store value temporarly
      value = data.aggr.spread[index,colname]

      # round if numeric
      if(grepl("acc", colname) | grepl("feat", colname)) {
        value = value[[1]]

        colname_best_results = stringr::str_replace(colname,"-",".")
        colname_best_results = stringr::str_replace(colname_best_results," ",".")
        # browser()

        # browser()
        # print(paste(value, best_values[[colname]], sep= " "))
        # check if value == best value
        if(!grepl("Quantile", data.aggr.spread[index,1]$ids) && !is.na(value) && value == best_values[[colname_best_results]]) {
          value = paste("\\textbf{", value, "}", sep="")
        }

        if(is.na(value) | is.nan(value) | value == "NaN") {
          value = "-"
        }
      }
      else {
        value = replaceMethodName(value)
      }

      colValues = c(colValues, toString(value))
    }

    if(colValues[1] == "full_predictor") {
      colValues[1] = "All Features"

      tr = paste(colValues, collapse = " & ")
      tr = paste("\\hline", tr, sep = "\n")
    } else {
      tr = paste(colValues, collapse = " & ")
    }

    if(index == 1) {
      string = paste(string, tr, sep = "\n")
    }
    else {
      string = paste(string, tr, sep = "\\\\ \n")
    }

  }


  tableEnd = "\\hline \\end{tabular} \\end{table}"
  string = paste(string, tableEnd, sep = "\\\\")

  cat(string)
}
creil94/FeatureSelectionDashboard documentation built on Nov. 4, 2019, 9:17 a.m.