library("astrid")
source("utilities_analysis.R")
library(e1071)
res_synthetic <- TRUE ## -- tables for the synthetic datasets
res_uci <- TRUE ## -- table showing the summarised results from all of the UCI datasets
res_credit_a_svm <- TRUE ## -- the credit a dataset (i) grouping and (ii) anonymisation example
## Prefix to prepend to result paths.
path_prefix <- "./"
## ==============================
if (res_synthetic) {
## a table with the results for the synthetic dataset
respath <- "results_synthetic"
respath <- paste0(path_prefix, respath)
cat("Using path: ", respath, "\n\n\n")
print_result_tables(respath, "synthetic", "svm", dataset_specs = FALSE, alpha = 0.05, full_tree = TRUE, R = 100)
print_result_tables(respath, "synthetic", "randomForest", dataset_specs = FALSE, alpha = 0.05, full_tree = TRUE, R = 100)
print_result_tables(respath, "synthetic", "naiveBayes", dataset_specs = FALSE, alpha = 0.05, full_tree = TRUE, R = 100)
}
## ==============================
if (res_uci) {
respath_uci <- "results_uci_add"
respath_uci <- paste0(path_prefix, respath_uci)
cat("Using path: ", respath_uci, "\n\n\n")
cat("SVM\n\n")
make_grouping_table(respath_uci, "svm")
cat("\n\nRandom forest\n\n")
make_grouping_table(respath_uci, "randomForest")
}
## ==============================
if (res_credit_a_svm) {
## read the dataset
respath <- "results_uci"
respath <- paste0(path_prefix, respath, "/", "credit-a_svm.rds")
cat("Using path: ", respath, "\n\n\n")
res <- readRDS(respath)
print_result_tables("results_uci", "credit-a", "svm", dataset_specs = FALSE, alpha = 0.05, full_tree = TRUE, R = 100)
## choose the tree for which k = 10
tree_anon <- res$results$tree_p[[9]]
cat("Using the tree:\n\n")
tmp <- tree_to_latex(tree_anon$tree)
tmp <- gsub("\\(", "\\\\set{", tmp)
tmp <- gsub("\\)", "}", tmp)
cat(tmp, "\n\n")
## Calculate baseline p-value
RNGkind("L'Ecuyer-CMRG")
set.seed(42)
p0 <- sid_p_tree_es(res$data_train, res$data_validation, tree = tree_anon$tree, classifier = svm, Rmin = 250, Rmax = 500, parallel = TRUE, alpha = 0.05, z = 2.6)
cat("p-value:\n\n")
cat(round(p0, 2), "\n\n")
cat("baseline goodness (train using unshuffled data):\n\n")
a0 <- sid_get_goodness(res$data_train, res$data_validation, classifier = svm)
cat(round(a0, 2), "\n\n")
cat("anon goodness (train using shuffled data):\n\n")
a <- sid_tree_goodness(res$data_train, res$data_validation, tree = tree_anon$tree, classifier = svm, R = 100)
cat(round(a, 2), "\n\n")
## generate an anonymised dataset
anon_quality <- function(data_orig, tree) {
require(sqldf)
data_anon <- sid_gen_surrogate(tree, data_orig)
tmp <- sqldf("SELECT * FROM data_anon INTERSECT SELECT * from data_orig")
nrow(tmp) / nrow(data_orig)
}
aq <- replicate(100, anon_quality(res$data_train, tree_anon$tree))
cat("Quality of anonymisation: \n\n")
cat(round(100 * mean(aq), 2), "\n\n")
}
## ==============================
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.