PrivateEC methods are described in the following publication.
Trang T. Le, W. K. Simmons, M. Misaki, B.C. White, J. Savitz, J. Bodurka, and B. A. McKinney. "Differential privacy-based evaporative cooling feature selection and classification with Relief-F and Random Forests," Bioinformatics, Volume 33, Issue 18, 15 September 2017, Pages 2906–2913. free
knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) library(privateEC)
library(privateEC) n.samples <- 300 # 100 samples in train/holdout/test n.variables <- 100 # 100 features label <- "class" type <- "mainEffect" # main effect simulatios bias <- 0.6 # moderate effect size pct.signals <- 0.1 # pct functional features verbose <- FALSE data.sets <- createSimulation(num.samples = n.samples, num.variables = n.variables, pct.signals = pct.signals, label = label, bias = bias, pct.train = 1/3, pct.holdout = 1/3, pct.validation = 1/3, sim.type = type, save.file = NULL, verbose = verbose)
update.freq <- 5 # attributes evaporated per iteration num_tree <- 500 importance.algorithm <- "ReliefFequalK" pec.result <- privateEC(train.ds = data.sets$train, # training split holdout.ds = data.sets$holdout, # holdout split validation.ds = data.sets$validation, # validation set label = data.sets$label, # class label importance.algorithm = importance.algorithm, # ReliefF relief.k.method = "k_half_sigma", # ReliefF knn rf.ntree = num_tree, # random forest is.simulated = TRUE, # validation set bias = bias, # pEC privacy update.freq = update.freq, # num attr evaporated save.file = NULL, # save results signal.names = data.sets$signal.names, # functional attr names use.nestedCV = T, verbose = verbose)
knitr::kable(pec.result$algo.acc, caption = "Evaporation Iterations", row.names = FALSE, digits = 3)
plotRunResults(pec.result)
Best pEC model selected based on highest holdout accuracy. Train, holdout and independent validation accuracies of selected model shown below. Features in model are listed: simvar prefix variables are functional. Others are background.
multiple.max.indices <- which(pec.result$algo.acc$holdout.acc==max(pec.result$algo.acc$holdout.acc)) last.max <- multiple.max.indices[length(multiple.max.indices)] cat("\n Max Holdout Accuracy Step [",last.max,"]\n") cat("\n Accuracies: ") print(pec.result$algo.acc[last.max,]) cat("\n Selected Features \n [",pec.result$atts.remain[[last.max]],"]\n")
rncv_result <- regular_nestedCV(train.ds = rbind(data.sets$train,data.sets$holdout), validation.ds = data.sets$validation, label = data.sets$label, method.model = "classification", is.simulated = TRUE, ncv_folds = c(10, 10), param.tune = FALSE, learning_method = "rf", importance.algorithm = importance.algorithm, relief.k.method = "k_half_sigma", # ReliefF knn wrapper = "relief", inner_selection_percent = 0.2, inner_selection_positivescores = TRUE, num_tree = num_tree, verbose = verbose)
cat("\n Train Accuracy [",rncv_result$cv.acc,"]\n") cat("\n Validation Accuracy [",rncv_result$Validation,"]\n") cat("\n Selected Features \n [",rncv_result$Features,"]\n") cat("\n Elapsed Time [",rncv_result$Elapsed,"]\n")
cncv_result <- consensus_nestedCV(train.ds = rbind(data.sets$train,data.sets$holdout), validation.ds = data.sets$validation, label = data.sets$label, method.model = "classification", is.simulated = TRUE, ncv_folds = c(10, 10), param.tune = FALSE, learning_method = "rf", importance.algorithm = importance.algorithm, relief.k.method = "k_half_sigma", # ReliefF knn wrapper = "relief", inner_selection_percent = 0.2, inner_selection_positivescores = TRUE, num_tree = num_tree, verbose = verbose)
cat("\n Nested Cross-Validation Accuracy [",cncv_result$cv.acc,"]\n") cat("\n Validation Accuracy [",cncv_result$Validation,"]\n") cat("\n Selected Features \n [",cncv_result$Features,"]\n") cat("\n Elapsed Time [",cncv_result$Elapsed,"]\n")
library(privateEC) n.samples <- 300 # 100 samples in train/holdout/test n.variables <- 100 # 100 features label <- "qtrait" method.model <- "regression" type <- "mainEffect" # main effect simulatios bias <- 0.6 # moderate effect size pct.signals <- 0.1 # pct functional features verbose <- FALSE data.sets <- createSimulation(num.samples = n.samples, num.variables = n.variables, pct.signals = pct.signals, label = label, bias = bias, pct.train = 1/3, pct.holdout = 1/3, pct.validation = 1/3, sim.type = type, save.file = NULL, verbose = verbose)
update.freq <- 5 # attributes evaporated per iteration num_tree <- 500 importance.algorithm <- "RReliefFequalK" pec.result <- privateEC(train.ds = data.sets$train, # training split holdout.ds = data.sets$holdout, # holdout split validation.ds = data.sets$validation, # validation set label = data.sets$label, # class label method.model = method.model, # analysis goal importance.algorithm = importance.algorithm, # ReliefF relief.k.method = "k_half_sigma", # ReliefF knn rf.ntree = num_tree, # random forest is.simulated = TRUE, # validation set bias = bias, # pEC privacy update.freq = update.freq, # num attr evaporated save.file = NULL, # save results signal.names = data.sets$signal.names, # functional attr names use.nestedCV = T, verbose = verbose)
knitr::kable(pec.result$algo.acc, caption = "Evaporation Iterations", row.names = FALSE, digits = 3)
plotRunResults(pec.result)
Best pEC model selected based on highest holdout R-Squared. Train, holdout and independent validation R-Squareds of selected model shown below. Features in model are listed: simvar prefix variables are functional. Others are background.
multiple.max.indices <- which(pec.result$algo.acc$holdout.acc==max(pec.result$algo.acc$holdout.acc)) last.max <- multiple.max.indices[length(multiple.max.indices)] cat("\n Max Holdout R-Squared Step [",last.max,"]\n") cat("\n R-Squared: ") print(pec.result$algo.acc[last.max,]) cat("\n Selected Features \n [",pec.result$atts.remain[[last.max]],"]\n")
rncv_result <- regular_nestedCV(train.ds = rbind(data.sets$train,data.sets$holdout), validation.ds = data.sets$validation, label = data.sets$label, method.model = method.model, is.simulated = TRUE, ncv_folds = c(10, 10), param.tune = FALSE, learning_method = "rf", importance.algorithm = importance.algorithm, relief.k.method = "k_half_sigma", # ReliefF knn wrapper = "relief", inner_selection_percent = 0.2, inner_selection_positivescores = TRUE, num_tree = num_tree, verbose = verbose)
cat("\n Train R-Squared [",rncv_result$cv.acc,"]\n") cat("\n Validation R-Squared [",rncv_result$Validation,"]\n") cat("\n Selected Features \n [",rncv_result$Features,"]\n") cat("\n Elapsed Time [",rncv_result$Elapsed,"]\n")
cncv_result <- consensus_nestedCV(train.ds = rbind(data.sets$train,data.sets$holdout), validation.ds = data.sets$validation, label = data.sets$label, method.model = method.model, is.simulated = TRUE, ncv_folds = c(10, 10), param.tune = FALSE, learning_method = "rf", importance.algorithm = importance.algorithm, relief.k.method = "k_half_sigma", # ReliefF knn wrapper = "relief", inner_selection_percent = 0.2, inner_selection_positivescores = TRUE, num_tree = num_tree, verbose = verbose)
cat("\n Train R-Squared [",cncv_result$cv.acc,"]\n") cat("\n Validation R-Squared [",cncv_result$Validation,"]\n") cat("\n Selected Features \n [",cncv_result$Features,"]\n") cat("\n Elapsed Time [",cncv_result$Elapsed,"]\n")
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.