PrivateEC methods are described in the following publication.
Trang T. Le, W. K. Simmons, M. Misaki, B.C. White, J. Savitz, J. Bodurka, and B. A. McKinney. "Differential privacy-based evaporative cooling feature selection and classification with Relief-F and Random Forests," Bioinformatics, Volume 33, Issue 18, 15 September 2017, Pages 2906–2913. free
knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) library(privateEC)
library(privateEC) n.samples <- 300 # 100 samples in train/holdout/test n.variables <- 100 # 100 features label <- "class" type <- "mainEffect" # main effect simulatios bias <- 0.6 # moderate effect size pct.signals <- 0.1 # pct functional features verbose <- FALSE data.sets <- createSimulation(num.samples = n.samples, num.variables = n.variables, pct.signals = pct.signals, label = label, bias = bias, pct.train = 1/3, pct.holdout = 1/3, pct.validation = 1/3, sim.type = type, save.file = NULL, verbose = verbose)
update.freq <- 5 # attributes evaporated per iteration num_tree <- 500 importance.algorithm <- "ReliefFequalK" pec.result <- privateEC(train.ds = data.sets$train, # training split holdout.ds = data.sets$holdout, # holdout split validation.ds = data.sets$validation, # independent validation set label = data.sets$label, # class label importance.algorithm = importance.algorithm, # feature selection method (ReliefF) relief.k.method = "k_half_sigma", # ReliefF nearest neighbor k estimate rf.ntree = num_tree, # random forest classifier is.simulated = TRUE, # indicates there will be a validation set bias = bias, # pEC privacy temperature noise update.freq = update.freq, # number of attributes to evaporate per step save.file = NULL, # save results signal.names = data.sets$signal.names, # functional variable names use.nestedCV = F, verbose = verbose)
knitr::kable(pec.result$algo.acc, caption = "Evaporation Iterations", row.names = FALSE, digits = 3)
plotRunResults(pec.result)
cat("\n Max Holdout Accuracy Step [",which.max(pec.result$algo.acc$holdout.acc),"]\n") cat("\n Accuracies: ") print(pec.result$algo.acc[which.max(pec.result$algo.acc$holdout.acc),]) cat("\n Selected Features \n [",pec.result$atts.remain[[which.max(pec.result$algo.acc$holdout.acc)]],"]\n")
rncv_result <- regular_nestedCV(train.ds = data.sets$train, validation.ds = data.sets$holdout, label = data.sets$label, is.simulated = TRUE, ncv_folds = c(10, 10), param.tune = FALSE, learning_method = "rf", importance.algorithm = importance.algorithm, num_tree = num_tree, verbose = verbose)
cat("\n Train Accuracy [",rncv_result$cv.acc,"]\n") cat("\n Validation Accuracy [",rncv_result$Validation,"]\n") cat("\n Selected Features \n [",rncv_result$Features,"]\n") cat("\n Elapsed Time [",rncv_result$Elapsed,"]\n")
cncv_result <- consensus_nestedCV(train.ds = data.sets$train, validation.ds = data.sets$holdout, label = data.sets$label, is.simulated = TRUE, ncv_folds = c(10, 10), param.tune = FALSE, learning_method = "rf", importance.algorithm = importance.algorithm, num_tree = num_tree, verbose = verbose)
cat("\n Train Accuracy [",cncv_result$cv.acc,"]\n") cat("\n Validation Accuracy [",cncv_result$Validation,"]\n") cat("\n Selected Features \n [",cncv_result$Features,"]\n") cat("\n Elapsed Time [",cncv_result$Elapsed,"]\n")
n.samples <- 100 n.variables <- 100 label <- "class" pct.imbalance = 0.56 type <- "mainEffect" importance.algorithm <- "ReliefFequalK" bias <- 0.4 pct.signals <- 0.1 update.freq <- 5 num_tree <- 500 verbose <- FALSE data.sets <- createSimulation(num.samples = n.samples, num.variables = n.variables, pct.signals = pct.signals, label = label, pct.imbalance = pct.imbalance, bias = bias, pct.train = 1/3, pct.holdout = 1/3, pct.validation = 1/3, sim.type = type, save.file = NULL, verbose = verbose)
pec.result <- privateEC(train.ds = data.sets$train, holdout.ds = data.sets$holdout, validation.ds = data.sets$validation, label = data.sets$label, importance.algorithm = importance.algorithm, rf.ntree = num_tree, is.simulated = TRUE, bias = bias, update.freq = update.freq, save.file = NULL, signal.names = data.sets$signal.names, verbose = verbose)
knitr::kable(pec.result$algo.acc, caption = "Algorithm Iterations", row.names = FALSE, digits = 3)
plotRunResults(pec.result)
cat("\n Selected Features \n [",pec.result$atts.remain[[which.max(pec.result$algo.acc$holdout.acc)]],"]\n")
rncv_result <- regular_nestedCV(train.ds = data.sets$train, validation.ds = data.sets$holdout, label = data.sets$label, is.simulated = TRUE, ncv_folds = c(10, 10), param.tune = FALSE, learning_method = "rf", importance.algorithm = importance.algorithm, num_tree = num_tree, verbose = verbose)
cat("\n Train Accuracy [",rncv_result$cv.acc,"]\n") cat("\n Validation Accuracy [",rncv_result$Validation,"]\n") cat("\n Selected Features \n [",rncv_result$Features,"]\n") cat("\n Elapsed Time [",rncv_result$Elapsed,"]\n")
cncv_result <- consensus_nestedCV(train.ds = data.sets$train, validation.ds = data.sets$holdout, label = data.sets$label, is.simulated = TRUE, ncv_folds = c(10, 10), param.tune = FALSE, learning_method = "rf", importance.algorithm = importance.algorithm, num_tree = num_tree, verbose = verbose)
cat("\n Train Accuracy [",cncv_result$cv.acc,"]\n") cat("\n Validation Accuracy [",cncv_result$Validation,"]\n") cat("\n Selected Features \n [",cncv_result$Features,"]\n") cat("\n Elapsed Time [",cncv_result$Elapsed,"]\n")
n.samples <- 100 n.variables <- 100 label <- "class" pct.imbalance = 0.5 pct.mixed = 0.5 mixed.type <- c("mainEffect", "interactionScalefree") importance.algorithm <- "ReliefFequalK" bias <- 0.4 pct.signals <- 0.1 update.freq <- 5 num_tree <- 500 verbose <- FALSE data.sets <- createMixedSimulation(num.samples = n.samples, num.variables = n.variables, pct.signals = pct.signals, label = label, pct.imbalance = pct.imbalance, pct.mixed = pct.mixed, mixed.type = mixed.type, bias = bias, pct.train = 1/3, pct.holdout = 1/3, pct.validation = 1/3, save.file = NULL, verbose = verbose)
pec.result <- privateEC(train.ds = data.sets$train, holdout.ds = data.sets$holdout, validation.ds = data.sets$validation, label = data.sets$label, importance.algorithm = importance.algorithm, rf.ntree = num_tree, is.simulated = TRUE, bias = bias, update.freq = update.freq, save.file = NULL, signal.names = data.sets$signal.names, verbose = verbose)
knitr::kable(pec.result$algo.acc, caption = "Algorithm Iterations", row.names = FALSE, digits = 3)
plotRunResults(pec.result)
cat("\n Selected Features \n [",pec.result$atts.remain[[which.max(pec.result$algo.acc$holdout.acc)]],"]\n")
rncv_result <- regular_nestedCV(train.ds = data.sets$train, validation.ds = data.sets$holdout, label = data.sets$label, is.simulated = TRUE, ncv_folds = c(10, 10), param.tune = FALSE, learning_method = "rf", importance.algorithm = importance.algorithm, num_tree = num_tree, verbose = verbose)
cat("\n Train Accuracy [",rncv_result$cv.acc,"]\n") cat("\n Validation Accuracy [",rncv_result$Validation,"]\n") cat("\n Selected Features \n [",rncv_result$Features,"]\n") cat("\n Elapsed Time [",rncv_result$Elapsed,"]\n")
cncv_result <- consensus_nestedCV(train.ds = data.sets$train, validation.ds = data.sets$holdout, label = data.sets$label, is.simulated = TRUE, ncv_folds = c(10, 10), param.tune = FALSE, learning_method = "rf", importance.algorithm = importance.algorithm, num_tree = num_tree, verbose = verbose)
cat("\n Train Accuracy [",cncv_result$cv.acc,"]\n") cat("\n Validation Accuracy [",cncv_result$Validation,"]\n") cat("\n Selected Features \n [",cncv_result$Features,"]\n") cat("\n Elapsed Time [",cncv_result$Elapsed,"]\n")
n.samples <- 100 n.variables <- 100 label <- "qtrait" type <- "mainEffect" importance.algorithm <- "RReliefFequalK" bias <- 0.4 pct.signals <- 0.1 update.freq <- 5 num_tree <- 500 verbose <- FALSE data.sets <- createSimulation(num.samples = n.samples, num.variables = n.variables, pct.signals = pct.signals, label = label, bias = bias, pct.train = 1/3, pct.holdout = 1/3, pct.validation = 1/3, sim.type = type, save.file = NULL, verbose = verbose)
pec.result <- privateEC(train.ds = data.sets$train, holdout.ds = data.sets$holdout, validation.ds = data.sets$validation, label = data.sets$label, importance.algorithm = importance.algorithm, rf.ntree = num_tree, is.simulated = TRUE, bias = bias, update.freq = update.freq, save.file = NULL, signal.names = data.sets$signal.names, verbose = verbose)
knitr::kable(pec.result$algo.acc, caption = "Algorithm Iterations", row.names = FALSE, digits = 3)
plotRunResults(pec.result)
cat("\n Selected Features \n [",pec.result$atts.remain[[which.max(pec.result$algo.acc$holdout.acc)]],"]\n")
rncv_result <- regular_nestedCV(train.ds = data.sets$train, validation.ds = data.sets$holdout, label = data.sets$label, is.simulated = TRUE, ncv_folds = c(10, 10), param.tune = FALSE, learning_method = "rf", importance.algorithm = importance.algorithm, num_tree = num_tree, verbose = verbose)
cat("\n Train R-squared [",rncv_result$cv.acc,"]\n") cat("\n Validation R-squared [",rncv_result$Validation,"]\n") cat("\n Selected Features \n [",rncv_result$Features,"]\n") cat("\n Elapsed Time [",rncv_result$Elapsed,"]\n")
cncv_result <- consensus_nestedCV(train.ds = data.sets$train, validation.ds = data.sets$holdou, label = data.sets$label, is.simulated = TRUE, ncv_folds = c(10, 10), param.tune = FALSE, learning_method = "rf", importance.algorithm = importance.algorithm, num_tree = num_tree, verbose = verbose)
cat("\n Train R-squared [",cncv_result$cv.acc,"]\n") cat("\n Validation R-squared [",cncv_result$Validation,"]\n") cat("\n Selected Features \n [",cncv_result$Features,"]\n") cat("\n Elapsed Time [",cncv_result$Elapsed,"]\n")
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.