#' Exhaustively testing ensemble on new dataset
#'@description Predict using an ensemble classifier created from this package
#'
#'@param train_data Data to
#'@param test_data Data to predict with the ensemble
#'@param model_list List of RWeka models fit to train_data
#'@param weight_type vector containing bin weight types from {"bin weighted","bin dictator"}
#'@param comb_rule vector containing combination rules from {"majority vote","average posterior"}
#'@param bin_type vector containing bin types from {"standard","quantile","iterative quantile"}
#'@param bin_features vector containing names of columns to bin
#'@param nbins vector containing number of bins in each dimension for bin_features (will use all combinations of these values)
#'
#'@return test accuracy rates for all weighting/voting/binning specifications requested
#'@export
unbinned_testing <- function(train_data, test_data, model_list,true_classes){
# Make data.frame with all "treatment" combinations
results <- expand.grid(c("unweighted","weighted"),c("majority vote","average posterior"))
names(results) <- c("weight_type","comb_type")
results$accuracy <- NA
train_preds <- make_train_preds(train_data,model_list,true_classes)
# Loop over treatments and save results
for(i in 1:nrow(results)){
weightedEnsemble <- make_ensemble(train_preds = train_preds, model_list = model_list,
weightType = results$weight_type[i], comb_rule = results$comb_type[i])
results$accuracy[i] <- sum(predictEnsemble(weightedEnsemble, test_data) == test_data$true_class)/nrow(test_data)
}
return(results)
}
#' binned_testing
#'@description binned_testing
#'
#'@param train_data Data to
#'@param test_data Data to predict with the ensemble
#'@param model_list List of RWeka models fit to train_data
#'
#'@return
#'@export
binned_testing <- function(train_data, test_data, model_list, bin_features_list, nbins_list,true_classes){
# Make data.frame with all "treatment" combinations
results <- expand.grid(c("bin weighted","bin dictator"),c("majority vote","average posterior"),c("standard","quantile"),
1:length(nbins_list),1:length(bin_features_list))
names(results) <- c("weight_type","comb_type","bin_type","nbins","bin_features")
nbins_names <- sapply(nbins_list,function(x) paste(as.character(x), collapse=" X ") )
bin_features_names <- sapply(bin_features_list,function(x) paste(as.character(x), collapse=" X ") )
results$bin_pair_name <- bin_features_names[results$bin_features]
results$nbins_name <- nbins_names[results$nbins]
results$accuracy <- NA
train_preds <- make_train_preds(train_data,model_list,true_classes)
# Loop over treatments and save results
for(i in 1:nrow(results)){
weightedEnsemble <- make_ensemble(train_preds = train_preds, model_list = model_list,
weightType = results$weight_type[i], comb_rule = results$comb_type[i],
bin_type = results$bin_type[i], bin_features = bin_features_list[[results$bin_features[i]]],
nbins = nbins_list[[results$nbins[i]]])
results$accuracy[i] <- sum(predictEnsemble(weightedEnsemble, test_data) == test_data$true_class)/nrow(test_data)
}
return(results)
}
#' binned_testing
#'@description binned_testing
#'
#'@param train_data Data to
#'@param test_data Data to predict with the ensemble
#'@param model_list List of RWeka models fit to train_data
#'
#'@return
#'@export
iq_binned_testing <- function(train_data, test_data, model_list, bin_features_list, nbins_list,true_classes){
# Make data.frame with all "treatment" combinations
results <- expand.grid(c("bin weighted","bin dictator"),c("majority vote","average posterior"),c("iterative quantile"),
1:length(nbins_list),1:length(bin_features_list))
names(results) <- c("weight_type","comb_type","bin_type","nbins","bin_features")
nbins_names <- sapply(nbins_list,function(x) paste(as.character(x), collapse=" X ") )
bin_features_names <- sapply(bin_features_list,function(x) paste(as.character(x), collapse=" X ") )
results$bin_pair_name <- bin_features_names[results$bin_features]
results$nbins_name <- nbins_names[results$nbins]
results$accuracy <- NA
train_preds <- make_train_preds(train_data,model_list,true_classes)
# Loop over treatments and save results
for(i in 1:nrow(results)){
#!# start timer here
weightedEnsemble <- make_ensemble(train_preds = train_preds, model_list = model_list,
weightType = results$weight_type[i], comb_rule = results$comb_type[i],
bin_type = results$bin_type[i], bin_features = bin_features_list[[results$bin_features[i]]],
nbins = nbins_list[[results$nbins[i]]])
#!# end timer here and record into results data frame
results$accuracy[i] <- sum(predictEnsemble(weightedEnsemble, test_data) == test_data$true_class)/nrow(test_data)
}
return(results)
}
#' make_bin_feature_list_pairs
#'@description make_bin_feature_list_pairs
#'
#'@param train_data Data to
#'@param test_data Data to predict with the ensemble
#'@param model_list List of RWeka models fit to train_data
#'
#'@return
#'@export
make_bin_feature_list_pairs <- function(bin_features_all,ordered=FALSE){
bin_feature_list <- list(NULL)
counter=0
for(i in 1:(length(bin_features_all)-1)){
for(j in (i+1):length(bin_features_all)){
counter <- counter+1
bin_feature_list[[counter]] <- bin_features_all[c(i,j)]
if(ordered)
counter <- counter+1
bin_feature_list[[counter]] <- bin_features_all[c(j,i)]
}
}
return(bin_feature_list)
}
#' make_nbins_list_pairs
#'@description make_nbins_list_pairs
#'
#'@param train_data Data to
#'@param test_data Data to predict with the ensemble
#'@param model_list List of RWeka models fit to train_data
#'
#'@return
#'@export
make_nbins_list_pairs <- function(nbins_all, equal_bins=FALSE){
nbins_list <- list(NULL)
counter=0
if(equal_bins) {
for(i in 1:length(nbins_all)){
counter <- counter+1
nbins_list[[counter]] <- nbins_all[c(i,i)]
}
} else {
for(i in 1:length(nbins_all)){
for(j in i:length(nbins_all)){
counter <- counter+1
nbins_list[[counter]] <- nbins_all[c(i,j)]
if(i != j){
counter <- counter+1
nbins_list[[counter]] <- nbins_all[c(j,i)]
}
}
}
}
return(nbins_list)
}
#' testing_all_ensembles
#'@description testing_all_ensembles
#'
#'@param train_data Data to
#'@param test_data Data to predict with the ensemble
#'@param model_list List of RWeka models fit to train_data
#'
#'@return
#'@export
testing_all_ensembles <- function(train_data,test_data,model_types,bin_features_all,nbins_all,equal_bins=FALSE){
true_classes <- unique(c(levels(train_data$true_class), levels(test_data$true_class)))
model_list <- make_model_list(model_types, train_data)
# collect results of all unbinned ensemble options
unbinned_results <- unbinned_testing(train_data, test_data, model_list,true_classes)
# collect results of all rectangular binned ensemble options
nbins_list <- make_nbins_list_pairs(nbins_all,equal_bins)
bin_features_list <- make_bin_feature_list_pairs(bin_features_all, ordered=FALSE)
rect_binned_results <- binned_testing(train_data, test_data, model_list, bin_features_list, nbins_list,true_classes)
# collect results of all unbinned ensemble options
nbins_list <- make_nbins_list_pairs(nbins_all,equal_bins)
bin_features_list <- make_bin_feature_list_pairs(bin_features_all, ordered=TRUE)
iq_binned_results <- iq_binned_testing(train_data, test_data, model_list, bin_features_list, nbins_list,true_classes)
return(list(unbinned_results=unbinned_results,rect_binned_results=rect_binned_results,iq_binned_results=iq_binned_results))
}
#' cv_testing_all_ensembles
#'@description cv_testing_all_ensembles
#'
#'@param train_data Data to
#'@param test_data Data to predict with the ensemble
#'@param model_list List of RWeka models fit to train_data
#'
#'@return
#'@export
cv_testing_all_ensembles <- function(data,model_types,bin_features_all,nbins_all,equal_bins=FALSE, cv_K=10){
true_classes <- levels(data$true_class)
cv_index <- cv_cohorts(nrow(data),cv_K)
results_list <- list(NULL)
for(fold in 1:cv_K){
train_data <- data[cv_index!=fold, ]
test_data <- data[cv_index==fold, ]
results_list[[fold]] <- testing_all_ensembles(train_data,test_data,model_types,bin_features_all,nbins_all,equal_bins)
}
results_list_all <- results_list[[1]]
cv_weights <- sapply(1:cv_K, function(x) sum(cv_index==x))
results_list_all[[1]]$accuracy <- sapply(1:nrow(results_list[[1]][[1]]), function(i) weighted.mean(sapply(1:cv_K, function(fold) results_list[[fold]][[1]]$accuracy)[i,],w=cv_weights))
results_list_all[[2]]$accuracy <- sapply(1:nrow(results_list[[1]][[2]]), function(i) weighted.mean(sapply(1:cv_K, function(fold) results_list[[fold]][[2]]$accuracy)[i,],w=cv_weights))
results_list_all[[3]]$accuracy <- sapply(1:nrow(results_list[[1]][[3]]), function(i) weighted.mean(sapply(1:cv_K, function(fold) results_list[[fold]][[3]]$accuracy)[i,],w=cv_weights))
return(results_list_all)
}
#' eval_cv_test_results
#'@description eval_cv_test_results
#'
#'@param train_data Data to
#'
#'@return
#'@export
eval_cv_test_results <- function(cv_test_results_list){
cv_test_abalone$unbinned_results
cv_test_abalone$rect_binned_results %>%
group_by(weight_type, comb_type,bin_type) %>%
summarize(tuned_accuracy = max(accuracy),
nbins_name=nbins_name[which.max(accuracy)],
bin_pair_name=bin_pair_name[which.max(accuracy)])
cv_test_abalone$iq_binned_results %>%
group_by(weight_type, comb_type,bin_type) %>%
summarize(tuned_accuracy = max(accuracy),
nbins_name=nbins_name[which.max(accuracy)],
bin_pair_name=bin_pair_name[which.max(accuracy)])
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.