#' Heuristic method of comparing if two datasets are similar
#'
#' @param egalitarian egalitarian object
#' @param n number of runs of kmeans algorithm
#'
#' @return data frame
#'
#' @importFrom stats as.formula kmeans model.matrix
#'
#' @export
#'
global_similarity <- function(egalitarian, n) {
target_name <- dataset_ <- NULL
kmeans_results <- lapply(1:n,
function(x)
kmeans(model.matrix(as.formula(paste0(target_name, "~.")),
data = select(egalitarian$data, -dataset_)),
centers = 2))
kmeans_compare <- lapply(kmeans_results,
function(x)
data.frame(predicted = x$cluster,
true = egalitarian$data$dataset_))
results <- lapply(kmeans_compare, function(x) {
kmeans_acc <- prop.table(table(x), margin = 2)
if(kmeans_acc[1, 1] >= kmeans_acc[2, 1]) {
training_accuracy <- kmeans_acc[1, 1]
validation_accuracy <- kmeans_acc[2, 2]
} else {
training_accuracy <- kmeans_acc[2, 1]
validation_accuracy <- kmeans_acc[1, 2]
}
data.frame(training_detected = training_accuracy,
vaidation_detected = validation_accuracy)
})
results <- dplyr::bind_rows(results)
dplyr::summarise_all(results, mean)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.