#' Main function collecting the functions of MDRDist
#'
#' @param data A data.frame with integer categories, that contains the
#' observations which should be clustered. Missing data are to be encoded
#' as integer -9, all other categories have to be positive.
#' If the data contain a column "status", this column will be discarded.
#' @param working_dir An existing, empty directory in which to drop intermediate
#' stages
#' @param n_rounds The number of repeated MBMDR-calls
#' @param use_existing_models check, whether the working directory is empty and
#' throw an error, if not -- unless former models are allowed by this flag.
#' @param evaluate_models an option that stops computation after calling MB-MDR.
#' This function is intended to be used to calculate models once and run
#' several with several similarity weights afterwards.
#' @param similarity_weights how should different interaction-class-labels be
#' combined? This is a list of lists as generated by function similarity_weights
#' @param comparison_distribution which distribution should be assumed for
#' generated data? This argument should be passed as function out of the set
#' ( "compdist_estimated_uncorellated", "compdist_uniform")
#' @param model_assumption how should different interaction classes be
#' interpreted with respect to similarity? This argument should be passed as
#' function out of the set ( "all_same", "all_different")
#' @param further_options a structure to manipulate further options.
#' It is passed as a list of two-element-lists, where the first element describes
#' one of the parameters in function "set_options()" and the second element
#' the new value.
#'
#' @return The function returns
#' dist: a distance matrix (in dist-structure) and
#' importance: a table containing the names and frequency of most appearing
#' interactions
#' @export
#'
#' @import parallelMap
#' @import reshape2
#' @import checkmate
#'
mdr_dist <- function(data,
working_dir,
n_rounds,
use_existing_models = FALSE,
evaluate_models = TRUE,
similarity_weights = MDRDist_weights("first_test"),
comparison_distribution = compdist_estimated_uncorellated,
model_assumption = all_different,
further_options = NULL){
checkmate::assertDataFrame(x = data, types = "numeric", any.missing = FALSE)
checkmate::assertDirectory(x = working_dir)
checkmate::assertNumber(x = n_rounds, lower = 0, upper = 10000)
checkmate::assertLogical(x = use_existing_models)
checkmate::assertLogical(x = evaluate_models)
checkmate::assertClass(x = similarity_weights, classes = "similarity_weights")
checkmate::assertFunction(x = model_assumption)
checkmate::assertFunction(x = comparison_distribution)
checkmate::assertList(x = further_options, null.ok = TRUE)
working_dir <- path.expand(working_dir)
if(length(x = dir(working_dir)) > 0){
checkmate::assert(use_existing_models,
.var.name = "empty working directory")
warning("The working directory you choose was not empty. The programm
cannot guarantee, that the results are meaningful in any way!")
}
### end of checks
if(!is.null(further_options)){
names(further_options) <- sprintf("mdrdist_%s", names(further_options))
}
set_options()
options(further_options)
if(n_rounds > 0){
nothing_to_save <- parallelMap::parallelLapply(x = 1:n_rounds,
fun = repeat_mbmdr,
data = data,
working_dir = working_dir,
comparison_distribution = comparison_distribution,
further_options = further_options)
}
if(evaluate_models){
# read in models
calculated_models <- read_mbmdr(list.dirs(path = working_dir,
full.names = TRUE,
recursive = FALSE))
if(nrow(calculated_models) < 1) {
# catch the case, that no model is significant
dist <- matrix(1, nrow = nrow(data), ncol = nrow(data),
dimnames = list(rownames(data), rownames(data)))
calculated_models_reduced <- list()
} else {
# else continue with evaluation
calculated_models_reduced <- reduce_calculated_models(calculated_models = calculated_models)
calculated_models_reduced$models_int <- lapply(X = calculated_models_reduced$models,
FUN = model_assumption)
classified_data <- classify_data(mbmdr_return = calculated_models_reduced,
data = data)
similarity <- calculate_similarity(classified_data = classified_data,
similarity_weights = similarity_weights,
interaction_importance_weights = calculated_models_reduced$count)
if(max(as.vector(x=similarity)) > 1){
warning(sprintf(fmt = "Similarity was not scaled properly to 1! The maximum was actually %f.",
max(as.vector(x=similarity))))
}
dist <- (abs(1 - similarity) ^ getOption("mdrdist_dissimilarity_exponent"))
}
res <- list(dist = dist,
importance = calculated_models_reduced,
interaction = calculated_models)
class(res) <- "MDRDist-distance"
return(res)
} else{
print(sprintf("The models are saved in %s.",
working_dir))
return(working_dir)
}
}
repeat_mbmdr <- function(x, data, working_dir, comparison_distribution, further_options){
set_options()
options(further_options)
Data2 <- build_supervised_sample(data = data,
fraction_of_real_data = getOption("mdrdist_fraction_of_real_data"),
fraction_of_artificial_data = getOption("mdrdist_fraction_of_artificial_data"),
do_bootstrapping = getOption("mdrdist_do_bootstrapping"),
method = comparison_distribution)
nothing_to_save <- call_mbmdr(formula = status ~ .,
data = Data2,
working_dir = working_dir)
return(TRUE)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.