#' Check for problems in datasets
#'
#' @param training training dataset
#' @param validation validation dataset
#'
#' @return data frame
#'
#' @export
#'
check_data_validity <- function(training, validation) {
if(length(setdiff(colnames(training),
colnames(validation))) > 0) {
warning("Some variables are missing in the validation dataset. \n
Additional variables will be dropped. \n")
}
if(length(setdiff(colnames(validation),
colnames(training))) > 0) {
warning("There are additional variable present in the validation set. \n
These variables will be dropped. \n")
}
for(x in intersect(colnames(training), colnames(validation))) {
if(class(training[, x]) != class(validation[, x])) {
warning("Mismatched variable classes in columns ", x, "\n")
}
}
for(x in intersect(colnames(training), colnames(validation))) {
if(is.factor(training[, x])) {
if(length(setdiff(levels(training[, x]), levels(validation[, x]))) > 0 |
length(setdiff(levels(validation[, x]), levels(training[, x]))) > 0) {
warning("Unequal factor levels in column ", x, "\n")
}
}
}
}
#' Preprocess data and model before analysis.
#'
#' @param training_set Data frame on which model was trained.
#' @param validation_set Data frame with the same columns as training_set.
#' @param target Character, name of the response variable.
#' @param model Model to be explained by local explainers.
#' If NULL, only global comparisons of distributions can be
#' performed on the result object.
#'
#' @return list of class egalitarian that consists of
#' \item{data}{data object from `egalitarian` object}
#' \item{model}{model from `egalitarian` object}
#' \item{target_name}{name of the response variable}
#'
#' @importFrom dplyr mutate bind_rows
#'
#' @export
#'
egalitarian <- function(training_set, validation_set, target, model = NULL) {
check_data_validity(training_set, validation_set)
cols <- intersect(colnames(training_set), colnames(validation_set))
combined_datasets <- mutate(suppressWarnings(bind_rows(training_set[, cols],
validation_set[, cols])),
dataset_ = c(rep("training",
nrow(training_set)),
rep("validation",
nrow(validation_set))))
result <- list(data = combined_datasets,
model = model,
target_name = target)
class(result) <- c("egalitarian", class(result))
result
}
#' Generic print method for egalitarian class.
#'
#' @param x object of class egalitarian.
#' @param ... Currently ignored.
#'
#' @export
#'
print.egalitarian <- function(x, ...) {
cat("EgalitaRian object \n")
cat("Number of predictor variables: ", ncol(x$data) - 2, "\n")
cat("Number of observations in training set: ", sum(x$data$dataset_ == "training"), "\n")
cat("Number of observations in validation set: ", sum(x$data$dataset_ == "validation"), "\n")
if(is.null(x$model)) {
cat("Model object is not present")
} else {
cat("Model object is present \n")
cat("Model class: ", class(x$model))
}
invisible(x)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.