Nothing
#' @title Print outliers in human-readable format
#' @description Pretty-prints outliers as output by the `predict` function from a Bagged OutlierTrees
#' model (as generated by function `bagged.outliertrees`).
#' @param x Outliers as returned by predict method on an object from `bagged.outliertrees`.
#' @param outliers_print Maximum number of outliers to print.
#' @param ... No use.
#' @return The same input `x` that was passed (as `invisible`).
#' @seealso \link{bagged.outliertrees} \link{predict.bagged.outliertrees}
#' @examples
#' library(bagged.outliertrees)
#'
#' ### example dataset with interesting outliers
#' data(hypothyroid)
#'
#' ### fit a Bagged OutlierTrees model
#' model <- bagged.outliertrees(hypothyroid,
#' ntrees = 10,
#' subsampling_rate = 0.5,
#' z_outlier = 6,
#' nthreads = 1
#' )
#'
#' ### use the fitted model to find outliers in the training dataset
#' outliers <- predict(model,
#' newdata = hypothyroid,
#' min_outlier_score = 0.5,
#' nthreads = 1
#' )
#'
#' ### print the top-10 outliers in human-readable format
#' print(outliers, outliers_print = 10)
#' @export
print.bagged.outlieroutputs <- function(x, outliers_print = 15, ...) {
outliers_print <- min(outliers_print, length(x))
cat(sprintf(
"Reporting top %d outliers [out of %d found]\n\n",
outliers_print,
length(x)
))
for (i in names(x[1:outliers_print])) {
### print suspicious value
cat(sprintf("row [%s] - suspicious column: [%s] - ", i, x[[i]]$suspicious_value$column))
cat(sprintf("suspicious value: [%s]\n", x[[i]]$suspicious_value$value))
### print distribution
if (!is.na(as.numeric(x[[i]]$suspicious_value$value))) {
if (as.numeric(x[[i]]$suspicious_value$value) > as.numeric(x[[i]]$group_statistics$mean)) {
cat(sprintf(
"\tdistribution: %.2f%% <= [%s] - [mean: %s] - [sd: %s] - [norm. obs: %s]\n",
as.numeric(x[[i]]$group_statistics$pct) * 100.,
x[[i]]$group_statistics$thr,
x[[i]]$group_statistics$mean,
x[[i]]$group_statistics$sd,
x[[i]]$group_statistics$n_obs
))
} else {
cat(sprintf(
"\tdistribution: %.2f%% >= [%s] - [mean: %s] - [sd: %s] - [norm. obs: %s]\n",
as.numeric(x[[i]]$group_statistics$pct) * 100.,
x[[i]]$group_statistics$thr,
x[[i]]$group_statistics$mean,
x[[i]]$group_statistics$sd,
x[[i]]$group_statistics$n_obs
))
}
} else {
cat(sprintf(
"\tdistribution: %.2f%% in [%s]\n",
as.numeric(x[[i]]$group_statistics$pct) * 100.,
x[[i]]$group_statistics$thr
))
cat(sprintf(
"\t( [norm. obs: %s] - [prior_prob: %.2f%%] - [next smallest: %.2f%%] )\n",
x[[i]]$group_statistics$n_obs,
as.numeric(x[[i]]$group_statistics$sd) * 100.,
as.numeric(x[[i]]$group_statistics$mean) * 100.
))
}
### print conditions
if (length(x[[i]]$conditions) > 0) {
conditions <- as.data.frame(x[[i]]$conditions)
for (j in nrow(conditions)) {
switch(conditions$comparison[j],
"is NA" = {
cat(sprintf("\t\t[%s] is NA\n", conditions$column[j]))
},
"<=" = {
cat(sprintf(
"\t\t[%s] <= [%s] (value: %s)\n",
conditions$column[j], conditions$value_comp[j], conditions$value_this[j]
))
},
">" = {
cat(sprintf(
"\t\t[%s] > [%s] (value: %s)\n",
conditions$column[j], conditions$value_comp[j], conditions$value_this[j]
))
},
"=" = {
cat(sprintf("\t\t[%s] = [%s]\n", conditions$column[j], conditions$value_comp[j]))
},
"!=" = {
cat(sprintf(
"\t\t[%s] != [%s] (value: %s)\n",
conditions$column, conditions$value_comp[j], conditions$value_this[j]
))
},
"in" = {
cat(sprintf(
"\t\t[%s] in [%s] (value: %s)\n",
conditions$column[j], conditions$value_comp[j], conditions$value_this[j]
))
}
)
}
}
cat("\n\n")
}
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.