quiet <- function(fun, quiet = TRUE) {
if (!quiet)
return(fun)
sink(tempfile())
on.exit(sink())
invisible(force(fun))
}
set.tictoc <- function (which, id) {
now <- proc.time()["elapsed"]
aux <- getOption(which)
name <- sprintf("%s_%s", which, id)
if (is.null(aux))
aux <- list()
aux[[name]] <- now
return(aux)
}
tic <- function(id = 1, quiet = FALSE) {
tic <- set.tictoc("tic", id)
options(tic = tic)
if (!quiet)
message(sprintf("Tic `id = %s` start time: %s",
id, Sys.time()))
invisible(tic)
}
toc <- function (id = 1, msg = "Elapsed time:", units = TRUE,
signif = 3, quiet = FALSE) {
if (!sprintf("tic_%s", id) %in% names(getOption("tic")))
stop(sprintf("You need to tic(id = '%s') before you toc(id = '%s')",
id, id))
toc <- set.tictoc("toc", id)
options(toc = toc)
tic <- getOption("tic")[[paste0("tic_", id)]]
toc <- getOption("toc")[[paste0("toc_", id)]]
time <- as.numeric(toc - tic)
if (units) {
x <- time
u <- ifelse(x < 60, "s", ifelse(x < 3600, "m",
ifelse(x < 86400, "h", "d")))
d <- ifelse(x < 60, 1, ifelse(x < 3600, 60, ifelse(x <
86400, 3600, 86400)))
timer <- paste0(signif(time/d, signif), u)
}
else timer <- paste0(signif(time, signif), "s")
msg <- sprintf("%s %s", msg, timer)
if (!quiet)
message(msg)
res <- list(tic = tic, toc = toc, time = time, msg = msg)
invisible(res)
}
wafflecut <- function (x, intervals, buckets = intervals, na.bucket = NA,
unmatched.bucket = NA, out.as.factor = TRUE) {
l <- length(intervals)
if (l != length(buckets)) {
stop("FancyCut requires a 1-1 map from intervals to buckets")
}
if (!is.numeric(x))
stop("'x' must be numeric")
out <- rep(NA, length(x))
intervals_df <- nano:::parse_intervals(intervals)
for (index in 1:l) {
b <- buckets[index]
lower <- intervals_df$left[index]
upper <- intervals_df$right[index]
left <- intervals_df$left_strict[index]
right <- intervals_df$right_strict[index]
mask <- rep(FALSE, length(x))
if (left & right) {
mask <- x >= lower & x <= upper
}
if (left & !right) {
mask <- x >= lower & x < upper
}
if (!left & right) {
mask <- x > lower & x <= upper
}
if (!left & !right) {
mask <- x > lower & x < upper
}
out[mask] <- b
}
if (sum(is.na(x)) == 0L) {
na.bucket <- NULL
}
else {
out[is.na(x)] <- na.bucket
}
if (sum(is.na(out)) == 0L) {
unmatched.bucket <- NULL
}
else {
out[is.na(out)] <- unmatched.bucket
}
levels <- unique(c(buckets, na.bucket, unmatched.bucket))
if (out.as.factor) {
return(factor(out, levels = levels, exclude = NULL))
}
else {
return(out)
}
}
# define parse_intervals function. From fancycut package but seems to be missing from the
# package now, so manualy created function here.
parse_intervals <- function(intervals) {
rx <- "^\\s*(\\(|\\[)\\s*((?:[-+]?\\d*\\.?\\d+(?:[eE][-+]?\\d+)?)|(?:[-+]?Inf))\\s*,\\s*((?:[-+]?\\d*\\.?\\d+(?:[eE][-+]?\\d+)?)|(?:[-+]?Inf))\\s*(\\)|\\])\\s*$"
lindex <- regexec(rx, intervals)
lmatch <- regmatches(intervals, lindex)
nrows <- length(lmatch)
ncols <- sapply(lmatch, length)
mmatch <- matrix(NA_character_, nrow = nrows, ncol = 5)
for (x in 1:nrows) {
row <- lmatch[[x]]
n <- length(row)
if (n > 0) {
mmatch[x, 1:n] <- lmatch[[x]][1:n]
}
}
intervals_df <- data.frame(
interval = intervals,
left = as.numeric(mmatch[, 3]),
right = as.numeric(mmatch[, 4]),
left_strict = (mmatch[, 2] == '['),
right_strict = (mmatch[, 5] == ']'),
match_count = ncols,
stringsAsFactors = FALSE
)
# Fix if point values
rx <- "^[-+]?\\d*\\.?\\d+(?:[eE][-+]?\\d+)?$"
points <- grepl(rx, intervals)
intervals_df$interval[points] <- intervals[points]
intervals_df$left[points] <- as.numeric(intervals[points])
intervals_df$right[points] <- as.numeric(intervals[points])
intervals_df$right_strict[points] <- TRUE
intervals_df$left_strict[points] <- TRUE
intervals_df$match_count[points] <- 5
for (x in 1:nrows) {
if (intervals_df$match_count[x] != 5) {
warning(paste0('The interval "',intervals_df$interval[x],'" is malformed.'))
next
}
if (intervals_df$right[x] < intervals_df$left[x]) {
warning(paste0('The interval "',intervals_df$interval[x],'" has right < left.'))
}
if (intervals_df$right[x] == intervals_df$left[x] &
(!intervals_df$left_strict[x] | !intervals_df$right_strict[x])) {
warning(paste0('The interval "',intervals_df$interval[x],'" is malformed.'))
}
}
return(intervals_df)
}
# function copied from fancycut package
wafflecut <- function (x, intervals, buckets = intervals, na.bucket = NA,
unmatched.bucket = NA, out.as.factor = TRUE) {
l <- length(intervals)
if (l != length(buckets)) {
stop("FancyCut requires a 1-1 map from intervals to buckets")
}
if (!is.numeric(x))
stop("'x' must be numeric")
out <- rep(NA, length(x))
intervals_df <- nano:::parse_intervals(intervals)
for (index in 1:l) {
b <- buckets[index]
lower <- intervals_df$left[index]
upper <- intervals_df$right[index]
left <- intervals_df$left_strict[index]
right <- intervals_df$right_strict[index]
mask <- rep(FALSE, length(x))
if (left & right) {
mask <- x >= lower & x <= upper
}
if (left & !right) {
mask <- x >= lower & x < upper
}
if (!left & right) {
mask <- x > lower & x <= upper
}
if (!left & !right) {
mask <- x > lower & x < upper
}
out[mask] <- b
}
if (sum(is.na(x)) == 0L) {
na.bucket <- NULL
}
else {
out[is.na(x)] <- na.bucket
}
if (sum(is.na(out)) == 0L) {
unmatched.bucket <- NULL
}
else {
out[is.na(out)] <- unmatched.bucket
}
levels <- unique(c(buckets, na.bucket, unmatched.bucket))
if (out.as.factor) {
return(factor(out, levels = levels, exclude = NULL))
}
else {
return(out)
}
}
# prints each element of a vector separate
print_vector <- function(vec, sep = ", ") {
out <- vec[1]
for (i in 2:length(vec)) {
out <- paste(out, vec[i], sep = sep)
}
return(out)
}
# extracts meta data from model
model_meta <- function(model, data) {
model_info <- h2o:::.process_models_or_automl(model, data)
meta <- list()
meta[["x"]] <- model_info$x
meta[["y"]] <- model_info$y
meta[["is_classification"]] <- model_info$is_classification
meta[["is_multinomial_classification"]] <- model_info$is_multinomial_classification
meta[["distribution"]] <- model@parameters$distribution
meta[["hyper_params"]] <- model@allparameters[c("ntrees", "max_depth", "min_rows",
"learn_rate", "distribution",
"tweedie_power", "sample_rate",
"col_sample_rate",
"col_sample_rate_per_tree",
"min_split_improvement")]
meta[["hyper_params"]] <- meta[["hyper_params"]][!sapply(meta[["hyper_params"]], is.null)]
meta[["search_conditions"]] <- model@allparameters[c("stopping_rounds", "stopping_metric", "stopping_tolerance")]
meta[["max_runtime_secs"]] <- model@allparameters$max_runtime_secs
meta[["seed"]] <- model@allparameters$seed
meta[["description"]] <- ""
return(meta)
}
# calculates metrics from h2o model. This will be input into nano object
model_metrics <- function(model, data = NULL) {
# determine model type
if (class(model) == "H2ORegressionModel") {
model_type = "regression"
} else if (class(model) == "H2OMultinomialModel") {
model_type = "multinomial classification"
} else {
model_type = "binomial classification"
}
# assign list to hold metrics
metrics <- list(train_metrics = NULL,
test_metrics = NULL,
cv_metrics = NULL,
holdout_metrics = NULL)
# functions to calculate metrics
metric_fun <- function(fun, metric) {
# calculate train metrics
metrics$train_metrics[[metric]] <- fun(model, train = TRUE)
# calculate test metrics
if (!is.null(model@model$validation_metrics@metrics)) {
metrics$train_metrics[[metric]] <- fun(model, valid = TRUE)
}
# calculate cv metrics
if (!is.null(model@model$cross_validation_metrics@metrics)) {
metrics$cv_metrics[[metric]] <- fun(model, xval = TRUE)
}
return(metrics)
}
# calculate metrics for all model types
metrics <- metric_fun(h2o::h2o.r2 , "r2")
metrics <- metric_fun(h2o::h2o.mse , "mse")
metrics <- metric_fun(h2o::h2o.rmse, "rmse")
# calculate extra metrics if regression
if (model_type == "regression") {
metrics <- metric_fun(h2o::h2o.rmsle , "rmsle")
metrics <- metric_fun(h2o::h2o.mae , "mae")
metrics <- metric_fun(h2o::h2o.mean_residual_deviance, "mean_residual_dev")
}
# calculate extra metrics if classification
if (grepl("classification", model_type)) {
metrics <- metric_fun(h2o::h2o.mean_per_class_error, "mean_class_err")
metrics <- metric_fun(h2o::h2o.logloss , "logloss")
}
# calculate extra metrics if binomial classification
if (model_type == "binomial classification") {
metrics <- metric_fun(h2o::h2o.giniCoef , "gini_coef")
metrics <- metric_fun(h2o::h2o.mcc , "mcc")
metrics <- metric_fun(h2o::h2o.F1 , "f1")
metrics <- metric_fun(h2o::h2o.F0point5 , "f0point5")
metrics <- metric_fun(h2o::h2o.F2 , "f2")
metrics <- metric_fun(h2o::h2o.accuracy , "accuracy")
metrics <- metric_fun(h2o::h2o.auc , "auc")
metrics <- metric_fun(h2o::h2o.aucpr , "aucpr")
metrics <- metric_fun(h2o::h2o.kolmogorov_smirnov, "ks")
}
# calculate metrics for holdout dataset
if ("data_id" %in% names(data)) {
if ("holdout" %in% data[["data_id"]]) {
# calculate performance object for each data type
holdout_perf <- h2o::h2o.performance(model, h2o::as.h2o(data[data_id == "holdout"]))
# function to calculate holdout metric
metric_fun <- function(fun, metric) {
metrics[["holdout_metrics"]][[metric]] <- fun(holdout_perf)
return(metrics)
}
# calculate metrics
metrics <- metric_fun(h2o::h2o.r2 , "r2")
metrics <- metric_fun(h2o::h2o.mse , "mse")
metrics <- metric_fun(h2o::h2o.rmse , "rmse")
# calculate extra metrics if regression
if (model_type == "regression") {
metrics <- metric_fun(h2o::h2o.rmsle , "rmsle")
metrics <- metric_fun(h2o::h2o.mae , "mae")
metrics <- metric_fun(h2o::h2o.mean_residual_deviance, "mean_residual_dev")
}
# calculate extra metrics if classification
if (grepl("classification", model_type)) {
metrics <- metric_fun(h2o::h2o.mean_per_class_error, "mean_class_err")
metrics <- metric_fun(h2o::h2o.logloss , "logloss")
}
# calculate extra metrics if binomial classification
if (model_type == "binomial classification") {
metrics <- metric_fun(h2o::h2o.giniCoef , "gini_coef")
metrics <- metric_fun(h2o::h2o.mcc , "mcc")
metrics <- metric_fun(h2o::h2o.F1 , "f1")
metrics <- metric_fun(h2o::h2o.F0point5 , "f0point5")
metrics <- metric_fun(h2o::h2o.F2 , "f2")
metrics <- metric_fun(h2o::h2o.accuracy , "accuracy")
metrics <- metric_fun(h2o::h2o.auc , "auc")
metrics <- metric_fun(h2o::h2o.aucpr , "aucpr")
metrics <- metric_fun(h2o::h2o.kolmogorov_smirnov, "ks")
}
}
}
return(metrics)
}
# function to increase space of nano object if it is full
nano_increase <- function(nano) {
if (sum(sapply(nano$grid, is.logical)) == 0) {
nano$grid <- append(nano$grid , rep(list(NA) , 10))
nano$model <- append(nano$model , rep(list(NA) , 10))
nano$metric <- append(nano$metric , rep(list(NA) , 10))
nano$data <- append(nano$data , rep(data.table(NA), 10))
nano$varimp <- append(nano$varimp , rep(list(NA) , 10))
nano$pdp <- append(nano$pdp , rep(list(NA) , 10))
nano$ice <- append(nano$ice , rep(list(NA) , 10))
nano$interaction <- append(nano$interaction, rep(list(NA) , 10))
nano$meta <- append(nano$meta , rep(list(NA) , 10))
}
return(nano)
}
# check if already connection to h2o exists
check_h2o_connection <- function() {
assign("is_h2o_running", TRUE, .GlobalEnv)
tryCatch(
expr = {
h2o::h2o.init(startH2O = FALSE)
},
error = function(e){
assign("is_h2o_running", FALSE, .GlobalEnv)
}
)
return(is_h2o_running)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.