#' @title Calculates PDP for multiple models
#' @description Calculates partial dependency plots (PDPs) from multiple h2o models.
#' @param models a list of h2o models.
#' @param data a list of datasets.
#' @param vars a character vector of variables to create PDPs off.
#' @param row_index a numeric vector of dataset rows numbers to be used to calculate PDPs. To
#' use entire dataset, set to -1.
#' @return a list of data.tables containing the calculated PDPs for each model. Each data.table
#' has the outputs for each variable in `vars` combined into the one data.table.
#' @details Creates a list of data.tables. Each data.table corresponds to the calculated PDPs
#' values from a single model. In each data.table, contains the PDPs values for each variable
#' combined together into a single data.table.
#'
#' For creating pdps, it is recommended to instead use the \code{nano_pdp} function
#' which is a wrapper for a series of functions which creates pdps. It is able to create
#' pdps directly from a nano object, for both single and multi models, and has the option
#' to return plots of the pdps.
#' @examples
#' \dontrun{
#' if(interactive()){
#' library(h2o)
#' library(nano)
#'
#' h2o.init()
#'
#' # import dataset
#' data(property_prices)
#' train <- as.h2o(property_prices)
#'
#' # set the response and predictors
#' response <- "sale_price"
#' var <- setdiff(colnames(property_prices), response)
#'
#' # build grids
#' grid_1 <- h2o.grid(x = var,
#' y = response,
#' training_frame = train,
#' algorithm = "randomForest",
#' hyper_params = list(ntrees = 1:2),
#' nfolds = 3,
#' seed = 628)
#'
#' grid_2 <- h2o.grid(x = var,
#' y = response,
#' training_frame = train,
#' algorithm = "randomForest",
#' hyper_params = list(ntrees = 3:4),
#' nfolds = 3,
#' seed = 628)
#'
#' model_1 <- h2o.getModel(grid_1@model_ids[[1]])
#' model_2 <- h2o.getModel(grid_2@model_ids[[1]])
#'
#' # calculate pdp
#' nano_multi_pdp(models = list(model_1, model_2),
#' data = list(property_prices),
#' vars = c("lot_size", "income"))
#'
#' }
#' }
#' @rdname nano_multi_pdp
#' @export
nano_multi_pdp <- function (models, data, vars, row_index = -1) {
if (!is.list(models)) {
stop("`models` must be a list.",
call. = FALSE)
}
if (!all(grepl("H2O", sapply(models, function(x) as.vector(class(x))))) |
!all(grepl("Model", sapply(models, function(x) as.vector(class(x)))))) {
stop("`models` must be a list of h2o models.",
call. = FALSE)
}
if (!is.list(data)) {
stop("`data` must be a list.",
call. = FALSE)
}
# convert data to a list of h2oframes
if (length(data) == 1 & length(models) != 1) {
data <- rep(list(data[[1]]), length(models))
}
for (model in models) {
if (!all(vars %in% model@parameters$x)) {
stop("`vars` must be predictors in all the models in `models`.",
call. = FALSE)
}
}
# calculate pdps for each model, for each variable
result <- list()
for (i in 1:length(models)) {
model <- models[[i]]
data_mod <- data[[i]]
result[[i]] <- nano::nano_single_pdp(model = model,
data = data_mod,
vars = vars,
row_index = row_index)
}
return(result)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.