Nothing
ice = function(object, X, y,
predictor, predictfcn,
verbose = TRUE, frac_to_build = 1, indices_to_build = NULL,
num_grid_pts, logodds = FALSE, probit = FALSE, ...){
MAX_NUM_UNIQUE_PTS_NOMINAL = 5
#check for factor
if (inherits(X[, predictor], "factor") || inherits(X[, predictor], "character")){
stop("ICE does not support factor attributes")
}
if(!is.numeric(frac_to_build) || frac_to_build > 1 || frac_to_build < 0 ){
stop("frac_to_build must be in (0, 1]")
}
if(!missing(y) && inherits(y, "factor")){
stop("Do not pass y when it is categorical variable.")
}
if (logodds && probit){
stop("You must employ either logodds OR probit but not both.")
}
######## (1) check inputs
# (a) check for valid prediction routine...
if (!missing(predictfcn)){
fcn_args = names(formals(predictfcn))
if (!("object" %in% fcn_args && "newdata" %in% fcn_args)){
stop("predictfcn must have 'object' and 'newdata' arguments")
} else {
use_generic = FALSE
}
} else {
#check for default prediction associated with class(object)
#this is harder than it should be because some classes have class(object)
#return a vector (randomForest, for instance).
classvec = class(object) #may have multiple classes
found_predict = FALSE
i = 1
for (i in 1 : length(classvec)){
if (length(grep("predict", methods(class = classvec[i]))) > 0){
found_predict = TRUE
break
}
}
if (!found_predict){
stop("No generic predict method found for this object.")
} else {
use_generic = TRUE
}
}
######## (2)
N = nrow(X)
# grid points
#now create xj-to-predict values
xj = X[, predictor] #fix so predictor can be given as a name.
grid_pts = sort(X[, predictor])
# there are 3 cases: frac_to_build specificed, indices specified, or nothing specified.
# 1: check fraction to build
if (frac_to_build < 1){
# we don't sample randomly -- we ensure uniform sampling across
# quantiles of xj so as to not leave out portions of the dist'n of x.
order_xj = order(xj)
X = X[order_xj, , drop = FALSE] #ordered by column xj
nskip = round(1 / frac_to_build)
xj_sorted_indices_to_build = seq(1, N, by = nskip)
X = X[xj_sorted_indices_to_build, , drop = FALSE]
xj = X[, predictor]
grid_pts = sort(xj)
indices_to_build = order_xj[xj_sorted_indices_to_build]
} else {
#2: indices specified:
if (!missing(indices_to_build)){
#extract the indicies the user asks for first, THEN order the remaining
#X matrix by the xj's left in the sub-sample defined by the user.
if (frac_to_build < 1){
stop("\"frac_to_build\" and \"indices_to_build\" cannot both be specified simultaneously")
}
X = X[indices_to_build, , drop = FALSE]
xj = X[, predictor]
order_xj = order(xj)
#order the remaining by column xj
X = X[order_xj, ] #ordered by column xj
grid_pts = sort(xj)
xj = X[, predictor] #end if for indices checking.
} else { #3: nothing specified, so just re-order by xj
X = X[order(xj), , drop = FALSE]
xj = X[, predictor]
}
}
grid_pts = unique(grid_pts)
num_unique_pts = length(grid_pts)
#now handle less grid pts
if (!missing(num_grid_pts)){
if (num_grid_pts > num_unique_pts){
warning(paste("the number of grid points specified,", num_grid_pts, "is larger than the number of unique values,", num_unique_pts, "(defaulting to number of unique values)"))
} else {
grid_pts = grid_pts[round(seq(from = 1, to = num_unique_pts, length.out = num_grid_pts))]
}
}
# generate partials
if (use_generic){
actual_prediction = predict(object, X, ...)
} else {
actual_prediction = predictfcn(object = object, newdata = X)
}
if (inherits(actual_prediction, "factor")){
stop("The predict function must return probabilities (not levels of a factor).")
}
if (logodds || probit){
min_pred = min(actual_prediction)
max_pred = max(actual_prediction)
#do some p_hat \in [0, 1] checking
if (min_pred < 0){
stop("the logodds option is on but predict returns values less than 0 (these should be probabilities!)")
} else if (max_pred > 1){
stop("the logodds option is on but predict returns values greater than 1 (these should be probabilities!)")
}
if (min_pred == 0){
second_lowest = min(actual_prediction[actual_prediction > 0])
if (is.na(second_lowest)){
second_lowest = .0001
}
lowest_practical_prob = mean(c(second_lowest, 0))
actual_prediction[actual_prediction == 0] = lowest_practical_prob
#warning("At least one probability was predicted to be 0, ICEbox is using ", lowest_practical_prob, " for the value(s) instead.")
}
if (max_pred == 1){
second_highest = max(actual_prediction[actual_prediction < 1])
if (is.na(second_highest)){
second_highest = .9999
}
highest_practical_prob = mean(c(second_highest, 1)) #arbitrarily, halfway between 1 and second_highest
actual_prediction[actual_prediction == 1] = highest_practical_prob
#warning("At least one probability was predicted to be 1, ICEbox is using ", highest_practical_prob, " for the value(s) instead.")
}
if (logodds){
#centered logit formula
actual_prediction = log(actual_prediction) - (1 / 2) * (log(actual_prediction) + log(1 - actual_prediction))
} else if (probit){
actual_prediction = qnorm(actual_prediction)
}
}
ice_curves = matrix(NA, nrow = nrow(X), ncol = length(grid_pts))
colnames(ice_curves) = grid_pts
#Compute actual pdp. Note that this is averaged over the observations
#we sample, so this might be different from the 'true' pdp if frac_to_build < 0.
xvec_temp = X[ ,predictor] #actual setting of this column after ordering.
for (t in 1 : length(grid_pts)){
X[, predictor] = grid_pts[t]
if (use_generic){
ice_curves[, t] = predict(object, X, ...)
}
else{
ice_curves[, t] = predictfcn(object = object, newdata = X)
}
if(verbose){cat(".")}
}
#return X to its original state.
X[, predictor] = xvec_temp
#do logit if necessary
if (logodds || probit){
#prevent log(0) error
min_val = min(ice_curves)
max_val = max(ice_curves)
if (min_val < 0){
stop("logodds is TRUE but predict returns negative values (these should be probabilities!)")
}
if (min_val == 0){
second_lowest = min(ice_curves[ice_curves > 0])
if (is.na(second_lowest)){
second_lowest = .0001 #arbitrary epsilon value
}
lowest_practical_prob = mean(c(second_lowest, 0)) #arbitrarily, halfway between 0 and second_lowest
ice_curves[(ice_curves == 0)] = lowest_practical_prob
warning("At least one probability was predicted to be 0, ICEbox is using ", lowest_practical_prob, " for the value(s) instead.")
}
if (max_val == 1){
second_highest = max(ice_curves[ice_curves < 1])
if (is.na(second_highest)){
second_highest = .9999 #arbitrary 1 - epsilon value
}
highest_practical_prob = mean(c(second_highest, 1)) #arbitrarily, halfway between 1 and second_highest
ice_curves[(ice_curves == 1)] = highest_practical_prob
warning("At least one probability was predicted to be 1, ICEbox is using ", highest_practical_prob, " for the value(s) instead.")
}
if (logodds){
#centered logit formula
ice_curves = log(ice_curves) - (1 / 2) * (log(ice_curves) + log(1 - ice_curves))
} else if (probit){
ice_curves = qnorm(ice_curves)
}
}
if (verbose){cat("\n")}
if (!inherits(predictor, "character")){
xlab = paste("x", predictor, sep = "_") #x_1, x_2 etc.
} else {
xlab = predictor #the actual name of the feature.
}
if (num_unique_pts <= MAX_NUM_UNIQUE_PTS_NOMINAL){
nominal_axis = TRUE
} else {
nominal_axis = FALSE
}
range_y = NULL
sd_y = NULL
if(!missing(y)){
range_y = max(y) - min(y)
sd_y = sd(y)
} else if (!logodds && !probit){
range_y = (max(ice_curves) - min(ice_curves))
sd_y = sd(actual_prediction)
cat("y not passed, so range_y is range of ice curves and sd_y is sd of predictions on real observations\n")
}
#compute friedman's pdp:
pdp = apply(ice_curves, 2, mean) # pdp = average over the columns
if(missing(predictfcn)){
predictfcn=NULL
}
ice_obj = list(ice_curves = ice_curves, gridpts = grid_pts, predictor = predictor, xj = xj, actual_prediction = actual_prediction,
logodds = logodds, probit = probit, xlab = xlab, nominal_axis = nominal_axis, range_y = range_y, sd_y = sd_y, Xice = X, pdp = pdp,
indices_to_build = indices_to_build, frac_to_build = frac_to_build, predictfcn = predictfcn)
class(ice_obj) = "ice"
invisible(ice_obj)
}
#/c/Program\ Files/R/R-devel/bin/R.exe CMD INSTALL ICEbox -l "/c/Users/kapel/AppData/local/R/win-library/4.3"
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.