Nothing
#' @title Random Forest probability scaled partial dependency plots
#' @description Produces partial dependency plots with probability distribution based on scaled margin distances.
#'
#' @param x Object of class randomForest
#' @param pred.data Training data.frame used for constructing the plot,
#' @param xname Name of the variable for calculating partial dependence
#' @param which.class The class to focus on
#' @param w Weights to be used in averaging (if not supplied, mean is not weighted)
#' @param prob Scale distances to probabilities
#' @param plot (TRUE/FALSE) Plot results
#' @param smooth c(spline, loess) Apply spline.smooth or loess to
#' @param conf (TRUE/FALSE) Should confidence intervals be calculated for smoothing
#' @param smooth.parm An appropriate smoothing parameter passed to loess or smooth.spline
#' @param pts (FALSE/TRUE) Add raw points
#' @param raw.line (FALSE/TRUE) Plot raw line (non-smoothed)
#' @param rug Draw hash marks on plot representing deciles of x
#' @param n.pt Number of points on the grid for evaluating partial dependence.
#' @param xlab x-axis plot label
#' @param ylab y-axis plot label
#' @param main Plot label for main
#' @param ... Additional graphical parameters passed to plot
#
#' @return A list class object with fit x,y. If smooth=c("spline","loess") y represents smoothed scaled margin distance values
#'
#' @author Jeffrey S. Evans <jeffrey_evans<at>tnc.org>
#'
#' @references Evans J.S., M.A. Murphy, Z.A. Holden, S.A. Cushman (2011). Modeling species distribution and change using Random Forests CH.8 in Predictive Modeling in Landscape Ecology eds Drew, CA, Huettmann F, Wiersma Y. Springer
#' @references Baruch-Mordo, S., J.S. Evans, J. Severson, J. D. Naugle, J. Kiesecker, J. Maestas, & M.J. Falkowski (2013) Saving sage-grouse from the trees: A proactive solution to reducing a key threat to a candidate species Biological Conservation 167:233-241
#'
#' @examples
#' require(randomForest)
#' data(iris)
#' iris.rf <- randomForest(iris[,1:4], iris[,5])
#'
#' # plot all parameters
#' par(mfrow=c(2,2))
#' for(i in names(iris)[1:4]) {
#' rf.partial.prob(iris.rf, iris, i, "setosa", smooth="spline",
#' n.pt=70, smooth.parm = 0.5)
#' }
#'
#' # Plot spline and loess smoothing for one parameter, with raw points and line
#' par(mfrow=c(1,2))
#' rf.partial.prob(x = iris.rf, pred.data = iris, xname = "Sepal.Length",
#' which.class = "setosa", smooth = "spline", smooth.parm = 0.5,
#' n.pt = 70, pts = TRUE, raw.line = TRUE, rug = TRUE)
#'
#' rf.partial.prob(x = iris.rf, pred.data = iris, xname = "Sepal.Length",
#' which.class = "setosa", smooth = "loess", smooth.parm = 0.20,
#' n.pt = 70, pts = TRUE, raw.line = TRUE, rug = TRUE)
#'
#' @seealso \code{\link[stats]{smooth.spline}} for smooth.spline details on spar smoothing argument
#' @seealso \code{\link[stats]{loess}} for loess details of span smoothing argument
#'
#' @export
rf.partial.prob <- function(x, pred.data, xname, which.class, w, prob=TRUE, plot=TRUE,
smooth, conf = TRUE, smooth.parm = NULL, pts = FALSE,
raw.line = FALSE, rug=FALSE, n.pt, xlab, ylab, main, ...) {
if(!any(class(x) %in% c("randomForest","list"))) stop("x is not a randomForest object")
if (is.null(x$forest)) stop("Object does not contain an ensemble \n")
if(!x$type != "regression") stop("Regression not supported \n")
if (missing(which.class)) stop("Class name missing \n")
if (missing(xname)) stop("X Variable name missing \n")
if (missing(x)) stop("randomForest object missing \n")
if (missing(pred.data)) stop("New data missing \n")
focus <- charmatch(which.class, colnames(x$votes))
if (is.na(focus)) stop(which.class, "is not one of the class labels")
xv <- pred.data[, xname]
n <- nrow(pred.data)
if(missing(n.pt)) n.pt <- min(length(unique(pred.data[, xname])), 51)
if (missing(w)) w <- rep(1, n)
scale.dist <- function(d) { return( (exp(d) - min(exp(d))) / (max(exp(d)) - min(exp(d))) ) }
if(is.factor(xv) && !is.ordered(xv)) {
x.pt <- levels(xv)
y.pt <- numeric(length(x.pt))
for (i in seq(along = x.pt)) {
x.data <- pred.data
x.data[, xname] <- factor(rep(x.pt[i], n), levels = x.pt)
pr <- stats::predict(x, x.data, type = "prob")
y.pt[i] <- stats::weighted.mean(log(ifelse(pr[, focus] > 0, pr[, focus],
.Machine$double.eps)) - rowMeans(log(ifelse(pr > 0, pr,
.Machine$double.eps))),
w, na.rm=TRUE)
}
if( prob == TRUE) { y.pt <- scale.dist(y.pt) }
if (plot) {
if(missing(xlab)) xlab=xname
if(missing(ylab)) ylab=which.class
if(missing(main)) main="Partial Dependency Plot"
graphics::barplot(y.pt, width=rep(1,length(y.pt)), col="blue",
xlab=xlab, ylab=ylab, main=main,
names.arg=x.pt, ...)
}
}
if (is.numeric(xv)) {
x.pt <- seq(min(xv), max(xv), length=n.pt)
y.pt <- numeric(length(x.pt))
for (i in seq(along=x.pt)) {
x.data <- pred.data
x.data[, xname] <- rep(x.pt[i], n)
pr <- stats::predict(x, x.data, type = "prob")
y.pt[i] <- stats::weighted.mean(log(ifelse(pr[, focus] == 0, .Machine$double.eps,
pr[, focus])) - rowMeans(log(ifelse(pr == 0, .Machine$double.eps, pr))),
w, na.rm=TRUE)
}
if(prob == TRUE) { y.pt <- scale.dist(y.pt) }
if (plot == TRUE) {
if(missing(xlab)) xlab = xname
if(missing(ylab)) ylab = "probability"
if(missing(main)) main = paste("Partial Dependency Plot for Class",
which.class, sep=" - ")
if(!missing(smooth)) {
if(smooth == "spline") {
fit <- stats::smooth.spline(x.pt, y.pt, spar = smooth.parm, all.knots = TRUE)
fit.y <- fit$y
if(prob == TRUE) {
fit.y[fit.y < 0] <- 0
fit.y[fit.y > 1] <- 1
}
if(conf == TRUE) {
# Using jackknifed residuals for upper and lower 95% confidence interval
res <- (fit$yin - fit$y)/(1-fit$lev)
sigma <- sqrt(stats::var(res))
upper <- fit$y + 2.0 * sigma * sqrt(fit$lev)
lower <- fit$y - 2.0 * sigma * sqrt(fit$lev)
}
} else if (smooth == "loess") {
options(warn=-1)
if(is.null(smooth.parm)) smooth.parm = 0.75
fit <- stats::predict(stats::loess(y.pt ~ x.pt, span = smooth.parm),
se = TRUE, degree = 2)
options(warn=0)
fit.y <- fit$fit
if(prob == TRUE) {
fit.y[fit.y < 0] <- 0
fit.y[fit.y > 1] <- 1
}
if(conf == TRUE) {
lower <- fit$fit - stats::qt(0.975, fit$df) * fit$se
upper <- fit$fit + stats::qt(0.975, fit$df) * fit$se
}
} else {
warning("Not a valid option for smoothing type, options are: spline or loess")
}
if(conf == FALSE) {
graphics::plot(x.pt, fit.y, type = "l", xlab=xlab, lwd=0.75, lty=2, ylab=ylab,
main=main)
} else {
graphics::plot(x.pt, y.pt, type = "n", xlab=xlab, ylab=ylab, main=main)
graphics::polygon(c(x.pt, rev(x.pt)), c(upper, rev(lower)),
col=grDevices::rgb(0.85, 0.85, 0.85, 0.5))
graphics::lines(x.pt, fit.y, lty=3, col="gray42")
}
} else {
fit.y <- y.pt
graphics::plot(x.pt, y.pt, type = "l", xlab=xlab, ylab=ylab, main=main)
}
if( raw.line == TRUE) { graphics::lines(x.pt, y.pt, lty = 1, lwd=0.5, col="red") }
if( pts == TRUE) { graphics::points(x.pt, y.pt, pch=19, cex=0.35, col="black") }
if (rug == TRUE) {
if (n.pt > 10) {
graphics::rug(stats::quantile(xv, seq(0.1, 0.9, by=0.1)), side = 1)
} else {
graphics::rug(unique(xv, side = 1))
}
}
}
}
invisible(list(x=x.pt, y=fit.y))
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.