Nothing
holdout.vimp.rfsrc <- function(formula, data,
ntree = function(p, vtry){1000 * p / vtry},
ntree.max = 2000,
ntree.allvars = NULL,
nsplit = 10,
ntime = 50,
mtry = NULL,
vtry = 1,
fast = FALSE,
verbose = TRUE,
...)
{
## --------------------------------------------------------------
##
## preliminary processing
##
## --------------------------------------------------------------
## pull unnamed parameters to pass to rfsrc
## some options cannot be used
dots <- list(...)
dots$formula <- dots$data <- dots$ntree <- dots$nsplit <-
dots$ntime <- dots$mtry <- dots$importance <- dots$fast <- NULL
## hidden options to speed calculations - missing data not handled for speed
#dots$terminal.quants <- dots$terminal.qualts <- TRUE
#dots$na.action <- "na.omit"
## add vtry to the list
dots$vtry <- vtry
## determine the grow interface - rfsrc or rfsrc.fast?
if (!fast) {
rfsrc.grow <- "rfsrc"
}
else {
rfsrc.grow <- "rfsrc.fast"
}
## get dimension p - make a fast single stump call for accurate dimension
p <- length(rfsrc(formula, data, nodedepth = 1, ntree = 1, splitrule = "random")$xvar.names)
## process ntree - is it a function or number?
if (!is.function(ntree) && !is.numeric(ntree)) {
stop("ntree must be a function or number specifying requested number of grow trees")
}
if (is.function(ntree)) {
ntree <- ntree(p, vtry)
}
## specify the hold out array
if (!is.null(ntree.allvars) && ntree.allvars <= 0) {
ntree.allvars <- NULL
}
dots$holdout.array <- make.holdout.array(vtry, p, ntree, ntree.allvars)
ntree <- ncol(dots$holdout.array)
## set block.size used for get.tree
block.size <- dots$block.size
dots$block.size <- NULL
## TBD TBD TBD currently not in effect TBD TBD TBD
block.size <- NULL
## --------------------------------------------------------------
##
## grow call - massive ntree
##
## --------------------------------------------------------------
## grow step
## set block.size to ntree to reduce computations
## the user specified block.size is only relevant for get.tree in predict
o <- do.call(rfsrc.grow, c(list(formula = formula, data = data, ntree = ntree,
importance = "none", block.size = ntree, nsplit = nsplit, mtry = mtry), dots))
##--------------------------------------------------------------
##
## holdout step - get.tree calls - can be slow
##
##--------------------------------------------------------------
## extract the holdout array
ho <- o$holdout.array
## used when ntree.alvars is not NULL
## determine the columns where all variables were used
## pull the corresponding trees
if (!is.null(ntree.allvars)) {
v.in <- which(colSums(ho, na.rm = TRUE) == 0)
pe.in <- predict(o, get.tree = v.in, block.size = block.size)
}
## pull the yvar names
ynms <- o$yvar.names
if (o$family == "surv" || o$family == "surv-CR") {
ynms <- ynms[1]
}
## verbose setup
if (verbose) pb <- txtProgressBar(min = 0, max = length(o$xvar.names), style = 3)
## -------------------------------------------------------
##
## loop over features
##
## -------------------------------------------------------
hvimp <- rbind(sapply(1:length(o$xvar.names), function(j) {
## progress bar
if (verbose && length(o$xvar.names) > 1) {
setTxtProgressBar(pb, j)
}
if (verbose && j == length(o$xvar.names)) {
cat("\n")
}
## determine hold out trees for v - this may fail in extreme cases
pt.out <- ho[j, ] == 1
## calculate PE for v held out and compare to all variables
if (sum(pt.out) > 0 && (!is.null(ntree.allvars) || sum(!pt.out) > 0)) {
## define the out trees - restrict size of v.out using ntree.max
## extract the ensembles for hold out trees
v.out <- which(pt.out)
v.out <- sample(v.out)[1:min(ntree.max, length(v.out))]
pe.out <- predict(o, get.tree = v.out, block.size = block.size)
## same as above for the "in" trees
if (is.null(ntree.allvars)) {
v.in <- which(!pt.out)
v.in <- sample(v.in)[1:min(ntree.max, length(v.in))]
pe.in <- predict(o, get.tree = v.in, block.size = block.size)
}
## loop over outcomes - in case this is a multivariate object
do.call(cbind, lapply(ynms, function(nn) {
## pull the hold out error rate
o.out <- coerce.multivariate(pe.out, nn)
err.out <- get.holdout.err(o.out, length(v.out))
## pull the error rate when v "is in"
o.in <- coerce.multivariate(pe.in, nn)
err.in <- get.holdout.err(o.in, length(v.in))
## here's the vimp
vmp <- err.out - err.in
dm <- length(get.holdout.err(coerce.multivariate(pe.out, nn), 1))
if (length(vmp) == 0) {
rep(NA, dm)
}
else {
cbind(vmp)
}
}))
}
else {## no holdout v or hold in v - return NA
do.call(cbind, lapply(ynms, function(nn) {
dm <- length(get.holdout.err(coerce.multivariate(o, nn), 1))
rep(NA, dm)
}))
}
}))
## pretty up for return
if (nrow(hvimp) == 1) {
hvimp <- c(hvimp)
names(hvimp) <- o$xvar.names
}
else {
if (o$family == "surv-CR") {
rownames(hvimp) <- colnames(o$err.rate)
}
else {
rownames(hvimp) <- ynms
}
colnames(hvimp) <- o$xvar.names
}
## return the goodies
hvimp
}
holdout.vimp <- holdout.vimp.rfsrc
## --------------------------------------------------------------
##
## function to extract holdout error rate
##
## --------------------------------------------------------------
get.holdout.err <- function(obj, j) {
err <- obj$err.rate
if (obj$family == "class") {
mean(cbind(err)[1:j, 1], na.rm = TRUE)
}
else if (obj$family == "surv-CR") {
colMeans(err[1:j,, drop = FALSE], na.rm = TRUE)
}
else {
mean(err[1:j], na.rm = TRUE)
}
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.