gg_vimp: Variable Importance (VIMP) data object

View source: R/gg_vimp.R

gg_vimpR Documentation

Variable Importance (VIMP) data object

Description

gg_vimp Extracts the variable importance (VIMP) information from a a rfsrc object.

Usage

gg_vimp(object, nvar, ...)

Arguments

object

A rfsrc object or output from vimp

nvar

argument to control the number of variables included in the output.

...

arguments passed to the vimp.rfsrc function if the rfsrc object does not contain importance information.

Value

gg_vimp object. A data.frame of VIMP measures, in rank order.

References

Ishwaran H. (2007). Variable importance in binary regression trees and forests, Electronic J. Statist., 1:519-537.

See Also

plot.gg_vimp rfsrc

vimp

Examples

## ------------------------------------------------------------
## classification example
## ------------------------------------------------------------
## -------- iris data
rfsrc_iris <- rfsrc(Species ~ ., data = iris,
                    importance = TRUE)
gg_dta <- gg_vimp(rfsrc_iris)
plot(gg_dta)

## ------------------------------------------------------------
## regression example
## ------------------------------------------------------------
## Not run: 
## -------- air quality data
rfsrc_airq <- rfsrc(Ozone ~ ., airquality,
                    importance = TRUE)
gg_dta <- gg_vimp(rfsrc_airq)
plot(gg_dta)

## End(Not run)

## -------- Boston data
data(Boston, package="MASS")
rfsrc_boston <- randomForestSRC::rfsrc(medv~., Boston,
                                       importance = TRUE)
gg_dta <- gg_vimp(rfsrc_boston)
plot(gg_dta)

## -------- Boston data
rf_boston <- randomForest::randomForest(medv~., Boston)
gg_dta <- gg_vimp(rf_boston)
plot(gg_dta)

## Not run: 
## -------- mtcars data
rfsrc_mtcars <- rfsrc(mpg ~ ., data = mtcars,
                      importance = TRUE)
gg_dta <- gg_vimp(rfsrc_mtcars)
plot(gg_dta)

## End(Not run)
## ------------------------------------------------------------
## survival example
## ------------------------------------------------------------
## Not run: 
## -------- veteran data
data(veteran, package = "randomForestSRC")
rfsrc_veteran <- rfsrc(Surv(time, status) ~ ., 
   data = veteran, 
   ntree = 100,
   importance = TRUE)

gg_dta <- gg_vimp(rfsrc_veteran)
plot(gg_dta)

## -------- pbc data
# We need to create this dataset
data(pbc, package = "randomForestSRC",) 
# For whatever reason, the age variable is in days... 
# makes no sense to me
for (ind in seq_len(dim(pbc)[2])) {
 if (!is.factor(pbc[, ind])) {
   if (length(unique(pbc[which(!is.na(pbc[, ind])), ind])) <= 2) {
     if (sum(range(pbc[, ind], na.rm = TRUE) == c(0, 1)) == 2) {
       pbc[, ind] <- as.logical(pbc[, ind])
     }
   }
 } else {
   if (length(unique(pbc[which(!is.na(pbc[, ind])), ind])) <= 2) {
     if (sum(sort(unique(pbc[, ind])) == c(0, 1)) == 2) {
       pbc[, ind] <- as.logical(pbc[, ind])
     }
     if (sum(sort(unique(pbc[, ind])) == c(FALSE, TRUE)) == 2) {
       pbc[, ind] <- as.logical(pbc[, ind])
     }
   }
 }
 if (!is.logical(pbc[, ind]) &
     length(unique(pbc[which(!is.na(pbc[, ind])), ind])) <= 5) {
   pbc[, ind] <- factor(pbc[, ind])
 }
}
#Convert age to years
pbc$age <- pbc$age / 364.24

pbc$years <- pbc$days / 364.24
pbc <- pbc[, -which(colnames(pbc) == "days")]
pbc$treatment <- as.numeric(pbc$treatment)
pbc$treatment[which(pbc$treatment == 1)] <- "DPCA"
pbc$treatment[which(pbc$treatment == 2)] <- "placebo"
pbc$treatment <- factor(pbc$treatment)
dta_train <- pbc[-which(is.na(pbc$treatment)), ]
# Create a test set from the remaining patients
pbc_test <- pbc[which(is.na(pbc$treatment)), ]

#========
# build the forest:
rfsrc_pbc <- randomForestSRC::rfsrc(
  Surv(years, status) ~ .,
 dta_train,
 nsplit = 10,
 na.action = "na.impute",
 forest = TRUE,
 importance = TRUE,
 save.memory = TRUE
)

gg_dta <- gg_vimp(rfsrc_pbc)
plot(gg_dta)

# Restrict to only the top 10.
gg_dta <- gg_vimp(rfsrc_pbc, nvar=10)
plot(gg_dta)

## End(Not run)

ehrlinger/ggRandomForests documentation built on Sept. 9, 2022, 6:55 p.m.