#' @title Calcuates variable importance measures for an rcRF object.
#'
#' @description This function accepts a forest object from the `rcRF()` function and estimates the importance of each
#' predictor. This is accomplished by considering each tree in the forest, obtaining the out-of-bag value for each predictor in that tree,
#' obtaining the permuted out-of-bag value for each predictor in the tree, and comparing the values. A larger discrepancy between
#' the original value and permuted value indicates the predictor is more important in predicting treatment. The function
#' returns the variables in order of importance along with the importance measure, scaled to be out of 1.
#'
#' @param rcRF.fit rcRF object from rcRF(). Required input.
#' @param n0 minimum number of treatment/control observations needed in a split to call a node terminal. Defaults to 2.
#' @param N0 minimum number of observations needed in a split to call a node terminal. Defaults to 20.
#' @param sort sort the variable importance measure? Defaults to TRUE.
#' @param details print details of each tree as the function progresses. Defaults to FALSE.
#' @param depth internal variable.
#' @param AIPWE indicator for AIPWE estimation.
#' @return Returns list of total (VI), efficacy (VI.Efficacy), and risk (VI.Risk)
#' ordered variable importance measure calculated for each splitting variable.
#' @import randomForest
#' @export
#' @examples
#' set.seed(123)
#' dat <- generateData()
#' # Build a forest with 100 trees
#' fit <- rcRF(data = dat,
#' split.var = 1:10,
#' ntree = 200,
#' risk.threshold = 2.75,
#' lambda = 1)
#' VI <- Variable.Importance.ITR(fit)
Variable.Importance.ITR <- function(rcRF.fit,
n0 = 5,
N0 = 20,
sort = TRUE,
details = FALSE,
depth = 1,
AIPWE = FALSE){
trees <- rcRF.fit$TREES
id.boots <- rcRF.fit$ID.Boots.Samples
# ARGUMENTS FOR MODEL SPECIFICATION
Model.Specification <- rcRF.fit$Model.Specification
dat0 <- Model.Specification$data
col.y <- Model.Specification$efficacy
col.r <- Model.Specification$risk
col.trt <- Model.Specification$col.trt
col.prtx <- Model.Specification$col.prtx
split.var <- Model.Specification$split.var
risk.threshold <- Model.Specification$risk.threshold
lambda <- Model.Specification$lambda
ctg <- Model.Specification$ctg
vnames <- colnames(dat0)[split.var]
#
ntree <- length(trees)
p <- length(split.var)
VI <- rep(0, p)
VI.Efficacy <- rep(0, p)
VI.Risk <- rep(0, p)
for (b in 1:ntree){
id.b <- id.boots[[b]]
dat.oob <- dat0[-sort(unique(id.b)), ]
n.oob <- nrow(dat.oob)
tre.b <- trees[[b]]
########## NOTE THAT revise.tree=T HERE! ##########
out0.b.Total <- send.down.VI.ITR(dat.new = dat.oob,
tre = tre.b,
col.y = col.y,
col.r = col.r,
col.trt = col.trt,
col.prtx = col.prtx,
lambda = lambda,
risk.threshold = risk.threshold,
ctg = ctg,
n0 = n0,
N0 = N0,
revise.tree = TRUE,
depth = depth,
AIPWE = AIPWE)
out0.b.Efficacy <- send.down.VI.ITR.Efficacy(dat.new = dat.oob,
tre = tre.b,
col.y = col.y,
col.r = col.r,
col.trt = col.trt,
col.prtx = col.prtx,
lambda = lambda,
risk.threshold = risk.threshold,
ctg = ctg,
n0 = n0,
N0 = N0,
revise.tree = TRUE,
depth = depth,
AIPWE = AIPWE)
out0.b.Risk <- send.down.VI.ITR.Risk(dat.new = dat.oob,
tre = tre.b,
col.y = col.y,
col.r = col.r,
col.trt = col.trt,
col.prtx = col.prtx,
lambda = lambda,
risk.threshold = risk.threshold,
ctg = ctg,
n0 = n0,
N0 = N0,
revise.tree = TRUE,
depth = depth,
AIPWE = AIPWE)
tre0.b.Total <- out0.b.Total$tre0
tre0.b.Efficacy <- out0.b.Efficacy$tre0
tre0.b.Risk <- out0.b.Risk$tre0
if (nrow(tre0.b.Total) > 0) { ### AVOID NULL TREES
Xs.b.Total <- sort(unique(na.omit(tre0.b.Total$var)))
Xs.b.Efficacy <- sort(unique(na.omit(tre0.b.Efficacy$var)))
Xs.b.Risk <- sort(unique(na.omit(tre0.b.Risk$var)))
G.oob.Total <- out0.b.Total$score # Overall score from original bootstrap tree
G.oob.Efficacy <- out0.b.Efficacy$score # Overall efficacy score from original bootstrap tree
G.oob.Risk <- out0.b.Risk$score # Overall risk score from original bootstrap tree
for (j in 1:p) {
if (details) print(j)
G.j.Total <- G.oob.Total # Initialize score for covariate j to be to bootstrap score value
G.j.Efficacy <- G.oob.Efficacy # Initialize score for covariate j to be to bootstrap score value
G.j.Risk <- G.oob.Risk # Initialize score for covariate j to be to bootstrap score value
col.xj <- split.var[j]
if (is.element(col.xj, Xs.b.Total)){
x.j <- dat.oob[, col.xj]
dat.permuted <- dat.oob
dat.permuted[ , col.xj] <- x.j[sample(1:n.oob,n.oob, replace=FALSE)]
########## NOTE THAT revise.tree=F HERE! ##########
out0.bj.Total <- send.down.VI.ITR(dat.new = dat.permuted,
tre = tre0.b.Total,
col.y = col.y,
col.r = col.r,
col.trt = col.trt,
col.prtx = col.prtx,
lambda = lambda,
risk.threshold = risk.threshold,
ctg = ctg,
n0 = n0,
N0 = N0,
revise.tree = FALSE,
depth = 1,
AIPWE = AIPWE)
out0.bj.Efficacy <- send.down.VI.ITR.Efficacy(dat.new = dat.permuted,
tre = tre0.b.Efficacy,
col.y = col.y,
col.r = col.r,
col.trt = col.trt,
col.prtx = col.prtx,
lambda = lambda,
risk.threshold = risk.threshold,
ctg = ctg,
n0 = n0,
N0 = N0,
revise.tree = FALSE,
depth = 1,
AIPWE = AIPWE)
out0.bj.Risk <- send.down.VI.ITR.Risk(dat.new = dat.permuted,
tre = tre0.b.Risk,
col.y = col.y,
col.r = col.r,
col.trt = col.trt,
col.prtx = col.prtx,
lambda = lambda,
risk.threshold = risk.threshold,
ctg = ctg,
n0 = n0,
N0 = N0,
revise.tree = FALSE,
depth = 1,
AIPWE = AIPWE)
tre0.bj.Total <- out0.bj.Total$tre0
tre0.bj.Efficacy <- out0.bj.Efficacy$tre0
tre0.bj.Risk <- out0.bj.Risk$tre0
G.j.Total <- ifelse(nrow(tre0.bj.Total) == 1, G.oob.Total, out0.bj.Total$score)
G.j.Efficacy <- ifelse(nrow(tre0.bj.Efficacy) == 1, G.oob.Efficacy, out0.bj.Efficacy$score)
G.j.Risk <- ifelse(nrow(tre0.bj.Risk) == 1, G.oob.Risk, out0.bj.Risk$score)
}
if (G.j.Total > G.oob.Total) G.j.Total <- G.oob.Total
if (G.j.Efficacy > G.oob.Efficacy) G.j.Efficacy <- G.oob.Efficacy
if (G.j.Risk < G.oob.Risk) G.j.Risk <- G.oob.Risk
##################### PREVENTS NEGATIVE IMPORTANCE VALUES
VI[j] <- VI[j] + abs((G.oob.Total - G.j.Total)/G.oob.Total)
VI.Efficacy[j] <- VI.Efficacy[j] + abs((G.oob.Efficacy - G.j.Efficacy)/G.oob.Efficacy)
VI.Risk[j] <- VI.Risk[j] + abs((G.j.Risk - G.oob.Risk)/G.oob.Risk)
}
}
}
names(VI) <- vnames
names(VI.Efficacy) <- vnames
names(VI.Risk) <- vnames
if (sort){
VI <- sort(VI, decreasing = TRUE)
VI.Efficacy <- sort(VI.Efficacy, decreasing = TRUE)
VI.Risk <- sort(VI.Risk, decreasing = TRUE)
}
VI <- VI/sum(VI)
VI.Efficacy <- VI.Efficacy/sum(VI.Efficacy)
VI.Risk <- VI.Risk/sum(VI.Risk)
out <- list(VI = VI,
VI.Efficacy = VI.Efficacy,
VI.Risk = VI.Risk)
return(out)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.