#' @importFrom data.table data.table
#' @importFrom Matrix colSums rowSums t
#' @importFrom utils combn
#' @importFrom fastmatch fmatch
#' @importFrom memoise memoise
intImportance <- function(int, nf, precision, weight) {
# Calculate importance metrics for an interaction across selected leaf nodes
# of a fitted random forest.
# prev: the prevalence of an interaction across all selected leaf nodes,
# weighted by observations in each leaf node.
# prec: the proportion of class-1 observations in leaf nodes containing the
# interaction.
# Determine which leaf nodes contain the given interaction
int.id <- Reduce(fast.intersect, nf[as.character(int)])
if (length(int.id) == 0)
return(data.frame(prev1=0, prev0=0, prec=0))
# Compute prevalence and precision for given interaction
weight1 <- weight * precision
sint1 <- sum(weight1[int.id])
s1 <- sum(weight1)
prev1 <- sint1 / s1
weight0 <- weight - weight1
sint0 <- sum(weight0[int.id])
s0 <- sum(weight0)
prev0 <- sint0 / s0
prec <- sint1 / (sint0 + sint1)
return(data.frame(prev1=prev1, prev0=prev0, prec=prec))
}
# Faster than base::intersect, assuming x and y don't contain duplications.
fast.intersect <- memoise(function(x, y) {
if (is.null(x) || is.null(y)) return(numeric(0))
y[fmatch(x, y, 0L)]
})
prevalence <- function(weight, idint) {
# Computes the prevalence in selected nodes
sint <- sum(weight[idint])
s <- sum(weight)
return(sint / s)
}
nodePrecision <- function(read.forest, y, count, weights=rep(1, length(y))) {
# Evaluate class proportion of class-1 observations in each leaf node.
if (is.factor(y)) y <- as.numeric(y) - 1
count.y <- Matrix::colSums(read.forest$node.obs * y * weights)
precision <- count.y / count
precision[count == 0] <- 0
return(precision)
}
subsetTest <- function(int, ints, importance) {
# Compare interaction importance metrics to simple baselines.
# Prevalence: expectation under independent selection.
# Precision: precision of lower-order interactions.
if (length(int) == 1) return(c(prev.test=0, prec.test=0))
id.int <- fmatch(list(int), ints)
# Evaluate prevalence relative to independent selection
id <- sapply(int, fmatch, ints)
prev.null <- sum(log(importance$prev1[id])) #prod(importance$prev1[id])
prev <- log(importance$prev1[id.int])
prev.test <- prev - prev.null
#if (is.infinite(prev.test)) prev.test <- 0
# Determine all s-1 order interactions to evaluate
ss <- combn(int, length(int) - 1, simplify=FALSE)
id <- sapply(ss, function(ii) fmatch(list(ii), ints))
prec.null <- log(importance$prec[id])
prec <- log(importance$prec[id.int])
prec.test <- min(prec - prec.null)
#if (is.infinite(prec.test)) prec.test <- 0
# Get prevalence of s-1 subsets
#prev.s <- log(importance$prev1[id])
#ss1 <- lapply(ss, function(ii) setdiff(int, ii))
#id1 <- sapply(ss1, function(ii) fmatch(list(ii), ints))
#prev.1 <- log(importance$prev1[id1])
#prev.null <- prev.s + prev.1
#prev.test <- min(prev - prev.null)
return(data.table(prev.test=prev.test, prec.test=prec.test))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.