Nothing
stat.split.rfsrc <- function(object, ...)
{
## is this a synthetic forest?
if (sum(inherits(object, c("rfsrc", "synthetic"), TRUE) == c(1, 2)) == 2) {
object <- object$rfSyn
}
## Incoming parameter checks. All are fatal.
if (is.null(object)) stop("Object is empty!")
if ((sum(inherits(object, c("rfsrc", "grow"), TRUE) == c(1, 2)) != 2) &&
(sum(inherits(object, c("rfsrc", "predict"), TRUE) == c(1, 2)) != 2))
stop("This function only works for objects of class `(rfsrc, grow)' or `(rfsrc, pred)',")
## Acquire the inbag counts. This is part of the grow object and
## not part of the forest object.
inbag <- object$inbag
## Acquire the native array. Make it global so we can access it during recursion.
if (is.null(object$node.stats)) {
stop("RF-SRC statistics content is NULL. Please re-run grow call with 'statistics=TRUE'")
}
else {
extendedNativeArray <- object$node.stats
}
## Acquire prune information
if (is.null(object$pstn.membership)) {
prune <- FALSE
}
else {
prune <- TRUE
}
if (is.null(object$forest)) {
stop("Forest is empty! Re-run grow call with forest set to 'TRUE'.")
}
## Discard the incoming grow object in favour of the forest.
object <- object$forest
extendedNativeArray <- cbind(object$nativeArray, extendedNativeArray)
xvar.names <- object$xvar.names
if (is.null(xvar.names)) {
stop("RFSRC xvar.names content is NULL. Please ensure the object is valid.")
}
if (is.null(object$xvar)) {
stop("RFSRC xvar content is NULL. Please ensure the object is valid.")
}
## Count the number of trees in the forest.
numTree <- length(as.vector(unique(extendedNativeArray$treeID)))
## Count the number of parameters in the data set.
numParm <- length(xvar.names)
## Acquire the incoming xvar matrix of dim [n] x [p]. Make it
## global so we can access it during recursion.
xvar <- object$xvar
## #######################################################################
## Loop through all trees. This is a LOCAL operation.
##
## Depends on external variables: numParm, inbag, prune
## #######################################################################
splitTreeObj <- mclapply(1:numTree, function(b) {
## Acquire the replicates for the current bootstrap.
inbag.local <- inbag[, b]
replicates <- NULL
## Grab a replicate from the xvar matrix and decrement its inbag
## occurrence. Do this until all replicates have been accounted
## for.
while (sum(inbag.local) > 0) {
replicates <- c(replicates, which(inbag.local > 0))
inbag.local <- sapply(1:length(inbag.local), function(k) { max(inbag.local[k] - 1, 0)})
}
## Create the recursive output object.
splitInfo <- vector("list", numParm)
names(splitInfo) <- xvar.names
recursiveObject <- list(offset = min(which(extendedNativeArray$treeID == b)),
splitInfo = splitInfo,
terminal = FALSE)
## Recursively parse the tree in the primary protocol.
return(spParseTree(recursiveObject, extendedNativeArray, xvar,
b,
depth=0,
membership = replicates,
ptnFlag = FALSE,
prune = prune))
})
## Assign the results to a list of length numTree.
result <- vector("list", numTree)
for (b in 1:numTree) {
## Clean the object up by removing internal variables used for recursion.
result[[b]] <- splitTreeObj[[b]]$splitInfo
}
invisible(result)
}
## #######################################################################
##
## PRIMARY RECURSION
##
## Note: Currently this function is ONLY valid for continuous splits.
## Fator splits will return a split end cut preference statistic of NA.
##
## INPUTS:
##
## b = treeID
## depth = zero (0) based depth of split
## membership = index of in-bag individual
##
## OUTPUTS:
##
## recursiveObject:
##
## splitinfo - list of length [numParm] with
## [[p]] = c(depth, parmID, nodeID, contPT, spltTY) where
##
## nodeID = internal node identifier
## parmID = x-var parameter identifier
## contPT = continuous split value
## mvcpSZ = mwcp size (factor flag)
##
## p = x-var parameter identifier
## depth = depth of split
##
## spltTY = split type for parent node
## bit 1 bit 0 meaning
## ----- ----- -------
## 0 0 0 = both daughters have valid splits
## 1 0 2 = only the left daughter is terminal
## 0 1 1 = only the right daughter is terminal
## 1 1 3 = both daughters are terminal
##
## spltEC = end cut preference statistic
## currently as follows:
## Let x1 < x2 < ... < xn
## Let s = xj be the actual split point where
## X <= s is a left node and
## X > s is a right node.
## Thus, 1 <= j <= n-1. Define
## ECP = min { |n-1-j|, |j-1| } / (n - 1)
##
##
## spltST = if prune = FLASE: split statistic on which the node was split
## = if prune = TRUE: pseudo-terminal node indicator
##
##
## Note: this function depends on the global variable xvar.
##
## #######################################################################
### Recursive function to determine first the split point information for a tree.
spParseTree <- function(recursiveObject, extendedNativeArray, xvar,
b,
depth,
membership,
ptnFlag,
prune = prune) {
## Weak consistency check to ensure that the iteration matches the treeID in the nativeArray record.
if(b != extendedNativeArray$treeID[recursiveObject$offset]) {
stop("Invalid nativeArray input record (treeID) at ", recursiveObject$offset, ". Please contact Technical Support.")
}
## Read the current nativeArray split information.
treeID <- extendedNativeArray$treeID[recursiveObject$offset]
nodeID <- extendedNativeArray$nodeID[recursiveObject$offset]
parmID <- extendedNativeArray$parmID[recursiveObject$offset]
mwcpSZ <- extendedNativeArray$mwcpSZ[recursiveObject$offset]
contPT <- extendedNativeArray$contPT[recursiveObject$offset]
spltST <- extendedNativeArray$spltST[recursiveObject$offset]
## We must continue to parse the native array until we reach a terminal node.
## Determine whether this is a terminal node.
if (parmID == 0) {
recursiveObject$terminal <- TRUE
}
else {
recursiveObject$terminal <- FALSE
}
## If we are pruning, we do not update the outputs if we have descended past a pseudo-terminal
## node. Thus, if the PTN flag has been turned on, it stays on for all nodes below
## that point, AFTER the update.
## Assume we update as the default.
if (prune) {
if (ptnFlag == FALSE) {
if (spltST == 1) {
ptnFlag = TRUE
}
}
}
updateFlag <- TRUE
if ((parmID == 0) || (ptnFlag)) {
updateFlag <- FALSE
}
if (updateFlag) {
if(mwcpSZ != 0) {
## Factor split: the EC statistic will not be coherent.
}
## Calculate the unique split points in this node on xvar that is being split.
split.vec <- sort(unique(xvar[unique(membership), parmID]))
## Determine the location of the split point in this vector.
split.idx <- which(split.vec == contPT)
spltEC <- min(length(split.vec) - 1 - split.idx, split.idx - 1) / (length(split.vec) - 1)
## Determine the left and right membership replicate vectors.
left.membership <- membership[which(xvar[membership, parmID] <= contPT)]
rght.membership <- membership[which(xvar[membership, parmID] > contPT)]
## Update the recursive object containing the split information.
## This will be the depth, node identifier, and (continuous) split
## value. We currently do not have the ability to return factor
## split values, but we do identify their occurrence.
localInfo <- c(treeID, nodeID, parmID, contPT, mwcpSZ, depth, 0, spltEC, spltST)
if (is.null(recursiveObject$splitInfo[[parmID]])) {
recursiveObject$splitInfo[[parmID]] <- rbind(recursiveObject$splitInfo[[parmID]], localInfo, deparse.level = 0)
## Attach names only once.
colnames(recursiveObject$splitInfo[[parmID]]) <- c("treeID", "nodeID", "parmID", "contPT", "mwcpSZ", "dpthID", "spltTY", "spltEC", "spltST")
}
else {
recursiveObject$splitInfo[[parmID]] <- rbind(recursiveObject$splitInfo[[parmID]], localInfo, deparse.level = 0)
}
## Get the row and column identifier for the split type statistic
## and use these below to update the statistic. This statistic depends
## on the daughter calls to the recursive function, and hence we cannot
## populate them at or before this point.
col.idx <- which(colnames(recursiveObject$splitInfo[[parmID]]) == "spltTY")
row.idx <- dim(recursiveObject$splitInfo[[parmID]])[1]
}
## Increment the offset regardless of terminal node status. We
## always parse the native forest object at each entry into the
## recursive routine after initializing the nodal information.
recursiveObject$offset <- recursiveObject$offset + 1
## Parse left and then right, if this is not a terminal node.
if (!recursiveObject$terminal) {
## Increment the (parsed) tree depth.
depth <- depth + 1
## Parse left:
recursiveObject <- spParseTree(recursiveObject, extendedNativeArray, xvar,
b, depth, left.membership, ptnFlag, prune)
## The following updates the split type as a result of a left
## split. This is only done if updates have been requested.
if(updateFlag) {
split.type <- recursiveObject$splitInfo[[parmID]][row.idx, ]
split.type[col.idx] <- split.type[col.idx] + 2
recursiveObject$splitInfo[[parmID]][row.idx, ] = split.type
recursiveObject$terminal <- FALSE
}
## Parse right:
recursiveObject <- spParseTree(recursiveObject, extendedNativeArray, xvar,
b, depth, rght.membership, ptnFlag, prune)
## The following updates the split type as a result of a left
## split. This is only done if updates have been requested.
if(updateFlag) {
split.type <- recursiveObject$splitInfo[[parmID]][row.idx, ]
split.type[col.idx] <- split.type[col.idx] + 1
recursiveObject$splitInfo[[parmID]][row.idx, ] = split.type
recursiveObject$terminal <- FALSE
}
}
return(recursiveObject)
}
stat.split <- stat.split.rfsrc
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.