Nothing
generic.predict.rfsrc <-
function(object,
newdata,
m.target = NULL,
importance = FALSE,
get.tree = NULL,
block.size = NULL,
importance.xvar,
na.action = c("na.omit", "na.impute", "na.random"),
outcome = c("train", "test"),
perf.type = NULL,
proximity = FALSE,
forest.wt = FALSE,
ptn.count = 0,
distance = FALSE,
var.used = c(FALSE, "all.trees", "by.tree"),
split.depth = c(FALSE, "all.trees", "by.tree"),
seed = NULL,
do.trace = FALSE,
membership = FALSE,
statistics = FALSE,
...)
{
univariate.nomenclature <- TRUE
## get any hidden options
user.option <- list(...)
## incoming parameter checks: all are fatal
if (missing(object)) {
stop("object is missing!")
}
## incoming object must be a grow forest or a forest object
if (sum(inherits(object, c("rfsrc", "grow"), TRUE) == c(1, 2)) != 2 &
sum(inherits(object, c("rfsrc", "forest"), TRUE) == c(1, 2)) != 2)
stop("this function only works for objects of class `(rfsrc, grow)' or '(rfsrc, forest)'")
## grow forests must have true forest information
if (sum(inherits(object, c("rfsrc", "grow"), TRUE) == c(1, 2)) == 2) {
if (is.forest.missing(object)) {
stop("Forest information for prediction is missing. Re-run rfsrc (grow call) with forest=TRUE")
}
}
## case specific performance
cse <- (user.option)
csv <- (user.option)
## verify importance (needed for jitt)
importance <- match.arg(as.character(importance)[1], c(FALSE, TRUE,
"none", "anti", "permute", "random", "anti.joint", "permute.joint", "random.joint"))
if (grepl("joint", importance)) {
vimp.joint <- TRUE
}
else {
vimp.joint <- FALSE
}
## verify na.action (needed for jitt)
na.action <- match.arg(na.action, c("na.omit", "na.impute", "na.random"))
if (inherits(object, "anonymous") && na.action == "na.impute") {
na.action <- "na.mean"
}
## verify other key options
outcome <- match.arg(outcome, c("train", "test"))
proximity <- match.arg(as.character(proximity), c(FALSE, TRUE, "inbag", "oob", "all"))
forest.wt <- match.arg(as.character(forest.wt), c(FALSE, TRUE, "inbag", "oob", "all"))
distance <- match.arg(as.character(distance), c(FALSE, TRUE, "inbag", "oob", "all"))
## set restore.mode and the ensemble option
## newdata missing --> restore.mode = TRUE
## outcome = "test" --> restore.mode = FALSE for R-processing but switched later for C-code function call
if (missing(newdata)) {
restore.mode <- TRUE
outcome <- "train"
ensemble <- "all"##remove option to select between "all", "oob", "inbag"
}
else {##there is test data present
restore.mode <- FALSE
## special treatment for outcome=="test" (which is really restore mode)
if (outcome == "test") {
ensemble <- "all"##remove option to select between "all", "oob", "inbag"
}
## standard prediction scenario on new test data - there is no OOB
else {
ensemble <- "inbag"
}
}
## check if this is an anonymous object
## coerce values as necessary
## graceful return if restore.mode = TRUE which is not allowed for anonymous
if (inherits(object, "anonymous")) {
anonymize.bits <- 2^26
outcome <- "train"
if (restore.mode) {
stop("in order to predict with anonymous forests please provide a test data set")
}
}
else {
anonymize.bits <- 0
}
## jitt
jitt <- is.hidden.jitt(user.option, importance, na.action, inherits(object, "anonymous"), partial = FALSE)
## rfq
rfq <- (user.option)
## quantile regression
gk.quantile <- (user.option)
prob <- (user.option)
prob.epsilon <- (user.option)
## vimp
vimp.threshold <- (user.option)
## case.depth
case.depth <- (user.option)
## set the family
family <- object$family
## pull the x-variable and y-outcome names from the grow object
xvar.names <- object$xvar.names
yvar.names <- object$yvar.names
subj.names <- object$subj.names
importance.xvar <- get.importance.xvar(importance.xvar, importance, object)
importance.xvar.idx <- match(importance.xvar, xvar.names)
var.used <- match.arg(as.character(var.used), c("FALSE", "all.trees", "by.tree"))
if (var.used == "FALSE") var.used <- FALSE
split.depth <- match.arg(as.character(split.depth), c("FALSE", "all.trees", "by.tree"))
if (split.depth == "FALSE") split.depth <- FALSE
## Currently we do not support split.depth for test data. TBD2
split.depth = FALSE
## initialize the seed
seed <- get.seed(seed)
## REDUCES THE OBJECT TO THE FOREST -- REDUCTION STARTS HERE
## hereafter we only need the forest and reassign "object" to the forest
## (TBD3) memory management "big.data" not currently implemented:
big.data <- FALSE
if (sum(inherits(object, c("rfsrc", "grow"), TRUE) == c(1, 2)) == 2) {
if (inherits(object, "bigdata")) {
big.data <- TRUE
}
object <- object$forest
}
else {
## object is already a forest
if (inherits(object, "bigdata")) {
big.data <- TRUE
}
}
## confirm version coherence
if (is.null(object$version)) {
cat("\n This function only works with objects created with the following minimum version of the package:")
cat("\n Minimum version: ")
cat("2.3.0")
cat("\n Your version: ")
cat("unknown")
cat("\n")
stop()
}
else {
object.version <- as.integer(unlist(strsplit(object$version, "[.]")))
installed.version <- as.integer(unlist(strsplit("3.2.2", "[.]")))
minimum.version <- as.integer(unlist(strsplit("2.3.0", "[.]")))
object.version.adj <- object.version[1] + (object.version[2]/10) + (object.version[3]/100)
installed.version.adj <- installed.version[1] + (installed.version[2]/10) + (installed.version[3]/100)
minimum.version.adj <- minimum.version[1] + (minimum.version[2]/10) + (minimum.version[3]/100)
## Minimum object version must be satisfied for us to proceed. This is the only way
## terminal node restoration is guaranteed, due to RNG coherency.
if (object.version.adj >= minimum.version.adj) {
## We are okay
}
else {
cat("\n This function only works with objects created with the following minimum version of the package:")
cat("\n Minimum version: ")
cat("2.3.0")
cat("\n Your version: ")
cat(object$version)
cat("\n")
stop()
}
}
## classification specific details related to rfq and perf.type
class.relfrq <- NULL
if (family == "class") {
## rfq specific details
if (!is.null(rfq)) {##predict has specified rfq
if (!rfq) {##predict does not want rfq
## nothing
}
else {##predict has requested rfq
class.relfrq <- table(object$yvar) / length(object$yvar)
}
}
if (is.null(rfq)) {##predict ambivalent about rfq
if (!object$rfq) {##grow did not use rfq
## nothing -> rfq = FALSE
}
else {##grow used rfq - use grow spec
class.relfrq <- table(object$yvar) / length(object$yvar)
rfq <- TRUE
}
}
## performance details
if (is.null(perf.type) && !is.null(object$perf.type)) {
perf.type <- object$perf.type
}
}
## recover the split rule
splitrule <- object$splitrule
## gk processing
if (!is.null(gk.quantile) || object$gk.quantile) {
if (is.null(gk.quantile)) {##predict ambivalent about gk - use grow spec
gk.quantile <- object$gk.quantile
}
}
## !!! here's where prob and prob.epsilon are set globally !!!
gk.quantile <- get.gk.quantile(gk.quantile)
prob.assign <- global.prob.assign(if (is.null(prob)) object$prob else prob,
if (is.null(prob.epsilon)) object$prob.epsilon else prob.epsilon,
gk.quantile, object$quantile.regr, splitrule, object$n)
## Determine the immutable yvar factor map which is needed for
## classification sexp dimensioning. But, first convert object$yvar
## to a data frame which is required for factor processing
#object$yvar <- as.data.frame(object$yvar)
#colnames(object$yvar) <- yvar.names
yfactor <- object$yvar.factor
## multivariate family details
m.target.idx <- get.outcome.target(family, yvar.names, m.target)
## short cut to get the y-outcome type and number of levels
yvar.types <- yfactor$types
yvar.nlevels <- yfactor$nlevels
yvar.numeric.levels <- yfactor$numeric.levels
## recover the individual subject identifiers, if they exist.
subj <- object$subj
## get event information for survival families
event.info <- object$event.info
event.type <- event.info$event.type
## CR.bits assignment
cr.bits <- get.cr.bits(family)
## determine the immutable xvar factor map
xfactor <- object$xvar.factor
any.xvar.factor <- (length(xfactor$factor) + length(xfactor$order)) > 0
## short cut to get the x-variable type and number of levels
xvar.types <- xfactor$types
xvar.nlevels <- xfactor$nlevels
xvar.numeric.levels <- xfactor$numeric.levels
## set dimensions
n.xvar <- length(xvar.names)
n <- object$n
r.dim <- event.info$rdim
## set flags appropriately for unsupervised forests
## there are layers of checks appearing later, so some of these are redundant
if (family == "unsupv") {
outcome <- "train"
perf.type <- "none"
importance <- "none"
}
## Override ptn.count by family. Pruning is based on variance,
## thus, this is not yet well-defined in [S] settings.
if (grepl("surv", family)) {
ptn.count <- 0
}
## ----------------------------------------------------------------
## From the native code's perspective, PRED mode can process one
## or two data sets. If one data set is sent in, we assume
## we wish to restore the forest with original-training or
## pseudo-training data. If two data sets are sent in, we
## assume we are sending in the original-training data set, and a
## test data set. When outcome="test" we make a call to the native
## code with only one data set (the test data which becomes pseudo-training
## data). We set the test outcome bit to allow the native
## code to distinguish this case from the case of the restore
## with the original training data.
## ----------------------------------------------------------------
##--------------------------------------------------------
##
## NON-RESTORE MODE PROCESSING (includes outcome=="test")
##
##--------------------------------------------------------
if (!restore.mode) {
## Filter the test data based on the formula
newdata <- newdata[, is.element(names(newdata), c(yvar.names, xvar.names)), drop = FALSE]
## impute.mean for anonymous forests
if (inherits(object, "anonymous") && na.action == "na.mean") {
newdata <- assign.impute.mean(newdata, object$impute.mean)
na.action <- "na.omit"
}
## Check that test/train factors are the same. If factor has an
## NA in its levels, remove it. Confirm factor labels overlap.
newdata <- rm.na.levels(newdata, xvar.names)
#newdata.xfactor <- extract.factor(newdata, xvar.names)
#if (!setequal(xfactor$factor, newdata.xfactor$factor)) {
# stop("x-variable factors from test data do not match original training data")
#}
#if (!setequal(xfactor$order, newdata.xfactor$order)) {
# stop("(ordered) x-variable factors from test data do not match original training data")
#}
any.outcome.factor <- family == "class"
if (family == "class+" | family == "mix+") {
if (length(intersect("R", yfactor$generic.types[m.target.idx])) == 0) {
any.outcome.factor <- TRUE
}
}
## If the outcomes contain factors we need to check that train/test y-outcomes are compatible
if (any.outcome.factor) {
if (sum(is.element(names(newdata), yvar.names)) > 0) {
newdata <- rm.na.levels(newdata, yvar.names)
#newdata.yfactor <- extract.factor(newdata, yvar.names)
#if (!setequal(yfactor$factor, newdata.yfactor$factor)) {
# stop("class outcome from test data does not match original training data")
#}
#if (!setequal(yfactor$order, newdata.yfactor$order)) {
# stop("(ordered) class outcome from test data does not match original training data")
#}
}
}
## one final (possibly redundant) check confirming coherence of train/test xvars
## previously commented out: but put we this back because error message is helpful
if (length(xvar.names) != sum(is.element(xvar.names, names(newdata)))) {
stop("x-variables in test data do not match original training data")
}
## coherence of train/test yvars (assuming test yvars are available)
yvar.present <- sum(is.element(yvar.names, names(newdata))) > 0
if (yvar.present && length(yvar.names) != sum(is.element(yvar.names, names(newdata)))) {
stop("y-variables in test data do not match original training data")
}
## Force test factor levels to equal grow factor levels
## this is crucial to ensuring an immutable map
if (any.xvar.factor) {
newdata <- check.factor(newdata, xfactor)
}
## If the outcomes contain factors we need to check factor coherence.
if (any.outcome.factor) {
if (yvar.present) {
newdata <- check.factor(newdata, yfactor)
}
}
## Extract test yvar names (if any) and xvar names.
if (yvar.present) {
fnames <- c(yvar.names, xvar.names)
}
else {
fnames <- xvar.names
}
## Data conversion to numeric mode
newdata <- finalizeData(fnames, newdata, na.action)
## Extract the test x-matrix and sort the columns as in the original training data.
## this accomodates incoming test x-matrix in a different order.
xvar.newdata <- as.matrix(newdata[, xvar.names, drop = FALSE])
n.newdata <- nrow(newdata)
## Save the row names for later overlay
newdata.row.names <- rownames(xvar.newdata)
## Process the y-outcomes and set their dimension
## note that in unsupervised mode there are no responses
## r.dim.newdata is set correctly by get.grow.event.info() in this case
if (yvar.present) {
yvar.newdata <- as.matrix(newdata[, yvar.names, drop = FALSE])
event.info.newdata <- get.grow.event.info(yvar.newdata, family, need.deaths = FALSE)
r.dim.newdata <- event.info.newdata$r.dim
## Survival specific coherency checks
## if there are no deaths, turn off performance values and VIMP
if (grepl("surv", family) && all(na.omit(event.info.newdata$cens) == 0)) {
perf.type <- "none"
importance <- "none"
}
## Ensure consistency of event types
if (grepl("surv", family) &&
length(setdiff(na.omit(event.info.newdata$cens), na.omit(event.info$cens))) > 1) {
stop("survival events in test data do not match training data")
}
}
else {
## Disallow outcome=TEST without y-outcomes
if (outcome == "test") {
stop("outcome=TEST, but the test data has no y values, which is not permitted")
}
## There are no outcomes.
r.dim.newdata <- 0
yvar.newdata <- NULL
perf.type <- "none"
importance <- "none"
}
## Remove xvar row and column names for proper processing by the native library
## does not apply when outcome = TEST because the xvar TEST data has been made NULL
if (outcome != "test") {
rownames(xvar.newdata) <- colnames(xvar.newdata) <- NULL
}
## We don't need the test data anymore
remove(newdata)
}
##--------------------------------------------------------
##
## RESTORE MODE PROCESSING (excludes outcome=="test")
##
##--------------------------------------------------------
else {
## There cannot be test data in restore mode
## The native code switches based on n.newdata being zero (0). Be careful.
n.newdata <- 0
r.dim.newdata <- 0
xvar.newdata <- NULL
yvar.newdata <- NULL
## Outcome is set to train for the native code
## Determine whether performance values are requested
outcome <- "train"
if (object$bootstrap == "none" || object$bootstrap == "by.node" || family == "unsupv") {
importance <- "none"
perf.type <- "none"
}
else {
## do nothing.
}
## restore hidden parameters
if (is.null(user.option$vimp.threshold)) {
vimp.threshold <- object$vimp.threshold
}
} ## ends restore.mode check
## ------------------------------------------------------------
## We have completed the restore/non-restore mode processing
## ------------------------------------------------------------
## Final processing of xvar and yvar test data
## depends on "outcome"
## outcome=train
if (outcome == "train") {
## data conversion for y-training data
yvar <- as.matrix(data.matrix(data.frame(object$yvar)))
## respect the training options related to bootstrapping:
sampsize <- round(object$sampsize(n))
case.wt <- object$case.wt
samp <- object$samp
## data conversion for x-training data
if (anonymize.bits == 0) {
xvar <- as.matrix(data.matrix(object$xvar))
rownames(xvar) <- colnames(xvar) <- NULL
}
else {
xvar <- NULL
}
}
if (outcome == "test") {
## outcome=test
## From the native code perspective we are in pseudo-restore mode
## From the R side, it is convenient to now pretend we are
## in restore mode, so we swap the training data out with the test data
## Performance is always requested for this setting
## swap the data
xvar <- xvar.newdata
yvar <- yvar.newdata
rownames(xvar) <- colnames(xvar) <- NULL
restore.mode <- TRUE
## set the dimensions
n <- nrow(xvar)
r.dim <- r.dim.newdata
## pretend there is no test data, but do *not* get rid of it
## we need (and use) this data *after* the native code call
n.newdata <- 0
r.dim.newdata <- 0
## set the sample size
## "swor" now handled because we now make sampsize a function of n
sampsize <- round(object$sampsize(n))
case.wt <- get.weight(NULL, n)
samp <- NULL
}
## initialize the number of trees in the forest
ntree <- object$ntree
## process the get.tree vector that specifies which trees we want
## to extract from the forest. This is only relevant to restore mode.
## The parameter is ignored in predict mode.
get.tree <- get.tree.index(get.tree, ntree)
## get performance and rfq, gk bits
perf.type <- get.perf(perf.type, FALSE, family)
perf.bits <- get.perf.bits(perf.type)
rfq <- get.rfq(rfq)
rfq.bits <- get.rfq.bits(rfq, family)
gk.quantile.bits <- get.gk.quantile.bits(gk.quantile)
statistics.bits <- get.statistics.bits(statistics)
bootstrap.bits <- get.bootstrap.bits(object$bootstrap)
## initialize the low bits
ensemble.bits <- get.ensemble.bits(ensemble)
importance.bits <- get.importance.bits(importance, perf.type)
proximity.bits <- get.proximity.bits(restore.mode, proximity)
distance.bits <- get.distance.bits(restore.mode, distance)
split.depth.bits <- get.split.depth.bits(split.depth)
var.used.bits <- get.var.used.bits(var.used)
outcome.bits <- get.outcome.bits(outcome)
case.depth.bits <- get.case.depth.bits(case.depth)
## Initalize the high bits
samptype.bits <- get.samptype.bits(object$samptype)
## forest weights
forest.wt.bits <- get.forest.wt.bits(restore.mode, object$bootstrap, forest.wt)
membership.bits <- get.membership.bits(membership)
terminal.qualts.bits <- get.terminal.qualts.predict.bits(object$terminal.qualts)
terminal.quants.bits <- get.terminal.quants.predict.bits(object$terminal.quants)
cse.bits = get.cse.bits(cse)
csv.bits = get.csv.bits(csv)
jitt.bits <- get.jitt.bits(jitt)
## set the data.pass flags: we do this here because the restore.mode flag is now finalized
## training data.pass acquires the grow data.pass flag
data.pass.bits <- get.data.pass.bits(object$data.pass)
## testing data.pass is na.action AND restore.mode dependent
if (restore.mode == FALSE) {
if (na.action == "na.omit") {
data.pass.predict.bits <- get.data.pass.predict.bits(TRUE)
}
else {
data.pass.predict.bits <- get.data.pass.predict.bits(FALSE)
}
}
else {
## we are in restore mode -> we are safe and the test data.pass flag irrelevant
data.pass.predict.bits <- get.data.pass.predict.bits(FALSE)
}
## We over-ride block-size in the case that get.tree is user specified
block.size <- min(get.block.size.bits(block.size, ntree), sum(get.tree))
## Turn off partial option.
partial.bits <- get.partial.bits(0)
## na.action bit
## in restore mode, we send in the protocol used for the training data.
if (restore.mode) {
na.action = object$na.action
}
na.action.bits <- get.na.action.bits(na.action)
do.trace <- get.trace.bits(do.trace)
## Check that hdim is initialized. If not, set it zero.
## This is necessary for backwards compatibility with 2.3.0
if (is.null(object$hdim)) {
hdim <- 0
}
else {
hdim <- object$hdim
}
## The pivot in this predict related function is different from
## the pivot used in the grow related function. In rfsrc we are
## referencing the list nativeOutput[[]]. Here we are referencing
## the $nativeArray[[]] object which is a massaged version of
## nativeOutput[[]]. Here, pivot points to the record PRIOR to
## parmID2. We adjust for the presence of interactions, and
## synthetics. The default chunk is parmIDx, contPTx, contPTRx,
## mwcpSZx, fsrecIDx.
## WARNING: Note that the maximum number of slots in the following
## foreign function call is 64. Ensure that this limit is not
## exceeded. Otherwise, the program will error on the call.
if (hdim == 0) {
pivot = 0
chunk = 0
}
## Start the C external timer.
ctime.external.start <- proc.time()
nativeOutput <- tryCatch({.Call("rfsrcPredict",
as.integer(do.trace),
as.integer(seed),
as.integer(ensemble.bits +
importance.bits +
bootstrap.bits +
proximity.bits +
split.depth.bits +
var.used.bits +
outcome.bits +
perf.bits +
rfq.bits +
cr.bits +
gk.quantile.bits +
statistics.bits +
anonymize.bits +
case.depth.bits),
as.integer(forest.wt.bits +
distance.bits +
samptype.bits +
na.action.bits +
membership.bits +
terminal.qualts.bits +
terminal.quants.bits +
cse.bits +
csv.bits +
data.pass.bits +
data.pass.predict.bits +
jitt.bits),
## >>>> start of maxi forest object >>>>
as.double(vimp.threshold),
as.integer(ntree),
as.integer(n),
list(as.integer(length(yvar.types)),
if (is.null(yvar.types)) NULL else as.character(yvar.types),
if (is.null(yvar.types)) NULL else as.integer(yvar.nlevels),
if (is.null(yvar.numeric.levels)) NULL else sapply(1:length(yvar.numeric.levels), function(nn) {as.integer(length(yvar.numeric.levels[[nn]]))}),
if (is.null(subj)) NULL else as.integer(subj),
if (is.null(event.type)) NULL else as.integer(length(event.type)),
if (is.null(event.type)) NULL else as.integer(event.type)),
if (is.null(yvar.numeric.levels)) {
NULL
}
else {
lapply(1:length(yvar.numeric.levels),
function(nn) {as.integer(yvar.numeric.levels[[nn]])})
},
if (is.null(yvar.types)) NULL else as.double(as.vector(yvar)),
list(as.integer(n.xvar),
as.character(xvar.types),
if (is.null(xvar.types)) NULL else as.integer(xvar.nlevels),
if (is.null(xvar.numeric.levels)) NULL else sapply(1:length(xvar.numeric.levels), function(nn) {as.integer(length(xvar.numeric.levels[[nn]]))}),
NULL,
NULL),
if (is.null(xvar.numeric.levels)) {
NULL
}
else {
lapply(1:length(xvar.numeric.levels),
function(nn) {as.integer(xvar.numeric.levels[[nn]])})
},
if (is.null(xvar)) NULL else as.double(as.vector(xvar)),
list(as.integer(length(case.wt)),
if (is.null(case.wt)) NULL else as.double(case.wt),
as.integer(sampsize),
if (is.null(samp)) NULL else as.integer(samp)),
list(if(is.null(event.info$time.interest)) as.integer(0) else as.integer(length(event.info$time.interest)),
if(is.null(event.info$time.interest)) NULL else as.double(event.info$time.interest)),
as.integer(object$totalNodeCount),
as.integer(object$leafCount),
list(as.integer(object$seed),
if (is.null(object$seedVimp)) NULL else as.integer(object$seedVimp),
as.integer(object$optLoGrow)),
as.integer(hdim),
## Object containing base learner settings. This is never NULL.
object$base.learner,
as.integer((object$nativeArray)$treeID),
as.integer((object$nativeArray)$nodeID),
as.integer((object$nativeArray)$nodeSZ),
as.integer((object$nativeArray)$brnodeID),
## This is hc_zero. It is never NULL.
list(as.integer((object$nativeArray)$parmID),
as.double((object$nativeArray)$contPT),
as.integer((object$nativeArray)$mwcpSZ),
as.integer((object$nativeArray)$fsrecID),
as.integer((object$nativeFactorArray)$mwcpPT)),
## This slot is hc_one_augm_intr. This slot can be NULL.
if (!is.null(object$base.learner)) {
if (object$base.learner$interact.depth > 1) {
list(as.integer((object$nativeArray)$pairCT),
as.integer((object$nativeArray)$augmXone),
as.integer((object$nativeArray)$augmXtwo))
} else { NULL }
} else { NULL },
## This slot is hc_one_augm_syth. This slot can be NULL.
if (!is.null(object$base.learner)) {
if (object$base.learner$synthetic.depth > 1) {
list(as.integer((object$nativeArray)$sythSZ),
as.integer((object$nativeArray)$augmXS))
} else { NULL }
} else { NULL },
NULL,
NULL,
NULL,
NULL,
NULL,
NULL,
NULL,
NULL,
NULL,
NULL,
NULL,
as.integer(object$nativeArrayTNDS$tnRMBR),
as.integer(object$nativeArrayTNDS$tnAMBR),
as.integer(object$nativeArrayTNDS$tnRCNT),
as.integer(object$nativeArrayTNDS$tnACNT),
as.double((object$nativeArrayTNDS$tnSURV)),
as.double((object$nativeArrayTNDS$tnMORT)),
as.double((object$nativeArrayTNDS$tnNLSN)),
as.double((object$nativeArrayTNDS$tnCSHZ)),
as.double((object$nativeArrayTNDS$tnCIFN)),
as.double((object$nativeArrayTNDS$tnREGR)),
as.integer((object$nativeArrayTNDS$tnCLAS)),
## <<<< end of maxi forest object <<<<
list(if (is.null(m.target.idx)) as.integer(0) else as.integer(length(m.target.idx)),
if (is.null(m.target.idx)) NULL else as.integer(m.target.idx)),
as.integer(ptn.count),
NULL,
list(if (is.null(importance.xvar.idx)) as.integer(0) else as.integer(length(importance.xvar.idx)),
if (is.null(importance.xvar.idx)) NULL else as.integer(importance.xvar.idx)),
## Partial variables disabled.
list(as.integer(0),
as.integer(0),
as.integer(0),
NULL,
as.integer(0),
NULL,
NULL),
as.integer(n.newdata),
as.integer(r.dim.newdata),
as.double(if (outcome != "test") yvar.newdata else NULL),
as.double(if (outcome != "test") xvar.newdata else NULL),
as.integer(block.size),
list(if (is.null(prob.assign$prob)) as.integer(0) else as.integer(length(prob.assign$prob)),
if (is.null(prob.assign$prob)) NULL else as.double(prob.assign$prob),
if (is.null(prob.assign$prob.epsilon)) as.double(0) else as.double(prob.assign$prob.epsilon)),
as.integer(get.tree),
as.integer(get.rf.cores()))}, error = function(e) {
print(e)
NULL})
## Stop the C external timer.
ctime.external.stop <- proc.time()
## check for error return condition in the native code
if (is.null(nativeOutput)) {
stop("An error has occurred in prediction. Please turn trace on for further analysis.")
}
## determine missingness which is used for imputed data
## not done for anonymous forests or na.random imputation
if (!inherits(object, "anonymous") && na.action != "na.random") {
if (restore.mode) {
n.miss <- get.nmiss(xvar, yvar)
}
else {
n.miss <- get.nmiss(xvar.newdata, yvar.newdata)
}
}
else {
n.miss <- 0
}
## extract the imputed data if there was missingness
if (n.miss > 0) {
imputed.data <- matrix(nativeOutput$imputation, nrow = n.miss)
nativeOutput$imputation <- NULL
imputed.indv <- imputed.data[, 1]
imputed.data <- as.data.frame(imputed.data[, -1, drop = FALSE])
if ((r.dim.newdata > 0) | (perf.type != "none")) {
colnames(imputed.data) <- c(yvar.names, xvar.names)
}
else {
colnames(imputed.data) <- xvar.names
}
}
## post-process the test data
## for restore mode there is no test data *except* when outcome=TEST
if (!restore.mode | outcome == "test") {
## add row and column names to test xvar matrix
xvar.newdata <- as.data.frame(xvar.newdata)
rownames(xvar.newdata) <- newdata.row.names
colnames(xvar.newdata) <- xvar.names
## map xvar factors back to original values
xvar.newdata <- map.factor(xvar.newdata, xfactor)
if (perf.type != "none") {
## add column names to test response matrix
yvar.newdata <- as.data.frame(yvar.newdata)
colnames(yvar.newdata) <- yvar.names
## map response factors back to original values
yvar.newdata <- map.factor(yvar.newdata, yfactor)
}
}
## Map imputed data factors back to original values
if (n.miss > 0) {
imputed.data <- map.factor(imputed.data, xfactor)
if (perf.type != "none") {
imputed.data <- map.factor(imputed.data, yfactor)
}
}
## proximity
if (proximity != FALSE) {
if (restore.mode) {
prox.n <- n
}
else {
prox.n <- n.newdata
}
proximity.out <- matrix(0, prox.n, prox.n)
count <- 0
for (k in 1:prox.n) {
proximity.out[k,1:k] <- nativeOutput$proximity[(count+1):(count+k)]
proximity.out[1:k,k] <- proximity.out[k,1:k]
count <- count + k
}
nativeOutput$proximity <- NULL
}
else {
proximity.out <- NULL
}
## distance
if (distance != FALSE) {
if (restore.mode) {
dist.n <- n
}
else {
dist.n <- n.newdata
}
distance.out <- matrix(0, dist.n, dist.n)
count <- 0
for (k in 1:dist.n) {
distance.out[k,1:k] <- nativeOutput$distance[(count+1):(count+k)]
distance.out[1:k,k] <- distance.out[k,1:k]
count <- count + k
}
nativeOutput$distance <- NULL
}
else {
distance.out <- NULL
}
## forest weight matrix
if (forest.wt != FALSE) {
if (restore.mode) {
forest.wt.n <- c(n, n)
}
else {
forest.wt.n <- c(n.newdata, n)
}
forest.wt.out <- matrix(nativeOutput$weight, forest.wt.n, byrow = TRUE)
nativeOutput$weight <- NULL
}
else {
forest.wt.out <- NULL
}
## forest case.depth
if (case.depth != FALSE) {
if (restore.mode) {
case.depth.out <- matrix(nativeOutput$caseDepth, c(ntree, n), byrow = TRUE)
nativeOutput$caseDepth <- NULL
}
else {
case.depth.out <- matrix(nativeOutput$caseDepth, c(ntree, n.newdata), byrow = TRUE)
}
nativeOutput$caseDepth <- NULL
}
else {
case.depth.out <- NULL
}
n.observed = if (restore.mode) n else n.newdata
## membership
if (membership) {
membership.out <- matrix(nativeOutput$nodeMembership, c(n.observed, ntree))
nativeOutput$nodeMembership <- NULL
if (restore.mode) {
inbag.out <- matrix(nativeOutput$bootMembership, c(n.observed, ntree))
nativeOutput$bootMembership <- NULL
}
else {
inbag.out <- NULL
}
if (ptn.count > 0) {
ptn.membership.out <- matrix(nativeOutput$pstnMembership, c(n.observed, ntree))
nativeOutput$pstnMembership <- NULL
}
else {
ptn.membership.out <- NULL
}
}
else {
membership.out <- NULL
inbag.out <- NULL
ptn.membership.out <- NULL
}
## variables used
if (var.used != FALSE) {
if (var.used == "all.trees") {
var.used.out <- nativeOutput$varUsed
names(var.used.out) <- xvar.names
}
else {
var.used.out <- matrix(nativeOutput$varUsed, nrow = ntree, byrow = TRUE)
colnames(var.used.out) <- xvar.names
}
nativeOutput$varUsed <- NULL
}
else {
var.used.out <- NULL
}
## split depth
if (split.depth != FALSE) {
if (split.depth == "all.trees") {
split.depth.out <- array(nativeOutput$splitDepth, c(n, n.xvar))
}
else {
split.depth.out <- array(nativeOutput$splitDepth, c(n, n.xvar, ntree))
}
nativeOutput$splitDepth <- NULL
}
else {
split.depth.out <- NULL
}
## node statistics
if (statistics == TRUE) {
node.stats <- as.data.frame(cbind(nativeOutput$spltST))
names(node.stats) <- c("spltST")
}
else {
node.stats <- NULL
}
## make the output object
rfsrcOutput <- list(
call = match.call(),
family = family,
n = n.observed,
ntree = ntree,
yvar = (if ((outcome == "train" & restore.mode) | (perf.type != "none")) {
if (outcome == "train" & restore.mode)
amatrix.remove.names(object$yvar) else amatrix.remove.names(yvar.newdata)} else NULL),
yvar.names = yvar.names,
xvar = (if(outcome != "test" & restore.mode) object$xvar else xvar.newdata),
xvar.names = xvar.names,
leaf.count = nativeOutput$leafCount,
proximity = proximity.out,
forest = object,
forest.wt = forest.wt.out,
case.depth = case.depth.out,
distance = distance.out,
ptn.membership = ptn.membership.out,
membership = membership.out,
splitrule = splitrule,
inbag = inbag.out,
var.used = var.used.out,
imputed.indv = (if (n.miss>0) imputed.indv else NULL),
imputed.data = (if (n.miss>0) imputed.data else NULL),
split.depth = split.depth.out,
node.stats = node.stats,
block.size = block.size,
perf.type = perf.type,
ctime.internal = nativeOutput$cTimeInternal,
ctime.external = ctime.external.stop - ctime.external.start
)
## memory management
nativeOutput$leafCount <- NULL
remove(object)
remove(proximity.out)
remove(forest.wt.out)
remove(case.depth.out)
remove(distance.out)
remove(ptn.membership.out)
remove(membership.out)
remove(inbag.out)
if (n.miss > 0) remove(imputed.indv)
if (n.miss > 0) remove(imputed.data)
remove(var.used.out)
remove(split.depth.out)
remove(node.stats)
## Safe the outputs.
survOutput <- NULL
classOutput <- NULL
regrOutput <- NULL
if(vimp.joint) {
vimp.count <- 1
}
else {
vimp.count <- length(importance.xvar)
}
## family specific additions to the predict object
if (grepl("surv", family)) {
if ((length(event.info$event.type) > 1) &&
(splitrule != "l2.impute") &&
(splitrule != "logrankscore")) {
coerced.event.count <- length(event.info$event.type)
}
else {
coerced.event.count <- 1
}
if (family == "surv") {
## Right Censored names.
ens.names <- list(NULL, NULL)
mortality.names <- list(NULL, NULL)
err.names <- list(NULL, NULL)
vimp.names <- list(NULL, if (vimp.joint) "joint" else importance.xvar)
}
else {
## Competing Risk names.
ens.names <- list(NULL, NULL, c(paste("condCHF.", 1:length(event.info$event.type), sep = "")))
mortality.names <- list(NULL, paste("event.", 1:length(event.info$event.type), sep = ""))
cif.names <- list(NULL, NULL, c(paste("CIF.", 1:length(event.info$event.type), sep = "")))
err.names <- list(c(paste("event.", 1:length(event.info$event.type), sep = "")), NULL)
vimp.names <- list(paste("event.", 1:length(event.info$event.type), sep = ""),
if(vimp.joint) "joint" else importance.xvar)
}
## From the native code:
## "allEnsbCHF"
## "oobEnsbCHF"
## -> of dim [length(event.info$event.type)] x [RF_sortedTimeInterestSize] x [n]
## where [length(event.info$event.type)] may be equal to [1].
## To the R code:
## -> of dim [n] x [RF_sortedTimeInterestSize] x [length(event.info$event.type)]
chf <- (if (!is.null(nativeOutput$allEnsbCHF))
adrop3d.last(array(nativeOutput$allEnsbCHF,
c(n.observed, length(event.info$time.interest), length(event.info$event.type)),
dimnames=ens.names), length(event.info$event.type)) else NULL)
nativeOutput$allEnsbCHF <- NULL
survOutput <- list(chf = chf)
remove(chf)
chf.oob <- (if (!is.null(nativeOutput$oobEnsbCHF))
adrop3d.last(array(nativeOutput$oobEnsbCHF,
c(n.observed, length(event.info$time.interest), length(event.info$event.type)),
dimnames=ens.names), length(event.info$event.type)) else NULL)
nativeOutput$oobEnsbCHF <- NULL
survOutput = c(survOutput, chf.oob = list(chf.oob))
remove(chf.oob)
## From the native code:
## "allEnsbMRT"
## "oobEnsbMRT"
## -> of dim [length(event.info$event.type)] x [n]
## To the R code:
## -> of dim [n] x [length(event.info$event.type)]
predicted <- (if (!is.null(nativeOutput$allEnsbMRT))
adrop2d.last(array(nativeOutput$allEnsbMRT,
c(n.observed, length(event.info$event.type)), dimnames=mortality.names), coerced.event.count) else NULL)
nativeOutput$allEnsbMRT <- NULL
survOutput = c(survOutput, predicted = list(predicted))
remove(predicted)
predicted.oob <- (if (!is.null(nativeOutput$oobEnsbMRT))
adrop2d.last(array(nativeOutput$oobEnsbMRT,
c(n.observed, length(event.info$event.type)), dimnames=mortality.names), coerced.event.count) else NULL)
nativeOutput$oobEnsbMRT <- NULL
survOutput <- c(survOutput, predicted.oob = list(predicted.oob))
remove(predicted.oob)
## From the native code:
## "allEnsbSRV"
## "oobEnsbSRV"
## -> of dim [RF_sortedTimeInterestSize] x [n]
## To the R code:
## -> of dim [n] x [RF_sortedTimeInterestSize]
survival <- (if (!is.null(nativeOutput$allEnsbSRV))
matrix(nativeOutput$allEnsbSRV,
c(n.observed, length(event.info$time.interest))) else NULL)
nativeOutput$allEnsbSRV <- NULL
survOutput <- c(survOutput, survival = list(survival))
remove(survival)
survival.oob <- (if (!is.null(nativeOutput$oobEnsbSRV))
matrix(nativeOutput$oobEnsbSRV,
c(n.observed, length(event.info$time.interest))) else NULL)
nativeOutput$oobEnsbSRV <- NULL
survOutput <- c(survOutput, survival.oob = list(survival.oob))
remove(survival.oob)
## From the native code:
## "allEnsbCIF"
## "oobEnsbCIF"
## -> of dim [length(event.info$event.type)] x [RF_sortedTimeInterestSize] x [n]
## To the native code:
## -> of dim [n] x [RF_sortedTimeInterestSize] x [length(event.info$event.type)]
cif <- (if (!is.null(nativeOutput$allEnsbCIF))
array(nativeOutput$allEnsbCIF,
c(n.observed, length(event.info$time.interest), length(event.info$event.type)),
dimnames=cif.names) else NULL)
nativeOutput$allEnsbCIF <- NULL
survOutput <- c(survOutput, cif = list(cif))
remove(cif)
cif.oob <- (if (!is.null(nativeOutput$oobEnsbCIF))
array(nativeOutput$oobEnsbCIF,
c(n.observed, length(event.info$time.interest), length(event.info$event.type)),
dimnames=cif.names) else NULL)
nativeOutput$oobEnsbCIF <- NULL
survOutput = c(survOutput, cif.oob = list(cif.oob))
remove(cif.oob)
## From the native code:
## "perfSurv"
## -> of dim [ntree] x length(event.info$event.type)]
## To the R code:
## -> of dim [ntree] x length(event.info$event.type)]
if (!is.null(nativeOutput$perfSurv)) {
err.rate <- adrop2d.first(array(nativeOutput$perfSurv,
c(length(event.info$event.type), ntree),
dimnames=err.names),
coerced.event.count)
nativeOutput$perfSurv <- NULL
if (family == "surv-CR") {
survOutput = c(survOutput, err.rate = list(t(err.rate)))
}
else {
survOutput = c(survOutput, err.rate = list(err.rate))
}
remove(err.rate)
}
## From the native code:
## "blockSurv"
## -> of dim [block.cnt] x length(event.info$event.type)]
## To the R code:
## -> of dim [block.cnt] x length(event.info$event.type)]
if (!is.null(nativeOutput$blockSurv)) {
err.block.rate <- adrop2d.first(array(nativeOutput$blockSurv,
c(length(event.info$event.type), floor(ntree/block.size)),
dimnames=err.names),
coerced.event.count)
nativeOutput$blockSurv <- NULL
if (family == "surv-CR") {
survOutput = c(survOutput, err.block.rate = list(t(err.block.rate)))
}
else {
survOutput = c(survOutput, err.block.rate = list(err.block.rate))
}
remove(err.block.rate)
}
## From the native code:
## "vimpSurv"
## -> of dim [n.xvar] x length(event.info$event.type)]
## To the R code:
## -> of dim length(event.info$event.type)] x [n.xvar]
if (!is.null(nativeOutput$vimpSurv)) {
importance <- adrop2d.first(array(nativeOutput$vimpSurv,
c(length(event.info$event.type), vimp.count),
dimnames = vimp.names),
coerced.event.count)
nativeOutput$vimpSurv <- NULL
if (family == "surv-CR") {
survOutput = c(survOutput, importance = list(t(importance)))
}
else {
survOutput = c(survOutput, importance = list(importance))
}
remove(importance)
}
survOutput = c(
survOutput, list(
time.interest = event.info$time.interest,
ndead = (if (perf.type != "none") sum((if (restore.mode) yvar[, 2] else yvar.newdata[, 2]) !=0 , na.rm=TRUE) else NULL))
)
## When TRUE we revert to univariate nomenclature for all the outputs.
if(univariate.nomenclature) {
rfsrcOutput <- c(rfsrcOutput, survOutput)
}
else {
rfsrcOutput <- c(rfsrcOutput, survOutput = list(survOutput))
}
}
else {
## We consider "R", "I", and "C" outcomes. The outcomes are grouped
## by type and sequential. That is, the first "C" encountered in the
## response type vector is in position [[1]] in the classification output
## list, the second "C" encountered is in position [[2]] in the
## classification output list, and so on. The same applies to the
## regression outputs. We also have a mapping from the outcome slot back
## to the original response vector type, given by the following:
## Given yvar.types = c("R", "C", "R", "C", "R" , "I")
## regr.index[1] -> 1
## regr.index[2] -> 3
## regr.index[3] -> 5
## clas.index[1] -> 2
## clas.index[2] -> 4
## clas.index[3] -> 6
## This will pick up all "C" and "I".
class.index <- which(yvar.types != "R")
class.count <- length(class.index)
regr.index <- which(yvar.types == "R")
regr.count <- length(regr.index)
if (class.count > 0) {
classOutput <- vector("list", class.count)
## Names of the classification outputs.
names(classOutput) <- yvar.names[class.index]
## Vector to hold the number of levels in each factor response.
levels.count <- array(0, class.count)
## List to hold the names of levels in each factor response.
levels.names <- vector("list", class.count)
counter <- 0
for (i in class.index) {
counter <- counter + 1
## Note that [i] is the actual index of the y-variables and not a sequential iterator.
## The sequential iteratior is [counter]
levels.count[counter] <- yvar.nlevels[i]
if (yvar.types[i] == "C") {
## This an unordered factor.
## Here, we don't know the sequence of the unordered factor list, so we identify the factor by name.
levels.names[[counter]] <- yfactor$levels[[which(yfactor$factor == yvar.names[i])]]
}
else {
## This in an ordered factor.
## Here, we don't know the sequence of the ordered factor list, so we identify the factor by name.
levels.names[[counter]] <- yfactor$order.levels[[which(yfactor$order == yvar.names[i])]]
}
}
## Incoming error rates: T=tree R=response L=level
## T1R1L0 T1R1L1 T1R1L2 T1R1L3 T1R2L0 T1R2L1 T1R2L2, T2R1L0 T2R1L1 T2R1L2 T2R1L3 T2R2L0 T2R2L1 T2R2L2, ...
## In GROW mode, all class objects are represented in the tree offset calculation.
## In PRED mode, the offsets are dependent on the only those targets that are requested!
## Yeilds tree.offset = c(1, 8, ...)
tree.offset <- array(1, ntree)
## Sum of all level counts targetted classification. For example,
## if R1 has 3 levels, and R2 has 2 levels, and R3 has 6 levels, and
## only R1 and R3 and targetted, then levels.total = 9 + 3
levels.total <- 0
if (ntree > 1) {
## Iterate over all the target outcomes and map them to the class list.
for (i in 1:length(m.target.idx)) {
## This is the slot in the class list. The class list spans all the classification
## outputs, some of which may be NULL.
target.idx <- which (class.index == m.target.idx[i])
## Is the target a classification y-variable.
if (length(target.idx) > 0) {
levels.total <- levels.total + 1 + levels.count[target.idx]
}
}
tree.offset[2:ntree] <- levels.total
}
tree.offset <- cumsum(tree.offset)
## The block offset calculation mirrors the tree offset calculation, but differs in only the primary dimension.
block.offset <- array(1, floor(ntree/block.size))
if (floor(ntree/block.size) > 1) {
block.offset[2:floor(ntree/block.size)] <- levels.total
}
block.offset <- cumsum(block.offset)
## Incoming vimp rates: V=xvar R=response L=level
## V1R1L0 V1R1L1 V1R1L2 V1R1L3 V1R1L0 V1R2L1 V1R2L2, V2R1L0 V2R1L1 V2R1L2 V2R1L3 V2R2L0 V2R2L1 V2R2L2, ...
## Yeilds vimp.offset = c(1, 8, ...)
vimp.offset <- array(1, vimp.count)
if (vimp.count > 1) {
vimp.offset[2:vimp.count] <- levels.total
}
vimp.offset <- cumsum(vimp.offset)
iter.ensb.start <- 0
iter.ensb.end <- 0
## From the native code:
## "allEnsbCLS"
## "oobEnsbCLS"
## -> of dim [[class.count]] x [levels.count[]] x [n]
## where this is a ragged array.
## From the native code:
## "perfClas"
## -> of dim [ntree] x [class.count] x [1 + levels.count[]]
## where the slot [.] x [.] x [1] holds the unconditional error rate.
## Note that this is a ragged array.
## To the R code:
## -> of dim [[class.count]] x [ntree] x [1 + levels.count[]]
## From the native code:
## "blockClas"
## -> of dim [block.cnt] x [class.count] x [1 + levels.count[]]
## where the slot [.] x [.] x [1] holds the unconditional error rate.
## Note that this is a ragged array.
## To the R code:
## -> of dim [[class.count]] x [block.cnt] x [1 + levels.count[]]
## From the native code:
## "vimpClas"
## -> of dim [n.xvar] x [class.count] x [1 + levels.count]
## where the slot [.] x [.] x [1] holds the unconditional vimp.
## Note that this is a ragged array.
## To the R code:
## -> of dim [[class.count]] x [1 + levels.count] x [n.xvar]
## From the native code:
## "cseNum"
## -> of dim [resp.class.count] x [n]
## To the R code:
## -> of dim [[resp.class.count]] x [n]
## From the native code:
## "cseDen"
## -> of dim [n]
## To the R code:
## -> of dim [n]
## From the native code:
## "csvNum"
## -> of dim [n.xvar] x [resp.class.count] x [n]
## To the R code:
## -> of dim [[resp.class.count]] x [n] x [n.xvar]
## From the native code:
## "csvDen"
## -> of dim [n.xvar] x [n]
## To the R code:
## -> of dim x [n] x [n.xvar]
for (i in 1:length(m.target.idx)) {
target.idx <- which(class.index == m.target.idx[i])
if (length(target.idx) > 0) {
iter.ensb.start <- iter.ensb.end
iter.ensb.end <- iter.ensb.end + (levels.count[target.idx] * n.observed)
ens.names <- list(NULL, levels.names[[target.idx]])
err.names <- c("all", levels.names[[target.idx]])
vimp.names <- list(c("all", levels.names[[target.idx]]), if(vimp.joint) "joint" else importance.xvar)
predicted <- (if (!is.null(nativeOutput$allEnsbCLS))
array(nativeOutput$allEnsbCLS[(iter.ensb.start + 1):iter.ensb.end],
c(n.observed, levels.count[target.idx]), dimnames=ens.names) else NULL)
classOutput[[target.idx]] <- list(predicted = predicted)
response <- (if (!is.null(predicted)) get.bayes.rule(predicted, class.relfrq) else NULL)
classOutput[[target.idx]] <- c(classOutput[[target.idx]], class = list(response))
remove(predicted)
remove(response)
predicted.oob <- (if (!is.null(nativeOutput$oobEnsbCLS))
array(nativeOutput$oobEnsbCLS[(iter.ensb.start + 1):iter.ensb.end],
c(n.observed, levels.count[target.idx]), dimnames=ens.names) else NULL)
classOutput[[target.idx]] <- c(classOutput[[target.idx]], predicted.oob = list(predicted.oob))
response.oob <- (if (!is.null(predicted.oob)) get.bayes.rule(predicted.oob, class.relfrq) else NULL)
classOutput[[target.idx]] <- c(classOutput[[target.idx]], class.oob = list(response.oob))
remove(predicted.oob)
remove(response.oob)
## New case specific arrays, numerator:
cse.num <- (if (!is.null(nativeOutput$cseClas))
array(nativeOutput$cseClas[(iter.ensb.start + 1):iter.ensb.end], n) else NULL)
classOutput[[target.idx]] <- c(classOutput[[target.idx]], cse.num = list(cse.num))
remove(cse.num)
## New case specific arrays, denominator:
cse.den <- (if (!is.null(nativeOutput$cseDen))
array(nativeOutput$cseDen[(iter.ensb.start + 1):iter.ensb.end], n) else NULL)
classOutput[[target.idx]] <- c(classOutput[[target.idx]], cse.den = list(cse.den))
remove(cse.den)
if (!is.null(nativeOutput$perfClas)) {
err.rate <- array(0, c(1 + levels.count[target.idx], ntree))
for (j in 1: (1 + levels.count[target.idx])) {
err.rate[j, ] <- nativeOutput$perfClas[tree.offset]
tree.offset <- tree.offset + 1
}
row.names(err.rate) <- err.names
classOutput[[target.idx]] <- c(classOutput[[target.idx]], err.rate = list(t(err.rate)))
remove(err.rate)
}
if (!is.null(nativeOutput$blockClas)) {
err.block.rate <- array(0, c(1 + levels.count[target.idx], floor(ntree/block.size)))
for (j in 1: (1 + levels.count[target.idx])) {
err.block.rate[j, ] <- nativeOutput$blockClas[block.offset]
block.offset <- block.offset + 1
}
row.names(err.block.rate) <- err.names
classOutput[[target.idx]] <- c(classOutput[[target.idx]], err.block.rate = list(t(err.block.rate)))
remove(err.block.rate)
}
if (!is.null(nativeOutput$vimpClas)) {
importance <- array(0, c(1 + levels.count[target.idx], vimp.count), dimnames=vimp.names)
for (j in 1: (1 + levels.count[target.idx])) {
importance[j, ] <- nativeOutput$vimpClas[vimp.offset]
vimp.offset <- vimp.offset + 1
}
classOutput[[target.idx]] <- c(classOutput[[target.idx]], importance = list(t(importance)))
remove(importance)
}
## See GROW call for explanation of arrays:
csv.idx <- array(0, n.observed * n.xvar)
j2seq <- (target.idx - 1) * n.observed
for (j in 1:n.xvar) {
j1seq <- (j - 1) * n.observed
csv.idx[(j1seq + 1) : (j1seq + n.observed)] <- ( (j2seq + j1seq * class.count + 1) : (j2seq + j1seq * class.count + n.observed) )
}
## New case specific arrays, numerator:
csv.num <- (if (!is.null(nativeOutput$csvClas))
array(nativeOutput$csvClas[csv.idx], c(n.observed, n.xvar)) else NULL)
classOutput[[target.idx]] <- c(classOutput[[target.idx]], csv.num = list(csv.num))
remove(csv.num)
## New case specific arrays, denominator:
csv.den <- (if (!is.null(nativeOutput$csvDen))
array(nativeOutput$csvDen, c(n.observed, n.xvar)) else NULL)
classOutput[[target.idx]] <- c(classOutput[[target.idx]], csv.den = list(csv.den))
remove(csv.den)
}
}
nativeOutput$allEnsbCLS <- NULL
nativeOutput$oobEnsbCLS <- NULL
nativeOutput$perfClas <- NULL
nativeOutput$vimpClas <- NULL
nativeOutput$blockClas <- NULL
## When TRUE we revert to univariate nomenclature for all the outputs.
if(univariate.nomenclature) {
if ((class.count == 1) & (regr.count == 0)) {
names(classOutput) <- NULL
rfsrcOutput <- c(rfsrcOutput, unlist(classOutput, recursive=FALSE))
}
else {
rfsrcOutput <- c(rfsrcOutput, classOutput = list(classOutput))
}
}
else {
rfsrcOutput <- c(rfsrcOutput, classOutput = list(classOutput))
}
}
if (regr.count > 0) {
regrOutput <- vector("list", regr.count)
names(regrOutput) <- yvar.names[regr.index]
## Incoming: T=tree R=response
## T1R1 T1R2, T2R1 T2R2, T3R1 T3R2, ...
## Yeilds tree.offset = c(1, 3, 5)
tree.offset <- array(1, ntree)
if (ntree > 1) {
tree.offset[2:ntree] <- length(regr.index)
}
tree.offset <- cumsum(tree.offset)
## The block offset calculation mirrors the tree offset calculation, but differs in only the primary dimension.
block.offset <- array(1, floor(ntree/block.size))
if (floor(ntree/block.size) > 1) {
block.offset[2:floor(ntree/block.size)] <- length(regr.index)
}
block.offset <- cumsum(block.offset)
## Incoming vimp rates: V=xvar R=response L=level
## V1R1 V1R2, V2R1 V2R2, V3R1 V3R2, ...
## Yeilds vimp.offset = c(1, 3, 5, ...)
vimp.offset <- array(1, vimp.count)
if (vimp.count > 1) {
vimp.offset[2:vimp.count] <- length(regr.index)
}
vimp.offset <- cumsum(vimp.offset)
iter.ensb.start <- 0
iter.ensb.end <- 0
iter.qntl.start <- 0
iter.qntl.end <- 0
## From the native code:
## "allEnsbRGR"
## -> of dim [regr.count] x [obsSize]
## From the native code:
## "allEnsbQNT"
## "oobEnsbQNT"
## -> of dim [regr.count] x [length(prob)] x [n]
## From the native code:
## "perfRegr"
## -> of dim [regr.count] x [ntree]
## To the R code:
## -> of dim [[regr.count]] x [ntree]
## From the native code:
## "blockRegr"
## -> of dim [block.cnt] x [regr.count]
## To the R code:
## -> of dim [[regr.count]] x [block.cnt]
## From the native code:
## "vimpRegr"
## -> of dim [n.vxar] x [regr.count]
## To the R code:
## -> of dim [[regr.count]] x [n.xvar]
## From the native code:
## "cseNum"
## -> of dim [regr.count] x [n]
## To the R code:
## -> of dim [[regr.count]] x [n]
## From the native code:
## "cseDen"
## -> of dim [n]
## To the R code:
## -> of dim [n]
## From the native code:
## "csvNum"
## -> of dim [n.xvar] x [regr.count] x [n]
## To the R code:
## -> of dim [[regr.count]] x [n] x [n.xvar]
## From the native code:
## "csvDen"
## -> of dim [n.xvar] x [n]
## To the R code:
## -> of dim x [n] x [n.xvar]
for (i in 1:length(m.target.idx)) {
target.idx <- which(regr.index == m.target.idx[i])
if (length(target.idx) > 0) {
iter.ensb.start <- iter.ensb.end
iter.ensb.end <- iter.ensb.end + n.observed
iter.qntl.start <- iter.qntl.end
iter.qntl.end <- iter.qntl.end + (length(prob) * n.observed)
vimp.names <- if(vimp.joint) "joint" else importance.xvar
predicted <- (if (!is.null(nativeOutput$allEnsbRGR))
array(nativeOutput$allEnsbRGR[(iter.ensb.start + 1):iter.ensb.end], n.observed) else NULL)
regrOutput[[target.idx]] <- list(predicted = predicted)
remove(predicted)
predicted.oob <- (if (!is.null(nativeOutput$oobEnsbRGR))
array(nativeOutput$oobEnsbRGR[(iter.ensb.start + 1):iter.ensb.end], n.observed) else NULL)
regrOutput[[target.idx]] <- c(regrOutput[[target.idx]], predicted.oob = list(predicted.oob))
remove(predicted.oob)
## New case specific arrays, numerator:
cse.num <- (if (!is.null(nativeOutput$cseRegr))
array(nativeOutput$cseRegr[(iter.ensb.start + 1):iter.ensb.end], n) else NULL)
regrOutput[[target.idx]] <- c(regrOutput[[target.idx]], cse.num = list(cse.num))
remove(cse.num)
## New case specific arrays, denominator:
cse.den <- (if (!is.null(nativeOutput$cseDen))
array(nativeOutput$cseDen[(iter.ensb.start + 1):iter.ensb.end], n) else NULL)
regrOutput[[target.idx]] <- c(regrOutput[[target.idx]], cse.den = list(cse.den))
remove(cse.den)
quantile <- (if (!is.null(nativeOutput$allEnsbQNT))
array(nativeOutput$allEnsbQNT[(iter.qntl.start + 1):iter.qntl.end],
c(n.observed, length(prob))) else NULL)
regrOutput[[target.idx]] <- c(regrOutput[[target.idx]], quantile = list(quantile))
remove(quantile)
quantile.oob <- (if (!is.null(nativeOutput$oobEnsbQNT))
array(nativeOutput$oobEnsbQNT[(iter.qntl.start + 1):iter.qntl.end],
c(n.observed, length(prob))) else NULL)
regrOutput[[target.idx]] <- c(regrOutput[[target.idx]], quantile.oob = list(quantile.oob))
remove(quantile.oob)
if (!is.null(nativeOutput$perfRegr)) {
err.rate <- nativeOutput$perfRegr[tree.offset]
tree.offset <- tree.offset + 1
regrOutput[[target.idx]] <- c(regrOutput[[target.idx]], err.rate = list(err.rate))
remove(err.rate)
}
if (!is.null(nativeOutput$blockRegr)) {
err.block.rate <- nativeOutput$blockRegr[block.offset]
block.offset <- block.offset + 1
regrOutput[[target.idx]] <- c(regrOutput[[target.idx]], err.block.rate = list(err.block.rate))
remove(err.block.rate)
}
if (!is.null(nativeOutput$vimpRegr)) {
importance <- nativeOutput$vimpRegr[vimp.offset]
names(importance) <- vimp.names
vimp.offset <- vimp.offset + 1
regrOutput[[target.idx]] <- c(regrOutput[[target.idx]], importance = list(importance))
remove(importance)
}
## See GROW call for explanation of arrays:
csv.idx <- array(0, n.observed * n.xvar)
j2seq <- (target.idx - 1) * n.observed
for (j in 1:n.xvar) {
j1seq <- (j - 1) * n.observed
csv.idx[(j1seq + 1) : (j1seq + n.observed)] <- ( (j2seq + j1seq * regr.count + 1) : (j2seq + j1seq * regr.count + n.observed) )
}
## New case specific arrays, numerator:
csv.num <- (if (!is.null(nativeOutput$csvRegr))
array(nativeOutput$csvRegr[csv.idx], c(n.observed, n.xvar)) else NULL)
regrOutput[[target.idx]] <- c(regrOutput[[target.idx]], csv.num = list(csv.num))
remove(csv.num)
## New case specific arrays, denominator:
csv.den <- (if (!is.null(nativeOutput$csvDen))
array(nativeOutput$csvDen, c(n.observed, n.xvar)) else NULL)
regrOutput[[target.idx]] <- c(regrOutput[[target.idx]], csv.den = list(csv.den))
remove(csv.den)
}
}
nativeOutput$allEnsbRGR <- NULL
nativeOutput$oobEnsbRGR <- NULL
nativeOutput$allEnsbQNT <- NULL
nativeOutput$oobEnsbQNT <- NULL
nativeOutput$perfRegr <- NULL
nativeOutput$vimpRegr <- NULL
nativeOutput$blockRegr <- NULL
## When TRUE we revert to univariate nomenclature for all the outputs.
if(univariate.nomenclature) {
if ((class.count == 0) & (regr.count == 1)) {
names(regrOutput) <- NULL
rfsrcOutput <- c(rfsrcOutput, unlist(regrOutput, recursive=FALSE))
}
else {
rfsrcOutput <- c(rfsrcOutput, regrOutput = list(regrOutput))
}
}
else {
rfsrcOutput <- c(rfsrcOutput, regrOutput = list(regrOutput))
}
}
}
class(rfsrcOutput) <- c("rfsrc", "predict", family)
return(rfsrcOutput)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.