distRforest: Distribution-based Random Forest

Documented in xpred.rpart

##
##  Get a set of cross-validated predictions
##  Most of the setup is identical to the rpart routine
##
xpred.rpart <- function(fit, xval = 10L, cp, return.all = FALSE)
{
    if (!inherits(fit, "rpart")) stop("Invalid fit object")

    method <- fit$method
    method.int <- pmatch(method, c("anova", "poisson", "class", "user", "exp"))
    if (method.int == 5L) method.int <- 2L
    Terms <- fit$terms
    Y <- fit$y
    X <- fit$x
    wt <- fit$wt
    numresp <- fit$numresp

    if (is.null(Y) || is.null(X)) {
	m <- fit$model
	if (is.null(m)) {
	    m <- fit$call[match(c("", "formula", "data", "weights", "subset",
                                  "na.action"), names(fit$call), 0L)]
	    if (is.null(m$na.action)) m$na.action <- na.rpart
	    m[[1]] <- quote(stats::model.frame)
	    m <- eval.parent(m)
        }
	if (is.null(X)) X <- rpart.matrix(m)
	if (is.null(wt)) wt <- model.extract(m, "weights")
	if (is.null(Y)) {
	    yflag <- TRUE
	    Y <- model.extract(m, "response")
            offset <- attr(Terms, "offset")
	    if (method != "user") {
		init <- get(paste("rpart", method, sep = "."))(Y, offset, NULL)
		Y <- init$y
		numy <- if (is.matrix(Y)) ncol(Y) else 1L
            }
        } else {
	    yflag <- FALSE
            numy <- if (is.matrix(Y)) ncol(Y) else 1L
        }
    } else {
	yflag <- FALSE
        numy <- if (is.matrix(Y)) ncol(Y) else 1L
	offset <- 0L
    }

    nobs <- nrow(X)
    nvar <- ncol(X)
    if (length(wt) == 0) wt <- rep(1, nobs)

    cats <- rep(0, nvar)
    xlevels <- attr(fit, "xlevels")
    if (!is.null(xlevels))
        cats[match(names(xlevels), colnames(X))] <-
            unlist(lapply(xlevels, length))

    controls <- fit$control
    if (missing(cp)) {
	cp <- fit$cptable[, 1L]
	cp <- sqrt(cp * c(10, cp[-length(cp)]))
	cp[1L] <- (1 + fit$cptable[1L, 1L])/2
    }
#    ncp <- length(cp)

    if (length(xval) == 1L) {
	## make random groups
	xgroups <- sample(rep(1L:xval, length = nobs), nobs, replace = FALSE)
    } else if (length(xval) == nrow(X)) {
	xgroups <- xval
	xval <- length(unique(xgroups))
    } else {
	## Check to see if observations were removed due to missing
	if (!is.null(fit$na.action)) {
	    ## if na.rpart was used, then na.action will be a vector
	    temp <- as.integer(fit$na.action)
	    xval <- xval[-temp]
	    if (length(xval) == nobs) {
		xgroups <- xval
		xval <- length(unique(xgroups))
            } else stop("Wrong length for 'xval'")
        } else stop("Wrong length for 'xval'")
    }

    costs <- fit$call$costs
    if (is.null(costs)) costs <- rep(1, nvar)

    parms <- fit$parms
    if (method == "user") {
	mlist <- fit$functions

	## If yflag == TRUE, then y was retrieved from the original data and we
        ##   need to call init to check and possibly transform it.
        ## If FALSE, then we have one of the few differences with the
        ##  rpart setup
        if (yflag) {
            init <- if (length(parms) == 0L) mlist$init(Y, offset, , wt)
                    else mlist$init(Y, offset, parms, wt)
            Y <- init$Y
            numy <- init$numy
            parms <- init$parms
        } else {
            numy <- if (is.matrix(Y)) ncol(Y) else 1L
            init <- list(numresp = numresp, numy = numy, parms = parms)
        }
        keep <- rpartcallback(mlist, nobs, init) # assign to a variable to
                                                 # stop garbage collection
	method.int <- 4L # the fourth entry in func_table.h
    }

    ## Finally do the work
    if (is.matrix(Y))  Y <- as.double(t(Y)) else storage.mode(Y) <- "double"
    storage.mode(X) <- "double"
    storage.mode(wt) <- "double"
    temp <- as.double(unlist(parms))
    if (length(temp) == 0L) temp <- 0    # if NULL, pass a dummy
    pred <- .Call(C_xpred,
                  ncat = as.integer(cats * !fit$ordered),
                  method = as.integer(method.int),
                  as.double(unlist(controls)),
                  temp,
                  as.integer(xval),
                  as.integer(xgroups),
                  Y,
                  X,
                  wt,
                  as.integer(numy),
                  as.double(costs),
                  as.integer(return.all),
                  as.double(cp),
                  as.double(fit$frame[1L, "dev"]),
                  as.integer(numresp))

    if (return.all && numresp > 1L) {
        temp <- array(pred, dim = c(numresp, length(cp), nrow(X)),
                      dimnames = list(NULL, format(cp), rownames(X)))
        aperm(temp)                     # flip the dimensions
    } else
        matrix(pred, nrow = nrow(X), byrow = TRUE,
               dimnames = list(rownames(X), format(cp)))
}