Nothing
##method to compute forestFloor_regression
forestFloor_randomForest_regression <- function(rf.fit,
X,
Xtest=NULL,
calc_np = FALSE,
binary_reg = FALSE,
bootstrapFC = FALSE,
majorityTerminal = FALSE,
...
) {
otherArgs = list(...) #extra arguments
#check the rf.fitbject have a inbag
if(is.null(rf.fit$inbag)) stop("input randomForest-object have no inbag, set keep.inbag=T,
try, randomForest(X,Y,keep.inbag=T) for regression where Y is numeric
and, cinbag(X,Y,keep.inbag=T,keep.forest=T) for binary-class where Y is factor
..cinbag is from trimTrees package...
error condition: if(is.null(rf.fit$inbag))")
#merge X and Xtest if Xtest is provided
if(!is.null(Xtest)) {
isTrain = c(rep(T,dim(X)[1]),rep(F,dim(Xtest)[1])) #mark OOB-CV FC and ext. test FC
merged.list = Xtestmerger(X,Xtest,rf.fit$inbag,rf.fit$y) #test for compatability and merge
X = merged.list$bigX #rbind X and Xtest (specific factor merging)
rf.fit$inbag = merged.list$bigInbag #correct inbag matrix
rf.fit$y = merged.list$bigy #fill in dummy target values, not used as test is always OOB
rf.fit$oob.times = c(rf.fit$oob.times,rep(rf.fit$ntree,sum(!merged.list$isTrain)))
} else {
isTrain = rep(T,dim(X)[1])
}
#make node status as integer matrix
ns = rf.fit$forest$nodestatus
storage.mode(ns) = "integer"
#translate binary classification RF-object, to regression mode
if(rf.fit$type=="classification") {
if(length(levels(rf.fit$y))!=2) stop("must be binary classification to use regression mode.
error condition: if(length(levels(rf.fit$y))!=2")
print("RF is classification, converting factors/categories to numeric 0 an 1")
Y = as.numeric((rf.fit$y))-1
cat(" defining",levels(rf.fit$y)[1]," as 0\n defining",levels(rf.fit$y)[2],"as 1")
rf.fit$forest$leftDaughter = rf.fit$forest$treemap[,1,] #translate daughter representation to regression mode
rf.fit$forest$rightDaughter = rf.fit$forest$treemap[,2,]
ns[ns==1] = -3 ##translate nodestatus representation to regression mode
if(is.null("rf.fit$inbagCount") && (is.null(rf.fit$call$replace) || rf.fit$call$replace)) {
stop("cannot compute classification forestFloor for
randomForest::randomForest when trained with replace=T.
Train forest with cinbag::trimTrees instead of randomForest().
Or set reaplace = FALSE. The two functions are identical,
except cinbag() entails a more detailed inbag record, which is
needed to estimate binary node probabilities.")
}
if(!calc_np) {
#print("node predictions must be re-calculated for random forest of type classification, set calc_np=T)
#error conditions: if(!calc_np && rf.fit$type='classification')")
print(" ")
print("setting calc_np=TRUE")
calc_np=TRUE
}
if(is.null(rf.fit$inbagCount)) inbag = rf.fit$inbag else inbag = rf.fit$inbagCount
} else {
Y=rf.fit$y
inbag = rf.fit$inbag
}
#preparing data, indice-correction could be moved to C++
#a - This should be fethed from RF-object, flat interface
ld = rf.fit$forest$leftDaughter-1 #indice correction, first element is 0 in C++ and 1 in R.
storage.mode(ld) = "integer"
rd = rf.fit$forest$rightDaughter-1
storage.mode(rd) = "integer"
bv = rf.fit$forest$bestvar-1
storage.mode(bv) = "integer"
np = rf.fit$forest$nodepred
storage.mode(np) = "double"
bs = rf.fit$forest$xbestsplit
storage.mode(bs) = "double"
ib = inbag
storage.mode(ib) = "integer"
Yd = as.numeric(Y)
storage.mode(Yd) = "double"
ot = rf.fit$oob.times
storage.mode(ot) = "integer"
##recording types of variables
if(is.null(rf.fit$forest)) {
stop("rf.fit$forest is null, try set keep.forest=TRUE during training")
}
xlevels = unlist(lapply(rf.fit$forest$xlevels,length),use.names=F)
xl = xlevels
storage.mode(xl) = "integer"
varsToBeConverted = xlevels>1
##Converting X to Xd, all factors change to level numbers
Xd=X
for(i in 1:dim(Xd)[2]) {
if(varsToBeConverted[i]) {
Xd[,i] = as.numeric(Xd[,i])-1
}
}
Xd=as.matrix(Xd)
storage.mode(Xd) = "double"
#outout variable
localIncrements = Xd*0
storage.mode(localIncrements) = "double"
#should activities of nodes be reestimated(true) or reused from randomForest object(false)
calculate_node_pred=calc_np
# C++ function, recursively finding increments of all nodes of all trees
# where OOB samples are present. vars, obs and ntree is "passed by number"
# Anything else is passed by reference. Found increments are imediately
# summed to localIncrements matrix.
recTree(
#passed by number
vars=dim(X)[2],
obs=dim(X)[1],
ntree=rf.fit$ntree,
calculate_node_pred=calculate_node_pred,
#passed by reference
X=Xd, #training data, double matrix [obs,vars]
Y=Yd,
majorityTerminal=majorityTerminal,
leftDaughter = ld, #row indices of left subnodes, integer matrix [nrnodes,ntree]
rightDaughter = rd, #...
nodestatus = ns, #weather node is terminal or not,
xbestsplit = bs,
nodepred = np,
bestvar = bv,
inbag = ib,
varLevels = xl,
ot, #oob.times
localIncrements = localIncrements #output is written directly to localIncrements from C++
)
if(bootstrapFC) {
#Compute FC for random bootstrapping or stratification over
#local increments from training set to root nodes, by bootstrap/stratificaiton
#compute LIs with inbag samples
#manual root mean calculation
if(binary_reg) {
rootSum = apply(rf.fit$inbag*Y,2,sum) #vector, Y mean in each tree
rootMean = rootSum / apply(rf.fit$inbag,2,sum) # vector root predictions
} else{
#... or just fetch from rf object
rootMean = rf.fit$forest$nodepred[1,]
}
grandMean = mean(Y[isTrain]) #training set target mean, not including test
bootStrapLIs = rootMean - grandMean #vector, one LI for each tree
#sum LIs over OOB samples
OOB.indices = as.matrix(rf.fit$inbag == 0)
OOB.indices[!OOB.indices] = NA #ignore samples being inbag
OOB.bootStrapLIs = t(t(OOB.indices) * bootStrapLIs) #collect LI for each sample when OOB
bootstrapFC.col = apply(OOB.bootStrapLIs,1,mean,na.rm=TRUE) #sum LI into FCs
localIncrements = cbind(localIncrements,bootstrapFC=bootstrapFC.col) #bind bootstrap col
}
#class argument will not work if type is not 1
if(!is.null(otherArgs$impClass)) {
otherArgs$impType = 1
print("class has been set to something, passing along type=1")
}
#if(is.null(otherArgs$impType)) otherArgs$impType = 1
#randomForest::importance to fetch importance
imp = forestFloor::importanceExportWrapper( #got a lot of funnies, this wrapper should catch them
rf = rf.fit,
type = otherArgs$impType,
class = otherArgs$impClass,
scale = otherArgs$impScale
)
#writing out list
out = list(X=as.data.frame(X), #cast as data.frame
Y=Y,
importance = imp,
imp_ind = sort(imp,decreasing=TRUE,index.return=TRUE)$ix,
FCmatrix = localIncrements,
isTrain = isTrain
)
#check that only one importance column is exported
if(!is.null(dim(out$importance)) && dim(out$importance)[2]!=1) {
warning("only one importance measure should be exported,
set type=NULL, class=NULL, scale=FALSE")
out$importance = randomForest::importance(x=rf.fit,type=1,scale=FALSE)[,1]
out$imp_ind = imp_ind = sort(imp,decreasing=TRUE,index.return=TRUE)$ix
}
class(out) = "forestFloor_regression"
return(out)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.