R/OnliningRandomUniformForests.R
In randomUniformForest: Random Uniform Forests for Classification, Regression and Unsupervised Learning

Documented in rm.trees rUniformForest.big rUniformForest.combine rUniformForest.grow

# <OWNER> = Saip CISS
# <ORGANIZATION> = QueensBridge Quantitative
# <YEAR> = 2014

# LICENSE 
# BSD 3-CLAUSE LICENSE

# Copyright (c) 2014, Saip CISS
# All rights reserved.

# Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:

# 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.

# 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.

# 3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission.

# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

# END OF LICENSE 

onlineCombineRUF <- function(rufObject1, rufObject2)
{
	rufObject.0 <- mergeLists(rufObject1, rufObject2, OOBEval = TRUE)	
	rufObject.0$regression = rufObject1$regression	
	if (!is.null(rufObject.0$percent.varExplained))
	{	rufObject.0$percent.varExplained = c(rufObject1$percent.varExplained, rufObject2$percent.varExplained)	}	
	if (!is.null(rufObject.0$OOB))
	{	
		if (is.character(rufObject.0$OOB))
		{	rufObject.0$OOB = rufObject1$OOB }
		else
		{
			p1 = ncol(rufObject1$OOB)
			p2 = ncol(rufObject2$OOB)
			tmp.OOB1 = rufObject1$OOB[,-p1]
			tmp.OOB2 = rufObject2$OOB[,-p2]
						
			if (p1 != p2)
			{
				if (p1 > p2)
				{
					tmp.OOB = tmp.OOB1
					matchIdx = rmNA(match(as.numeric(colnames(tmp.OOB2)), as.numeric(colnames(tmp.OOB1))))
					for (i in 1:length(matchIdx))
					{	
						tmp.OOB[matchIdx[i],matchIdx[i]] = tmp.OOB[matchIdx[i],matchIdx[i]] + tmp.OOB2[matchIdx[i],matchIdx[i]]
					}	
				}
				else
				{
					tmp.OOB = tmp.OOB2
					matchIdx = rmNA(match(as.numeric(colnames(tmp.OOB1)), as.numeric(colnames(tmp.OOB2))))
					for (i in 1:length(matchIdx))
					{	
						tmp.OOB[matchIdx[i],matchIdx[i]] = tmp.OOB[matchIdx[i],matchIdx[i]] + tmp.OOB1[matchIdx[i],matchIdx[i]]
					}				
				}
				class.error =  1 - diag(tmp.OOB)/rowSums(tmp.OOB)	
				rufObject.0$OOB = cbind(tmp.OOB, class.error)
			}
			else
			{			
				tmp.OOB = tmp.OOB1 + tmp.OOB2
				class.error =  1 - diag(tmp.OOB)/rowSums(tmp.OOB)	
				rufObject.0$OOB = cbind(tmp.OOB, class.error)
			}
		}
	}	
	tmp.rufObject.0 = rufObject.0	
	if (!is.null(rufObject.0$OOB.strengthCorr))
	{	
		rufObject.0$OOB.strengthCorr = NULL		
		if (!is.null(rufObject1$OOB.strengthCorr$PE.forest))
		{
			rufObject.0$OOB.strengthCorr$PE.forest = c(rufObject1$OOB.strengthCorr$PE.forest, rufObject2$OOB.strengthCorr$PE.forest)
			rufObject.0$OOB.strengthCorr$PE.max = c(rufObject1$OOB.strengthCorr$PE.max, rufObject2$OOB.strengthCorr$PE.max)
			rufObject.0$OOB.strengthCorr$PE.tree = c(rufObject1$OOB.strengthCorr$PE.tree, rufObject2$OOB.strengthCorr$PE.tree)
			rufObject.0$OOB.strengthCorr$mean.corr = c(rufObject1$OOB.strengthCorr$mean.corr, rufObject2$OOB.strengthCorr$mean.corr)
		}
		else
		{
			rufObject.0$OOB.strengthCorr$PE = c(rufObject1$OOB.strengthCorr$PE, rufObject2$OOB.strengthCorr$PE)
			rufObject.0$OOB.strengthCorr$avg.corr = c(rufObject1$OOB.strengthCorr$avg.corr, rufObject2$OOB.strengthCorr$avg.corr)
			rufObject.0$OOB.strengthCorr$strength = c(rufObject1$OOB.strengthCorr$strength, rufObject2$OOB.strengthCorr$strength)
			rufObject.0$OOB.strengthCorr$std.strength = c(rufObject1$OOB.strengthCorr$std.strength, rufObject2$OOB.strengthCorr$std.strength)
		}
	}	
	if (!is.null(rufObject.0$variableImportance))
	{	
		rufObject.0$variableImportance  = NULL
		tmp.var.imp.names = unique(c(as.character(rufObject1$variableImportance[,1]), as.character(rufObject2$variableImportance[,1])))
		p = length(tmp.var.imp.names)
		score = classFrequency = classEstimate = rep(0,p)		
		for (i in 1:p)
		{
			idx.1 = which(rufObject1$variableImportance[,1] == tmp.var.imp.names[i])
			idx.2 = which(rufObject2$variableImportance[,1] == tmp.var.imp.names[i])		    
			if (length(idx.1) > 0)
			{	
				score[i] = rufObject1$variableImportance[idx.1,2]				
				if (!rufObject.0$regression) 
				{
					if (any(is.na(rufObject1$variableImportance[,3])))
					{	rufObject1$variableImportance[,3] = as.vector(na.replace(matrix(rufObject1$variableImportance[,3]))) }
					classFrequency[i] = rufObject1$variableImportance[idx.1,4]
					classEstimate[i] = rufObject1$variableImportance[idx.1,3]
				}
			}			
			if  (length(idx.2) > 0)
			{	
				score[i] = score[i]  + rufObject2$variableImportance[idx.2, 2]				
				if (!rufObject.0$regression)
				{
					if (any(is.na(rufObject2$variableImportance[,3])))
					{	rufObject2$variableImportance[,3] = as.vector(na.replace(matrix(rufObject2$variableImportance[,3]))) }
					if (classEstimate[i] == rufObject2$variableImportance[idx.2,3])
					{	classFrequency[i] = 0.5*(classFrequency[i] + rufObject2$variableImportance[idx.2,4]) }
					else
					{
						if (!is.na(classFrequency[i]))	
						{	allClassFrequency = c(classFrequency[i], rufObject2$variableImportance[idx.2,4])	}
						else
						{	 allClassFrequency = rufObject2$variableImportance[idx.2,4]	 }						
						classFrequency[i] = allClassFrequency[which.max(allClassFrequency)]
						classEstimate[i] = if( allClassFrequency[1] == 0) { rufObject1$variableImportance[idx.2,3] }
						else { c(classEstimate[i], rufObject1$variableImportance[idx.2,3])[which.max(allClassFrequency)] } 
					}
				}
			}  
		}		
		if (!rufObject.0$regression)
		{
			var.imp.output = cbind(score, classEstimate, classFrequency, round(100*score/max(score),2), round(100*score/sum(score),0))
			var.imp.output =  data.frame(tmp.var.imp.names, var.imp.output)
			var.imp.output = var.imp.output[order(var.imp.output[,2], decreasing = TRUE),]					
			colnames(var.imp.output) = c("variables", "score", "class", "class.frequency", "percent", "percent.importance")
		}
		else
		{
			var.imp.output = cbind(score,  round(100*score/max(score),2), round(100*score/sum(score),0))
			var.imp.output =  data.frame(tmp.var.imp.names, var.imp.output)
			var.imp.output = var.imp.output[order(var.imp.output[,2], decreasing = TRUE),]					
			colnames(var.imp.output) = c("variables", "score", "percent", "percent.importance")
		}		
		rufObject.0$variableImportance = var.imp.output
	}
	if (!is.null(rufObject.0$OOB.strengthCorr)) { 	tmp.rufObject.0$OOB.strengthCorr = rufObject.0$OOB.strengthCorr	}	
	if (!is.null(rufObject.0$variableImportance))	{   tmp.rufObject.0$variableImportance  = rufObject.0$variableImportance  }	
	return(tmp.rufObject.0)
}

rUniformForest.big <- function(X, Y = NULL, xtest = NULL, ytest = NULL, nforest = 2, randomCut = FALSE, 
	reduceDimension = FALSE,
    reduceAll = FALSE, 			
	replacement = FALSE, 
	subsample = FALSE,	
	ntree = 100,
    nodesize = 1, 
	maxnodes = Inf,
	mtry = ifelse(bagging,ncol(X),floor(4/3*ncol(X))),
	regression = ifelse(is.factor(Y),FALSE, TRUE),
    subsamplerate = ifelse(regression, 0.7, 1),
	replace = ifelse(regression,FALSE,TRUE),
	OOB = TRUE,
	BreimanBounds = ifelse(OOB, TRUE, FALSE),
    depth = Inf,
    depthcontrol = NULL,
    importance = TRUE,	
	bagging = FALSE,
	unsupervised = FALSE, 
	unsupervisedMethod = c("uniform univariate sampling", "uniform multivariate sampling", "with bootstrap"),
	classwt = NULL,	
	oversampling = 0,
	targetclass = -1,
	outputperturbationsampling = FALSE,
    rebalancedsampling = FALSE,
	featureselectionrule = c("entropy", "gini", "random", "L1", "L2"),	
	randomcombination = 0,
	randomfeature = FALSE,
	categoricalvariablesidx = NULL,
	na.action = c("fastImpute", "accurateImpute", "omit"),
	logX = FALSE,
	classcutoff = c(0,0),
	subset = NULL,
	usesubtrees = FALSE,
	threads = "auto",
	parallelpackage = "doParallel")
{	
	if (!is.null(subset))
	{
		X = X[subset,]
		Y = Y[subset]
	}
	
	if (exists("categorical", inherits = FALSE)) {  categoricalvariablesidx = categorical }
	else { categorical = NULL  }
	if ((nrow(X)/nforest < 50) & !reduceDimension & !reduceAll) 
	{ stop("Too small size to achieve efficiency in learning")	}
	X <- fillVariablesNames(X)
	if (is.null(Y)) 
	{ 
		cat("Enter in unsupervised learning.\n")
		if (unsupervisedMethod[1] == "with bootstrap")
		{ 
			cat("'with bootstrap' is only needed as the second argument of 'method' option. Default option will be computed.\n")
			unsupervisedMethod[1] = "uniform univariate sampling"
		}
		XY <- unsupervised2supervised(X, method = unsupervisedMethod[1], bootstrap = if (length(unsupervisedMethod) > 1) { TRUE } else {FALSE })
		X = XY$X
		Y = as.factor(XY$Y)
		unsupervised = TRUE
	}	
    getFactors <- which.is.factor(X, count = TRUE) 	
	if (is.data.frame(X)) {  cat("X is a dataframe. String or factors have been converted to numeric values.\n")  }
	X <- NAfactor2matrix(X)	
	if (randomCut)
	{
		set.seed(sample(1e9,1))
		n = nrow(X)
		newIdx = sample(n,n)
		X = X[newIdx,]
		Y = Y[newIdx]
	}
	if ( length(which( (X == Inf) | (X == -Inf) ) ) > 0) 
	{ stop("Inf or -Inf values found in data. Learning can not be done.\nPlease replace them with NA in order to learn") }
	XY <- NATreatment(X, Y, na.action = na.action, regression = regression)
	X  <- XY$X
	Y  <- XY$Y
	rm(XY)
	if (randomcombination[1] > 0)
	{ 
		randomcombinationString = randomcombination[1]
		L.combination = length(randomcombination)
		if (L.combination%%3 != 0)
		{	weights = round(replicate(length(randomcombination)/2, runif(1)), 2)	}
		else
		{ 	
			weights = round(randomcombination[(L.combination + 1 - L.combination/3):L.combination],2)
			randomcombination = randomcombination[1:(L.combination - (L.combination/3))]
		}		
		for (i in 2:length(randomcombination)) {  randomcombinationString = paste(randomcombinationString, randomcombination[i], sep=",") }
		for (i in 1:length(weights)) {  randomcombinationString = paste(randomcombinationString, weights[i], sep=",") }
		if (!is.null(xtest)) { xtest <- randomCombination(xtest, combination = randomcombination, weights = weights) }
		randomcombinationObject = list(randomcombination, weights)
		X <- randomCombination(X, combination = randomcombinationObject[[1]], weights = randomcombinationObject[[2]])	
	}
	else
	{   randomcombinationObject = randomcombination }
	RUF.model <- randomUniformForestCore.big(X, Y, nforest, reduceDimension = reduceDimension, reduceAll = reduceAll, r.ntree = ntree, r.nodeMinSize = nodesize, r.maxNodes = maxnodes, r.depth = depth, r.depthControl = depthcontrol, r.features = mtry, r.bootstrap = replace, r.treeSubsampleRate = subsamplerate, r.overSampling = oversampling, r.outputPerturbation = outputperturbationsampling, r.targetClass = targetclass, 
	r.rebalancedSampling = rebalancedsampling, r.classcutoff = classcutoff, r.use.OOB = OOB,
	r.BreimanBounds = BreimanBounds, r.treeBagging = bagging, r.randomCombination = randomcombination, 
	r.randomFeature = randomfeature, r.variableImportance = importance, r.whichCatVariables = categoricalvariablesidx, r.featureSelectionRule = featureselectionrule, r.logX = logX, r.classwt = classwt, r.regression = regression,  r.useSubTrees = usesubtrees, r.threads = threads, r.parallelPackage = parallelpackage, replacement = FALSE, subsample = FALSE)		
	if (!is.null(classwt))
	{ 
		classwtString = classwt[1]
		for (i in 2:length(classwt)) { 	classwtString = paste(classwtString, classwt[i], sep=",") }	
	}	
    if (length(targetclass) > 1)
	{ 
		targetclassString = targetclass[1]
		for (i in 2:length(targetclass)) { 	targetclassString = paste(targetclassString, targetclass[i], sep=",") }	
	}	
	if (length(rebalancedsampling) > 1)
	{ 
		rebalancedsamplingString = rebalancedsampling[1]
		for (i in 2:length(rebalancedsampling)) 
		{ 	rebalancedsamplingString = paste(rebalancedsamplingString, rebalancedsampling[i], sep=",") }	
	}	
	if (RUF.model$regression)	{  classcutoff = c(0,0)  }	
	if (as.numeric(classcutoff[2]) == 0)	{  classcutoffString = FALSE }
	else
	{
		classcutoffString = levels(Y)[which(levels(Y) == as.character(classcutoff[1]))]
		classcutoffString = paste("Class ", classcutoffString, "," , as.numeric(classcutoff[2])*100, "%", sep ="")
	}	
	paramsObject = c(reduceDimension, reduceAll, nforest, ntree, mtry, nodesize, maxnodes, as.character(replace), 		bagging, depth, 
		ifelse(is.null(depthcontrol), length(depthcontrol)> 0, depthcontrol), OOB, importance, subsamplerate, 
		ifelse(is.null(classwt),  length(classwt) > 0, classwtString), classcutoffString, 
		ifelse((oversampling == 0),(oversampling != 0), oversampling), outputperturbationsampling, 
		ifelse(length(targetclass) > 1, targetclassString, targetclass), 
		ifelse(length(rebalancedsampling) > 1, rebalancedsamplingString, rebalancedsampling), 
		ifelse((randomcombination[1] == 0),(randomcombination != 0), randomcombinationString), randomfeature, 
		ifelse(is.null(categoricalvariablesidx), length(categoricalvariablesidx) > 0, length(categoricalvariablesidx)), featureselectionrule[1])	
	names(paramsObject) = c("reduceDimension", "reduceAll", "nforest", "ntree", "mtry", "nodesize", "maxnodes", "replace", "bagging", "depth", "depthcontrol",  "OOB", "importance", "subsamplerate", "classwt", "classcutoff", "oversampling", "outputperturbationsampling", "targetclass", "rebalancedsampling", "randomcombination", "randomfeature", "categorical variables", "featureselectionrule")	
	if (RUF.model$regression) 
	{ 
		paramsObject[24] = "Sum of squared residuals"
	    if (paramsObject[8] == "FALSE") { paramsObject[11] = FALSE }
	}		
	paramsObject = as.data.frame(paramsObject)
	RUF.model$logX = logX	
	RUFObject <- genericOutput(xtest, ytest, paramsObject, RUF.model, ytrain = Y, classcutoff = classcutoff)	
	RUFObject$logX = logX
	if(is.null(colnames(X))) 
	{  
		varNames = NULL
		for (i in 1:ncol(X)) { varNames = c(varNames,paste("V", i, sep="")) }
		RUFObject$variablesNames = varNames
	}
	else
	{	RUFObject$variablesNames = colnames(X)	}	
	if (randomcombination[1] > 0)
	{ 	
		tempVarNames = vector(length = length(randomcombination)/2)
		idx = 1
		for (i in 1:(length(randomcombination)/2)) 
		{ 
			tempVarNames[i] = paste("V", randomcombination[idx], "x", randomcombination[idx+1], sep="") 
			idx = idx + 2
		}
		RUFObject$variablesNames = c(RUFObject$variablesNames, tempVarNames)
	}	
	if (!is.null(Y)) { 	RUFObject$y = Y	 }
	if (!is.null(categoricalvariablesidx ))
	{  RUFObject$categoricalvariables = categoricalvariablesidx }
	if (unsupervised) 
	{ 
		RUFObject$unsupervised = TRUE 
		RUFObject$unsupervisedMethod  = if (length(unsupervisedMethod) == 3) 
										{ unsupervisedMethod[1] } 
										else 
										{ 
											if (length(unsupervisedMethod) == 1) { unsupervisedMethod } 
											else { unsupervisedMethod[1:2] } 
										}
	}
	RUFObject$call <- match.call()
	class(RUFObject) <- "randomUniformForest"	
	RUFObject
}

randomUniformForestCore.big <- function(X, Y, howManyForests, reduceDimension = FALSE, reduceAll = FALSE, 
	replacement = FALSE, 
	subsample = FALSE,
	r.nodeMinSize = 1,
	r.maxNodes = Inf, 
	r.depth = Inf,
	r.depthControl = NULL,
	r.features = floor(4/3*ncol(X)), 
	r.bootstrap = ifelse(r.regression, FALSE, TRUE), 
	r.overSampling = 0,
	r.outputPerturbation = FALSE,
	r.targetClass = 0,
	r.rebalancedSampling = FALSE,
	r.regression = TRUE, 
	r.ntree = 100, 
	r.use.OOB = TRUE,
	r.BreimanBounds = ifelse(r.use.OOB, TRUE, FALSE),
	r.treeSubsampleRate = ifelse(r.regression, 0.7, 1),
	r.variableImportance = TRUE,
	r.treeBagging = FALSE,
	r.classwt = NULL,
	r.classcutoff = c(0,0),
	r.randomCombination = 0,
	r.randomFeature = FALSE,
	r.whichCatVariables = NULL,
	r.featureSelectionRule = c("entropy", "gini", "random", "L1", "L2"),
	r.logX = FALSE,
	r.useSubTrees = FALSE,
	r.threads = "auto",
	r.parallelPackage = "doParallel")
{	
	nCovForests = -1
	r.ntree = round(r.ntree/(howManyForests + nCovForests + 1),0)		
	if (howManyForests > 1)
	{
		if (reduceAll)
		{
			X.list_kdimensions <- setManyDatasets(X, howManyForests, dimension = TRUE, replace = replacement, 
			subsample = subsample)
			X.list_kdatasets <- setManyDatasets(X, howManyForests, replace = replacement, subsample = subsample)		
			X.set = X[X.list_kdatasets[[1]], X.list_kdimensions[[1]]]
			Y.set = Y[X.list_kdatasets[[1]]] 
		}
		else
		{
			if (reduceDimension)
			{
				X.list_kdimensions <- setManyDatasets(X, howManyForests, dimension = reduceDimension, replace = replacement, subsample = subsample)
				X.set = X
				X.set[,-X.list_kdimensions[[1]]] = 0
				Y.set = Y
			}
			else
			{
				X.list_kdatasets <- setManyDatasets(X, howManyForests, replace = replacement, subsample = subsample)
				X.set = X[X.list_kdatasets[[1]],]
				Y.set = Y[X.list_kdatasets[[1]]] 
			}
		}
	}
	else
	{ X.set = X;  Y.set = Y }
	
	rUF1 <- randomUniformForestCore(X.set, trainLabels = Y.set, depth = r.depth, depthControl = r.depthControl, 
		ntree = r.ntree, nodeMinSize = r.nodeMinSize, maxNodes = r.maxNodes, features = r.features, 
		rf.bootstrap = r.bootstrap, use.OOB = r.use.OOB, BreimanBounds = r.BreimanBounds, threads = r.threads, rf.treeSubsampleRate = r.treeSubsampleRate, rf.overSampling = r.overSampling, rf.targetClass = r.targetClass, rf.rebalancedSampling = r.rebalancedSampling, variableImportance = r.variableImportance, 
		rf.treeBagging = r.treeBagging, rf.regression = r.regression, rf.randomCombination = r.randomCombination, 
		rf.randomFeature = r.randomFeature, whichCatVariables = r.whichCatVariables, logX = r.logX, classwt = r.classwt, 
		classCutOff = r.classcutoff, rf.outputPerturbationSampling = r.outputPerturbation, rf.featureSelectionRule = r.featureSelectionRule, useSubTrees = r.useSubTrees, parallelPackage = r.parallelPackage[1])
	
	if (howManyForests > 1)
	{
		for (i in 2:(howManyForests + nCovForests + 1))
		{
			{
				if (reduceAll)
				{	
					X.set = X[X.list_kdatasets[[i]],X.list_kdimensions[[i]]]
					Y.set = Y[X.list_kdatasets[[i]]] 
				}
				else
				{
					if (reduceDimension) 
					{ 
						X.set = X
						X.set[,-X.list_kdimensions[[i]]] = 0
					}
					else
					{
						X.set = X[X.list_kdatasets[[i]],]
						Y.set = Y[X.list_kdatasets[[i]]] 
					}
				}
			}
			rUF2 <- randomUniformForestCore(X.set, trainLabels = Y.set, depth = r.depth, depthControl = r.depthControl, ntree = r.ntree, nodeMinSize = r.nodeMinSize, maxNodes = r.maxNodes, features = r.features, 
			rf.bootstrap = r.bootstrap, use.OOB = r.use.OOB, BreimanBounds = r.BreimanBounds, threads = r.threads, rf.treeSubsampleRate = r.treeSubsampleRate, rf.overSampling = r.overSampling, 
			rf.targetClass = r.targetClass, rf.rebalancedSampling = r.rebalancedSampling,
			variableImportance = r.variableImportance, rf.treeBagging = r.treeBagging, rf.regression = r.regression, rf.randomCombination = r.randomCombination, rf.randomFeature = r.randomFeature, whichCatVariables = r.whichCatVariables, logX = r.logX, classwt = r.classwt, classCutOff = r.classcutoff, rf.outputPerturbationSampling = r.outputPerturbation, rf.featureSelectionRule = r.featureSelectionRule, useSubTrees = r.useSubTrees, parallelPackage = r.parallelPackage[1])			
			rUF1 <- onlineCombineRUF(rUF1, rUF2)
		}
	}	
	return(rUF1)
}

rUniformForest.merge <- function(X = NULL, Y = NULL, xtest = NULL, ytest = NULL, previousObject = NULL, 
	newObject = NULL,
	ntree = 100, 
	mtry = ifelse(bagging,ncol(X),floor(4/3*ncol(X))),
	nodesize = 1,
	maxnodes = Inf, 
    subsamplerate = 1,
    depth = Inf,
    depthcontrol = NULL,	    
	regression = ifelse(is.factor(Y),FALSE,TRUE),
	replace = ifelse(regression,FALSE,TRUE),
	OOB = ifelse(replace,TRUE,FALSE),
	BreimanBounds = ifelse(OOB, TRUE, FALSE),
    importance = TRUE,	
	bagging = FALSE,
	classwt = NULL,	
	unsupervised = FALSE, 
	unsupervisedMethod = c("uniform univariate sampling", "uniform multivariate sampling", "with bootstrap"),	
	oversampling = 0,
	targetclass = -1,
	outputperturbationsampling = FALSE,
    rebalancedsampling = FALSE,
	featureselectionrule = c("entropy", "gini", "random", "L1", "L2"),	
	randomcombination = 0,
	randomfeature = FALSE,
	categoricalvariablesidx = NULL,
	threads = "auto",
	logX = FALSE,
	classcutoff = c(0,0),
	subset = NULL,
	usesubtrees = FALSE,
	na.action = c("fastImpute", "accurateImpute", "omit"),
	parallelpackage = "doParallel"  )
{
	if (exists("categorical", inherits = FALSE)) {  categoricalvariablesidx = categorical }
	else { categorical = NULL  }
	
	if (!is.null(X) & !is.null(Y))
	{
		if (!is.null(subset))
		{
			X = X[subset,]
			Y = Y[subset]
		}		
		if (ntree < 2) { stop("Please use at least 2 trees for computing forest\n") }
		if ( (subsamplerate == 1) &  (replace == FALSE))  { OOB = FALSE }
		if (depth < 3) { stop("Stumps are not allowed. Minimal depth is 3, leading to 4 leaf nodes.\n") }
		if (BreimanBounds &  ntree > 500) { cat("Warning : Breiman Bounds, especially for multiclass problems, are computationnaly intensive.\n") }
		if (maxnodes < 6) { stop("Maximal number of nodes must be above 5.\n") }
		{
			X <- fillVariablesNames(X)			
			getFactors <- which.is.factor(X, count = TRUE)		 
			if (is.data.frame(X)) 
			{ 
				cat("X is a dataframe. String or factors have been converted to numeric values.\n") 
				X <- NAfactor2matrix(X)	
			}					
			if (!is.null(categoricalvariablesidx))
			{
				if (categoricalvariablesidx[1] == "all")
				{	
					factorVariables <- which(getFactors > 0)
					if (length(factorVariables) > 0) 	{ 	categoricalvariablesidx = factorVariables	}
					else
					{ 
					  cat("\nNo categorical variables found. Please type them manually, replacing in categoricalvariablesidx
					  option argument 'all' by a vector of the variables that need to be treated as categorical.\n")
					}
				}
				else 
				{  
					if (is.character(categoricalvariablesidx[1]))
					{	categoricalvariablesidx = sort(match(categoricalvariablesidx, colnames(X)))	}
				}
			}
			if ( length(which( (X == Inf) | (X == -Inf) ) ) > 0) 
			{ stop("Inf or -Inf values found in data. Learning can not be done.\nPlease replace them with NA in order to learn") }
			XY <- NATreatment(X, Y, na.action = na.action, regression = regression)
			X  <- XY$X
			Y  <- XY$Y
			rm(XY)
			if (randomcombination[1] > 0)
			{ 
				randomcombinationString = randomcombination[1]
				L.combination = length(randomcombination)
				if (L.combination%%3 != 0)
				{	weights = round(replicate(length(randomcombination)/2, runif(1)), 2)	}
				else
				{ 	
					weights = round(randomcombination[(L.combination + 1 - L.combination/3):L.combination],2)
					randomcombination = randomcombination[1:(L.combination - (L.combination/3))]
				}				
				for (i in 2:length(randomcombination)) 
				{  randomcombinationString = paste(randomcombinationString, randomcombination[i], sep=",") }			
				for (i in 1:length(weights)) 
				{  randomcombinationString = paste(randomcombinationString, weights[i], sep=",") }				
				if (!is.null(xtest)) { xtest <- randomCombination(xtest, combination = randomcombination, weights = weights) }				
				randomcombinationObject = list(randomcombination, weights)						
				X <- randomCombination(X, combination = randomcombinationObject[[1]], 
				weights = randomcombinationObject[[2]])				
			}
			else
			{ randomcombinationObject = randomcombination }
		}
	}
	RUF.model <- randomUniformForestCore.merge(X = X, Y = Y, previousObject = previousObject, newObject = newObject, 
		m.nodeMinSize = nodesize, m.maxNodes = maxnodes, m.depth = depth, m.depthControl = depthcontrol, 
		m.features = mtry, m.bootstrap = replace, m.overSampling = oversampling, 
		m.outputPerturbation = outputperturbationsampling, m.targetClass = targetclass, 
		m.rebalancedSampling = rebalancedsampling, m.regression = regression, m.ntree = ntree, m.use.OOB = OOB, m.BreimanBounds = BreimanBounds, m.treeSubsampleRate = subsamplerate, m.variableImportance = importance, 
		m.treeBagging = bagging, m.randomCombination = randomcombination, m.randomFeature = randomfeature, m.whichCatVariables = categoricalvariablesidx, m.featureSelectionRule = featureselectionrule[1], m.logX = logX, m.classwt = classwt, m.classcutoff = classcutoff, m.threads = threads, m.useSubTrees = usesubtrees,m.parallelPackage = parallelpackage[1])
	if (!is.null(classwt))
	{ 
		classwtString = classwt[1]
		for (i in 2:length(classwt)) { 	classwtString = paste(classwtString, classwt[i], sep=",") }	
	}	
	
	if (length(targetclass) > 1)
	{ 
		targetclassString = targetclass[1]
		for (i in 2:length(targetclass)) { 	targetclassString = paste(targetclassString, targetclass[i], sep=",") }	
	}
	if (length(rebalancedsampling) > 1)
	{ 
		rebalancedsamplingString = rebalancedsampling[1]
		for (i in 2:length(rebalancedsampling)) 
		{ 	rebalancedsamplingString = paste(rebalancedsamplingString, rebalancedsampling[i], sep=",") }	
	}
	if (RUF.model$regression)
	{	classcutoff = c(0,0)  }	
	if (as.numeric(classcutoff[2]) == 0)
	{	classcutoffString = FALSE }
	else
	{
		classcutoffString = levels(Y)[which(levels(Y) == as.character(classcutoff[1]))]
		classcutoffString = paste("Class ", classcutoffString, "," , as.numeric(classcutoff[2])*100, "%", sep ="")
	}	
	if (is.null(previousObject))
	{
		previousObject = filter.object(previousObject)
		newObject = filter.object(newObject)		
		if (rownames(previousObject$forestParams)[1] == "reduceDimension" )
		{		
			params.big = as.character(c(as.logical(c(previousObject$forestParams[[1]][1], previousObject$forestParams[[1]][2])),previousObject$forestParams[[1]][2]))
			names.big = c("reduceDimension", "reduceAll", "nforest")
			value.big = 3 
		}
		else {  params.big = names.big = NULL; value.big = 0 }		
		paramsObject = c(params.big, ntree, mtry, nodesize, maxnodes, as.character(replace), bagging, depth, 
		ifelse(is.null(depthcontrol), length(depthcontrol)> 0, depthcontrol), OOB, importance, subsamplerate, 
		ifelse(is.null(classwt), length(classwt) > 0, classwtString), classcutoffString,
		ifelse((oversampling == 0),(oversampling != 0), oversampling), outputperturbationsampling, 
		ifelse(length(targetclass) > 1, targetclassString, targetclass), 
		ifelse(length(rebalancedsampling) > 1, rebalancedsamplingString, rebalancedsampling), 
		ifelse((randomcombination[1] == 0),(randomcombination != 0), randomcombinationString), randomfeature,  
		ifelse(is.null(categoricalvariablesidx), length(categoricalvariablesidx) > 0, length(categoricalvariablesidx)), featureselectionrule[1])								
		if (RUF.model$regression) 
		{ 
			paramsObject[21 + value.big] = "Sum of squared residuals"
			if (paramsObject[5 + value.big] == "FALSE") { paramsObject[9 + value.big] = FALSE }
		}
		names(paramsObject) = c(names.big, "ntree", "mtry", "nodesize", "maxnodes", "replace", "bagging", "depth", "depthcontrol", "OOB", "importance", "subsamplerate", "classwt", "classcutoff", "oversampling", "outputperturbationsampling", "targetclass", "rebalancedsampling", "randomcombination", "randomfeature", "categorical variables", "featureselectionrule")	
		paramsObject = as.data.frame(paramsObject)			
		RUFObject <- genericOutput(xtest, ytest, paramsObject, RUF.model, ytrain = Y, classcutoff = classcutoff)		
		if (is.null(Y) &  !RUF.model$regression)
		{  RUFObject$classes =  previousObject$classes  }		
		RUF.model$logX = logX
		if (!is.null(newObject$y)) { RUFObject$y = newObject$y	}			
		RUFObject$variablesNames = colnames(X)		
		if (!is.null(categoricalvariablesidx ))
		{  RUFObject$categoricalvariables = categoricalvariablesidx } 	
	}
	else
	{ 	
		paramsObject = cbind(previousObject$forestParams, newObject$forestParams) 
		RUF.model$logX = RUF.model$logX[1]		
		RUFObject <- genericOutput(xtest, ytest, paramsObject, RUF.model, ytrain = Y, classcutoff = classcutoff)		
		if (is.null(Y) & !RUF.model$regression) {  RUFObject$classes = previousObject$classes  }		
		RUFObject$logX = RUF.model$logX
		RUFObject$y = c(previousObject$y,newObject$y)		
		RUFObject$variablesNames = previousObject$variablesNames
	}	
	if (randomcombination[1] > 0)
	{ 	
		tempVarNames = vector(length = length(randomcombination)/2)
		idx = 1
		for (i in 1:(length(randomcombination)/2)) 
		{ 
			tempVarNames[i] = paste("V", randomcombination[idx], "x", randomcombination[idx+1], sep="") 
			idx = idx + 2
		}
		RUFObject$variablesNames = c(RUFObject$variablesNames, tempVarNames)
	}
	RUFObject$call <- match.call()
	class(RUFObject) <- "randomUniformForest"	
	RUFObject
}
		
randomUniformForestCore.merge <- function(X = NULL,Y = NULL, previousObject = NULL, newObject = NULL, 
	m.nodeMinSize = 1,
	m.maxNodes = Inf,
	m.depth = Inf,
    m.depthControl = NULL,	
	m.features = floor(4/3*ncol(X)), 
	m.bootstrap = TRUE,
	m.overSampling = 0,
	m.outputPerturbation = FALSE,
	m.targetClass = 0,
	m.rebalancedSampling = FALSE,
	m.regression = TRUE, 
	m.ntree = 100, 
	m.use.OOB = TRUE,
	m.BreimanBounds = ifelse(m.use.OOB, TRUE, FALSE),
	m.treeSubsampleRate = 1,
	m.variableImportance = TRUE,
	m.treeBagging = FALSE,
	m.classwt = NULL,	
	m.randomCombination = 0,
	m.randomFeature = FALSE,
	m.whichCatVariables = NULL,
	m.featureSelectionRule = c("entropy", "gini", "random", "L1", "L2"),
	m.logX = FALSE,
	m.classcutoff = c(0,0),
	m.useSubTrees = FALSE, 
	m.threads = "auto",
	m.parallelPackage = "doParallel")
{
    if (is.null(previousObject)) { stop ("no object to merge") }	
	if (is.null(newObject))
	{
		previousParams = previousObject$forestParams		
		if (previousParams["importance" , 1] !=  m.variableImportance)	
		{  m.variableImportance = previousParams["importance", 1] }
		if (previousParams["OOB", 1] !=  m.use.OOB)	{   m.use.OOB  = previousParams["OOB", 1]  }		
		newObject <- randomUniformForestCore(X, trainLabels = Y, depth = m.depth, depthControl = m.depthControl, 
		ntree = m.ntree, nodeMinSize = m.nodeMinSize, maxNodes = m.maxNodes, features = m.features, 
		rf.bootstrap = m.bootstrap, use.OOB = m.use.OOB, BreimanBounds = m.BreimanBounds, threads = m.threads, rf.treeSubsampleRate = m.treeSubsampleRate, rf.overSampling = m.overSampling, rf.targetClass = m.targetClass, rf.rebalancedSampling = m.rebalancedSampling, variableImportance = m.variableImportance, 
		rf.treeBagging = m.treeBagging, rf.regression = m.regression, rf.randomCombination = m.randomCombination, 
		rf.randomFeature = m.randomFeature, whichCatVariables = m.whichCatVariables, 			rf.outputPerturbationSampling = m.outputPerturbation, logX = m.logX, classwt = m.classwt, 
		classCutOff = m.classcutoff, rf.featureSelectionRule = m.featureSelectionRule, useSubTrees = m.useSubTrees,
		parallelPackage = m.parallelPackage[1])
	}			
	previousObject = filter.forest(previousObject)
	newObject = filter.forest(newObject)	
	if (length(previousObject) != length(newObject))
	{	
		objectNames = c("OOB.strengthCorr", "OOB.votes", "OOB.predicts", "OOB",  "pred.error", "percent.varExplained")
		if (is.null(previousObject$variableImportance) | is.null(newObject$variableImportance))
		{	objectNames = c(objectNames, "variableImportance") }		
		cat("'OOB object' and\\or 'Breiman Bounds' of randomUniformForest are not present in all models,\n hence they both will be dropped in the combined model. To keep them, please enable 'OOB' (and disable 'Breiman Bounds') option\n in all your models, or enable (or disable) both options in all your models.\n")		
		previousObject = rmInAListByNames(previousObject, objectNames)
		newObject = rmInAListByNames(newObject,objectNames)
	}	
	return( onlineCombineRUF(previousObject, newObject) )
}

rUniformForest.combine <- function(...)
{
	object <- list(...)
	if  (length(object) == 1)	{	object = object[[1]]	}		
	n <- length(object)	
	if (n > 1)	{	bigObject <- rUniformForest.merge(previousObject = object[[1]], newObject = object[[2]])	}
	else 	{ stop ("combine need at least two random uniform forests objects") }	
	if (n > 2)
	{
		for (i in 3:n) { bigObject <- rUniformForest.merge(previousObject = bigObject, newObject = object[[i]]) }
	}
	return(bigObject)
}


rUniformForest.grow <- function(object, X, Y = NULL,  ntree = 100, threads = "auto")
{
	if (ntree < 10)	{ stop ("please provide at least 10 trees to add") }	
	object <- filter.object(object)
	paramsObject <- object$forestParams	
	if (is.null(Y))	{  Y = object$y[1:nrow(X)]  }	
	if (!is.null(object$predictionObject))
	{	
		object = rmInAListByNames(object, c("errorObject", "predictionObject"))
		class(object) = "randomUniformForest"	
	}	
	callArgs <- as.character(object$call)	
	if ((!object$forest$regression) & !is.factor(Y)) { Y = as.factor(Y) }
	BreimanBounds = TRUE
	if (is.null(object$forest$OOB.strengthCorr)) {	BreimanBounds = FALSE	}		
	if (length(callArgs) <= 3)
	{ 
		if ( !is.null(object$formula))
		{	object2 <- randomUniformForest.formula(object$formula, data = X, ntree = ntree, BreimanBounds = BreimanBounds)	}
		else
		{	object2 <- randomUniformForest.default(X, Y = Y, ntree = ntree, BreimanBounds = BreimanBounds) 	}
	}
	else
	{
		usesubtrees = if (length(which(callArgs == "usesubtrees")) > 0) { TRUE } else { FALSE }
		bagging = if (length(which(callArgs == "bagging")) > 0) { TRUE } else { FALSE }
		na.action = { if (length(which(callArgs == "omit")) > 0) { "omit" } 
					else 
					{ 
						if (length(which(callArgs == "accurateImpute")))
						{ "accurateImpute" }
						else
						{ "fastImpute"}
					} }
		
		classCutOffString  = as.character(paramsObject["classcutoff", 1])
		classcutoff = rep(0,2)
	    if (classCutOffString != "FALSE")
		{
			classCutOffString = rm.string(classCutOffString, "%")
			classCutOffString = rm.string(classCutOffString, "Class ")
			classCutOffString = strsplit(classCutOffString,",")
			classcutoff[1] = which(object$classes == classCutOffString[[1]][1])
			classcutoff[2] = as.numeric(classCutOffString[[1]][2])/100
		}		
		if ( !is.null(object$formula))
		{
			object2 <- randomUniformForest.formula(object$formula, data = X, ntree = ntree, 
				mtry = ifelse(bagging, ncol(X), as.numeric(as.character(paramsObject["mtry",1]))),
				usesubtrees = ifelse(usesubtrees, TRUE, FALSE),
				nodesize = as.numeric(as.character(paramsObject["nodesize",1])), 
				maxnodes = as.numeric(as.character(paramsObject["maxnodes",1])),
				depth = as.numeric(as.character(paramsObject["depth",1])),
				depthcontrol = if (is.logical(as.logical(as.character(paramsObject["depthcontrol",1])))) {  NULL } 
					else 
					{ 
						if (is.numeric(as.character(paramsObject["depthcontrol",1]))) 
						{	 as.numeric(as.character(paramsObject["depthcontrol",1])) }
						else { "random" }
					},
				regression = object$forest$regression,
				replace = as.logical(as.character(paramsObject["replace",1])),
				OOB = as.logical(as.character(paramsObject["OOB",1])),
				BreimanBounds = BreimanBounds,
				subsamplerate = as.numeric(as.character(paramsObject["subsamplerate",1])),
				importance = as.logical(as.character(paramsObject["importance",1])),
				bagging = ifelse(bagging, TRUE, FALSE),
				unsupervised = if (is.null(object$unsupervised)) { FALSE } else { TRUE }, 
				unsupervisedMethod = if (is.null(object$unsupervised)) { unsupervisedMethod = c("uniform univariate sampling", "uniform multivariate sampling", "with bootstrap") } else { object$unsupervisedMethod },
				classwt = if (as.character(paramsObject["classwt", 1]) == "FALSE") { NULL } 
					else {  as.numeric(strsplit(as.character(paramsObject["classwt", 1]),",")[[1]]) },
				oversampling = if (as.character(paramsObject["oversampling", 1]) == "FALSE") { 0 } 
					else { as.numeric(as.character(paramsObject["oversampling", 1])) },
				targetclass = as.numeric(as.character(paramsObject["targetclass", 1])),
				outputperturbationsampling = if (as.character(paramsObject["outputperturbationsampling", 1]) == "FALSE") { FALSE } 	else { TRUE },
				rebalancedsampling = if (as.character(paramsObject["rebalancedsampling", 1]) == "FALSE") 
					{ as.logical(as.character(paramsObject["rebalancedsampling", 1])) } 
					else 
					{ 
						if (length(paramsObject["rebalancedsampling", 1]) > 1)	{  as.numeric(as.character(paramsObject["rebalancedsampling", 1])) }
						else {  TRUE }
					},
				featureselectionrule = if (object$forest$regression) 
									   {	
											if (as.character(paramsObject["featureselectionrule", 1]) != "Sum of squared residuals" ) 
											{  "L1"  }
											else
											{ "L2"  }
									   }
									   else
									   {	as.character(paramsObject["featureselectionrule", 1])	},			
				randomcombination = if (as.character(paramsObject["randomcombination", 1]) == "FALSE") { 0 } 
					else {	as.numeric(strsplit(as.character(paramsObject["randomcombination", 1]),",")[[1]]) },
				randomfeature = if (as.character(paramsObject["randomfeature", 1]) == "FALSE") { FALSE } 
					else { TRUE },
				categoricalvariablesidx = if (as.character(paramsObject["categorical variables", 1]) == "FALSE") { NULL } 
					else { object$categoricalvariables },
				na.action = na.action,
				logX = object$forest$logX,			
				classcutoff = classcutoff,
				threads = threads,
				parallelpackage = "doParallel"
			)		
		}
		else
		{
			object2 <- randomUniformForest.default(X, Y = Y, ntree = ntree, 
				mtry = ifelse(bagging, ncol(X), as.numeric(as.character(paramsObject["mtry",1]))),
				usesubtrees = ifelse(usesubtrees, TRUE, FALSE),
				nodesize = as.numeric(as.character(paramsObject["nodesize",1])),
				maxnodes = as.numeric(as.character(paramsObject["maxnodes",1])),
				depth = as.numeric(as.character(paramsObject["depth",1])),
				depthcontrol = 	if (is.logical(as.logical(as.character(paramsObject["depthcontrol",1])))) 
								{  NULL } 
								else 
								{ 
									if (is.numeric(as.character(paramsObject["depthcontrol",1])))
									{  as.numeric(as.character(paramsObject["depthcontrol",1])) }
									else 
									{ "random" }
								},
				regression = object$forest$regression,
				replace = as.logical(as.character(paramsObject["replace",1])),
				OOB = as.logical(as.character(paramsObject["OOB",1])),
				BreimanBounds = BreimanBounds,
				subsamplerate = as.numeric(as.character(paramsObject["subsamplerate",1])),
				importance = as.logical(as.character(paramsObject["importance",1])),
				bagging = ifelse(bagging, TRUE, FALSE),
				unsupervised = if (is.null(object$unsupervised)) { FALSE } else { TRUE }, 
				unsupervisedMethod = if (is.null(object$unsupervised)) { unsupervisedMethod = c("uniform univariate sampling", "uniform multivariate sampling", "with bootstrap") } else { object$unsupervisedMethod },
				classwt = if (as.character(paramsObject["classwt", 1]) == "FALSE") { NULL } 
						else {  as.numeric(strsplit(as.character(paramsObject["classwt", 1]),",")[[1]]) },
				oversampling = if (as.character(paramsObject["oversampling", 1]) == "FALSE") { 0 } 
							else { as.numeric(as.character(paramsObject["oversampling", 1])) },
				targetclass = as.numeric(as.character(paramsObject["targetclass", 1])),
				outputperturbationsampling = if (as.character(paramsObject["outputperturbationsampling", 1]) == "FALSE") { FALSE } 
											 else { TRUE },
				rebalancedsampling = if (as.character(paramsObject["rebalancedsampling", 1]) == "FALSE") 
									{ as.logical(as.character(paramsObject["rebalancedsampling", 1])) } 
									else 
									{ 
										if (length(paramsObject["rebalancedsampling", 1]) > 1)	
										{  as.numeric(as.character(paramsObject["rebalancedsampling", 1])) }
										else 
										{  TRUE }
									},
				featureselectionrule = 	if (object$forest$regression) 
										{	
											if (as.character(paramsObject["featureselectionrule", 1]) != "Sum of squared residuals" ) 
											{  "L1"  }
											else
											{ "L2"  }
										}
										else
										{	as.character(paramsObject["featureselectionrule", 1])	},		
				randomcombination = if (as.character(paramsObject["randomcombination", 1]) == "FALSE") { 0 } 
									else {	as.numeric(strsplit(as.character(paramsObject["randomcombination", 1]),",")[[1]]) },
				randomfeature = if (as.character(paramsObject["randomfeature", 1]) == "FALSE") { FALSE } 
								else { TRUE },
				categoricalvariablesidx = if (as.character(paramsObject["categorical variables", 1]) == "FALSE") { NULL } 
										  else { object$categoricalvariables },
				na.action = na.action,
				logX = object$forest$logX,			
				classcutoff = classcutoff,
				threads = threads,
				parallelpackage = "doParallel"
			)
		}
	}	
	objectCombined <- rUniformForest.combine(object, object2)
	objectCombined$call = object$call
	cat("OOB evaluation might become obsolete if training sample remains unchanged.\n")
	return(objectCombined)
}

rm.trees <- function(rufObject, X = NULL, Y = NULL, 
	method = c("default", "random", "oldest", "newest", "optimal", "quantile"), 
	howMany = NULL, 
	rm.sample = 0.1)
{	
	tmp.rufObject = NULL
	if (!is.null(rufObject$time.elapse)) 
	{	
		tmp.rufObject = rufObject
		rufObject = filter.object(rufObject)	
	}
	ntree = if (!is.null(rufObject$object)) { length(rufObject$object$forest$object) } else { length(rufObject$forest$object) }
	regression = rufObject$forest$regression
	if (method[1] == "default")
	{			
		if (is.null(rufObject$forest$OOB.votes))
		{ stop("Forest can not be optimized without OOB data") }		
		treesAvgScore <- apply(rufObject$forest$OOB.votes, 2, function(Z) length(rmNA(match(Z, rufObject$y))))
		cuttingEdge <- round(quantile(treesAvgScore, rm.sample),0)		
		idx = which(treesAvgScore <= cuttingEdge)
		idx2 = c(idx,2*idx)		
		rufObject$forest$object = rm.InAList(rufObject$forest$object, idx2)
	}		
	if (method[1] == "random")
	{			
		rm.idx = floor(rm.sample*ntree)
		idx = sample(ntree,rm.idx)
		
		rufObject$forest$object = rm.InAList(rufObject$forest$object,idx)
	}	
	if (method[1] == "oldest")
	{			
		if (is.null(howMany)) { stop("No number of trees has been found") }
		if (!is.numeric(howMany)) { stop("number of trees must be numeric") }
		rufObject$forest$object = rm.InAList(rufObject$forest$object, (ntree - howMany + 1):ntree)
	}	
	if (method[1] == "newest")
	{			
		if (is.null(howMany)) { stop("No number of trees has been found") }
		if (!is.numeric(howMany)) { stop("number of trees must be numeric") }
		rufObject$forest$object = rm.InAList(rufObject$forest$object, 1:(ntree - howMany))
	}	
	if (method[1] == "quantile")
	{	
		ntreeLength = unlist(lapply(rufObject$forest$object, nrow))
		idx = which(ntreeLength >= quantile(ntreeLength, 1 - rm.sample/2) |  ntreeLength <= quantile(ntreeLength, rm.sample/2) )		
		rufObject$forest$object = rm.InAList(rufObject$forest$object,idx)
	}	
	if (method[1] == "optimal")
	{	
		if (!is.null(rufObject$OOB.votes)) 	{  OOBVotes = rufObject$OOB.votes	}
		else 
		{	
			if (is.null(X)) { stop( "there is no data for optimal forest") }
			else {  predictRF = predict(rufObject, X, type = "all"); OOBVotes = predictRF$all.votes;  OOBPredicts = predictRF$majority.vote  }
		}
		if (regression)
		{ 
			L2error = mean(rufObject$forest$OOB.strengthCorr$PE.tree, na.rm =TRUE)						
			if (!is.null(rufObject$forest$object$OOB.votes)) { OOBVotes[OOBVotes == Inf] = NA }
			L2errorTree = colMeans(abs(OOBVotes), na.rm = TRUE)
			idx = which(L2errorTree > L2error)
			if (length(idx) == ntree) {  idx = which(L2errorTree  > mean(L2errorTree) ) }
		}
		else
		{ 
			TreeImportance = localTreeImportance(rufObject, OOBVotes = OOBVotes, OOBPredicts = OOBPredicts)$pweights
		    idx = which(TreeImportance < mean(TreeImportance))
		}
		rufObject$forest$object = rm.InAList(rufObject$forest$object, idx)
	}
	cat("Trees have been removed, but old number of trees will be printed\n")
	if (is.null(tmp.rufObject))	{	return(rufObject) 	}
	else
	{   
		tmp.rufObject$object = rufObject
		return(tmp.rufObject)  
	}
}
#END OF FILE
Any scripts or data that you put into this service are public.
randomUniformForest documentation built on June 22, 2022, 1:05 a.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
randomUniformForest
Random Uniform Forests for Classification, Regression and Unsupervised Learning

R/OnliningRandomUniformForests.R
In randomUniformForest: Random Uniform Forests for Classification, Regression and Unsupervised Learning

Defines functions rm.trees rUniformForest.grow rUniformForest.combine randomUniformForestCore.merge rUniformForest.merge randomUniformForestCore.big rUniformForest.big onlineCombineRUF

Documented in rm.trees rUniformForest.big rUniformForest.combine rUniformForest.grow

Try the randomUniformForest package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

randomUniformForest Random Uniform Forests for Classification, Regression and Unsupervised Learning

R/OnliningRandomUniformForests.R In randomUniformForest: Random Uniform Forests for Classification, Regression and Unsupervised Learning

Defines functions rm.trees rUniformForest.grow rUniformForest.combine randomUniformForestCore.merge rUniformForest.merge randomUniformForestCore.big rUniformForest.big onlineCombineRUF

Documented in rm.trees rUniformForest.big rUniformForest.combine rUniformForest.grow

Try the randomUniformForest package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

randomUniformForest
Random Uniform Forests for Classification, Regression and Unsupervised Learning

R/OnliningRandomUniformForests.R
In randomUniformForest: Random Uniform Forests for Classification, Regression and Unsupervised Learning