R/RandomUniformForestsCPP.R

Defines functions unsupervised.randomUniformForest rm.coordinates clusterAnalysis updateCombined.unsupervised scalingMDS combineUnsupervised update.unsupervised as.supervised clusteringObservations mergeClusters splitClusters modifyClusters plot.unsupervised print.unsupervised observationsImportance hClust kMeans gap.stats specClust MDSscale kBiggestProximities proximitiesMatrix unsupervised2supervised fillNA2.randomUniformForest plotTree plotTreeCore2 plotTreeCore rankingTrainData combineRUFObjects print.importance plot.importance twoColumnsImportance partialDependenceBetweenPredictors partialDependenceOverResponses partialImportance importance.randomUniformForest localTreeImportance localVariableImportance postProcessingVotes weightedVoteModel weightedVote monitorOOBError strength_and_correlation getVotesProbability2 getVotesProbability majorityClass randomUniformForestCore.predict rUniformForestPredict randomUniformForestCore randomUniformForest.default genericOutput residualsRandomUniformForest predict.randomUniformForest getTree.randomUniformForest plot.randomUniformForest summary.randomUniformForest print.randomUniformForest randomUniformForest.formula unsupervised importance randomUniformForest

Documented in as.supervised clusterAnalysis clusteringObservations combineUnsupervised fillNA2.randomUniformForest getTree.randomUniformForest importance importance.randomUniformForest mergeClusters modifyClusters partialDependenceBetweenPredictors partialDependenceOverResponses partialImportance plot.importance plot.randomUniformForest plotTree plot.unsupervised postProcessingVotes predict.randomUniformForest print.importance print.randomUniformForest print.unsupervised randomUniformForest randomUniformForest.default randomUniformForest.formula splitClusters summary.randomUniformForest unsupervised unsupervised.randomUniformForest update.unsupervised

# <OWNER> = Saip CISS
# <ORGANIZATION> = QueensBridge Quantitative
# <YEAR> = 2014

# LICENSE 
# BSD 3-CLAUSE LICENSE

# Copyright (c) 2014, Saip CISS
# All rights reserved.

# Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:

# 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.

# 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.

# 3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission.

# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

# END OF LICENSE 

randomUniformForest <- function(...) UseMethod("randomUniformForest")
	
importance <- function(object, ...) UseMethod("importance")

unsupervised <- function(object,...) UseMethod("unsupervised")

randomUniformForest.formula <- function(formula, data = NULL, subset = NULL, ...)
{
	if (is.null(data))stop ("Please provide data.\n")	
	if (!is.null(subset)) data = data[subset,]	
	data <- fillVariablesNames(data)	
	mf <- model.frame(formula = formula, data = as.data.frame(data))
	x <- model.matrix(attr(mf, "terms"), data = mf)[,-1]
	y <- model.response(mf)
	names(y) = NULL
	RUFObject <- randomUniformForest.default(x, Y = y, ...)
	RUFObject$call <- match.call()
	RUFObject$formula <- formula
	class(RUFObject) <- c("randomUniformForest.formula", "randomUniformForest")
	RUFObject
}

print.randomUniformForest <- function(x, biasCorrection = FALSE, ...)
{
	object <- x
	cat("Call:\n")
	print(object$call)
	cat("\n")
	cat("Type of random uniform forest: ") 
	if (!is.null(object$unsupervised))
	{	cat("Unsupervised learning\n") }
	else
	{
		if (object$forest$regression) 	{  cat("Regression\n")	}
		else { 	cat("Classification\n") }
	}
	cat("\n")
	print(object$forestParams)
	cat("\n")
	if (!is.null(object$forest$OOB))
	{ 
		cat("Out-of-bag (OOB) evaluation")
		if (!object$forest$regression)
		{
			if (!is.numeric(object$forest$pred.error))
			{	cat("\n", object$forest$pred.error, "\n", "Options used seem have to be reconsidered.","\n") }
			else
			{
				OOBErrorRate = mean(100*object$forest$pred.error)
				cat("\nOOB estimate of error rate: ", round(OOBErrorRate, 2),"%\n", sep ="")
				cat("OOB error rate bound (with 1% deviation): ",round(OOBErrorRate + OOBErrorRate*(1 - estimatePredictionAccuracy(floor(length(object$forest$OOB.predicts)*0.368))), 2),"%\n", sep = "")
				cat("\nOOB confusion matrix:\n")
				colnames(object$forest$OOB)[1:length(object$classes)] = object$classes
				rownames(object$forest$OOB)[1:length(object$classes)] = object$classes
				print(round(object$forest$OOB,4))			
				if ((length(object$classes) == 2) && (rownames(object$forestParams)[1] != "reduceDimension"))
				{	
					cat("\nOOB estimate of AUC: ", round(pROC::auc(as.numeric(object$y), as.numeric(object$forest$OOB.predicts))[[1]], 4), sep = "")
					cat("\nOOB estimate of AUPR: ", round(myAUC(as.numeric(object$forest$OOB.predicts), 
					as.numeric(object$y), falseDiscoveryRate = TRUE)$auc, 4),sep = "")
					cat("\nOOB estimate of F1-score: ", round(fScore(object$forest$OOB), 4),sep = "")
				}
				cat("\nOOB (adjusted) estimate of geometric mean:", round(gMean(object$forest$OOB),4),"\n")
				if (nrow(object$forest$OOB) > 2)
				{ 
					cat("OOB (adjusted) estimate of geometric mean for precision:", round(gMean(object$forest$OOB, precision = TRUE),4),"\n") 
				}
				else
				{	cat("OOB estimate of Kappa statistic:", round(kappaStat(object$forest$OOB), 4),"\n")	}			
				if (!is.null(object$forest$OOB.strengthCorr))
				{
					PE = rmNA(object$forest$OOB.strengthCorr$PE)
					strength = rmNA(object$forest$OOB.strengthCorr$strength)
					varStrength = object$forest$OOB.strengthCorr$std.strength^2
					treesAvgCorr = rmNA(object$forest$OOB.strengthCorr$avg.corr)
					classAsNumeric = as.numeric(object$y)
					classAsNumeric = classAsNumeric - min(classAsNumeric)
					bias = round(mean(object$forest$OOB.predicts -1)- mean(classAsNumeric), 4)
					
					if (biasCorrection)
					{
						strength = strength + bias/2
						PE = treesAvgCorr*(1 - strength^2)/strength^2
						cat("\nBreiman's bounds (with bias correction)")
						cat("\nExpected prediction error: ", round(mean(100*PE),2),"%\n", sep ="")
						cat("Upper bound: ", round(mean(100*rmNA(varStrength/strength^2)),2), "%\n", sep = "")
						cat("Average correlation between trees:", round(mean(round(treesAvgCorr,4)),4),"\n")
						cat("Strength (margin):", round(mean(round(rmNA(strength),4)),4), "\n")
						cat("Standard deviation of strength:", round(mean(sqrt(varStrength)),4),"\n")					
						cat("Expected bias:", bias, "\n")
					}
					else
					{
						cat("\nBreiman's bounds")
						cat("\nExpected prediction error: ", round(mean(100*PE),2),"%\n", sep ="")
						cat("Upper bound: ", round(mean(100*rmNA(varStrength/strength^2)),2), "%\n", sep = "")
						cat("Average correlation between trees:", round(mean(round(treesAvgCorr,4)),4),"\n")
						cat("Strength (margin):", round(mean(round(rmNA(strength),4)),4), "\n")
						cat("Standard deviation of strength:", round(mean(sqrt(varStrength)),4),"\n")
						cat("Expected bias:", bias, "\n(may affect the expected prediction error and the upper bound)\n")
					}
				}
			}
		}
		else
		{
			cat("\nMean of squared residuals:", round(mean(object$forest$pred.error),8), "\n")
			cat("Mean squared error bound (experimental):", round(object$forest$pred.error + object$forest$pred.error*(1- estimatePredictionAccuracy(floor(length(object$forest$OOB.predicts)*
			(1 - as.numeric(as.vector(object$forestParams["subsamplerate",1])))))), 6),"\n")
			if (length(object$forest$percent.varExplained) > 1)
			{	
				varExplained = object$forest$percent.varExplained
				for (i in 1:length(varExplained))
				{	varExplained[i] = paste(object$forest$percent.varExplained[i], "%", sep="")	}
				cat("Variance explained:", varExplained, "\n")	
			}
			else { cat("Variance explained: ", object$forest$percent.varExplained, "%\n", sep = "")	} #percent.varExplained
			cat("\nOOB residuals:\n")
			Residuals = summary(rmNA(object$forest$OOB.predicts - object$y))
			names(Residuals) = c("Min", "1Q", "Median", "Mean", "3Q", "Max")
			print(Residuals)
			cat("Mean of absolute residuals:", sum(abs(rmNA(object$forest$OOB.predicts - object$y)))/length(rmNA(object$y)),"\n")			
			if (!is.null(object$forest$OOB.strengthCorr))
			{
				cat("\nBreiman's bounds")
				cat("\nTheoretical prediction error:", round(rmNA(object$forest$OOB.strengthCorr$PE.forest),6) ,"\n")
				cat("Upper bound:", round(rmNA(object$forest$OOB.strengthCorr$PE.max),6),"\n")
				cat("Mean prediction error of a tree:", round(rmNA(object$forest$OOB.strengthCorr$PE.tree),6),"\n")
				cat("Average correlation between trees residuals:", round(rmNA(object$forest$OOB.strengthCorr$mean.corr),4),"\n")
				residuals_hat = vector(length = ncol(object$forest$OOB.votes))
				residuals_hat <- apply(object$forest$OOB.votes, 2, function(Z) mean(rmInf(Z) - mean(rmNA(object$forest$OOB.predicts))))		
				cat("Expected squared bias (experimental):", round(mean(rmNA(residuals_hat))^2,6),"\n")
			}
		}		
	}  
    if (!is.null(object$errorObject))
	{	
	    cat("\nTest set")
		if (!object$forest$regression)
		{
			cat("\nError rate: ")
			cat(round(100*object$errorObject$error,2), "%\n", sep="")
			cat("\nConfusion matrix:\n")
			print(round(object$errorObject$confusion,4))			
			if (!is.null(object$errorObject$AUC)) 
			{ 
				cat("\nArea Under ROC Curve:", round(object$errorObject$AUC,4))
				cat("\nArea Under Precision-Recall Curve:", round(object$errorObject$AUPR,4))
				cat("\nF1 score:", round(fScore(object$errorObject$confusion),4)) 	
			}
			cat("\nGeometric mean:", round(gMean(object$errorObject$confusion),4),"\n")
			if (nrow(object$errorObject$confusion) > 2)
			{ 
				cat("Geometric mean of the precision:", round(gMean(object$errorObject$confusion, precision = TRUE),4),"\n") 
			}
			else
			{	cat("Kappa statistic:", round(kappaStat(object$errorObject$confusion), 4),"\n")	}
		}
		else
		{	
			cat("\nMean of squared residuals: ", round(object$errorObject$error, 6), "\n", sep="")			
			cat("Variance explained: ", object$errorObject$percent.varExplained, "%\n\n", sep = "")		
			Residuals <- object$errorObject$Residuals
			names(Residuals) = c("Min", "1Q", "Median", "Mean", "3Q", "Max")
			cat("Residuals:\n")
			print(Residuals)
			cat("Mean of absolute residuals: ", round(rmNA(object$errorObject$meanAbsResiduals), 6), "\n", sep="")		
		} 		
	}
}

summary.randomUniformForest <- function(object, maxVar = 30, border = NA,...)
{	
	object <- filter.object(object)	
	if (!is.null(object$forest$variableImportance))
	{	
		par(las=1) 
		maxChar = floor(2 + max(nchar(object$variablesNames))/2)		
		par(mar=c(5, maxChar + 1,4,2))		
		varImportance1 = varImportance = object$forest$variableImportance		
		if (!object$forest$regression)
		{
			varImportance[,"class"] = object$classes[as.numeric(varImportance[,"class"])]
			varImportance1[,"class"] = varImportance[,"class"]
		}		
		nVar = nrow(varImportance)
		if (nVar > maxVar) { varImportance = varImportance[1:maxVar,] }	
		barplot( varImportance[nrow(varImportance):1,"percent.importance"], horiz = TRUE, 
		col = sort(heat.colors(nrow(varImportance)), decreasing = TRUE), 
		names.arg = varImportance[nrow(varImportance):1,"variables"], border = border,
		xlab = "Relative Influence (%)", 
		main = 	if (!object$forest$regression) 
				{ "Variable Importance based on Information Gain" }
				else 
				{ 
					if ( length(grep(as.character(object$forestParams[nrow(object$forestParams),1]), "absolute")) == 1 ) 
					{ "Variable Importance based on L1 distance" }
					else
					{ "Variable Importance based on L2 distance" }
				})
		abline(v = 100/nVar, col ='grey')
		cat("\nGlobal Variable importance:\n")
		if (!object$forest$regression)
		{
			cat("Note: most predictive features are ordered by 'score' and plotted. Most discriminant ones\nshould also be taken into account by looking 'class' and 'class.frequency'.\n\n")
		}
		print(varImportance1)
		cat("\n")
		cat("Average tree size (number of nodes) summary: ", "\n")
		nodesView = unlist(lapply(object$forest$object,nrow))
		print(floor(summary(nodesView)))
		cat("\n")
		cat("Average Leaf nodes (number of terminal nodes) summary: ", "\n")
		terminalNodesView = unlist(lapply(object$forest$object, function(Z) length(which(Z[,"status"] == -1))))
		print(floor(summary(terminalNodesView)))
		cat("\n")
		cat("Leaf nodes size (number of observations per leaf node) summary: ", "\n")
		print(summary(unlist(lapply(object$forest$object, function(Z) Z[which(Z[,"prediction"] != 0), "nodes"]) )))
		cat("\n")
		cat("Average tree depth :", round(log(mean(nodesView))/log(2),0), "\n")
		cat("\n")
		cat("Theoretical (balanced) tree depth :", round(log(length(object$y), base = 2),0), "\n")
		cat("\n")		
	}
	else
	{ 
		cat("Average tree size (number of nodes) summary: ", "\n")
		nodesView = unlist(lapply(object$forest$object,nrow))
		print(floor(summary(nodesView)))
		cat("\n")
		cat("Average Leaf nodes (number of terminal nodes) summary: ", "\n")
		terminalNodesView = unlist(lapply(object$forest$object, function(Z) length(which(Z[,"status"] == -1))))
		print(floor(summary(terminalNodesView)))
		cat("\n")
		cat("Leaf nodes size (number of observations per leaf node) summary: ", "\n")
		print(summary(unlist(lapply(object$forest$object, function(Z) Z[which(Z[,"prediction"] != 0), "nodes"]) )))
		cat("\n")
		cat("Average tree depth :", round(log(mean(nodesView), base = 2),0), "\n")
		cat("\n")
		cat("Theoretical (balanced) tree depth :", round(log(length(object$y), base = 2),0), "\n")
		cat("\n")		
	}
}

plot.randomUniformForest <- function(x, threads = "auto", ...) 
{
	object <- x
	if (!is.null(object$forest$OOB.votes))
	{
		Ytrain = object$y
		OOBMonitoring = object$forest$OOB.votes
		
		ff = L2Dist
		ffComment = "OOB mean squared error"
		if ( length(grep(as.character(object$forestParams[nrow(object$forestParams),1]), "absolute")) == 1 ) 
		{ 
			ff = L1Dist	
			ffComment = "OOB mean absolute error"
		}
				
		ZZ <- monitorOOBError(OOBMonitoring, Ytrain, regression = object$forest$regression, threads = threads, f = ff)
		
		if (object$forest$regression)
		{	plot(ZZ, type = 'l', lty=2, xlab = "Trees", ylab = ffComment, ...) 	}
		else
		{	    
			plot(apply(ZZ[,1:3],1, min), type='l', lty=2, col = "green", xlab = "Trees", ylab ="OOB error", ...)
			points(apply(ZZ[,1:3],1, mean), type='l', lty=3)
			points(apply(ZZ[,1:3],1, max), type='l', lty=3, col='red')
		}
		grid()
	}
	else
	{ print("no OOB data to plot")	}
}

getTree.randomUniformForest <- function(object, whichTree, labelVar = TRUE)
{
	if (labelVar)
	{
		Tree = data.frame(object$forest$object[[whichTree]])
		idx = which(Tree[, "split.var"] != 0)
		Tree[idx, "split.var"] = object$variablesNames[Tree[idx, "split.var"]]
		
		return(Tree)
	}
	else
	{  return(object$forest$object[[whichTree]])  }
}
		
predict.randomUniformForest <- function(object, X, 
	type = c("response", "prob", "votes", "confInt", "ranking", "quantile", "truemajority", "all"),
	classcutoff = c(0,0), 
	conf = 0.95,
	whichQuantile = NULL,
	rankingIDs = NULL,
	threads = "auto", 
	parallelpackage = "doParallel", ...) rUniformForestPredict(object, X, type = type, classcutoff = classcutoff, 
									conf = conf,
									whichQuantile = whichQuantile,
									rankingIDs = rankingIDs,
									threads = threads, 
									parallelpackage = parallelpackage, ...)

residualsRandomUniformForest <- function(object, Y = NULL) 
{
	object = filter.object(object)
	if (is.null(Y))
	{
		if (is.null(object$forest$OOB.predicts))
		{	stop("please enable OOB option when computing a random uniform forest") }
		else
		{	
			print("OOB residuals:")
			cat("\n")
			return(object$y - object$forest$OOB.predicts) 
		}
	}
	else
	{	
		if (is.numeric(object))
		{ return(object - Y) }
		else
		{ 
			if (!is.null(object$predictionObject$majority.vote))
			{ return(object$predictionObject$majority.vote - Y) }
			else
			{	stop("Please provide model responses to compute residuals")	}
		}
	}	
}

genericOutput <- function(xtest, ytest, paramsObject, RUF.model, ytrain = NULL, classcutoff = c(0,0))
{
	classes = NULL
	if (!is.null(ytest))
	{
		if (is.factor(ytest)) { YNames = classes = levels(ytest)   }
	}
	else
	{
		if (!is.null(ytrain))
		{
			if (!RUF.model$regression) 
			{ 
				ytrain = as.factor(ytrain) 
				YNames = classes = levels(ytrain)  
			}
		}
		else
		{
			if (!RUF.model$regression)  { YNames = classes = sort(unique(RUF.model$object[[2]][,6])[-1]) }
		}
	}		
	if (!is.null(RUF.model$OOB))
	{
		if (!RUF.model$regression && is.factor(ytest) && !is.character(RUF.model$OOB)) 
		{ row.names(RUF.model$OOB) = colnames(RUF.model$OOB)[-(length(YNames)+1)] = YNames }
	}
	if (!is.null(xtest)) 
	{ 
		classwtString = as.vector(paramsObject[which(row.names(paramsObject) == "classwt"),1])	
		classwt = FALSE
		if (is.na(as.logical(classwtString))) {  classwt = TRUE }
		
		if (!RUF.model$regression && (as.numeric(classcutoff[2]) != 0)) 
		{  
			classcutoff = c(which(classes == as.character(classcutoff[1])), as.numeric( classcutoff[2]))
		}				
		if (classcutoff[2] != 0 ) {  classcutoff[2] = 0.5/classcutoff[2] }		
		RUF.predicts <- randomUniformForestCore.predict(RUF.model, xtest, pr.classwt = classwt, 
		pr.imbalance = classcutoff)		 
		if (!is.null(ytest)) 
		{ 
			majorityVote = RUF.predicts$majority.vote
			if (!RUF.model$regression)
			{
				majorityVote = as.factor(majorityVote) 
				levels(majorityVote) = classes[as.numeric(levels(majorityVote))]				
			}
			errorObject <- someErrorType(majorityVote, ytest, regression = RUF.model$regression)
			if (!RUF.model$regression && is.factor(ytest)) 
			{ row.names(errorObject$confusion) = colnames(errorObject$confusion)[-(length(YNames)+1)] = YNames }			
			RUFObject = list(forest = RUF.model, predictionObject = RUF.predicts, errorObject = errorObject, forestParams = paramsObject, classes = classes) 
		}
		else
		{	
			RUFObject = list(forest = RUF.model, predictionObject = RUF.predicts, forestParams = paramsObject,  
			classes = classes) 
		}		
	}
	else
	{   RUFObject = list(forest = RUF.model, forestParams = paramsObject, classes = classes)	}	
	RUFObject
}

randomUniformForest.default <- function(X, Y = NULL, xtest = NULL, ytest = NULL, ntree = 100, 
	mtry = ifelse(bagging,ncol(X),floor(4/3*ncol(X))), 
	nodesize = 1,
	maxnodes = Inf,
    depth = Inf,
    depthcontrol = NULL,	    
	regression = ifelse(is.factor(Y), FALSE, TRUE),
	replace = ifelse(regression,FALSE,TRUE),
	OOB = TRUE,
	BreimanBounds = ifelse(OOB, TRUE, FALSE),
	subsamplerate = ifelse(regression,0.7,1),
    importance = TRUE,	
	bagging = FALSE,
	unsupervised = FALSE, 
	unsupervisedMethod = c("uniform univariate sampling", "uniform multivariate sampling", "with bootstrap"),
	classwt = NULL,
	oversampling = 0,
	targetclass = -1,
	outputperturbationsampling = FALSE,
    rebalancedsampling = FALSE,
	featureselectionrule = c("entropy", "gini", "random", "L2", "L1"),	
	randomcombination = 0,
	randomfeature = FALSE,
	categoricalvariablesidx = NULL,
	na.action = c("veryFastImpute", "fastImpute", "accurateImpute", "omit"),
	logX = FALSE,
	classcutoff = c(0,0),
	subset = NULL,
	usesubtrees = FALSE,
	threads = "auto",
	parallelpackage = "doParallel", 
	...)
{
	{
		if (threads != 1)
		{
			if (sample(9,1) == 9) { rm.tempdir() }
		}
		if (!is.null(subset))
		{
			X = X[subset,]
			Y = Y[subset]
		}
		if (exists("categorical", inherits = FALSE)) { categoricalvariablesidx = categorical }
		else { categorical = NULL  }
		if (is.null(Y) || (unsupervised == TRUE)) 
		{ 
			cat("Enter in unsupervised learning.\n")
			if (unsupervisedMethod[1] == "with bootstrap")
			{ 
				cat("'with bootstrap' is only needed as the second argument of 'unsupervisedMethod' option. Default option will be computed.\n")
				unsupervisedMethod[1] = "uniform univariate sampling"
			}
			XY <- unsupervised2supervised(X, method = unsupervisedMethod[1], bootstrap = if (length(unsupervisedMethod) > 1) { TRUE } else {FALSE })
			X = XY$X
			Y = as.factor(XY$Y)
			unsupervised = TRUE
		}
		if (ntree < 2) { stop("Please use at least 2 trees for computing forest.\n") }			
		if ( (subsamplerate == 1) &&  (replace == FALSE))  { OOB = FALSE }
		if (subsamplerate > 1) { replace = TRUE }
		if (depth < 3) { stop("Stumps are not allowed. Minimal depth is 3, leading, at most, to 8 leaf nodes.\n") }
		if (!is.null(classwt) && (!is.factor(Y) || regression)) 
		{ 
			cat("Class reweighing is not allowed for regression. Resetting to default values.\n")
			classwt = NULL
		}
		if (targetclass == 0) { stop("'targetclass' must take a strictly positive value") }
		if ( (BreimanBounds && (length(unique(Y)) > 2) && (ntree > 500)) || (BreimanBounds && (ntree > 500)) ) 
		{ cat("Note: Breiman's bounds (especially for multi-class problems) are computationally intensive.\n") }
		if (maxnodes < 6) { stop("Maximal number of nodes must be above 5.\n") }
		X <- fillVariablesNames(X)        
		getFactors <- which.is.factor(X, count = TRUE)
		if ( (sum(getFactors) > 0) && is.null(categoricalvariablesidx))
		{ 
			cat("Note: categorical variables are found in data. Please use option categoricalvariablesidx = 'all' to match them more closely.\n")
		}		 
		if (is.data.frame(X)) 
		{ 	cat("X is a data frame. String or factors have been converted to numeric values.\n") }
		X <- NAfactor2matrix(X, toGrep = "anythingParticular")
		if (!is.null(categoricalvariablesidx))
		{
			if (categoricalvariablesidx[1] == "all")
			{	
				factorVariables <- which(getFactors > 0)
				if (length(factorVariables) > 0) 	{ 	categoricalvariablesidx = factorVariables	}
				else {  cat("\nNo categorical variables found. Please type them manually\n")  }
			}
			else 
			{  
				if (is.character(categoricalvariablesidx[1]))
				{	categoricalvariablesidx = sort(match(categoricalvariablesidx, colnames(X)))	}
			}
		}
		if ( length(which( (X == Inf) | (X == -Inf) ) ) > 0) 
		{ stop("Inf or -Inf values found in data. Learning can not be done.\nRemove or replace them with NA in order to learn.\n") }
		XY <- NATreatment(X, Y, na.action = na.action, regression = regression)
		X  <- XY$X
		Y  <- XY$Y
		rm(XY)		
		if (randomcombination[1] > 0)
		{ 
			randomcombinationString = randomcombination[1]
			L.combination = length(randomcombination)
			if (L.combination%%3 != 0)
			{	weights = round(replicate(length(randomcombination)/2, sample(c(-1,1),1)*runif(1)), 2)	}
			else
			{ 	
				weights = round(randomcombination[(L.combination + 1 - L.combination/3):L.combination],2)
				randomcombination = randomcombination[1:(L.combination - (L.combination/3))]
			}			
			for (i in 2:length(randomcombination)) {  randomcombinationString = paste(randomcombinationString, randomcombination[i], sep=",") }
			for (i in 1:length(weights)) {  randomcombinationString = paste(randomcombinationString, weights[i], sep=",") }
			if (!is.null(xtest)) { xtest <- randomCombination(NAfactor2matrix(xtest, toGrep = "anythingParticular"), combination = randomcombination, weights = weights) }			
			randomcombinationObject = list(randomcombination, weights)					
			X <- randomCombination(X, combination = randomcombinationObject[[1]], weights = randomcombinationObject[[2]])			
		}
		else
		{   randomcombinationObject = randomcombination }
	}	
	RUF.model <- randomUniformForestCore(X, trainLabels = Y, ntree = ntree, nodeMinSize = nodesize, maxNodes = maxnodes, 
		features = mtry, rf.bootstrap = replace, depth = depth, depthControl = depthcontrol, 
		rf.treeSubsampleRate = subsamplerate, classwt = classwt, classCutOff = classcutoff, 
		rf.overSampling = oversampling, rf.targetClass = targetclass, rf.rebalancedSampling = rebalancedsampling, 
		rf.outputPerturbationSampling = outputperturbationsampling,	rf.randomCombination = randomcombinationObject, 
		rf.randomFeature = randomfeature, rf.treeBagging = bagging, rf.featureSelectionRule = featureselectionrule[1], 
		rf.regression = regression, use.OOB = OOB, BreimanBounds = BreimanBounds, variableImportance = importance, 
		whichCatVariables = categoricalvariablesidx, logX = logX, threads = threads, useSubTrees = usesubtrees,unsupervised = unsupervised,
		parallelPackage = parallelpackage[1])
	if (!is.null(classwt))
	{ 
		classwtString = classwt[1]
		for (i in 2:length(classwt)) { 	classwtString = paste(classwtString, classwt[i], sep=",") }	
	}	
	if (length(targetclass) > 1)
	{ 
		targetclassString = targetclass[1]
		for (i in 2:length(targetclass)) { 	targetclassString = paste(targetclassString, targetclass[i], sep=",") }	
	}	
	if (length(rebalancedsampling) > 1)
	{ 
		rebalancedsamplingString = rebalancedsampling[1]
		for (i in 2:length(rebalancedsampling)) 
		{ rebalancedsamplingString = paste(rebalancedsamplingString, rebalancedsampling[i], sep= ",") }	
	}	
	if (RUF.model$regression) {	classcutoff = c(0,0)  }	
	if (as.numeric(classcutoff[2]) == 0) {	classcutoffString = FALSE }
	else
	{
		classcutoffString = levels(Y)[which(levels(Y) == as.character(classcutoff[1]))]
		classcutoffString = paste("Class ", classcutoffString, "," , as.numeric(classcutoff[2])*100, "%", sep ="")
	}		
	paramsObject = c(ntree, mtry, nodesize, maxnodes, as.character(replace), bagging, depth, 
		ifelse(is.null(depthcontrol), length(depthcontrol)> 0, depthcontrol), OOB, importance, subsamplerate, 
		ifelse(is.null(classwt), length(classwt) > 0, classwtString), classcutoffString, 
		ifelse((oversampling == 0), (oversampling != 0), oversampling), outputperturbationsampling, 
		ifelse(length(targetclass) > 1, targetclassString, targetclass), 
		ifelse(length(rebalancedsampling) > 1, rebalancedsamplingString, rebalancedsampling), 
		ifelse((randomcombination[1] == 0),(randomcombination != 0), randomcombinationString), randomfeature, 
		ifelse(is.null(categoricalvariablesidx), length(categoricalvariablesidx) > 0, length(categoricalvariablesidx)), featureselectionrule[1])				
	if (RUF.model$regression) 
	{ 
		paramsObject[21] = if (featureselectionrule[1] == "L1") { "Sum of absolute residuals" }  else { "Sum of squared residuals" }
	    if ((paramsObject[5] == "FALSE") && (subsamplerate == 1)) { paramsObject[8] = FALSE }
	}	
	names(paramsObject) = c("ntree", "mtry", "nodesize", "maxnodes", "replace", "bagging", "depth", "depthcontrol", "OOB", "importance", "subsamplerate", "classwt", "classcutoff", "oversampling", "outputperturbationsampling", "targetclass", "rebalancedsampling", "randomcombination", "randomfeature", "categorical variables", "featureselectionrule")						
	paramsObject = as.data.frame(paramsObject)	
	RUF.model$logX = logX	
	if (!regression && !is.null(ytest) ) 
	{ 
		if (!is.factor(ytest))
		{ ytest = as.factor(ytest) }
	}
	RUFObject <- genericOutput(xtest, ytest, paramsObject, RUF.model, ytrain = Y, classcutoff = classcutoff)
	RUFObject$logX = logX
	if (!is.null(Y)) { 	RUFObject$y = Y	 }	
	if (is.null(colnames(X))) 
	{  
		varNames = NULL
		for (i in 1:ncol(X)) { varNames = c(varNames, paste("V", i, sep="")) }
		RUFObject$variablesNames = varNames
	}
	else
	{	RUFObject$variablesNames = colnames(X)	}	
	if (randomcombination[1] > 0)
	{ 	
		tempVarNames = vector(length = length(randomcombination)/2)
		idx = 1
		for (i in 1:(length(randomcombination)/2)) 
		{ 
			tempVarNames[i] = paste("V", randomcombination[idx], "x", randomcombination[idx+1], sep="") 
			idx = idx + 2
		}
		RUFObject$variablesNames = c(RUFObject$variablesNames, tempVarNames)
	}
	if (!is.null(categoricalvariablesidx)) {  RUFObject$categoricalvariables = categoricalvariablesidx }
	if (unsupervised) 
	{ 
		RUFObject$unsupervised = TRUE 
		RUFObject$unsupervisedMethod  = if (length(unsupervisedMethod) == 3) {unsupervisedMethod[1] } 
							else { if (length(unsupervisedMethod) == 1) { unsupervisedMethod } else { unsupervisedMethod[1:2] } }
	}
	RUFObject$call <- match.call()
	class(RUFObject) <- "randomUniformForest"
	RUFObject
}

randomUniformForestCore <- function(trainData, trainLabels = 0, 
	features = floor(4/3*(ncol(trainData))), 
	ntree = 100, 
	nodeMinSize = 1,
	maxNodes = Inf,
	depth = Inf,
    depthControl = NULL,
	splitAt = "random", 
	rf.regression = TRUE, 
	rf.bootstrap = ifelse(rf.regression, FALSE, TRUE), 
	use.OOB = TRUE,
	BreimanBounds = ifelse(use.OOB, TRUE, FALSE),
	rf.treeSubsampleRate = ifelse(rf.regression, 0.7, 1),
	rf.treeBagging = FALSE, 
	classwt = NULL,
	classCutOff = c(0,0),
	rf.overSampling = 0,
	rf.targetClass = -1,
	rf.outputPerturbationSampling = FALSE,
    rf.rebalancedSampling = FALSE,
	rf.featureSelectionRule = c("entropy", "gini", "random", "L2", "L1"),
	variableImportance = TRUE,	
	rf.randomCombination = 0,
	rf.randomFeature = FALSE,
	whichCatVariables = NULL,
	logX = FALSE,
	unsupervised = FALSE,
	useSubTrees = FALSE,
	threads = "auto",	
	parallelPackage = "doParallel",
	export = c("uniformDecisionTree", "CheckSameValuesInAllAttributes", "CheckSameValuesInLabels", "fullNode", "genericNode", "leafNode", "randomUniformForestCore.predict", "onlineClassify", "overSampling", "predictDecisionTree", "options.filter", "majorityClass", "randomCombination", "randomWhichMax", "which.is.na", "factor2vector", "outputPerturbationSampling", "rmNA", "count.factor", "find.idx", "classifyMatrixCPP", 
	"L2DistCPP", "checkUniqueObsCPP", "crossEntropyCPP", "giniCPP", "L2InformationGainCPP", 
	"entropyInformationGainCPP", "runifMatrixCPP", "NATreatment", "rmInf", "rm.InAList"), 
	...)
{
	set.seed(sample(ntree,1))
	n = nrow(trainData)
	p = ncol(trainData)
	if (exists("categorical", inherits = FALSE)) { whichCatVariables = categorical }
	else { categorical = NULL  }
	if (useSubTrees) 
	{ 
		if (depth == Inf) { depth = min(10, floor(0.5*log(n)/log(2))) }
		if (rf.outputPerturbationSampling)
		{
			rf.outputPerturbationSampling = FALSE
			cat("'output perturbation sampling' is not currently compatible with 'usesubtrees'.\n Option has been reset.\n")
		}
	}
	if ((rf.randomFeature) && (features == "random") ) { stop("random feature is a special case of mtry = 'random'") }
	if (is.numeric(threads))
	{
		if (threads < 1) { stop("Number of threads must be positive") }
	}
	if (!is.matrix(trainData)) { trainData <- NAfactor2matrix(trainData, toGrep = "anythingParticular") }
	if (!is.null(whichCatVariables)) 
	{ 
	  cat("Considering use of Dummies (for example by using formula) for categorical variables might be an alternative.\n")
	  # rmFromCatIdx = apply(trainData[,whichCatVariables], 2,function(Z) length(table(Z)))
	  # whichCatVariables = whichCatVariables[which(rmFromCatIdx != 2)]
	  # if (length(whichCatVariables) == 0) {  whichCatVariables = NULL }
	}
	{
		if (!is.null(classwt)) 
		{ 
			classwt = classwt/sum(classwt) 
			if (is.na(sum(classwt))) { stop ("NA found in class weights.\n") }			
			if (sum(classwt) > 1.1)  { stop ("(inverse) weights do not sum to 1.\n") }
		}		
		if (length(trainLabels) != 1)
		{	
			if (n != length(trainLabels)) 
			{ stop ("X and Y don't have the same size.\n")	}	
		}		
		if (!is.matrix(trainData))
		{ stop ("X cannot be converted to a matrix. Please provide true matrix not data frame.") }		
		if (rf.treeSubsampleRate == 0) { rf.treeSubsampleRate = 1 }		
		if (rf.treeBagging) { features = min(features, p)  }
		if ( (rf.treeSubsampleRate < 0.5) && (n < 300) ) 
		{ rf.treeSubsampleRate = 0.5;  cat("Too small output matrix. Subsample rate has been set to 0.5.\n") }		
		if (!is.character(nodeMinSize))
		{
			if ( (nodeMinSize != 1) && (nodeMinSize >  floor(n/4)) ) { stop("nodeMinSize is too high. Not suitable for a random forest.\n") }	
			if ( (nodeMinSize < 1) ) {  nodeMinSize  = 1; cat("Minimal node size has been set to 1.\n") }
		}		
		if ( features == 1 ) { rf.randomFeature = TRUE }
		if ( features < 1 )  
		{ 
			features = floor(4/3*p)  
			bagging = FALSE
			cat("Error setting mtry. Resetting to default values.\n") 
		}			
		if (is.factor(trainLabels))
		{ 	
			if ((as.numeric(classCutOff[2]) != 0)) 
			{  	
				classCutOff = c(which(levels(trainLabels) == as.character(classCutOff[1])), 
				0.5/as.numeric(classCutOff[2])) 
				if (length(classCutOff) == 1)
				{ stop("Label not found. Please provide name of the label instead of its position.\n") }
			}					
			rf.regression = FALSE 
			labelsObject = factor2vector(trainLabels)
			trainLabels = as.numeric(as.vector(labelsObject$vector))
			if (length(unique(trainLabels)) == 1 ) { stop ("Y is a constant value. Thus, learning is not needed.\n")	}			
		}
		else
		{	
			if (rf.regression && (length(unique(trainLabels)) > 32)) 
			{ 
				if ((rf.treeSubsampleRate < 1) && (rf.bootstrap == TRUE))
				{ cat("For only accuracy, use option 'subsamplerate = 1' and 'replace = FALSE'\n") }				
				classCutOff =  c(0,0) 				
			}
			else
			{
				if (!rf.regression)
				{
					if ((as.numeric(classCutOff[2]) != 0)) 
					{  	
						classCutOff = c(which(levels(trainLabels) == as.character(classCutOff[1])), 0.5/as.numeric(classCutOff[2])) 
						if (length(classCutOff) == 1)
						{ stop("Label not found. Please provide name of the label instead of its position") }
					}					
					labelsObject = factor2vector(trainLabels)
					trainLabels = as.numeric(as.vector(labelsObject$vector))
					labelsObject$factors = levels(as.factor(trainLabels))
					if (length(unique(trainLabels)) == 1 ) { stop ("Y is a constant value. Thus, learning is not needed\n")	}
				}
				else
				{		
					if (length(unique(trainLabels)) <= 32)  
					{ 
						cat("Regression has been performed but there is less than 32 distinct values.\nRegression option could be set to FALSE, if classification is #required.\nIf outputs are factors, conversion is automatically done.\n")
						classCutOff = c(0,0)
					}
				}
			}
		}							
		if (!rf.regression)
		{
			rf.classes <- as.numeric(levels(as.factor(trainLabels)) )			
			if (as.character(rf.classes[1]) != labelsObject$factors[1]) 
			{	
				cat("Labels", labelsObject$factors, "have been converted to", rf.classes, 
				"for ease of computation and will be used internally as a replacement.\n")	
			}			
			if (!is.null(classwt))
			{ 	
				if (length(classwt) != length(rf.classes)) 
				{ stop("Length of class weights is not equal to length of classes.\n") }
			}
		}
		else
		{ 	rf.classes = trainLabels; rf.overSampling = 0; rf.rebalancedSampling = FALSE; classCutOff = c(0,0)  }		
		if (logX)
		{ 
		   if (is.null(whichCatVariables))  {  trainData <- generic.log(trainData) }
		   else   {  trainData[,-whichCatVariables] <- generic.log(trainData[,-whichCatVariables]) }
		}		
		if (!rf.regression)  
		{ 
			if ( (rf.featureSelectionRule[1] == "L1") || (rf.featureSelectionRule[1] == "L2") )
			{	
				rf.featureSelectionRule[1] = "entropy" 
				cat("Feature selection rule has been set to entropy.\n")
			}
		}
		else 
		{  
			if ( (rf.featureSelectionRule[1] == "entropy")  || (rf.featureSelectionRule[1] == "gini") ) 
			{	rf.featureSelectionRule[1] = "L2" }			
			if (!is.null(depthControl) && (!is.character(depthControl)))
			{	
				if (depthControl < 1)
				{
					depthControl = NULL
					cat("'depthcontrol' option is lower than its lower bound. Resetting to default value.\n")
				}				
			}			
		}
		if (!rf.bootstrap && (rf.treeSubsampleRate == 1)) {  use.OOB = FALSE }
	}
	{
		## #require(parallel)	
		max_threads = detectCores()		
		if (threads == "auto")
		{	
			if (max_threads == 2) { threads = max_threads }
			else {	threads  = max(1, max_threads - 1)  }
		}
		else
		{
			if (max_threads < threads) 
			{	cat("Note: number of threads is higher than logical threads in this computer.\n") }
		}
		
		{
			## #require(doParallel)			
			Cl = makePSOCKcluster(threads, type = "SOCK")
			registerDoParallel(Cl)
		}
		chunkSize  <-  ceiling(ntree/getDoParWorkers())
		smpopts  <- list(chunkSize = chunkSize)
	}
	rufObject = vector('list', ntree)
	if (use.OOB)
	{
		rufObject <- foreach(i = 1:ntree, .export = export, .options.smp = smpopts, .inorder = FALSE, 
		.multicombine = TRUE, .maxcombine = ntree) %dopar%
		{
			uniformDecisionTree(trainData, trainLabels, nodeMinSize = nodeMinSize, maxNodes = maxNodes, 
			treeFeatures = features, getSplitAt = splitAt, regression = rf.regression, bootstrap = rf.bootstrap, treeSubsampleRate = rf.treeSubsampleRate, treeDepth = depth, treeClasswt = classwt,
			treeOverSampling = rf.overSampling, targetClass = rf.targetClass, OOB = use.OOB, 
			treeRebalancedSampling = rf.rebalancedSampling, treeBagging = rf.treeBagging, 
			randomFeature = rf.randomFeature, treeCatVariables = whichCatVariables, 
			outputPerturbation = rf.outputPerturbationSampling, featureSelectionRule = rf.featureSelectionRule, treeDepthControl = depthControl, unsupervised = unsupervised)
		}		
		stopCluster(Cl)
		if (useSubTrees)
		{
			rufObject2 = vector('list', ntree)
			Cl = makePSOCKcluster(threads, type = "SOCK") 
			registerDoParallel(Cl)
						
			rufObject2 <- foreach(i = 1:ntree, .export = export, .options.smp = smpopts, .inorder = FALSE, 
			.multicombine = TRUE, .maxcombine = ntree) %dopar%
			{
				finalTree = list()
				OOBIdx = rufObject[[i]]$OOB.idx
				idxList = rufObject[[i]]$idxList
				trainData2 = trainData[rufObject[[i]]$followIdx,]
				trainLabels2 = trainLabels[rufObject[[i]]$followIdx]
				nodeVector = rufObject[[i]]$nodeVector
				lengthIdx = sapply(idxList, length)
				rmlengthIdx = which(lengthIdx < 2)
				nodeVector = if (length(rmlengthIdx) > 0) nodeVector[-rmlengthIdx] else nodeVector
				newIdxList <- if (length(rmlengthIdx) > 0) rm.InAList(idxList, rmlengthIdx) else idxList
				originalTree = rufObject[[i]]$Tree
				lengthNodeVector = length(nodeVector)
				newTrees = vector('list', lengthNodeVector)
								
				if ( (lengthNodeVector > 0) )
				{
					newTrees <- lapply(newIdxList, function(Z)
						{
							newTrainData = trainData2[Z, ,drop = FALSE]; newTrainLabels = trainLabels2[Z]
													
							uniformDecisionTree(newTrainData, newTrainLabels, nodeMinSize = nodeMinSize, 
							maxNodes = maxNodes, treeFeatures = features, getSplitAt = splitAt, 
							regression = rf.regression, bootstrap = FALSE, treeSubsampleRate = 1, treeDepth = Inf, treeClasswt = classwt, treeOverSampling = rf.overSampling, targetClass = rf.targetClass, OOB = FALSE, treeRebalancedSampling = rf.rebalancedSampling, treeBagging = rf.treeBagging, 
							randomFeature = rf.randomFeature, treeCatVariables = whichCatVariables, 
							outputPerturbation = rf.outputPerturbationSampling, 
							featureSelectionRule = rf.featureSelectionRule, treeDepthControl = NULL, 
							unsupervised = unsupervised, moreThreadsProcessing = FALSE, moreNodes = TRUE)
						}
					)
					
					for (j in 1:lengthNodeVector)
					{
						toInsertTree = newTrees[[j]]$Tree 
						keepMaxIdx = nrow(originalTree)
						toInsertTree[,1:2] = toInsertTree[,1:2] + keepMaxIdx
						toInsertTree[toInsertTree[,"status"] == -1,1:2] = 0
						originalTree = rbind(originalTree, toInsertTree)
						originalTree[nodeVector[j],] = toInsertTree[1,]
					}
					rownames(originalTree) = 1:nrow(originalTree)
				}
				finalTree$Tree = originalTree
				finalTree$OOB.idx = OOBIdx
				finalTree
			}
			rufObject = rufObject2 
			stopCluster(Cl)
		}
		{
			new.rufObject = vector("list", ntree)
			new.rufObject$Tree = lapply(rufObject, function(Z) Z$Tree) 
			OOB.lengthVector = sapply(rufObject, function(Z) length(Z$OOB.idx))
			OOB.matrix = matrix(0L, sum(OOB.lengthVector), 2)
			jj = 1
			for (i in 1:ntree) 	
			{ 
				OOB.matrix[jj:(jj + OOB.lengthVector[i] - 1),] = 
				cbind(rufObject[[i]]$OOB.idx, rep(i,OOB.lengthVector[i]))
				jj = jj + OOB.lengthVector[i]
			} 
			OOB.val = sort(unique(OOB.matrix[,1])) 
			n.OOB = n 
			OOB.votes2 = matrix(Inf, n.OOB, ntree)			
			if (is.null(classwt))
			{
				OOB.votes <- randomUniformForestCore.predict(new.rufObject$Tree, trainData, 
					pr.regression = rf.regression, classes = rf.classes, OOB.idx = TRUE, 
					pr.parallelPackage = parallelPackage[1], pr.imbalance = classCutOff, pr.threads = threads)			 
				for (j in 1:ntree)
				{
					idxJ = which(OOB.matrix[,2] == j)
					if (length(idxJ) > 0) {	OOB.votes2[OOB.matrix[idxJ,1],j] = OOB.votes[OOB.matrix[idxJ,1],j] }
				}
				OOB.object <- majorityClass(OOB.votes2, rf.classes, m.imbalance = classCutOff, 
					m.regression = rf.regression)
			}
			else
			{
				OOB.allWeightedVotes = matrix(Inf, n.OOB, ntree)
				OOB.votes <- randomUniformForestCore.predict(new.rufObject$Tree, trainData, 
					pr.regression = rf.regression, classes = rf.classes, pr.classwt = TRUE, OOB.idx = TRUE, pr.parallelPackage = parallelPackage[1], pr.imbalance = classCutOff, pr.threads = threads)			
				for (j in 1:ntree)
				{
					idxJ = which(OOB.matrix[,2] == j)
					if (length(idxJ) > 0) 	
					{	
						OOB.votes2[OOB.matrix[idxJ,1],j] = OOB.votes$all.votes[OOB.matrix[idxJ,1],j] 	
						OOB.allWeightedVotes[OOB.matrix[idxJ,1],j] = OOB.votes$allWeightedVotes[OOB.matrix[idxJ,1],j]
					}
				}				
				OOB.object <- majorityClass(OOB.votes2, rf.classes, m.classwt = OOB.allWeightedVotes, 
					m.imbalance = classCutOff, m.regression = rf.regression)			
			}			
			OOB.pred = OOB.object$majority.vote
			if (BreimanBounds)
			{
				strengthCorr.object <- strength_and_correlation(s.trainLabels = trainLabels, OOB.votes2, 
					OOB.object, rf.classes, s.regression = rf.regression, s.parallelPackage = parallelPackage[1], s.threads = threads)
			}
			if (rf.regression)
			{  
				OOB.confusion = "only prediction error for regression. See ..$pred.error"				
				MSE <- L2Dist(OOB.pred[OOB.val], trainLabels[OOB.val])/n.OOB				
				pred.error = round(MSE,4)
				percent.varExplained = max(0, 100*round(1 - MSE/var(trainLabels[OOB.val]),4))
			}
			else
			{
				if ( min(trainLabels) == 0) { OOB.pred = OOB.pred - 1 }			
				if ( (length(unique(trainLabels[OOB.val])) == 1) || (length(unique(OOB.pred[OOB.val])) == 1) )
				{	
					OOB.confusion = "Minority class can not be predicted or OOB predictions contain only one label."
					pred.error = "Minority class can not be predicted or OOB predictions contain only one label."
				}
				else
				{
					OOB.confusion <- confusion.matrix(OOB.pred[OOB.val], trainLabels[OOB.val])
					pred.error <- generalization.error(OOB.confusion)
				}
			}
		}
		if (variableImportance)
		{	
			Cl = makePSOCKcluster(threads, type = "SOCK")
			registerDoParallel(Cl)
			gen.rufObject = new.rufObject$Tree	
		}
		else
		{	
			if (rf.regression)
			{ 
				if (BreimanBounds)
				{
					return(list(object = new.rufObject$Tree, OOB = OOB.confusion, OOB.predicts = as.numeric(OOB.pred), 
					OOB.strengthCorr = strengthCorr.object, OOB.votes = OOB.votes2,	pred.error = pred.error, 
					percent.varExplained = percent.varExplained, regression = rf.regression) )
				}
				else
				{
					return(list(object = new.rufObject$Tree, OOB = OOB.confusion, OOB.predicts = as.numeric(OOB.pred), 
					OOB.votes = OOB.votes2,	pred.error = pred.error, percent.varExplained = percent.varExplained,  
					regression = rf.regression) )
				}
			}
			else
			{ 	
				if (BreimanBounds)
				{
					return(list(object = new.rufObject$Tree, OOB = OOB.confusion, OOB.predicts = as.numeric(OOB.pred), 
						OOB.strengthCorr = strengthCorr.object, OOB.votes = OOB.votes2, pred.error = pred.error, 
						regression = rf.regression) )
				}
				else
				{
					return(list(object = new.rufObject$Tree, OOB = OOB.confusion, OOB.predicts = as.numeric(OOB.pred), 
						OOB.votes = OOB.votes2, pred.error = pred.error, regression = rf.regression) )
				}					
			}
		}		
	} 
	else
	{
		if (!useSubTrees)
		{
			rufObject <- foreach(i = 1:ntree, .export = export, .options.smp  = smpopts, .inorder = FALSE, 
			.multicombine = TRUE, .maxcombine = ntree) %dopar%	
			{	
				uniformDecisionTree(trainData, trainLabels, nodeMinSize = nodeMinSize, maxNodes = maxNodes, 
				treeFeatures = features, getSplitAt = splitAt, regression = rf.regression, bootstrap = rf.bootstrap, treeSubsampleRate = rf.treeSubsampleRate, treeDepth = depth, treeClasswt = classwt, 
				treeOverSampling = rf.overSampling, targetClass = rf.targetClass, treeBagging = rf.treeBagging, 
				treeRebalancedSampling = rf.rebalancedSampling, randomFeature = rf.randomFeature, 
				treeCatVariables = whichCatVariables, outputPerturbation = rf.outputPerturbationSampling, 
				featureSelectionRule = rf.featureSelectionRule, treeDepthControl = depthControl, 
				unsupervised = unsupervised, moreThreadsProcessing = useSubTrees)$Tree
			}
		}		
		else
		{
			rufObject <- foreach(i = 1:ntree, .export = export, .options.smp  = smpopts, .inorder = FALSE, 
			.multicombine = TRUE, .maxcombine = ntree) %dopar%	
			{	
				uniformDecisionTree(trainData, trainLabels, nodeMinSize = nodeMinSize, maxNodes = maxNodes, 
				treeFeatures = features, getSplitAt = splitAt, regression = rf.regression, bootstrap = rf.bootstrap, treeSubsampleRate = rf.treeSubsampleRate, treeDepth = depth, treeClasswt = classwt, 
				treeOverSampling = rf.overSampling, targetClass = rf.targetClass, treeBagging = rf.treeBagging, 
				treeRebalancedSampling = rf.rebalancedSampling, randomFeature = rf.randomFeature, 
				treeCatVariables = whichCatVariables, outputPerturbation = rf.outputPerturbationSampling, 
				featureSelectionRule = rf.featureSelectionRule, treeDepthControl = depthControl, 
				unsupervised = unsupervised, moreThreadsProcessing = useSubTrees)
			}
						
			stopCluster(Cl)
			rufObject2 = vector('list', ntree)
			Cl = makePSOCKcluster(threads,  type = "SOCK") 
			registerDoParallel(Cl)
						
			rufObject2 <- foreach(i = 1:ntree, .export = export, .options.smp = smpopts, .inorder = FALSE, 
			.multicombine = TRUE, .maxcombine = ntree) %dopar%
			{
				OOBIdx = rufObject[[i]]$OOB.idx
				idxList = rufObject[[i]]$idxList
				trainData2 = trainData[rufObject[[i]]$followIdx,]
				trainLabels2 = trainLabels[rufObject[[i]]$followIdx]
				nodeVector = rufObject[[i]]$nodeVector
				lengthIdx = sapply(idxList, length)
				rmlengthIdx = which(lengthIdx < 2)
				nodeVector = if (length(rmlengthIdx) > 0) nodeVector[-rmlengthIdx] else nodeVector
				newIdxList <- if (length(rmlengthIdx) > 0) rm.InAList(idxList, rmlengthIdx) else idxList
				originalTree = rufObject[[i]]$Tree
				lengthNodeVector = length(nodeVector)
				newTrees = vector('list', lengthNodeVector)
				
				if (length(nodeVector) > 0)
				{
					newTrees <- lapply(newIdxList, function(Z)
						{
							newTrainData = trainData2[Z, ,drop = FALSE]; newTrainLabels = trainLabels2[Z]
							
							uniformDecisionTree(newTrainData, newTrainLabels, nodeMinSize = nodeMinSize, 
							maxNodes = maxNodes, treeFeatures = features, getSplitAt = splitAt, 
							regression = rf.regression, bootstrap = FALSE, treeSubsampleRate = 1, treeDepth = Inf, treeClasswt = classwt, treeOverSampling = rf.overSampling, targetClass = rf.targetClass, OOB = FALSE, treeRebalancedSampling = rf.rebalancedSampling, treeBagging = rf.treeBagging, 
							randomFeature = rf.randomFeature, treeCatVariables = whichCatVariables, 
							outputPerturbation = rf.outputPerturbationSampling,
							featureSelectionRule = rf.featureSelectionRule, treeDepthControl = NULL, 
							unsupervised = unsupervised, moreThreadsProcessing = FALSE, moreNodes = TRUE)
						}
					)
					
					for (j in 1:lengthNodeVector)
					{
						toInsertTree = newTrees[[j]]$Tree 
						keepMaxIdx = nrow(originalTree)
						toInsertTree[,1:2] = toInsertTree[,1:2] + keepMaxIdx
						toInsertTree[toInsertTree[,"status"] == -1,1:2] = 0
						originalTree = rbind(originalTree, toInsertTree)
						originalTree[nodeVector[j],] = toInsertTree[1,]
					}
					rownames(originalTree) = 1:nrow(originalTree)
				}				
				originalTree
			}
			rufObject = rufObject2 
		}
					
		if (variableImportance) {  gen.rufObject = rufObject	}
		else 
		{	
			stopCluster(Cl) 			
			return( list(object = rufObject, regression = rf.regression))	
		}
	}
	if (variableImportance)
	{
	   	if (ntree <= 100) {  threads = 1 }						
		varImpMatrix1 <- unlist(lapply(gen.rufObject, function(Z) Z[,"split var"]))		
		if (rf.regression) 	{  varImpMatrix2 <- unlist(lapply(gen.rufObject, function(Z) Z[,"L2Dist"]))/1000 }
		else {	varImpMatrix2 <- unlist(lapply(gen.rufObject, function(Z) Z[,"Gain"])) }			
		varImpMatrix <- cbind(varImpMatrix1, varImpMatrix2)
		if (!rf.regression)
		{   
			predMatrix <- foreach(i = 1:ntree, .options.smp = smpopts, .inorder = TRUE, .combine = rbind, 
			.multicombine = TRUE) %dopar%	
			{ 	
				predIdx = which(gen.rufObject[[i]][,"left daughter"] == 0)
				predClass = gen.rufObject[[i]][predIdx, "prediction"]				
				predVar = vector(length = length(predIdx))
				if (useSubTrees)
				{
					for (j in seq_along(predIdx))
					{ 
						predVar[j] = gen.rufObject[[i]][which(gen.rufObject[[i]][,"left daughter"] == predIdx[j] | gen.rufObject[[i]][,"right daughter"] == predIdx[j]), "split var"][1]
					}
				}
				else
				{
					for (j in seq_along(predIdx))
					{ 
						if ((predIdx[j] %% 2) == 0) 
						{ 
							predVar[j] = gen.rufObject[[i]][which(gen.rufObject[[i]][,"left daughter"] == predIdx[j]), "split var"] 
						}
						else 
						{	
							predVar[j] = gen.rufObject[[i]][which(gen.rufObject[[i]][,"right daughter"] == predIdx[j]), "split var"] 
						}
					}
				}			
				cbind(predVar, predClass)
			}	
		}
		stopCluster(Cl)
		varImpMatrix = varImpMatrix[-which(varImpMatrix[,1] == 0),]		
		na.gain = which(is.na(varImpMatrix[,2]))
		if (length(na.gain) > 0) { 	varImpMatrix = varImpMatrix[-na.gain,]	}			
		rf.var = unique(sortCPP(varImpMatrix[,1]))
		n.var = length(rf.var)			
		if (!rf.regression)
		{	
			var.imp.output <- matrix(NA, n.var, 4)
			for (i in 1:n.var)
			{  	
				classTable = sort(table(predMatrix[which(rf.var[i] == predMatrix[,1]),2]),decreasing = TRUE)
				classMax = as.numeric(names(classTable))[1]
				classFreq =	(classTable/sum(classTable))[1]
				var.imp.output[i,] = c(rf.var[i], sum(varImpMatrix[which(rf.var[i] == varImpMatrix[,1]),2]), 
					classMax, classFreq)
			}
		}
		else
		{	
			var.imp.output <- matrix(NA, n.var, 2)
			for (i in 1:n.var)
			{  	var.imp.output[i,] = c(rf.var[i], sum(varImpMatrix[which(rf.var[i] == varImpMatrix[,1]),2]))	}
		}
		var.imp.output = var.imp.output[order(var.imp.output[,2], decreasing = TRUE),]
		var.imp.output = round(cbind( var.imp.output, 100*var.imp.output[,2]/max(var.imp.output[,2])),2)
		percent.importance = round(100*var.imp.output[,2]/sum(var.imp.output[,2]),0)
		var.imp.output[,2] = round(var.imp.output[,2],0)		
		var.imp.output = cbind(var.imp.output, percent.importance)		
		if (rf.regression) 
		{	colnames(var.imp.output) = c("variables", "score", "percent", "percent importance")	}
		else
		{	
			colnames(var.imp.output) = c("variables", "score", "class", "class frequency", "percent", 
				"percent importance")
			row.names(var.imp.output) = NULL	
		}		
		var.imp.output = data.frame(var.imp.output)		
		if (!is.null(colnames(trainData))) 	{	var.imp.output[,1] = colnames(trainData)[var.imp.output[,1]]	}		
		if (use.OOB)
		{	
			if (rf.regression)
			{
				if (BreimanBounds)
				{
					return(list(object = gen.rufObject, OOB = OOB.confusion, OOB.predicts = as.numeric(OOB.pred), 
					OOB.strengthCorr = strengthCorr.object, OOB.votes = OOB.votes2, pred.error = pred.error, 
					percent.varExplained = percent.varExplained,  variableImportance = var.imp.output, 
					regression = rf.regression) )
				}
				else
				{
					return(list(object = gen.rufObject, OOB = OOB.confusion, OOB.predicts = as.numeric(OOB.pred), 
					OOB.votes = OOB.votes2, pred.error = pred.error, percent.varExplained = percent.varExplained, 
					variableImportance = var.imp.output, regression = rf.regression) )
				}
			}
			else
			{
				if (BreimanBounds)
				{
					return( list(object = gen.rufObject, OOB = OOB.confusion, OOB.predicts = as.numeric(OOB.pred), 
					OOB.strengthCorr = strengthCorr.object, OOB.votes = OOB.votes2, pred.error = pred.error, 
					variableImportance = var.imp.output, regression = rf.regression) )
				}
				else
				{
					return(list(object = gen.rufObject, OOB = OOB.confusion, OOB.predicts = as.numeric(OOB.pred), 
					OOB.votes = OOB.votes2, pred.error = pred.error, variableImportance = var.imp.output, regression = rf.regression) )	
				}
			}
		}
		else
		{	return(list(object = gen.rufObject, variableImportance = var.imp.output, regression = rf.regression) )	}
	}
}

rUniformForestPredict <- function(object, X, 
	type = c("response", "prob", "votes", "confInt", "ranking", "quantile", "truemajority", "all"),
	classcutoff = c(0,0), 
	conf = 0.95,
	whichQuantile = NULL,
	rankingIDs = NULL,
	threads = "auto", 
	parallelpackage = "doParallel", ...)
{
	object <- filter.object(object)
	X <- if (is.vector(X)) { t(X) } else  { X }
	X <- fillVariablesNames(X)	
	if (!is.null(object$formula) && (length(object$variablesNames) != ncol(X)))
	{
		mf <- model.frame(data = as.data.frame(cbind(rep(1, nrow(X)),X)))
		X <- model.matrix(attr(mf, "terms"), data = mf)[,-1]
	}
	else
	{
		if (length(object$variablesNames) != ncol(X)) 
		{	
			cat("Data to predict have not the same dimension than data that have been computed by the model.\n Relevant variables have been extracted.\n")
			newVars <- rmNA(match(object$variablesNames, colnames(X)))
			if (length(newVars) == 0) 
			{ 
				stop("No relevant variable has been found. Please give to both train and test data the same column names.\n") 
			}
			X = X[,newVars, drop = FALSE]
		}
	}
	classes = object$classes	
	flag1 = flag2 = 1
	for (i in 1:ncol(object$forestParams))
	{			
		classCutOffString = as.vector(object$forestParams[which(row.names(object$forestParams) == "classcutoff"), i])		
		if ((classCutOffString != "FALSE") && (as.numeric(classcutoff[2]) == 0))
		{
			classCutOffString = rm.string(classCutOffString, "%")
			classCutOffString = rm.string(classCutOffString, "Class ")
			classCutOffString = strsplit(classCutOffString, ",")
			classcutoff[1] = which(classes == classCutOffString[[1]][1])
			classcutoff[2] = 0.5/(as.numeric(classCutOffString[[1]][2])/100)
		}
		else
		{	
			if ((as.numeric(classcutoff[2]) != 0) && (i == 1)) 
			{  	
				classcutoff <- c(which(classes == as.character(classcutoff[1])), 0.5/(as.numeric(classcutoff[2]))) 
				if (length(classcutoff) == 1) 
				{ stop("Label not found. Please provide name of the label instead of its position") }
				if (i > 1) { cat("Only last cutoff will be used in incremental random Uniform Forest.\n") }
			}		
		}				
		classwtString = as.vector(object$forestParams[which(row.names(object$forestParams) == "classwt"),i])	
		classwt = FALSE
		if (is.na(as.logical(classwtString))) {  classwt = TRUE; flag1 = flag2 = -1  }
		else 
		{	flag2 = 1	}		
		if (flag1 != flag2) 
		{ stop("Incremental random Uniform forest. Class reweighing must remain (or miss) in all forests.\n") }	
		randomCombinationString = as.vector(object$forestParams[which(row.names(object$forestParams) == "randomcombination"),i])
		if  ((randomCombinationString != "FALSE") )
		{	
			if (i == 1)
			{
				random.combination = as.numeric(unlist(strsplit(randomCombinationString, ",")))
				nbCombination = length(random.combination)/3
				X <- randomCombination(NAfactor2matrix(X, toGrep = "anythingParticular"), 
					combination = random.combination[1:(length(random.combination) - nbCombination)], 
					weights = random.combination[(length(random.combination) - nbCombination + 1):length(random.combination)])
			}
			else {	cat("Only first random combination will be used in incremental random uniform forest.\n") 	}
		}
	}	
	r.threads = threads	
	predObject <- randomUniformForestCore.predict(object, X, rf.aggregate = TRUE, pr.imbalance = classcutoff, pr.threads = threads, 	
	pr.classwt = classwt, pr.parallelPackage = parallelpackage[1])		
	object = filter.forest(object)	
	if (type[1] == "response") 
	{  
		predictedObject = predObject$majority.vote
		if (!is.null(classes)) 
		{  
			predictedObject = as.factor(predictedObject) 
			levels(predictedObject) = classes[as.numeric(levels(predictedObject))]
		}
	}	
	if (type[1] == "truemajority") 
	{
		{	
			nbObs = nrow(predObject$all.votes)
			trueMajorityVotes = rep(0,nbObs)
            alpha1 = (1 - conf)/2
			alpha2 = conf + alpha1			
			for (i in 1:nbObs)
			{
				expectedClasses = unlist(lapply(predObject$votes.data, function(Z) Z[i,1]))	
				votingMembers = unlist(lapply(predObject$votes.data, function(Z) Z[i,2]))
				if (object$regression)
				{
					votes = cbind(expectedClasses, votingMembers)
					colnames(votes) = c("expectedClasses", "votingMembers")
					trueMajorityVotes[i] = sum( votes[,"expectedClasses"]*votes[,"votingMembers"])/sum(votes[,"votingMembers"])
				}
				else
				{
					outliers = c(quantile(votingMembers, alpha1), quantile(votingMembers, alpha2))
					idxWoOutliers = which(votingMembers <= outliers[1] | votingMembers >= outliers[2])
					votes = cbind(expectedClasses[-idxWoOutliers], votingMembers[-idxWoOutliers])
					colnames(votes) = c("expectedClasses", "votingMembers")
					trueMajorityVotes[i] = which.max(by(votes, votes[,"expectedClasses"], sum))
				}
			}
			if (object$regression)	{ predictedObject = trueMajorityVotes }
			else {	predictedObject = as.factor(trueMajorityVotes); levels(predictedObject) = classes }
		}
	}	
	if (type[1] == "confInt")	
	{  
		if (!object$regression)
		{ stop( "confidence interval can only be computed for regression") }
		else
		{	
			alpha1 = (1 - conf)/2
			alpha2 = conf + alpha1
			Q_1 = apply(predObject$all.votes, 1, function(Z) quantile(Z, alpha1))
			Q_2 = apply(predObject$all.votes, 1, function(Z) quantile(Z, alpha2))
			SD = apply(predObject$all.votes, 1, function(Z) sd(Z))
			predictedObject = data.frame(cbind(predObject$majority.vote, Q_1, Q_2, SD))
			Q1Name = paste("LowerBound", "(q = ", round(alpha1,3), ")", sep ="")
			Q2Name = paste("UpperBound", "(q = ", round(alpha2,3), ")",sep ="")
			colnames(predictedObject) = c("Estimate", Q1Name, Q2Name, "standard deviation")
		}		
	}
	if (type[1] == "quantile")	
	{  
		if (!object$regression)
		{ stop( "quantile(s) can only be computed for regression") }
		else
		{	
			if (!is.numeric(whichQuantile)) 
			{ 
				stop( "Please use option 'whichQuantile', providing as argument a numeric value greater than 0 and lower than 1.\n") 
			}			
			if ( (whichQuantile <= 0) || (whichQuantile >= 1))  
			{ stop( "Please provide for 'whichQuantile' option, a numeric value than 0 and lower than 1.\n") }			
			predictedObject = apply(predObject$all.votes, 1, function(Z) quantile(Z, whichQuantile))
		}		
	}	
	if (type[1] == "all")	
	{  
		predictedObject = predObject
		if (object$regression)
		{ 
			stdDev = apply(predObject$all.votes, 1, sd)
			confIntObject = data.frame(cbind(predObject$majority.vote, apply(predObject$all.votes, 1, function(Z) quantile(Z,0.025)), 
			apply(predObject$all.votes, 1, function(Z) quantile(Z,0.975)), stdDev))			
			colnames(confIntObject) = c("Estimate", "LowerBound", "UpperBound", "Standard deviation")
			predictedObject$confidenceInterval = confIntObject
		}
	}	
	if (type[1] == "votes")  {	predictedObject = predObject$all.votes }	
	if ( (type[1] == "prob") && (!object$regression) )
	{	
		predictedObject = round(getVotesProbability2(predObject$all.votes, 1:length(classes)), 4) 
		colnames(predictedObject) = classes
	}	
	if ( (type[1] == "prob") && (object$regression) ) { stop("Probabilities can not be computed for regression") }	
	if ( (type[1] == "ranking") && (!object$regression) )
	{
		predictedObject = predictedObject2 = predObject$majority.vote
		if (!is.null(classes)) 
		{  
			if (!is.numeric(as.numeric(classes)))
			{	stop("Class are not numeric values or factor of numeric values. For Ranking, numeric values are needed as an equivalent of each class.") }
			else
			{
				minClass = min(as.numeric(classes))
				minPred = min(predictedObject)
				maxClass = max(as.numeric(classes))
				maxPred = max(predictedObject)				
				if (minClass < (minPred - 1))
				{	
					predictedObject[predictedObject != minPred] = predictedObject[predictedObject != minPred] - 1
					predictedObject = predictedObject - 1
				}
				else
				{
					if (minClass < minPred)
					{ 
				       predictedObject = predictedObject - 1
					}
				}		
				if (maxClass > maxPred)
				{	predictedObject[predictedObject == maxPred] = maxPred  }
			}
		}
		else { stop("Class not found. Please check if model is computed as classification, by looking if Y is set as factor.") }			
		numClasses = sort(unique(predObject$all.votes[,1]))
		probabilities = round(getVotesProbability2(predObject$all.votes, numClasses), 2) 
		colnames(probabilities) = classes
		countPredObject = table(predictedObject2)		
		majorityVote = as.numeric(names(which.max(countPredObject)))				
		n = length(predictedObject)
		followIdx = 1:n		
		if (!is.null(rankingIDs))
		{	
			rankingObject = cbind(followIdx, rankingIDs, predictedObject, probabilities)	
			if (length(classes) > 2)
			{  
				minoritiesProbability = rowSums(probabilities[,-majorityVote])
				rankingObject = cbind(rankingObject, minoritiesProbability)
			}			
			colnames(rankingObject)[1] = "idx"
			if (is.vector(rankingIDs) || is.factor(rankingIDs)) 
			{ 
				lengthRankingIDS = 1 
				colnames(rankingObject)[2] = "ID"
				colnames(rankingObject)[3] = "majority vote"
				cases = sort(unique(rankingIDs))
			}
			else 
			{ 
				lengthRankingIDS = ncol(rankingIDs)
				if (is.null(colnames(rankingIDs)))
				{	colnames(rankingObject)[2:(2+lengthRankingIDS)] = "ID"	}			
				
				colnames(rankingObject)[lengthRankingIDS+2] = "majority vote"
				cases = sort(unique(rankingIDs[,1]))
			}			
			if (length(classes) > 2)
			{  minorityIdx = which(colnames(rankingObject) == "minoritiesProbability") }
			else
			{ 	minorityIdx = which(colnames(rankingObject) == classes[-majorityVote])	}			
			lengthCases = length(cases)
			subCases = vector('list', lengthCases)
			for (i in 1:lengthCases)
			{	subCases[[i]] = which(rankingObject[,2] == cases[i]) }							
			rankingOutputObject <- matrix(NA, n, ncol(rankingObject) + 1)
			for (i in 1:lengthCases)
			{
				if (length(subCases[[i]]) > 1)
				{
					rankingOutputObject[subCases[[i]],] <- as.matrix(cbind(sortMatrix(rankingObject[subCases[[i]],], minorityIdx, decrease = TRUE), 
					1:length(subCases[[i]])))
				}
				else
				{	rankingOutputObject[subCases[[i]],] <- c(rankingObject[subCases[[i]],], 1)	}
			}			
			rankingOutputObject = as.data.frame(rankingOutputObject)			
			for (j in 1:ncol(rankingOutputObject))
			{
				if (is.factor(rankingOutputObject[,j]) && is.numeric(as.numeric(as.vector(rankingOutputObject[,j]))))
				{	rankingOutputObject[,j] = as.numeric(as.vector(rankingOutputObject[,j])) }
			}			
			colnames(rankingOutputObject) = c(colnames(rankingObject), "rank")
			predictedObject = sortMatrix(rankingOutputObject,1)	
		}
		else
		{	
			rankingObject = cbind(followIdx, predictedObject, probabilities)			
			if (length(classes) > 2)
			{  
				minoritiesProbability = rowSums(probabilities[,-majorityVote])
				rankingObject = cbind(rankingObject, minoritiesProbability)
			}			
			colnames(rankingObject)[1] = "idx"
			colnames(rankingObject)[2] = "majority vote"			
			if (length(classes) > 2)
			{  minorityIdx = which(colnames(rankingObject) == "minoritiesProbability") }
			else
			{	minorityIdx = which(colnames(rankingObject) == classes[-majorityVote]) }			
			rankingOutputObject = sortMatrix(rankingObject, minorityIdx, decrease = TRUE)	
			predictedObject = cbind(rankingOutputObject, 1:n)
			colnames(predictedObject)[ncol(predictedObject)] = "rank"
			predictedObject = sortMatrix(predictedObject,1)	
		}
	}		
	if ( (type[1] == "ranking") && (object$regression) )
	{ stop("Ranking currently available for classification tasks") }	
	predictedObject
}

randomUniformForestCore.predict <- function(object, X, 
rf.aggregate = TRUE, 
OOB.idx = FALSE, 
pr.imbalance = c(0,0),
pr.regression = TRUE,
pr.classwt = FALSE,  
classes = -1,
pr.export = c("onlineClassify", "predictDecisionTree", "majorityClass", "randomWhichMax", "rmNA", "mergeLists", "classifyMatrixCPP", "L2DistCPP", "checkUniqueObsCPP", "crossEntropyCPP", "giniCPP", "L2InformationGainCPP",	"entropyInformationGainCPP", "runifMatrixCPP", "NATreatment"), 
pr.threads = "auto", 
pr.parallelPackage = "doParallel")
{
	## #require(rUniformForestCppClass)	
	if (!OOB.idx)
	{
		if (is.matrix(X))
		{
			matchNA = (length(which(is.na(X))) > 0)		
			if (matchNA) 
			{ 
				cat("NA found in data. Fast imputation (means) is used for missing values. Please use one of many models available if accuracy is needed\n Or use na.impute() function with the option 'na.action = accurateImpute'.")
				X <- na.impute(X)
			}
		}
		else
		{ 
			X <- NAfactor2matrix(X, toGrep = "anythingParticular")	
			matchNA = (length(which(is.na(X))) > 0)		
			if (matchNA) 
			{ 
				cat("NA found in data. Fast imputation (means) is used for missing values. Please use one of many models available if accuracy is needed\n Or use na.impute() function with the option 'na.action = accurateImpute'.\n")
				X <- na.impute(X)
			}
		}
		if (!is.null(object$logX))
		{
			if (object$logX)
			{ 
			   if (is.null(object$categoricalvariables))  {  X <- generic.log(X) }
			   else  {  X[,-object$categoricalvariables] <- generic.log(X[,-object$categoricalvariables]) }
			}
		}		
		object = filter.forest(object)
	}				
	if (!is.null(object$OOB) && (!OOB.idx))
	{  OOB.predicts = object$OOB.predicts; pr.regression = object$regression; object = object$object }
	else
	{
		if (!OOB.idx) 	{ 	pr.regression = object$regression; object = object$object   }			
		if (!is.null(object$variableImportance) && (OOB.idx)) {	object = object$object	}		
	}			
	n = nrow(X)			
	if (!pr.regression)
	{
		if (classes[1] <  1)
		{
			classes = sort(as.numeric(rownames(table(object[[sample(1:length(object),1)]][,"prediction"]))))
			if (classes[1] == 0) {  classes = classes[-1]  }
		}			
		l.class = length(classes)
		class.occur = rep(0,l.class)
	}	
	{
		ntree = length(object)
		pred.X = vector("list", ntree)
		all.votes = nodes.length = nodes.depth = matrix(data = Inf, nrow = n, ncol = ntree)
		majority.vote = vector(length = n)		
		fullDim = ntree*n		
		if ((fullDim < 1e7) || (pr.threads == 1))
		{	pred.X <- lapply(object, function(Z) predictDecisionTree(Z, X))	 }
		else
		{
			{
				## #require(parallel)	
				max_threads = detectCores()				
				if (pr.threads == "auto")
				{	
					if (max_threads == 2) { pr.threads = max_threads }
					else {	pr.threads  = max(1, max_threads - 1)  }
				}
				else
				{
					if (max_threads < pr.threads) 
					{	cat("Note: number of threads is higher than logical threads in this computer.\n") }
				}
				## #require(doParallel)	
				Cl = makePSOCKcluster(pr.threads, type = "SOCK")
				registerDoParallel(Cl)
				chunkSize  <-  ceiling(n/getDoParWorkers())
				smpopts  <- list(chunkSize = chunkSize)
			}						
			pushObject <- function(object, X) lapply(object, function(Z) predictDecisionTree(Z, X))			
			pred.X <- foreach(X = iterators::iter(X, by ='row', chunksize = chunkSize), .combine = mergeLists, .export = pr.export) %dopar%
			pushObject(object, X)									
			stopCluster(Cl)
		}								
		for (i in 1:ntree)
		{	
			all.votes[,i] = pred.X[[i]][,1]
			nodes.length[,i] = pred.X[[i]][,2]
			nodes.depth[,i] = pred.X[[i]][,3]	
		}		
		if (pr.classwt)
		{
			allWeightedVotes = matrix(data = Inf, nrow = n, ncol = ntree)
			for (i in 1:ntree)	{  allWeightedVotes[,i] = object[[i]][nodes.depth[,i], "avgLeafWeight"]	}
		}
		else
		{  allWeightedVotes = NULL }
			
		if (pr.regression) {  majority.vote = rowMeans(all.votes)	}
		else {	majority.vote = majorityClass(all.votes, classes, m.imbalance = pr.imbalance, m.classwt = allWeightedVotes)$majority.vote  }
	}				
	if (rf.aggregate && (!OOB.idx))
	{	
		return(list(all.votes = all.votes, majority.vote = majority.vote, nodes.depth = nodes.depth, nodes.length = nodes.length, 
		votes.data = pred.X))	
	}	
	else
	{
		if (OOB.idx) 
		{  
			if (pr.classwt)	{ return(list(all.votes = all.votes, allWeightedVotes = allWeightedVotes))	}
			else { return(all.votes)	}
		}
		else  {  return(majority.vote) 	}		
	}
}

majorityClass <- function(all.votes, classes, m.regression = FALSE, m.imbalance = c(0,0), m.classwt = NULL, 
m.threads = "auto")
{
	if (m.regression)
	{	
		all.votes[all.votes == Inf] = NA  
		majority.vote = rowMeans(all.votes, na.rm = TRUE) 
		class.counts = NULL	
		return(list(majority.vote = majority.vote, class.counts = class.counts))
	}
	else
	{
		n = nrow(all.votes)
		majority.vote = vector(length = n)
		l.class = length(classes)
		class.occur = vector(length = l.class)
		class.countsMajorityVote = trueClass.counts = matrix(NA, n, l.class)
		for (i in 1:n)
		{ 
			if (!is.null(m.classwt))
			{	
				for (j in 1:l.class) 
				{
					idx = rmNA(which(all.votes[i,] == classes[j]))
					l.idx = length(idx)									
					if (l.idx > 0) {   class.occur[j] = sum(m.classwt[i,idx]) }
					else	{ class.occur[j] = 0 }
				}				
			}
			else
			{
				for (j in 1:l.class) { 	class.occur[j] <- sum(all.votes[i,] == classes[j])  }
			}				
			if (m.imbalance[1] != 0)
			{	class.occur[m.imbalance[1]] = floor(class.occur[m.imbalance[1]]*m.imbalance[2]) } 
			majority.vote[i] = randomWhichMax(class.occur)
			class.countsMajorityVote[i,] = c(class.occur[majority.vote[i]], class.occur[-majority.vote[i]])
			trueClass.counts[i,]  = class.occur
		}
		return(list(majority.vote = majority.vote, class.counts = class.countsMajorityVote, trueClass.counts = trueClass.counts))
	}	
}

getVotesProbability  <- function(X, classes) (majorityClass(X, classes)$class.counts/ncol(X))

getVotesProbability2  <- function(X, classes) (majorityClass(X, classes)$trueClass.counts/ncol(X))

strength_and_correlation <- function(OOB.votes, OOB.object, 
	rf.classes, 
	s.trainLabels = NULL, 
	s.regression = FALSE, 
	output = NULL, 
	s.threads = "auto", 
	s.parallelPackage = "doParallel" )
{
	j = NULL
	dimOOBvotes = dim(OOB.votes)	
	n.OOB = dimOOBvotes[1]
	p.OOB = dimOOBvotes[2]	
	if (s.regression)
	{
		OOB.votes[OOB.votes == Inf] = NA		
		if (is.null(s.trainLabels))  { Y = rowMeans(OOB.votes, na.rm =TRUE)	}
		else {  Y = s.trainLabels  }			
		expectedSquaredErrorOverTrees = colMeans(  (OOB.votes - Y)^2, na.rm = TRUE)
		PE.tree = sum(expectedSquaredErrorOverTrees)/length(expectedSquaredErrorOverTrees)
		sd.T =  sqrt(expectedSquaredErrorOverTrees)
		mean.corr = mean(rowMeans(cor((Y - OOB.votes), use = "pairwise.complete.obs")))		
		if (is.na(mean.corr))
		{	
			cat("Not enough data to compute average correlation for trees. Error is then prediction error of a tree.\n")
			return(list(PE.forest = (mean(sd.T))^2,  PE.max = PE.tree, PE.tree = PE.tree, mean.corr = mean.corr))
		}
		else
		{	return(list(PE.forest = mean.corr*(mean(sd.T))^2,  PE.max = mean.corr*PE.tree, PE.tree = PE.tree, mean.corr = mean.corr))	}
	}
	else
	{
		## #require(parallel)		
		max_threads = detectCores()		
		if (s.threads == "auto")
		{	s.threads  = max(1, max_threads - 1)  }
		else
		{
			if (max_threads < s.threads) 
			{	cat("Note: number of threads is higher than logical threads in this computer.\n") }
		}
		{	
			## #require(doParallel)
			Cl <- makePSOCKcluster(s.threads, type ="SOCK")
			registerDoParallel(Cl)
		}
		chunkSize  <-  ceiling(p.OOB/getDoParWorkers())
		smpopts  <-  list(chunkSize = chunkSize)	
		p.new.OOB = apply(OOB.votes, 1, function(OOB.votes) sum(OOB.votes != Inf))			
		if (length(rf.classes) == 2)
		{
			Q.x.1 = OOB.object$class.counts[,1]/rowSums(OOB.object$class.counts)
			rawStrength = 2*Q.x.1 - 1			
			Tk.1 = Tk.2 = matrix (data = NA, ncol = p.OOB, nrow = n.OOB)
			OOB.votes.1 = cbind(OOB.votes, OOB.object$majority.vote)
			Tk.1 <- foreach(j = 1:p.OOB, .options.smp = smpopts, .combine = cbind, .multicombine = TRUE) %dopar%
			apply(OOB.votes.1[,-j], 1, function(Z) sum(Z[-p.OOB] == Z[p.OOB]))
			Tk.1 = Tk.1/p.new.OOB 
			Tk.2 = 1 - Tk.1
			stopCluster(Cl)
		}
		else
		{
			nn = rowSums(OOB.object$class.counts)
			Q.x.j = apply(OOB.object$class.counts[,-1], 1, max)
			Q.x.y = OOB.object$class.counts[,1]			
			rawStrength = (Q.x.y - Q.x.j)/nn
			maj.class.j = vector(length = n.OOB)
			for (i in 1:n.OOB)
			{  
				second.class.max = randomWhichMax(OOB.object$class.counts[i,-1])
				maj.class.j[i] = rf.classes[-OOB.object$majority.vote[i]][second.class.max]
			}
			OOB.votes.1 = cbind(OOB.votes, OOB.object$majority.vote)
			OOB.votes.2 = cbind(OOB.votes,  maj.class.j)			
			ZZ <- function(j) cbind(apply(OOB.votes.1[,-j], 1, function(Z) sum(Z[-p.OOB] == Z[p.OOB])), apply(OOB.votes.2[,-j], 1, 
				function(Z) sum(Z[-p.OOB] == Z[p.OOB])))					
			Tk <- foreach(j = 1:p.OOB, .options.smp = smpopts, .combine = cbind, .multicombine = TRUE) %dopar% ZZ(j)		
			mixIdx = getOddEven(1:ncol(Tk))			
			Tk.1 = Tk[,mixIdx$odd]
			Tk.2 = Tk[,mixIdx$even]
			Tk.1 = Tk.1/p.new.OOB
			Tk.2 = Tk.2/p.new.OOB
			stopCluster(Cl) 
		}				
		p1 = colMeans(Tk.1)
		p2 = colMeans(Tk.2)		
		strength = mean(rawStrength)
		varStrength =  var(rawStrength)
		sd.T = ((p1 + p2 + (p1 - p2)^2))^0.5		
		mean.corr = varStrength / (mean(sd.T))^2			
		PE.est = mean.corr*(1 -	strength^2)/strength^2		
		return(list(PE = PE.est, avg.corr = mean.corr, strength = strength, std.strength = sqrt(varStrength)))
	}
}

monitorOOBError  <- function(OOB.votes, Y, regression = FALSE, threads = "auto", f = L2Dist)
{
	j = NULL
	n = length(Y)
	n.RS = sqrt(n)
	p = ncol(OOB.votes)	
	## #require(parallel)	
	max_threads = detectCores()		
	if (threads == "auto")
	{	
		if (max_threads == 2) { threads = max_threads }
		else {	threads  = max(1, max_threads - 1)  }
	}
	else
	{
		if ((max_threads) < threads) 
		{	 cat("Note: number of threads is higher than logical threads in this computer\n.") }
	}	
	## #require(doParallel)			
	Cl = makePSOCKcluster(threads, type = "SOCK")
	registerDoParallel(Cl)	
	chunkSize  <-  ceiling(p/getDoParWorkers())
	smpopts  <- list(chunkSize  =  chunkSize)
	if (!regression)
	{
		Y = as.numeric(Y)
		classes = sort(unique(Y))
		S1 = sample(classes,1)
		S2 = classes[-S1][1]
		
		OOBmonitor <- foreach(j = 1:(p-1), 
		.export = c("generalization.error", "confusion.matrix", "majorityClass", "rmNA", "randomWhichMax"), 
		.options.smp = smpopts, .combine = rbind) %dopar%
		{
			Estimate <- generalization.error(confusion.matrix(majorityClass(OOB.votes[,1:(j+1)], classes)$majority.vote, Y))
			C1 <- generalization.error(confusion.matrix(majorityClass(OOB.votes[,1:(j+1)], classes, m.imbalance =c(1, 1.5))$majority.vote, Y))
			C2 <- generalization.error(confusion.matrix(majorityClass(OOB.votes[,1:(j+1)], classes, m.imbalance= c(2, 1.5))$majority.vote, Y))
			
			t(c(Estimate, C1, C2))
		}
		
		E0 <- generalization.error(confusion.matrix(majorityClass(matrix(OOB.votes[,1]), classes)$majority.vote, Y))
		C1 <- generalization.error(confusion.matrix(majorityClass(matrix(OOB.votes[,1]), classes, m.imbalance =c(1, 1.5))$majority.vote, Y))
		C2 <- generalization.error(confusion.matrix(majorityClass(matrix(OOB.votes[,1]), classes, m.imbalance= c(2, 1.5))$majority.vote, Y))
		OOBmonitor = rbind(t(c(E0, C1, C2)),OOBmonitor)
	}
	else
	{
		OOBmonitor <- foreach(j = 1:(p-1), .export = c("generalization.error", "confusion.matrix", "majorityClass", "rmNA", "L2Dist", "L1Dist"), .options.smp = smpopts, .combine = c) %dopar%
		{
			Z = majorityClass(OOB.votes[,1:(j+1)], 0, m.regression = TRUE)$majority.vote
			NAIdx = which(is.na(Z))
			if (length(NAIdx) > 0) { f(Z[-NAIdx], Y[-NAIdx])/length(Z[-NAIdx])}
			else { f(Z, Y)/length(Z)  }
		}		
		Z = majorityClass(matrix(OOB.votes[,1]), 0, m.regression = TRUE)$majority.vote
		NAIdx = which(is.na(Z))
		E0 = if (length(NAIdx) > 0) { f(Z[-NAIdx], Y[-NAIdx])/length(Z[-NAIdx])} else { f(Z, Y)/length(Z)  }
		OOBmonitor = c(E0,OOBmonitor)
	}	
	stopCluster(Cl)
	return(OOBmonitor)
}

weightedVote <- function(all.votes, idx = 2, granularity = 2)
{
	all.votes <- round(all.votes, granularity)	
	apply(all.votes, 1, function(Z)
		{
			A = sort(table(rmInf(Z)), decreasing = TRUE)
			B = as.numeric(names(A))[1:idx]
			sum( B * (B/sum(abs(B))) )
		}
	)
}

weightedVoteModel <- function(votes, majorityVote, Y = NULL, nbModels = 1, idx = 1 , granularity = 1, train = TRUE, models.coeff = NULL)
{
	if (train)
	{
		if (is.null(Y))
		{ stop("output is neded to build model") }		
		if (nbModels == 1)
		{
			default.model = weightedVote(votes, idx = idx, granularity = granularity)
			lin.model = lm(Y ~ cbind(majorityVote, default.model))
		}
		else
		{
			models = matrix(data = NA, ncol = nbModels + 2, nrow = length(Y))			
			models[,1] = majorityVote
			models[,2] = weightedVote(votes, idx = idx, granularity = granularity)  
			for (j in 3:(nbModels+2))
			{	models[,j] = weightedVote(votes, idx = max(j,idx), granularity = j ) }
			lin.model = lm(Y ~ models)
		}		
		return(lin.model)
	}
	else
	{
		p = length(models.coeff)
		models = matrix(data = NA, ncol = p, nrow = length(majorityVote))			
		models[,1] = rep(1,length(majorityVote))
		models[,2] = majorityVote
		models[,3] = weightedVote(votes, idx = idx, granularity = granularity)		
		if (p > 3)
		{
			for (j in 4:p)
			{	models[,j] = weightedVote(votes,  idx =j  , granularity = j)	}
		}		
		newMajorityVote = apply(models,1, function(Z) sum(models.coeff*Z))		
		return(newMajorityVote )
	}
}

postProcessingVotes <- function(object, nbModels = 1, idx = 1, granularity = 1, predObject = NULL, 
swapPredictions = FALSE, X = NULL, Xtest = NULL, imbalanced = FALSE, OOB = FALSE,
method = c("cutoff", "bias", "residuals"), 
keep2ndModel = FALSE, 
largeData = FALSE, ...)
{
	object <- filter.object(object)
	if (rownames(object$forestParams)[1] == "reduceDimension") 
	{ stop("Post Processing does not work with objects coming from rUniformForest.big() function") }	
	if (OOB)
	{
		predObject = list()
		predObject$all.votes = object$forest$OOB.votes
		predObject$majority.vote = object$forest$OOB.predicts
	}	
	if (!object$forest$regression)
	{
		if (length(as.numeric(object$classes)) > 2)
		{	stop("Optimization currently works only for binary classification") }
		
		if (is.null(X))
		{	stop("Please provide test data") }
		else
		{
			if (is.null(predObject)) {	predObject <- predict(object, X, type = "all")	}
			else
			{
				if (is.null(predObject$all.votes)) 
				{ stop("Please provide full prediction object (option type = 'all' when calling predict() function).") }
				
				if (is.null(predObject$majority.vote)) 
				{ stop("Please provide full prediction object (option type = 'all' when calling predict() function).") }
			}
			
			majorityVotePosition = which.max(table(predObject$majority.vote))
			numClasses = sort(rmInf(unique(predObject$all.votes[,1])))
			probPred = round(getVotesProbability2(predObject$all.votes, numClasses), 4) 
			colnames(probPred) = object$classes
			
			if (imbalanced) 
			{ 
				cutoff =  1 - ( mean(probPred[,2])/mean(probPred[,1]) )
				predVotes = predict(object, X, classcutoff = c(object$classes[majorityVotePosition], cutoff))
			}
			else 
			{ 	
				if (method[1] == "cutoff")
				{
					cutoff = 0.5/2*( mean(probPred[,2])/mean(probPred[,1]) + mean(probPred[,1])/mean(probPred[,2]) ) 
					predVotes = predict(object, X, classcutoff = c(object$classes[majorityVotePosition], cutoff))
				}
				else
				{
					if (length(object$classes) > 2) 
					{ stop("'bias' method does not apply to multi-class classification.") }
					
					classAsNumeric = as.numeric(object$y)
					classAsNumeric = classAsNumeric - min(classAsNumeric)
					bias = round(mean(object$forest$OOB.predicts - 1) - mean(classAsNumeric), 4)
					probPred = predict(object, X, type = "prob")
					probPred[,2] = probPred[,2] - bias
					probPred[,1] = probPred[,1] + bias
					predVotes = apply(probPred, 1, which.max)
					predVotes = as.factor(object$classes[predVotes])
				}
			}			
			return(predVotes)
		}
	}	
	if (swapPredictions) 
	{ 
		object$forest$OOB.votes = predObject$forest$OOB.votes 
		object$forest$OOB.predicts = predObject$forest$OOB.predicts
	}	
	if (is.null(object$forest$OOB.votes)) 
	{ 
		stop("No OOB data for post processing. Please enable OOB option and subsamplerate or bootstrap ('replace' option) when computing model.") 
	}			
	if (is.null(object$predictionObject))
	{
		if (is.null(predObject)) { stop("Post processing can not be computed. Please provide prediction object") }
		else 
		{  
			if (is.null(predObject$majority.vote))
			{ 
				stop("Post processing can not be computed. Please provide full prediction object (type = 'all') when calling predict()") 
			}
			else
			{			
				meanEstimate = predObject$majority.vote
				allVotes = predObject$all.votes
			}
		}
	}
	else
	{   
		meanEstimate = object$predictionObject$majority.vote
		allVotes = object$predictionObject$all.votes
	}	
	Y = object$y
	OOBVotes = object$forest$OOB.votes	
	NAIdx = which.is.na(object$forest$OOB.predicts)	
	if (length(NAIdx) > 0)
	{
		Y = Y[-NAIdx]
		OOBVotes = OOBVotes[-NAIdx,]
		object$forest$OOB.predicts = object$forest$OOB.predicts[-NAIdx]
	}	
	if (method[1] == "residuals")
	{
		modelResiduals = object$forest$OOB.predicts - Y
		if (largeData) { rufResiduals = rUniformForest.big(X, modelResiduals, xtest = Xtest,...) }
		else { rufResiduals = randomUniformForest(X, modelResiduals, xtest = Xtest,...) }		
		newEstimates = object$predictionObject$majority.vote - rufResiduals$predictionObject$majority.vote
		if (keep2ndModel) { return(list(predictions = newEstimates, modelForResiduals = rufResiduals)) }
		else { return(newEstimates) }
	}
	else
	{
		OOBMeanEstimate = object$forest$OOB.predicts
		L2DistOOBMeanEstimate <- L2Dist(OOBMeanEstimate, Y)/length(Y)		
		OOBMedianEstimate <- apply(OOBVotes, 1, function(Z) median(rmInf(Z)))
		L2DistOOBMedianEstimate <- L2Dist(OOBMedianEstimate, Y)/length(Y)		
		weightedModel <- weightedVoteModel(OOBVotes, OOBMeanEstimate, Y = Y, nbModels = nbModels, idx = idx, granularity = granularity)
		OOBWeightedVoteEstimate <- weightedVoteModel(OOBVotes, OOBMeanEstimate, train = FALSE, models.coeff = weightedModel$coefficients)		
		NAOOBWeightedVoteEstimate <- which.is.na(OOBWeightedVoteEstimate)		
		if (length(NAOOBWeightedVoteEstimate) > 0) 
		{ OOBWeightedVoteEstimate[NAOOBWeightedVoteEstimate] = OOBMeanEstimate[NAOOBWeightedVoteEstimate] }		
		L2DistOOBWeightedVoteEstimate <- L2Dist(OOBWeightedVoteEstimate, Y)/length(Y)
		flagMedian =  ( (L2DistOOBMedianEstimate <= L2DistOOBMeanEstimate) | (L1Dist(OOBMedianEstimate, Y) <= L1Dist(OOBMeanEstimate, Y)) )
		flagWeighted = ( (L2DistOOBWeightedVoteEstimate <= L2DistOOBMeanEstimate) | (L1Dist(OOBWeightedVoteEstimate, Y) <= L1Dist(OOBMeanEstimate, Y)) )		
		if (flagMedian)
		{
			if (flagWeighted)
			{  weightedVoteEstimate <- weightedVoteModel(allVotes, meanEstimate, train = FALSE, models.coeff = weightedModel$coefficients) }
			else
			{  weightedVoteEstimate = rep(0, length(meanEstimate)) }
		}
		else
		{   
			weightedVoteEstimate <- weightedVoteModel(allVotes, meanEstimate, train = FALSE, models.coeff = weightedModel$coefficients) 
		}		
		medianEstimate <- apply(allVotes, 1, function(Z) median(rmInf(Z)))	
		NAWeightedVoteEstimate <- which.is.na(weightedVoteEstimate)
		if (length(NAWeightedVoteEstimate) > 0) { weightedVoteEstimate[NAWeightedVoteEstimate] = meanEstimate[NAWeightedVoteEstimate] }
		negativeBias = - (mean(OOBMeanEstimate) - Y)
		modelResiduals = Y - OOBMeanEstimate
		biasModel  = lm(modelResiduals ~ negativeBias) # Z5
		biasSign = sign(meanEstimate - medianEstimate)   
		biasSign[which(biasSign == 0)] = 1
		residuals_hat = vector(length = ncol(OOBVotes))	
		residuals_hat <- apply(OOBVotes, 2, function(Z) sum(rmInf(Z))/length(rmInf(Z))) - mean(OOBMeanEstimate)
		MeanNegativeBias = -mean(residuals_hat^2)
		biasCorrection = biasModel$coefficients[2]*biasSign*MeanNegativeBias + biasModel$coefficients[1]
		if (flagMedian)
		{ 
			if (flagWeighted) { return( 0.5*(medianEstimate + weightedVoteEstimate + biasCorrection)) }
			else { return(medianEstimate) }
		}
		else
		{ return(weightedVoteEstimate + biasCorrection) }
	}
}




localVariableImportance <- function(object, nbVariables = 2, Xtest = NULL, predObject = NULL, l.threads = "auto", l.parallelPackage = "doParallel")
{
	object = filter.object(object)
	rufObject = object$forest$object	
	if (is.null(predObject)) 
	{ 
		if (is.null(Xtest))
		{ stop("Local variable importance can not be computed. please provide test data.") }
		else
		{
			predObject = predict(object, Xtest, type = "all")
			majority.vote = as.numeric(predObject$majority.vote) 
			pred.rufObject = predObject$votes.data
		}
	}
	else 
	{  
		if (is.null(predObject$majority.vote))
		{ 
			stop("Local variable importance can not be computed. Please provide full prediction object (type = 'all') when calling predict()") 
		}
		else
		{			
			majority.vote = as.numeric(predObject$majority.vote) 
			pred.rufObject = predObject$votes.data
		}		
	}
	ntree = length(rufObject)
	if (is.null(pred.rufObject))
	{ stop ("no data to evaluate importance") }
	else
	{
		n = dim(pred.rufObject[[1]])[1]
		{
			## #require(parallel)
			threads = l.threads
			max_threads = detectCores()			
			if (threads == "auto")
			{	
				if (max_threads == 2) { threads = max_threads }
				else {	threads  = max(1, max_threads - 1)  }
			}
			else
			{
				if ((max_threads) < threads) 
				{	cat("Note: number of threads is higher than logical threads in this computer.\n") }
			}
			{
				## #require(doParallel)				
				Cl = makePSOCKcluster(threads, type = "SOCK")
				registerDoParallel(Cl)
			}
			chunkSize  <- ceiling(ntree/getDoParWorkers())
			smpopts  <- list(chunkSize = chunkSize)
		}
		b = NULL
		varMatrix <- foreach(b = 1:ntree, .options.smp = smpopts, .combine = rbind, .multicombine = TRUE) %dopar%	
		{ 	
			predVar = vector(length = n)
			for (i in 1:n)
			{
				predIdx = pred.rufObject[[b]][i,3]
				predVar[i] = rufObject[[b]][which(rufObject[[b]][,"left daughter"] == predIdx 
				| rufObject[[b]][,"right daughter"] == predIdx), "split var"][1]
			}						
			predVar
		}
		stopCluster(Cl)
		varImp = varFreq = matrix(data = NA, ncol = nbVariables, nrow = n)
		varImp <- t(apply(varMatrix, 2, function(Z) 
			as.numeric(names(sort(table(Z), decreasing = TRUE)[1:nbVariables])) ))
		varFreq <- t(apply(varMatrix, 2, function(Z)  sort(table(Z), decreasing = TRUE)[1:nbVariables]))/ntree	
		NAIdx = which(is.na(varImp), arr.ind = TRUE)		
		if (length(NAIdx[,1]) > 0)
		{	
			varImp[NAIdx] = apply(NAIdx, 1, function(Z) { 
												newZ = rmNA(varImp[Z])
												if (length(newZ) == 1) { rep(newZ, length(Z)) } else  { sample(newZ,1)  }
											}
							)
			varFreq[NAIdx] = apply(NAIdx, 1, function(Z) { 
												newZ = rmNA(varFreq[Z])
												if (length(newZ) == 1) { rep(newZ, length(Z)) } else  { sample(newZ,1)  }
											}
							)				
		}		
		objectNames = objectFrequencyNames = vector()
		for (j in 1:nbVariables)
		{	
			objectNames[j] = paste("localVariable", j, sep = "")
			objectFrequencyNames[j] = paste("localVariableFrequency", j, sep = "") 
		}	
		variableImportance.object = cbind(majority.vote, varImp, varFreq)				
		if (!object$forest$regression)
		{
			colnames(variableImportance.object) = c("class", objectNames, objectFrequencyNames)		
			classObject = as.numeric(names(table(variableImportance.object[,1])))		
			classMatrix = list()		
			for (i in 1:length(classObject))
			{		
				classMatrix[[i]] = round(sort(table(variableImportance.object[which(variableImportance.object[,1] == i),2])/
					sum(table(variableImportance.object[which(variableImportance.object[,1] == i),2])), decreasing = TRUE),2)
			}		
			orderedVar = unique(as.numeric(names(sort(unlist(classMatrix), decreasing = TRUE))))
			Variables = as.numeric(names(sort(unlist(classMatrix), decreasing = TRUE)))
			orderedImportance =  as.numeric(sort(unlist(classMatrix), decreasing = TRUE))			
			classVariableImportance = matrix(data = 0, ncol = length(classObject), nrow = length(orderedVar))
			for (i in 1:length(orderedVar))
			{	
				for (j in 1:length(classObject))
				{	
					idx = which( as.numeric(names(classMatrix[[j]])) == orderedVar[i])
					if (length(idx) > 0)
					{ classVariableImportance[i,j] = classMatrix[[j]][idx]	}
				}
			}		
			classVariableImportance = data.frame(classVariableImportance)
			rownames(classVariableImportance) = orderedVar
			colnames(classVariableImportance) = paste("Class", classObject, sep=" ")				
			return(list( obsVariableImportance = variableImportance.object, classVariableImportance = classVariableImportance ))
		}
		else
		{
			colnames(variableImportance.object) = c("estimate", objectNames, objectFrequencyNames)
			return(variableImportance.object)			
		}
	}
}
	
localTreeImportance <- function(rfObject, OOBPredicts = NULL, OOBVotes = NULL)
{
	rfObject = filter.object(rfObject)  
	if (!is.null(rfObject$OOB.votes))	{ OOBVotes = rfObject$OOB.votes }
	if (is.null( OOBVotes))	{ stop ( "OOB outputs missing. No optimization or weighted vote can be done") }
	if (!is.null(rfObject$OOB.predicts))	{	OOBPredicts = rfObject$OOB.predicts	}	
	n = length(OOBPredicts)  
	wPlus = wMinus = NULL
	for ( i in seq_along(OOBPredicts))
	{
		wPlus = c(wPlus, which(OOBVotes[i,] == OOBPredicts[i]))
		wMinus = c(wMinus, which(OOBVotes[i,] != OOBPredicts[i]))
	}	
	object = list(pweights = (table(wPlus)/n), nweights = (table(wMinus)/n))	
	return(object)
}

importance.randomUniformForest <- function(object, maxVar = 30, maxInteractions = 3, Xtest = NULL, 
predObject = NULL, ...)
{
	object <- filter.object(object) 
	maxInteractions = max(2, maxInteractions)	
	if (!is.null(Xtest))
	{
		if (!is.null(object$formula) && (length(object$variablesNames) != ncol(Xtest)))
		{
			mf <- model.frame(formula = object$formula, data = as.data.frame(Xtest))
			Xtest <- model.matrix(attr(mf, "terms"), data = mf)[,-1]
			if (object$logX) { 	Xtest <- generic.log(Xtest) }		
		}
		else
		{		
			Xfactors <- which.is.factor(Xtest)
			catVarIdx <- which(rownames(object$paramsObject) == "categorical variables")
			Xtest <- NAfactor2matrix(Xtest, toGrep = "anythingParticular")
			matchNA = (length(which(is.na(Xtest))) > 0)				
			if (matchNA) 
			{ 
				cat("NA found in data. Fast imputation (means) is used for missing values\n")
				Xtest <- na.impute(Xtest)
			}			
			if (object$logX)
			{ 	
				trueFactorsIdx <- which(Xfactors == 1)
				if (length(trueFactorsIdx) ==  0)  {  Xtest <- generic.log(Xtest) }
				else  
				{  
					if (!is.null(object$paramsObject[1,catVarIdx]))
					{	
						cat("Warning : all categorical variables have been ignored for the logarithm transformation.\nIf only some of them have been defined as categorical, variable importance will be altered.\n To avoid it, please define logarithm transformation outside of the model or ignore categorical variables, or set them to 'all'.\n")
						Xtest[,-trueFactorsIdx] <- generic.log(Xtest[,-trueFactorsIdx])						
					}
					else
					{ 	Xtest <- generic.log(Xtest)	}
				}
			}
		}		
	}	
	if (!is.null(object$forest$variableImportance))
	{	
		par(las=1) 
		maxChar = floor(1 + max(nchar(object$variablesNames))/2)		
		par(mar=c(5, maxChar + 1,4,2)) 		
		varImportance1 = varImportance = object$forest$variableImportance
		if (!object$forest$regression)
		{
			varImportance[,"class"] = object$classes[as.numeric(varImportance[,"class"])]
			varImportance1[,"class"] = varImportance[,"class"]
		}		
		nVar = nrow(varImportance)
		if (nVar > maxVar) { varImportance = varImportance[1:maxVar,] }	
		barplot(varImportance[nVar:1, "percent.importance"], horiz = TRUE, col = sort(heat.colors(nVar), 
		decreasing = TRUE), 
		names.arg = varImportance[nVar:1,"variables"], xlab = "Relative Influence (%)", 
		main = if (object$forest$regression)  { "Variable Importance based on 'Lp' distance" }  
		else  { "Variable Importance based on information gain" }, border = NA)
		abline(v = 100/nVar, col = 'grey')				
		localVarImportance <- localVariableImportance(object, nbVariables = maxInteractions, Xtest = Xtest, 
		predObject = predObject)		
		if (!object$forest$regression)
		{
			cat("\n1 - Global Variable Importance (", min(maxVar, nVar), " most important based on information gain) :\n", sep = "")
			cat("Note: most predictive features are ordered by 'score' and plotted. Most discriminant ones\nshould also be taken into account by looking 'class' and 'class.frequency'.\n\n")				
			rownames(localVarImportance$classVariableImportance) = object$variablesNames[as.numeric(rownames(localVarImportance$classVariableImportance))]
			colnames(localVarImportance$classVariableImportance) = paste("Class ", 
				object$classes[as.numeric(rm.string(colnames(localVarImportance$classVariableImportance), "Class"))], sep = "")				
			obsVarImportance = data.frame(localVarImportance$obsVariableImportance)
			obsVarImportance[,1] = object$classes[obsVarImportance[,1]]
		}
		else
		{	
			cat("\n1 - Global Variable Importance (", min(maxVar, nVar), 
			" most important based on 'Lp' distance) :\n", sep = "")
			obsVarImportance = data.frame(localVarImportance) 	
		}
		print(varImportance)		
		for (j in 2:(maxInteractions+1)) { obsVarImportance[,j] = object$variablesNames[obsVarImportance[,j]] }		
		obsVarImportance2 = obsVarImportance
		fOrder = sort(table(obsVarImportance2[,2]), decreasing = TRUE)
        sOrder = sort(table(obsVarImportance2[,3]), decreasing = TRUE)				
		W1 = mean(obsVarImportance2[,grep("localVariableFrequency1", colnames(obsVarImportance2))[1]])
		W2 = mean(obsVarImportance2[,grep("localVariableFrequency2", colnames(obsVarImportance2))[1]])		
		minDim2 = min(length(fOrder), length(sOrder))
		partialDependence = matrix(NA, minDim2, minDim2)
		for (i in 1:minDim2) {  partialDependence[,i] = fOrder[i]*W1/W2 + sOrder[1:minDim2]  }		
		colnames(partialDependence) = names(fOrder)[1:minDim2]
		rownames(partialDependence) = names(sOrder)[1:minDim2]
		partialDependence = partialDependence/(2*nrow(obsVarImportance2))
		SumpartialDependence = sum(partialDependence)
		partialDependence = partialDependence/(SumpartialDependence/ncol(partialDependence))
		avg1rstOrder = colMeans(partialDependence)
		avg2ndOrder = c(rowMeans(partialDependence),0)
		partialDependence = rbind(partialDependence, avg1rstOrder)
		partialDependence = cbind(partialDependence, avg2ndOrder)		
		minDim = min(10, minDim2)
		varImportanceOverInteractions = vector()
		for (i in 1:minDim2)
		{
			idx = which(rownames(partialDependence)[i] == colnames(partialDependence))
			if (length(idx) > 0)  
			{	
				varImportanceOverInteractions[i] = 0.5*(avg1rstOrder[i] + avg2ndOrder[idx] + 
					2*partialDependence[i,idx])  
			}
			else 
			{	varImportanceOverInteractions[i] = avg1rstOrder[i]	}			
			names(varImportanceOverInteractions)[i] = rownames(partialDependence)[i]
		}				
		varImportanceOverInteractions = sort(varImportanceOverInteractions, decreasing = TRUE)		
		cat("\n\n2 - Local Variable importance")
		cat("\nVariables interactions (", minDim, " most important variables at first (columns) and second (rows) order) :", sep = "") 
		cat("\nFor each variable (at each order), its interaction with others is computed.\n\n")
		print(round(partialDependence[c(1:minDim, nrow(partialDependence)),],4))	
		cat("\n\nVariable Importance based on interactions (", minDim, " most important) :\n", sep = "")
		print(round(varImportanceOverInteractions[1:minDim]/sum(varImportanceOverInteractions), 4))
		if (!object$forest$regression)
		{
			cat("\nVariable importance over labels (", minDim, " most important variables conditionally to each label) :\n", sep = "")
			print(localVarImportance$classVariableImportance[1:minDim,])		
			cat("\n\nSee ...$localVariableImportance$obsVariableImportance to get variable importance for each observation.", sep = "")			
			cat("\n\nCall clusterAnalysis() function to get a more compact and complementary analysis.\n Type '?clusterAnalysis' for help.", sep = "")			
		}
		else
		{   cat("\n\nSee ...$localVariableImportance to get variable importance for each observation.")	}
		
		cat("\n\nCall partialDependenceOverResponses() function to get partial dependence over responses\nfor each variable. Type '?partialDependenceOverResponses' for help.\n")	
	}
	else
	{ stop("no variable importance defined in random uniform forest") }	
	importanceObject = list(globalVariableImportance = varImportance1, localVariableImportance = localVarImportance, 
		partialDependence = partialDependence, variableImportanceOverInteractions = varImportanceOverInteractions)		
	class(importanceObject) <- "importance"	
	importanceObject
}


partialImportance <- function(X, object, whichClass = NULL, threshold = NULL, thresholdDirection = c("low", "high"), border = NA, nLocalFeatures = 5)
{
	if (is.list(object$localVariableImportance))
	{
		Z = object$localVariableImportance$obsVariableImportance
		whichClassNames <- rm.string(names(object$localVariableImportance$class), "Class ")
		numericClassNames = as.numeric(as.factor(whichClassNames))		
		if (is.null(whichClass) ) { stop("Please provide a class.") }
		if (is.character(whichClass)) { whichClass = numericClassNames[which(whichClassNames == whichClass)] 	}
		whichClass2 = whichClassNames[which(numericClassNames == whichClass)]	
		idx = which(Z[,1] == whichClass)
		Z = Z[idx, ,drop = FALSE]		
		if (dim(Z)[1] <= 1) { stop("Not enough observations found using this class.") }
	}
	else
	{   
		Z = object$localVariableImportance	
		if (!is.null(threshold))
		{ 
			if (thresholdDirection == "low") { 	idx = which(Z[,1] <= threshold) }
			else {  idx = which(Z[,1] > threshold)  }
			Z = Z[idx,]
			
			if (dim(Z)[1] < 1) { stop("No observations found using this threshold.\n") }
		}
	}	
	Z = Z[,-1]
	idxLocalVar = grep("localVariable", colnames(X))
	idxPos = length(idxLocalVar)/2	
	countVars = sort(table(Z[,1:idxPos]), decreasing = TRUE)	
	obsObject = rmNA(countVars[1:nLocalFeatures]/sum(countVars))	
	XNames = colnames(X)
	par(las = 1)
	maxChar = floor(2 + max(nchar(XNames[as.numeric(names(sort(obsObject)))])))/2
	par(mar = c(5, maxChar + 2,4,2))
	barplot(sort(obsObject*100), horiz = TRUE, col = sort(heat.colors(length(obsObject)), decreasing = TRUE), 
	border = border, names.arg = XNames[as.numeric(names(sort(obsObject)))], xlab = "Relative influence (%)", 
		main = if (is.list(object$localVariableImportance)) { paste("Partial importance based on observations over class ", whichClass2, sep ="") }
		else 
		{
			if (!is.null(threshold)) 
			{	
				if (thresholdDirection == "low") { paste("Partial importance based on observations (with Response < ", round(threshold, 4), ")", sep ="") }
				else  { paste("Partial importance based on observations (with Response > ", round(threshold, 4), ")", sep ="") }					
			}
			else {	"Partial importance based on observations" }
		}
	)	
	cat("Relative influence: ", round(rmNA(sum(obsObject))*100, 2), "%\n", sep="")
	return(obsObject)
}

partialDependenceOverResponses <- function(Xtest, importanceObject, 
whichFeature = NULL, 
whichOrder = c("first", "second", "all"), 
outliersFilter = FALSE, 
plotting = TRUE, 
followIdx = FALSE, 
maxClasses = if (is.null(whichFeature)) { 10 } 
else { max(10, which.is.factor(Xtest[, whichFeature, drop = FALSE], count = TRUE)) }, 
bg = "lightgrey")
{
	FeatureValue = Response = Class = Observations = NULL
	if (!is.null(whichFeature))
	{	
		if (is.character(whichFeature)) { whichFeature = which(colnames(Xtest) == whichFeature) }		
		if (length(whichFeature) > 1)
		{ 
			whichFeature = whichFeature[1]
			cat("Only one variable can be computed at the same time\n")
		}
	}	
	if (whichOrder[1] == "first") 	{ idxOrder = 2	}	
	if (whichOrder[1] == "second") 	{ idxOrder = 3	}	
	if (whichOrder[1] == "all") 	
	{ 
		if (is.matrix(importanceObject$localVariableImportance))
		{	idxOrder = 2:length(grep("localVariableFrequency", colnames(importanceObject$localVariableImportance)))	}
		else
		{	idxOrder = 2:length(grep("localVariableFrequency", colnames(importanceObject$localVariableImportance$obsVariableImportance)))	}
	}	
	idx = list()
	if (is.matrix(importanceObject$localVariableImportance)) {	importanceObjectMatrix = importanceObject$localVariableImportance }
	else { importanceObjectMatrix = importanceObject$localVariableImportance$obsVariableImportance }	
	if (is.null(whichFeature))
	{	whichFeature = as.numeric(names(which.max(table(importanceObjectMatrix[,idxOrder[1]]))))	}			
	idx[[1]] = which(importanceObjectMatrix[,idxOrder[1]] == whichFeature)			
	if (length(idxOrder) > 1)
	{
		for (i in 1:length(idxOrder))
		{	idx[[i+1]] = which(importanceObjectMatrix[,1+idxOrder[i]]== whichFeature)	}
	}	
	partialDependenceMatrix = cbind(Xtest[unlist(idx), whichFeature], importanceObjectMatrix[unlist(idx),1], unlist(idx))
	partialDependenceMatrix = sortMatrix(partialDependenceMatrix, 1)	
	NAIdx = which(is.na(partialDependenceMatrix))	
	if (length(NAIdx) > 0) {  partialDependenceMatrix = partialDependenceMatrix[-NAIdx,] }	
	if (outliersFilter && (!is.factor(Xtest[,whichFeature])))
	{
		highOutlierIdx =  which(partialDependenceMatrix[,1] > quantile(partialDependenceMatrix[,1],0.95))
		lowOutlierIdx =  which(partialDependenceMatrix[,1] < quantile(partialDependenceMatrix[,1],0.05))
		if (length(highOutlierIdx) > 0 || length(lowOutlierIdx) > 0) 
		{	partialDependenceMatrix = partialDependenceMatrix[-c(lowOutlierIdx,highOutlierIdx),]	}
	}	
	if (is.vector(partialDependenceMatrix) )
	{ stop ("Not enough points to plot partial dependencies. Please increase order of interaction when computing importance.") }	
	if (dim(partialDependenceMatrix)[1] < 10)
	{ stop ("Not enough points to plot partial dependencies. Please increase order of interaction when computing importance.") }
	else
	{	
		idx = partialDependenceMatrix[,3]
	    partialDependenceMatrix = partialDependenceMatrix[,-3]
		flagFactor = 0
		smallLength = length(unique(Xtest[,whichFeature]))
		n = nrow(Xtest)		
		if ( ((smallLength < maxClasses) || is.factor(Xtest[,whichFeature])) && (smallLength/n <  1/5)) 
		{
			featureLevels = levels(as.factor(Xtest[,whichFeature]))
			testNumeric = is.numeric(as.numeric(featureLevels))
			if (testNumeric && length(rmNA(as.numeric(featureLevels))) > 0)
			{  flagFactor = 1 }
			else
			{	
				if (is.matrix(importanceObject$localVariableImportance))
				{  
					classFeature = unique(partialDependenceMatrix[,1])
					B = round(as.numeric(partialDependenceMatrix[,2]),4)		 
					A = as.numeric(factor2matrix(partialDependenceMatrix[,1, drop= FALSE]))
					partialDependenceMatrix = cbind(A,B)
					colnames(partialDependenceMatrix) = c("Class", "Response")
					flagFactor = 1 
					valueFeature = unique(A)
					
					referenceTab = cbind(classFeature, valueFeature)
					colnames(referenceTab) = c("category", "numeric value")
									
					cat("categorical values have been converted to numeric values :\n")
					print(referenceTab)
					cat("\n")
				}
				else
				{	partialDependenceMatrix[,1] = featureLevels[ as.numeric(as.factor(partialDependenceMatrix[,1]))] }
			}
		}
    }		
	if (plotting)
	{
		if (dim(partialDependenceMatrix)[1] < 1)
		{ stop ("Not enough points to plot partial dependencies. Please increase order of interaction when computing importance.") }	
		if (is.matrix(importanceObject$localVariableImportance))
		{
			## #require(ggplot2) || install.packages("ggplot2")
			if ( ((smallLength < maxClasses) || is.factor(Xtest[,whichFeature])) && (smallLength/n <  1/5)) 
		    {
				A = if (flagFactor) { as.factor(partialDependenceMatrix[,1]) } else { partialDependenceMatrix[,1] } 
				B = round(as.numeric(partialDependenceMatrix[,2]),4)				
				partialDependenceMatrix = data.frame(A , B)
				colnames(partialDependenceMatrix) = c("Class", "Response")
								
				plot(qplot(Class, Response, data = partialDependenceMatrix, geom = c("boxplot", "jitter"), 
				outlier.colour = "green", outlier.size = 2.5, fill= Class, main = "Partial dependence over predictor",
				xlab = colnames(Xtest)[whichFeature], ylab = "Response"))
			}
			else
			{			
				colnames(partialDependenceMatrix) = c("FeatureValue", "Response")
				partialDependenceMatrix = data.frame(partialDependenceMatrix)
			
				tt <- ggplot(partialDependenceMatrix, aes(x = FeatureValue, y = Response))
				plot(tt +  geom_point(colour = "lightblue") + stat_smooth(fill = "green", colour = "darkgreen", 
				size = 1) + 
				labs(title = "Partial dependence over predictor", x = colnames(Xtest)[whichFeature], y = "Response"))
			}
		}
		else
		{
			## #require(ggplot2) || install.packages("ggplot2")
			colnames(partialDependenceMatrix) = c("Observations", "Class")
			partialDependenceMatrix = data.frame(partialDependenceMatrix)
			variablesNames = unique(partialDependenceMatrix$Class)
			partialDependenceMatrix$Class = factor(partialDependenceMatrix$Class)
			levels(partialDependenceMatrix$Class) = colnames(importanceObject$localVariableImportance$classVariableImportance)[sort(variablesNames)]			
			if ( ((smallLength < maxClasses) || is.factor(Xtest[,whichFeature])) && (smallLength/n <  1/5))
			{   
				par(las=1)
				if (bg != "none") par(bg = bg)
				mosaicplot(t(table(partialDependenceMatrix)), color = sort(heat.colors(length(featureLevels)), 
			    decreasing = FALSE), border = NA, ylab = colnames(Xtest)[whichFeature],  xlab = "Class",
				main = "Partial dependence over predictor")
			}
			else
			{
				plot(qplot(Class, Observations, data = partialDependenceMatrix, geom = c("boxplot", "jitter"), 
				outlier.colour = "green", outlier.size = 2.5, fill= Class, main = "Partial dependence over predictor",
				xlab = "", ylab = colnames(Xtest)[whichFeature]))
			}
		}
	}
	else
	{
		if (!is.matrix(importanceObject$localVariableImportance))
		{	
			colnames(partialDependenceMatrix) = c("Observations", "Class")
			partialDependenceMatrix = data.frame(partialDependenceMatrix)
			variablesNames = unique(partialDependenceMatrix$Class)
			partialDependenceMatrix$Class = factor(partialDependenceMatrix$Class)
			levels(partialDependenceMatrix$Class) = colnames(importanceObject$localVariableImportance$classVariableImportance)[sort(variablesNames)]
		}
	}	
	if (followIdx)
	{	return(list(partialDependenceMatrix = partialDependenceMatrix, idx = as.numeric(idx) )) }
	else
	{  return(partialDependenceMatrix)  }
}	

partialDependenceBetweenPredictors <- function(Xtest, importanceObject, features, 
whichOrder = c("first", "second", "all"), 
perspective = FALSE, 
outliersFilter = FALSE, 
maxClasses = max(10, which.is.factor(Xtest[,, drop = FALSE], count = TRUE)),
bg = "grey")
{
	Variable1 = Variable2 = SameClass = ..level.. = Response = NULL
	if (length(features) == 1) { stop("Please provide two or more features.") }
	if (length(features) > 2)
	{
		dependenceOverFeatures <- copulaLike(Xtest, importanceObject, whichFeatures = features, 
		whichOrder = whichOrder, maxClasses = maxClasses, outliersFilter = FALSE, bg = bg)
		cat("Dependence over chosen features (note that predictions act as a proxy for fixing dependencies)\nTo see interactions in details, call the function with two features\n")
		print(head(dependenceOverFeatures, 20))
		cat("...\n")
	}	
	## #require(ggplot2)
	graphics.off()	
	pD1 <- partialDependenceOverResponses(Xtest, importanceObject, whichFeature = features[1], whichOrder = whichOrder, 
		outliersFilter = outliersFilter, plotting = FALSE, followIdx = TRUE, maxClasses = maxClasses)	
	pD2 <- partialDependenceOverResponses(Xtest, importanceObject, whichFeature = features[2], whichOrder = whichOrder, 
		outliersFilter = outliersFilter, plotting = FALSE, followIdx = TRUE, maxClasses = maxClasses)	
	sameIdx2 = find.idx(pD1$idx, pD2$idx)
	sameIdx1 = find.idx(pD2$idx, pD1$idx)
	minDim = length(sameIdx1)	
	if ( (minDim < 10) || (length(sameIdx2) < 10)) { stop("Not enough points. Please use option whichOrder = 'all'") }	
	pD1 = pD1$partialDependenceMatrix; pD2 = pD2$partialDependenceMatrix;
	pD11 = factor2matrix(pD1)[sameIdx1,]; pD22 = factor2matrix(pD2)[sameIdx2,]		
	if (!is.matrix(importanceObject$localVariableImportance))
	{
		minN = min(nrow(pD11), nrow(pD22))
		pD11 = pD11[1:minN,]
		pD22 = pD22[1:minN,]		
		idx = ifelse(pD11[,2] == pD22[,2], 1, 0)
		Xi  = cbind(pD11[which(idx == 1), 1], pD22[which(idx == 1), 1])
		Xj  = cbind(pD11[which(idx == 0), 1], pD22[which(idx == 0), 1])		
		if (!is.character(features[1]))
		{ 
			fName1 = which(colnames(Xtest) == colnames(Xtest)[features[1]])
			fName2 = which(colnames(Xtest) == colnames(Xtest)[features[2]])
			features = colnames(Xtest)[c(fName1, fName2)]
		}		
		Xi = as.data.frame(Xi); colnames(Xi) = c("Variable1", "Variable2")
		Xj = as.data.frame(Xj); colnames(Xj) = c("Variable1", "Variable2")		
		Xi = cbind(Xi, rep(1, nrow(Xi)))
		Xj = cbind(Xj, rep(0, nrow(Xj)))		
		colnames(Xj)[3] = colnames(Xi)[3] = "SameClass"
		X = rbind(Xi, Xj)
		X[,3] = ifelse(X[,3] == 1, TRUE, FALSE)
		smallLength = length(unique(Xtest[,features[1]]))
		n = nrow(Xtest)	
		Xnumeric = X
		ggplotFlag1 = ggplotFlag2 = 1
		options(warn = -1)
		if ( ((smallLength < maxClasses) || is.factor(Xtest[,features[1]])) && (smallLength/n <  1/5)) 
		{
			featureLevels = levels(factor(Xtest[,features[1]]))
			testNumeric = is.numeric(as.numeric(featureLevels))
			if (testNumeric && length(rmNA(as.numeric(featureLevels))) > 0)	{ 	ggplotFlag1 = 0 	}
			else  { X[,1] = featureLevels[X[,1]] }			
		}		
		smallLength = length(unique(Xtest[,features[2]]))
		if ( ((smallLength < maxClasses) || is.factor(Xtest[,features[2]])) && (smallLength/n < 1/5)) 
		{
			featureLevels = levels(factor(Xtest[,features[2]]))
			testNumeric = is.numeric(as.numeric(featureLevels))
			if (testNumeric && length(rmNA(as.numeric(featureLevels))) > 0)	{ 	ggplotFlag2 = 0 	}
			else  { X[,2] = featureLevels[X[,2]] }		
		}
		options(warn = 0)
		dev.new() 
		tt <- ggplot(X, aes(x = Variable1, y = Variable2, colour = SameClass))		
		if (ggplotFlag1 == 1)
		{
			plot(tt + geom_point(size = 2) 
				+ labs(title = "Dependence between predictors", x = features[1], y = features[2])	
				+ scale_colour_manual("Same class", values = c("red", "green") )
				+ theme(axis.text.x = element_text(angle = 60, hjust = 1))
			)
		}
		else
		{
			plot(tt + geom_point(size = 2) 
				+ labs(title = "Dependence between predictors", x = features[1], y = features[2])	
				+ scale_colour_manual("Same class", values = c("red", "green") )
			)
		}
		dev.new()
		cde1 <- geom_histogram(position = "fill", binwidth = diff(range(Xnumeric[,1]))/4, alpha = 7/10)
		cde2 <- geom_histogram(position = "fill", binwidth = diff(range(Xnumeric[,2]))/4, alpha = 7/10)
		tt1 <- ggplot(X, aes(x = Variable1, fill = SameClass))		
		if (ggplotFlag1 == 1)
		{
			plot(tt1 + cde1 
				+ labs(title = "Class distribution", x = features[1], y = "Frequency")
				+ scale_fill_manual(paste("Same class as", features[2]),values = c("red", "lightgreen"))
				+ theme(axis.text.x = element_text(angle = 60, hjust = 1))
			)
		}
		else
		{
			plot(tt1 + cde1 
				+ labs(title = "Class distribution", x = features[1], y = "Frequency")
				+ scale_fill_manual(paste("Same class as", features[2]),values = c("red", "lightgreen"))
			)
		}		
		dev.new()
		tt1 <- ggplot(X, aes(x = Variable2, fill = SameClass))		
		if (ggplotFlag2 == 1)
		{
			plot(tt1 + cde2 
				+ labs(title = "Class distribution", x = features[2], y = "Frequency")
				+ scale_fill_manual(paste("Same class as", features[1]),values = c("red", "lightgreen"))
				+ theme(axis.text.x = element_text(angle = 60, hjust = 1))
			)
		}
		else
		{
			plot(tt1 + cde2 
				+ labs(title = "Class distribution", x = features[2], y = "Frequency")
				+ scale_fill_manual(paste("Same class as", features[1]),values = c("red", "lightgreen"))
			)
		}		
		if ( (length(unique(X[,1])) == 1) || (length(unique(X[,2])) == 1) )
		{  cat("\nOne of the variable does not have variance. Heatmap is not plotted.\n") }
		else
		{
			dev.new()
			tt2 <- ggplot(Xnumeric, aes( x = Variable1, y = Variable2, z = SameClass))	
				try(plot(tt2 + stat_density2d(aes(fill = ..level.., alpha =..level..), geom = "polygon") 
				+ scale_fill_gradient2(low = "lightyellow", mid = "yellow", high = "red")
				+ labs(title = "Heatmap of dependence between predictors", x = features[1], y = features[2])
			), silent = TRUE)
		}
		colnames(X) = c(features, "Same class")
	}
	else
	{
		intervals = cut(c(pD11[,2], pD22[,2]), minDim, labels = FALSE)
		pD11 = cbind(pD11,intervals[1:minDim])		
		if (nrow(pD22) != length(rmNA(intervals[(minDim + 1):(2*minDim)])))
		{
			sameN = min(nrow(pD22),length(rmNA(intervals[(minDim + 1):(2*minDim)])))
			pD22 = cbind(pD22[1:sameN,], rmNA(intervals[(minDim + 1):(2*minDim)])[1:sameN])
		}
		else
		{ 	pD22 = cbind(pD22, rmNA(intervals[(minDim + 1):(2*minDim)]))	}		
		minN = min(nrow(pD11), nrow(pD22))		
		Xi = sortMatrix(pD11,3)[1:minN,]
		Xj = sortMatrix(pD22,3)[1:minN,]
		Z  = (Xi[,2] + Xj[,2])/2		
		if (!is.character(features[1]))
		{ 
			fName1 = which(colnames(Xtest) == colnames(Xtest)[features[1]])
			fName2 = which(colnames(Xtest) == colnames(Xtest)[features[2]])
			features = colnames(Xtest)[c(fName1, fName2)]
		}		
		dev.new()
		XiXj = as.data.frame(cbind(Xi[,1], Xj[,1], Z)); colnames(XiXj) = c("Variable1", "Variable2", "Response")		
		tt <- ggplot(XiXj, aes(x = Variable1, y = Variable2, z = Response))
		ttMore <- tt + stat_density2d(aes(fill = ..level.., alpha =..level..), geom = "polygon") +
			scale_fill_gradient2(low = "lightyellow", mid = "yellow", high = "red") +
			labs(title = "Local Heatmap of dependence (with frequency and intensity of Response)", 
			x = features[1], y = features[2])
		try(plot(ttMore), silent= TRUE)		
		dev.new()
		fourQuantilesCut = cut(Z,4)
		XiXj[,3] = fourQuantilesCut		
		dataCuts = table(fourQuantilesCut)		
		if (length(which(dataCuts < 5)) > 0)
		{
			lowDataCuts = names(dataCuts[which(dataCuts < 5)])
			rmIdx = find.idx(lowDataCuts, XiXj[,3])			
			XiXj = XiXj[,-3]
			Z = Z[-rmIdx]
			fourQuantilesCut = cut(Z, 4)
			XiXj = cbind(XiXj[-rmIdx,], fourQuantilesCut)		
			dataCuts = table(fourQuantilesCut)
			colnames(XiXj)[3] = "Response"
		}
		X = cbind(XiXj, Z)
		colnames(X) = c(features, "Response in four quantile intervals", "Response")		
	    tt1 <- ggplot(XiXj, aes(x = Variable1, y = Variable2, z = Response)) 
		try(plot(tt1 + stat_density2d(aes(fill = ..level.., alpha =..level..), geom = "polygon") 
			+  scale_fill_gradient2(low = "lightyellow", mid = "yellow", high = "red")	
			+ labs(title = "Global Heatmap of dependence (with intensity of Response)", x = features[1], 
			y = features[2]) 
		), silent = TRUE)				
		dev.new()
		try(plot(tt + geom_point(aes(colour = Response, size = Response))
		    +  stat_smooth(fill = "lightgrey", colour = "grey", size = 1) 
			+ labs(title = "Dependence between predictors", x = features[1], y = features[2])		
			+ scale_colour_gradient2(low = "blue", mid = "green", high = "red") 	
		), silent = TRUE)
	}	
	#if (.Platform$OS.type == "windows") { arrangeWindows("vertical", preserve = FALSE) }
	idxf1 = which(colnames(importanceObject$partialDependence) ==  features[1])
	idxf2 = which(rownames(importanceObject$partialDependence) ==  features[2])	
	if ( (length(idxf1) > 0) && (length(idxf2) > 0) ) 
	{
		np = dim(importanceObject$partialDependence)
		cat("\nLevel of interactions between ", features[1], " and " , features[2], " at first order: ", 
		round(importanceObject$partialDependence[idxf2, idxf1],4), "\n", "(", round(round(importanceObject$partialDependence[idxf2, idxf1],4)/max(importanceObject$partialDependence[-np[1], -np[2]])*100,2), "% of the feature(s) with maximum level", ")\n",  sep="")
	}
	else
	{  cat("\nFeatures do not appear to have strong co-influence on the response.\n") }	
	idxf1 = which(rownames(importanceObject$partialDependence) == features[1])	
	if (length(idxf1) > 0)	
	{ 
		idxf2 = which(colnames(importanceObject$partialDependence) ==  features[2]) 
		if (length(idxf2) > 0)	
		{	
			cat("Level of interactions between ", features[1], " and ", features[2], " at second order: ", 
			round(importanceObject$partialDependence[idxf1, idxf2],4), "\n", "(", round(round(importanceObject$partialDependence[idxf1, idxf2],4)/max(importanceObject$partialDependence[-np[1], -np[2]])*100,2), "% of the feature(s) with maximum level", ")\n",  sep="")
		}
	}
	
	if(!is.matrix(importanceObject$localVariableImportance))
	{	
		cat("\nClass distribution : for a variable of the pair, displays the estimated probability\nthat the considered variable has the same class than the other. If same class tends to be TRUE\nthen the variable has possibly an influence on the other (for the considered category or values) when predicting a label.\n\n")
		cat("Dependence : for the pair of variables, displays the shape of their dependence\nand the estimated agreement in predicting the same class, for the values that define dependence.\nIn case of categorical variables, cross-tabulation is used.\n\n")
		cat("Heatmap : for the pair of variables, displays the area where the dependence is the most effective.\nThe darker the colour, the stronger is the dependence.\n")
		cat("\nFrom the pair of variables, the one that dominates is, possibly, the one\nthat is the most discriminant one (looking 'Global variable Importance') and/or the one\nthat has the higher level of interactions(looking 'Variable Importance based on interactions').\n")
	}
	else
	{
		cat("\nDependence : for the pair of variables, displays the shape of their dependence\nand the predicted value (on average) of the response for the values taken by the pair.\n\n")
		cat("Heatmap : for the pair of variables, displays the area where the dependence is the most effective.\nThe darker the colour, the stronger is the dependence. First plot focuses on the intensity of the response\n, while the second considers both frequency (number of close predictions) and intensity.\n\n")	
	}		
	cat("\nPlease use the R menu to tile vertically windows in order to see all plots.\n\n")
	if (perspective && is.matrix(importanceObject$localVariableImportance))
	{
		dev.new()
		gridSize = 100; gridLag = 100
		x = XiXj[,1]
		y = XiXj[,2]
		z = Z			
		n = length(x)		
		xyz = cbind(x,y,z)
		if (n > 5*gridSize)
		{
			sampleIdx = sample(n, 500)
			xyz = xyz[sampleIdx,]
			n = length(sampleIdx)
		}
		xyz.byX = sortMatrix(xyz,1)
		xyz.byY = sortMatrix(xyz,2)			
		newX = xyz.byX[,1]
		newY = xyz.byY[,2]		
		dummyForRepX = dummyForRepY = rep(0,n)
		for (i in 2:n)
		{ 
			if (newX[i] == newX[i-1]) {	dummyForRepX[i] = 1 }
			if (newY[i] == newY[i-1]) {	dummyForRepY[i] = 1 }
		}			
		newIdx = which(dummyForRepY == 0 & dummyForRepX == 0)
		newX = newX[newIdx]
		newY = newY[newIdx]			
		if ( (gridSize + gridLag) > length(newX))
		{
			interp.1 = seq(min(newX) + 0.1, max(newX) - 0.1,length = gridSize + gridLag - length(newX))
			interp.2 = seq(min(newY) + 0.1, max(newY) - 0.1,length = gridSize + gridLag- length(newY))				
			newX = sort(c(newX, interp.1))
			newY = sort(c(newY, interp.2))
		}			
		duplicatesX = duplicated(newX)
		duplicatesY = duplicated(newY)			
		if (sum(duplicatesX) > 0)
		{
			newX = newX[!duplicatesX]
			newY = newY[!duplicatesX]
		}			
		if (sum(duplicatesY) > 0)
		{
			newY = newY[!duplicatesY]
			newX = newX[!duplicatesY]
		}			
		newXYZ = cbind(newX, newY, rep(NA, length(newX)))
		proxyM = fillNA2.randomUniformForest(rbind(xyz, newXYZ), nodesize = 2)						
		xyz.dim =  dim(xyz)
		nn = length(newX)						
		newZ = matrix(NA, nn, nn)			
		for (i in 1:nn)
		{
			for (j in 1:nn)
			{	newZ[i,j] = mean(proxyM[which(newX[i] == proxyM[,1] | newY[j] == proxyM[,2]), 3])	}
		}											
		L.smoothNewZ = t(apply(newZ, 1, function(Z) lagFunction(Z, lag = gridLag, FUN  = mean, inRange = TRUE)))
		C.smoothNewZ = apply(newZ, 2, function(Z) lagFunction(Z, lag = gridLag, FUN  = mean, inRange = TRUE))			
		smoothIdx = round(seq(1, nrow(L.smoothNewZ), length = gridLag),0)
		newX = newX[-smoothIdx]
		newY = newY[-smoothIdx]
		C.smoothNewZ = C.smoothNewZ[,-smoothIdx]
		L.smoothNewZ = L.smoothNewZ[-smoothIdx,]
		newZ = 0.5*(C.smoothNewZ + L.smoothNewZ)		
		highOutlierIdx2 <- apply(newZ, 2, function(Z) which(Z >= quantile(Z, 0.975)))
		lowOutlierIdx2 <- apply(newZ, 2, function(Z) which(Z <= quantile(Z, 0.025)))
		ouliersIdx2 <- c(lowOutlierIdx2, highOutlierIdx2)
		if (length(ouliersIdx2) > 0) 
		{	
			newZ = newZ[-ouliersIdx2, -ouliersIdx2]  	
			newX = newX[-ouliersIdx2]
			newY = newY[-ouliersIdx2]
		}
		rm(lowOutlierIdx2);
		rm(highOutlierIdx2);
		highOutlierIdx2 <- apply(newZ, 1, function(Z) which(Z >= quantile(Z, 0.975)))
		lowOutlierIdx2 <-  apply(newZ, 1, function(Z) which(Z <= quantile(Z, 0.025)))
		ouliersIdx2 <- c(lowOutlierIdx2, highOutlierIdx2)
		if (length(ouliersIdx2) > 0) 
		{	
			newZ = newZ[-ouliersIdx2, -ouliersIdx2]  	
			newX = newX[-ouliersIdx2]
			newY = newY[-ouliersIdx2]
		}			
		nNewZ = nrow(newZ)
		if (nNewZ > gridSize)
		{
			sampleIdx = sort(sample(nNewZ, gridSize))
			newX = newX[sampleIdx]
			newY = newY[sampleIdx]
			newZ = newZ[sampleIdx, sampleIdx]
		}
		if (bg != "none") par(bg = bg)
		flag = endCondition = 0
		lastANSWER = -40
		newX = as.matrix(newX)
		newY = as.matrix(newY)
		newZ = as.matrix(newZ)
		while (!endCondition)
		{
			if (!flag)
			{
				try(perspWithcol(newX, newY, newZ, heat.colors, nrow(newZ), theta = -40, phi = 20, xlab = features[1], ylab = features[2], zlab = "Response", main = "Dependence between predictors and effect over Response", ticktype = "detailed", box = TRUE, expand = 0.5, shade = 0.15), silent = FALSE)
			}			
			ANSWER <- readline(cat("To get another view of 3D representation\nplease give a number between -180 and 180 (default one = -40).\nType 'b' to remove border.\nTo see animation please type 'a'.\nType Escape to leave :\n"))
			if ( is.numeric(as.numeric(ANSWER)) && !is.na(as.numeric(ANSWER)) )
			{  
				flag = 1
				try(perspWithcol(newX, newY, newZ, heat.colors, nrow(newZ), theta = ANSWER, phi = 20, xlab = features[1], ylab = features[2], zlab = "Response", main = "Dependence between predictors and effect over Response", ticktype = "detailed", box = TRUE, expand = 0.5, shade = 0.15), 
				silent = FALSE)
				lastANSWER = ANSWER
			}
			else
			{ 
				if (as.character(ANSWER) == "b")
				{
					flag = 1
					try(perspWithcol(newX, newY, newZ, heat.colors, nrow(newZ), theta = lastANSWER, phi = 20, xlab = features[1], ylab = features[2], zlab = "Response", main = "Dependence between predictors and effect over Response", ticktype = "detailed", box = TRUE, expand = 0.5, border = NA, shade = 0.15), silent = FALSE)
				}
				else
				{
					if ( as.character(ANSWER) == "a")
					{
						flag = 1
						nn = nrow(newZ)
						circle = -180:180
						for (i in circle)
						{
							try(perspWithcol(newX, newY, newZ, heat.colors, nn, theta = i, phi = 20, 
							xlab = features[1], ylab = features[2], zlab = "Response", main = "Dependence between predictors and effect over Response", ticktype = "detailed", box = TRUE, border = NA, expand = 0.5, shade = 0.15), silent = FALSE)	
						}
					}				
					else				
					{ endCondition = 1	}
				}
			}
		}			
	}		
	return(X)
}

twoColumnsImportance <- function(importanceObjectMatrix)
{
	idx = length(grep("localVariableFrequency", colnames(importanceObjectMatrix))) - 1
	tmpImportanceObjectMatrix = importanceObjectMatrix[,1:2]
	if (idx > 0)
	{
		for (i in 1:idx)
		{	tmpImportanceObjectMatrix = rbind(tmpImportanceObjectMatrix, importanceObjectMatrix[,c(1,2+idx[i])])	}
	}
	return(tmpImportanceObjectMatrix)
}

plot.importance <- function(x, 
nGlobalFeatures = 30, 
nLocalFeatures = 5, 
Xtest = NULL, 
whichFeature = NULL, 
whichOrder = if (ncol(x$globalVariableImportance) > 5) 
	{  if (nrow(x$localVariableImportance$obsVariableImportance) > 1000) "first" else "all" } 
	else
	{  if (nrow(x$localVariableImportance) > 1000) "first" else "all" }, 
outliersFilter = FALSE, 
formulaInput = NULL, 
border = NA, 
...)
{
	cat("\nPlease use the R menu to tile vertically windows in order to see all plots.\n")
	object <- x
	if (nrow(x$partialDependence) < 3)
	{
		stop("Not enough (or no) interactions between variables. Please use partialDependenceOverResponses( )\n and partialDependenceBetweenPredictors( ) functions to deeper assess importance.") 
	}
	Variable = Response = NULL
	if (!is.null(Xtest))
	{
		if (!is.null(formulaInput))
		{
			mf <- model.frame(formula = formulaInput, data = as.data.frame(Xtest))
			Xtest <- model.matrix(attr(mf, "terms"), data = mf)[,-1]
			cat("Note: please note that categorical variables have lost their original values when using formula.\nIt is strongly recommended to not use formula if one wants to assess importance\n \n.")
		}
		else
		{		
			matchNA = (length(which(is.na(Xtest))) > 0)				
			if (matchNA) 
			{ 
				cat("NA found in data. Fast imputation (means) is used for missing values\n")
				Xtest <- na.impute(Xtest)
			}
		}
	}		
	maxVar = nGlobalFeatures
	maxVar2 = nLocalFeatures
	graphics.off()
	varImportance = object$globalVariableImportance
	n = nrow(varImportance)	
	if (n > maxVar) { varImportance = varImportance[1:maxVar,] }
	else { maxVar = n}	
	par(las = 1)
	maxChar = if (nLocalFeatures >= 10) { 11 } 
	else { floor(2 + max(nchar(as.character(object$globalVariableImportance[,1])))/2) }
	par(mar = c(5, maxChar + 1,4,2))
	barplot(varImportance[maxVar:1,"percent.importance"], horiz = TRUE, col = sort(heat.colors(maxVar), decreasing = TRUE), border = border,
	names.arg = varImportance[maxVar:1,"variables"], xlab = "Relative influence (%)", main = "Variable importance based on information gain")
	abline(v = 100/n, col = 'grey')	
	dev.new()
	par(las=1)
    par(mar = c(5,4,4,2))
	nbFeatures = ncol(object$partialDependence)
	newNbFeatures = min(maxVar2, nbFeatures -1)	
	if (newNbFeatures < (nbFeatures - 1))
	{
		OthersVariablesCol = colSums(object$partialDependence[(newNbFeatures+1):(nbFeatures -1), -nbFeatures, drop = FALSE])[1:newNbFeatures]
		OthersVariablesRow = rowSums(object$partialDependence[-nbFeatures, (newNbFeatures+1):(nbFeatures -1), drop = FALSE])[1:newNbFeatures]
		corner = mean(c(OthersVariablesCol, OthersVariablesRow))
		newPartialDependence = object$partialDependence[1:newNbFeatures,1:newNbFeatures]
		newPartialDependence =  rbind(cbind(newPartialDependence, OthersVariablesRow), c(OthersVariablesCol, corner))
		colnames(newPartialDependence)[ncol(newPartialDependence)] = "Other features"
		rownames(newPartialDependence)[nrow(newPartialDependence)] = "Other features"
		mosaicplot(t(newPartialDependence), color = sort(heat.colors(newNbFeatures + 1), decreasing = FALSE), 
		main = "Variables interactions over observations", ylab = "Most important variables at 2nd order", 
		xlab = "Most important variables at 1rst order", las = ifelse(maxChar > 10, 2,1), border = border)
	}
	else
	{
		mosaicplot(t(object$partialDependence[1:newNbFeatures,1:newNbFeatures]), color = sort(heat.colors(newNbFeatures), decreasing = FALSE), 
		las = ifelse(maxChar > 10, 2, 1), main = "Variables interactions over observations", ylab = "Most important variables at 2nd order", xlab = "Most important variables at 1rst order", border = border)
	}	
	dev.new()
	par(las=1)
	par(mar=c(5,maxChar + 1,4,2))
	nbFeatures2 = min(maxVar2, length(object$variableImportanceOverInteractions))
	importanceOverInteractions = sort(object$variableImportanceOverInteractions, decreasing = TRUE)/sum(object$variableImportanceOverInteractions)*100
	barplot(importanceOverInteractions[nbFeatures2:1], horiz = TRUE, col = sort(heat.colors(nbFeatures2), decreasing = TRUE), 
		names.arg = names(importanceOverInteractions)[nbFeatures2:1], xlab = "Relative influence (%)", 
		main = "Variable importance based on interactions", border = border)
	abline(v = 100/n, col = 'grey')
	if (!is.matrix(object$localVariableImportance))
	{
		dev.new()
		par(las=1)
	    par(mar = c(5, maxChar + 1,4,2))
		nbFeatures3 = min(nbFeatures2, nrow(object$localVariableImportance$classVariableImportance))
		mosaicplot(t(object$localVariableImportance$classVariableImportance[1:nbFeatures3,,drop = FALSE]), 
			color = sort(heat.colors(nbFeatures3),
			decreasing = FALSE), main = "Variable importance over labels", border = border)
	}
	else
	{
		dev.new()
		## #require(ggplot2) || install.packages("ggplot2")
		ggData = twoColumnsImportance(object$localVariableImportance)
		mostImportantFeatures = as.numeric(names(sort(table(ggData[,2]), decreasing = TRUE)))	
		if (length(unique(ggData[,2])) > maxVar2)
		{	
			mostImportantFeatures = mostImportantFeatures[1:maxVar2]	
			ggData = ggData[find.idx(mostImportantFeatures, ggData[,2], sorting = FALSE),]
		}
		if (is.null(Xtest)) {	textX = paste("V", mostImportantFeatures, collapse = " ", sep="")	}
		else {	textX = paste( colnames(Xtest)[mostImportantFeatures], collapse = ", ", sep="")	}
		textX = paste("Index of most important variables [", textX, "]")
		colnames(ggData)[1] = "Response"
		colnames(ggData)[2] = "Variable"
		ggData = data.frame(ggData)
		if (!is.null(Xtest)) { 	ggData[,"Variable"] = colnames(Xtest)[ggData[,"Variable"]] 	}
		ggData[,"Variable"] = as.factor(ggData[,"Variable"])
		if (nrow(ggData) > 1000) 
		{ 
			randomSample = sample(nrow(ggData), 1000) 
			gg <- qplot(Variable, Response, data = ggData[randomSample,], geom = c("boxplot", "jitter"), 
			colour = Response, outlier.colour = "green", outlier.size = 1.5, fill = Variable)
			cat("1000 observations have been randomly sampled to plot 'Dependence on most important predictors'.\n")
		}
		else
		{	
			gg <- qplot(Variable, Response, data = ggData, geom = c("boxplot", "jitter"), colour = Response, outlier.colour = "green", outlier.size = 1.5, fill = Variable)	
		}		
		if (maxChar > 10)
		{
			plot(gg + labs(x ="", y = "Response", title = "Dependence on most important predictors") + 
			theme(axis.text.x = element_text(angle = 60, hjust = 1)))			
		}
		else
		{	
			plot(gg + labs(x ="", y = "Response", title = "Dependence on most important predictors"))
		}
	}	
	if (is.null(Xtest))	{	stop("partial dependence between response and predictor can not be computed without data") }
	else
	{
		endCondition = 0
		dev.new()
		pD <- partialDependenceOverResponses(Xtest, object, whichFeature = whichFeature, whichOrder = whichOrder, outliersFilter = outliersFilter,...)
		#if (.Platform$OS.type == "windows") { arrangeWindows("vertical", preserve = FALSE) }
		idxMostimportant = rmNA(match(names(object$variableImportanceOverInteractions), colnames(Xtest)))[1:nbFeatures2]
		mostimportantFeatures = colnames(Xtest)[idxMostimportant]
		while (!endCondition)
		{
			ANSWER <- readline(cat("To get partial dependence of most important features\ngive a column number\namong", idxMostimportant,
			"\n(", mostimportantFeatures, ")\nPress escape to quit\n"))
			if ( is.numeric(as.numeric(ANSWER)) && !is.na(as.numeric(ANSWER)) )
			{  
				whichFeature = as.numeric(ANSWER)
				if (whichFeature %in% idxMostimportant)
				{	
					pD <- partialDependenceOverResponses(Xtest, object, whichFeature = whichFeature, whichOrder = whichOrder, outliersFilter = outliersFilter,...)	
				}
				else
				{   stop("Please provide column index among most important. Partial Dependence can not be computed.") }				
			}
			else
			{	endCondition = 1	}
		}
	}	
}

print.importance <- function(x, ...)
{
	object <- x
	minDim = min(10,length(object$variableImportanceOverInteractions))
	# global importance
	if (!is.matrix(object$localVariableImportance))
	{
		cat("\n1 - Global Variable importance (", minDim, " most important based on information gain) :\n", sep = "")
		cat("Note: most predictive features are ordered by 'score' and plotted. Most discriminant ones\nshould also be taken into account by looking 'class' and 'class.frequency'.\n\n")
	}
	else
	{  
		cat("\n1 - Global Variable importance (", minDim, " most important based on 'Lp' distance) :\n", 
		sep = "") 
	}
	print(object$globalVariableImportance[1:minDim,])	
	
	cat("\n\n2 - Local Variable importance")
	cat("\nVariables interactions (", minDim, " most important variables at first (columns) and second (rows) order) :", sep = "")
	cat("\nFor each variable (at each order), its interaction with others is computed.\n\n")
	#interactions	
	print(round(object$partialDependence[c(1:minDim, nrow(object$partialDependence)),], 2))	
	#local importance
	cat("\n\nVariable Importance based on interactions (", minDim, " most important) :\n", sep = "")
	print(round(object$variableImportanceOverInteractions/sum(object$variableImportanceOverInteractions),2)[1:minDim])	
	#local importance over labels
	if (!is.matrix(object$localVariableImportance))
	{
		cat("\nVariable importance over labels (", minDim, 
		" most important variables conditionally to each label) :\n", sep = "")
		print(object$localVariableImportance$classVariableImportance[1:minDim,])
	
		cat("\n\nSee ...$localVariableImportance$obsVariableImportance to get variable importance for each observation.", sep = "")
		
		cat("\n\nCall clusterAnalysis() function to get a more compact and complementary analysis.\n Type '?clusterAnalysis' for help.", sep = "")	
	}
	else 
	{   cat("\n\nSee ...$localVariableImportance to get variable importance for each observation.")	}	
	# partial dependence (response vs predictor)
	cat("\n\nCall partialDependenceOverResponses() function to get partial dependence over responses\nfor each variable. Type '?partialDependenceOverResponses' for help.\n")	
}

combineRUFObjects <- function(rUF1, rUF2)
{
	rUF1 <- filter.object(rUF1)
	rUF2 <- filter.object(rUF2)
	return(onlineCombineRUF(rUF1, rUF2))	
}

rankingTrainData <- function(trainData = NULL, trainLabels = NULL, testData = NULL, testLabels = NULL, ntree = 100,  thresholdScore = 2/3, nTimes = 2, ...)
{	
	score = tmpScore = rep(0, nrow(testData))	
	classes = unique(trainLabels)
	majorityClass = modX(trainLabels)	
	i = 1;	rmIdx = NULL; idx = 1:nrow(testData)
	while( (i <= nTimes) && (length(testData[,1]) >= 1) )
	{
		rUF <- randomUniformForestCore(trainData, trainLabels = as.factor(trainLabels), ntree = ntree, use.OOB = FALSE, rf.overSampling = -0.75, rf.targetClass = majorityClass, rf.treeSubsampleRate = 2/3)
		predictRUF <- randomUniformForestCore.predict(rUF, testData)	
		tmpScore = tmpScore + apply(predictRUF$all.votes, 1, function(Z)  length(which(Z != majorityClass)))
		score[idx] = score[idx] + tmpScore								
		rmIdx = which(tmpScore < (thresholdScore*ntree))		
		if (length(rmIdx) > 0)  {  testData = testData[-rmIdx,];  idx = idx[-rmIdx]; tmpScore = tmpScore[-rmIdx];   }
		i = i + 1
	}	
	return(score)
}
		
plotTreeCore <- function(treeStruct, rowNum = 1, height.increment = 1)
{
  if ( (treeStruct[rowNum, "status"] == -1) )
  {
    treeGraphStruct <- list()
    attr(treeGraphStruct, "members") <- 1
    attr(treeGraphStruct, "height") <- 0
    attr(treeGraphStruct, "label") <- if (treeStruct[rowNum,"prediction"] == 0) { "next node" } else 
		{ 	
			if (is.numeric(treeStruct[rowNum,"prediction"]))	
			{ round(treeStruct[rowNum,"prediction"],2) } else { treeStruct[rowNum,"prediction"]	}	
		}
	attr(treeGraphStruct, "leaf") <- TRUE
  }
  else
  {
	left <- plotTreeCore(treeStruct, treeStruct[rowNum, "left.daughter"], height.increment) #,  incDepth+1
    right <- plotTreeCore(treeStruct, treeStruct[rowNum, "right.daughter"], height.increment)#,  incDepth+1
    treeGraphStruct <- list(left,right)
    attr(treeGraphStruct, "members") <- attr(left, "members") + attr(right,"members")
    attr(treeGraphStruct,"height") <- max(attr(left, "height"),attr(right, "height")) + height.increment
    attr(treeGraphStruct, "leaf") <- FALSE	
	if (rowNum != 1)
	{     attr(treeGraphStruct, "edgetext") <- paste(treeStruct[rowNum, "split.var"] , " > " , round(treeStruct[rowNum, "split.point"],2), " ?", sep ="")  }
	else
	{	  
		attr(treeGraphStruct, "edgetext") <- paste(".[no].  .", 
		treeStruct[rowNum, "split.var"] , " > " , round(treeStruct[rowNum, "split.point"],2), ".                                                   .[yes]", sep="") 
	}
  }
  class(treeGraphStruct) <- "dendrogram"
  return(treeGraphStruct)
}

plotTreeCore2 <- function(treeStruct, rowNum = 1, height.increment = 1,  maxDepth = 100 )
{
  if ((treeStruct[rowNum, "status"] == -1) || (rowNum > maxDepth))
  {
    treeGraphStruct <- list()
    attr(treeGraphStruct, "members") <- 1
    attr(treeGraphStruct, "height") <- 0
    attr(treeGraphStruct, "label") <- if (treeStruct[rowNum,"status"] == 1) { "next node" } else 
		{ 	
			if (is.numeric(treeStruct[rowNum,"prediction"]))	
			{ round(treeStruct[rowNum,"prediction"],2) } else { treeStruct[rowNum,"prediction"]	}	
		}
	attr(treeGraphStruct, "leaf") <- TRUE
  }
  else
  {
    left <- plotTreeCore2(treeStruct, treeStruct[rowNum, "left.daughter"], height.increment) #,  incDepth+1
    right <- plotTreeCore2(treeStruct, treeStruct[rowNum, "right.daughter"], height.increment)#,  incDepth+1
    treeGraphStruct <- list(left,right)
    attr(treeGraphStruct, "members") <- attr(left, "members") + attr(right,"members")
    attr(treeGraphStruct,"height") <- max(attr(left, "height"),attr(right, "height")) + height.increment
    attr(treeGraphStruct, "leaf") <- FALSE	
	if (rowNum != 1)
	{     attr(treeGraphStruct, "edgetext") <- paste(treeStruct[rowNum, "split.var"] , " > " , round(treeStruct[rowNum, "split.point"],2), "?", sep ="")  }
	else
	{	  
		attr(treeGraphStruct, "edgetext") <- paste(treeStruct[rowNum, "split.var"] , " > " , round(treeStruct[rowNum, "split.point"],2), "?", ".                                     .", sep="") 
	}
  }
  class(treeGraphStruct) <- "dendrogram"
  return(treeGraphStruct)
}

plotTree <- function(treeStruct, rowNum = 1, height.increment = 1, maxDepth = 100, fullTree = FALSE, xlim = NULL, ylim= NULL, center = TRUE)
{
	if (fullTree)
	{	drawTreeStruct <- plotTreeCore(treeStruct, rowNum = rowNum , height.increment = height.increment)	}
	else
	{    drawTreeStruct <- plotTreeCore2(treeStruct, rowNum = rowNum , height.increment = height.increment,  maxDepth = maxDepth)	}	
	nP <- list(col = 3:2, cex = c(2.0, 0.75), pch = 21:22, bg =  c("light blue", "pink"), lab.cex = 0.75, lab.col = "tomato")	
	if (is.null(xlim))
	{	
		if (is.null(ylim)) { ylim = c(0,8) }
		plot(drawTreeStruct, center = center, leaflab ='perpendicular', edgePar = list(t.cex = 2/3, p.col = NA, p.lty = 0, lty =c( 2,5), 
			col = c("purple", "red"), lwd = 1.5), nodePar = nP, ylab = "Tree depth", xlab = "Predictions", 
			xlim = c(0, min(30,floor(nrow(treeStruct)/2))), ylim = ylim)
	}
	else
	{
		if (is.null(ylim)) { ylim = c(0,8) }
		plot(drawTreeStruct, center = center, leaflab ='perpendicular', edgePar = list(t.cex = 2/3, p.col = NA, p.lty = 0, lty =c( 2,5), 
			col = c("purple", "red"), lwd = 1.5), nodePar = nP, ylab = "Tree depth", xlab = "Predictions", xlim = xlim, ylim = ylim )
	}
}

fillNA2.randomUniformForest <- function(X, Y = NULL, ntree = 100, mtry = 1, nodesize = 10, 
categoricalvariablesidx = NULL, 
NAgrep = "", 
maxClasses = max(10, which.is.factor(X[,, drop = FALSE], count = TRUE)),
na.action = c("accurateImpute", "fastImpute", "veryFastImpute"), 
threads = "auto",
...)
{	
    i = NULL 
	n <- nrow(X)
	X <- fillVariablesNames(X)
	flagRownames = FALSE
	if (!is.null(rownames(X))) { rownamesX = rownames(X); flagRownames =  TRUE }	
	if (na.action[1] != "accurateImpute")
	{
		X = NAfactor2matrix(X)
		if (na.action[1] != "fastImpute") 
		{ 	X = na.replace(X, fast = FALSE)	}
		else
		{  X = na.replace(X, fast = TRUE) }
		cat("'fastImpute' or 'veryFastImpute' arguments always transform the original data into a pure R matrix.\n")
		if (flagRownames) { rownames(X) = rownamesX }
		return(X)
	}
	else
	{
		if (exists("categorical", inherits = FALSE)) { categoricalvariablesidx = categorical }
		else { categorical = NULL  }
		if (!is.null(Y)) { trueY = Y }
		trueX = X
		limitSize = if (n > 2000) { 50 } else { min(nodesize, 10) } 
		if (!is.matrix(X))
		{	
			flag = TRUE
			X.factors <- which.is.factor(X, maxClasses = maxClasses)
			X <- NAfactor2matrix(X, toGrep = NAgrep)
		}
		else
		{
			flag = FALSE
			X.factors = rep(0,ncol(X))
		}	
		NAIdx = which(is.na(X), arr.ind = TRUE)	
		if (dim(NAIdx)[1] == 0) { stop("No missing values in data. Please use NAgrep option to specify missing values.\nFunction checks for NA in data and for the string given by NAgrep. Please also check if string does not contain space character") }	
		processedFeatures = unique(NAIdx[,2])
		nbNA = table(NAIdx[,2])
		fullNAFeatures = which(nbNA == n)
		fullNAFeaturesLength = length(fullNAFeatures)
		if (fullNAFeaturesLength == length(processedFeatures))
		{	stop("All features have only NA in their values.\n") }
		else
		{	
			if (fullNAFeaturesLength > 0) 
			{	processedFeatures = processedFeatures[-fullNAFeatures] }
		}
		nFeatures = length(processedFeatures)	
		idx <- lapply(processedFeatures, function(Z) NAIdx[which(NAIdx[,2] == Z),1]) 
		validIdx <- sapply(idx, function(Z) (length(Z) >  limitSize))
		processedFeatures <- processedFeatures[which(validIdx == TRUE)]
		nFeatures = length(processedFeatures)
		invalidIdx <- which(validIdx == FALSE)
		if (length(invalidIdx) > 0)	{	idx <- rm.InAList(idx, invalidIdx)	}		
		if (nFeatures == 0)
		{	
			cat("Not enough missing values. Rough imputation is done. Please lower 'nodesize' value to increase accuracy.\n")
			return(na.replace(trueX, fast = TRUE))	
		}
		else
		{
			if (!is.null(Y)){  X = cbind(X,Y)	}
			X <- fillVariablesNames(X)
			X <- na.replace(X, fast = TRUE)			
			{
				## #require(parallel)	
				max_threads = min(detectCores(),4)		
				if (threads == "auto")
				{	
					if (max_threads == 2) { threads = min(max_threads, nFeatures) }
					else {	threads = max(1, max_threads)  }
				}
				else
				{
					if (max_threads < threads) 
					{	cat("Note: number of threads is higher than number of logical threads in this computer.\n") }
				}			
				threads = min(nFeatures, max_threads)
				## #require(doParallel)			
				Cl <- makePSOCKcluster(threads, type = "SOCK")
				registerDoParallel(Cl)		
				chunkSize <- ceiling(nFeatures/getDoParWorkers())
				smpopts  <- list(chunkSize = chunkSize)
			}				
			export = c("randomUniformForest.default", "rUniformForest.big", "randomUniformForestCore.big", "randomUniformForestCore", "predict.randomUniformForest", "rUniformForestPredict", "uniformDecisionTree", "CheckSameValuesInAllAttributes", "CheckSameValuesInLabels", "fullNode", "genericNode", "leafNode", "filter.object", "filter.forest", "randomUniformForestCore.predict", "onlineClassify", "overSampling", "predictDecisionTree", "options.filter", "majorityClass", "randomCombination", "randomWhichMax", "vector2matrix", "which.is.na", "which.is.factor", "factor2vector", "outputPerturbationSampling", "rmNA", "count.factor", "find.idx", "genericOutput", "fillVariablesNames", "is.wholenumber", "rm.tempdir", "setManyDatasets", "onlineCombineRUF", "mergeLists", "classifyMatrixCPP", "L2DistCPP", "checkUniqueObsCPP", "crossEntropyCPP", "giniCPP", "L2InformationGainCPP", "entropyInformationGainCPP", "runifMatrixCPP", "NAfactor2matrix", "factor2matrix", "as.true.matrix", "NAFeatures", "NATreatment", "rmInf", "rm.InAList")
			if (nrow(X) < 2001)
			{
				newX <- foreach( i= 1:nFeatures, .options.smp = smpopts, .inorder = FALSE, .combine = cbind, 
				.multicombine = TRUE, .export = export) %dopar%
				{
					if (X.factors[processedFeatures[i]] == 1)
					{
						rufObject <- randomUniformForest.default(X[-idx[[i]],-processedFeatures[i]], 
									Y = as.factor(X[-idx[[i]], processedFeatures[i]]), OOB = FALSE, importance = FALSE, ntree = ntree, mtry = mtry, nodesize = nodesize, threads = 1, categoricalvariablesidx = categoricalvariablesidx)
								
						X[idx[[i]], processedFeatures[i]] <- as.numeric(as.vector(predict.randomUniformForest(rufObject, 
							X[idx[[i]], -processedFeatures[i]])))
					}
					else
					{
						rufObject <- randomUniformForest.default(X[-idx[[i]],-processedFeatures[i]], Y = X[-idx[[i]], processedFeatures[i]], 
									OOB = FALSE, importance = FALSE, ntree = ntree, mtry = mtry, nodesize = nodesize, threads = 1,
									categoricalvariablesidx =  categoricalvariablesidx)
							
						X[idx[[i]], processedFeatures[i]] <- predict.randomUniformForest(rufObject, 
						X[idx[[i]], -processedFeatures[i]])	
					}							
					if (mean(is.wholenumber(rmNA(X[-idx[[i]], processedFeatures[i]]))) == 1) {  round(X[,processedFeatures[i]]) }
					else {  X[,processedFeatures[i]]  }	
				}			
				X[,processedFeatures] = newX
				stopCluster(Cl)				
			}
			else
			{
				newX <- foreach(i = 1:nFeatures, .options.smp = smpopts, .inorder = FALSE, .combine = cbind, 
				.multicombine = TRUE, .export = export) %dopar%
				{
					if (X.factors[processedFeatures[i]] == 1)
					{
						rufObject <- rUniformForest.big(X[-idx[[i]],-processedFeatures[i]], Y = as.factor(X[-idx[[i]], 
							processedFeatures[i]]), nforest = max(1, floor(nrow(X[-idx[[i]],])/2000)), replacement = TRUE, randomCut = TRUE, OOB = FALSE, importance = FALSE, ntree = ntree, mtry = mtry, 
							nodesize = nodesize, threads = 1, categoricalvariablesidx =  categoricalvariablesidx)
								
						X[idx[[i]], processedFeatures[i]] <- as.numeric(as.vector(predict.randomUniformForest(rufObject, 
							X[idx[[i]], -processedFeatures[i]])))
					}
					else
					{
						rufObject <- rUniformForest.big(X[-idx[[i]],-processedFeatures[i]], Y = X[-idx[[i]], processedFeatures[i]], 
							nforest = max(1, floor(nrow(X[-idx[[i]],])/2000)), replacement = TRUE, randomCut = TRUE, OOB = FALSE, 
							importance = FALSE, ntree = ntree, mtry = mtry, nodesize = nodesize, threads = 1, 
							categoricalvariablesidx = categoricalvariablesidx)
							
						X[idx[[i]], processedFeatures[i]] <- predict.randomUniformForest(rufObject, X[idx[[i]], -processedFeatures[i]])	
					}
								
					if (mean(is.wholenumber(rmNA(X[-idx[[i]], processedFeatures[i]]))) == 1) 
					{  round(X[,processedFeatures[i]]) }
					else  {  X[,processedFeatures[i]]  }	
				}
				X[,processedFeatures] = newX
				stopCluster(Cl)
			}			
			if (sum(X.factors) != 0)
			{
				factorsIdx = which(X.factors != 0)
				X = as.true.matrix(X)
				X = as.data.frame(X)
				for (j in 1:length(factorsIdx))
				{	
					k = factorsIdx[j]
					X[,k] = as.factor(X[,k])
					Xlevels = as.numeric(names(table(X[,k])))
					asFactorTrueX_k = as.factor(trueX[,k])
					oldLevels = levels(asFactorTrueX_k)
					for (i in 1:length(NAgrep))
					{
						levelToRemove = which(oldLevels == NAgrep[i])
						if (length(levelToRemove) > 0) 
						{ 
							if (NAgrep[i] == "")
							{	levels(X[,k]) = c("virtualClass", oldLevels[-levelToRemove])	}
							else
							{  levels(X[,k]) = oldLevels[-levelToRemove] }
						}
						else 
						{ 
							checkLevel = sample(Xlevels,1)
							if ((!is.numeric(trueX[,k])) && (checkLevel > 0) && (is.wholenumber(checkLevel)))
							{ levels(X[,k]) = oldLevels[Xlevels] }
						}
					}
				}				
			}
			for (j in 1:ncol(X))
			{
				classTrueX = class(trueX[,j])
				if ((classTrueX) == "character") X[,j] = as.character(X[,j])
			}
			if (flagRownames) { rownames(X) = rownamesX }
			return(X)
		}
	}
}

rufImpute = fillNA2.randomUniformForest

# unsupervised Learning = unsupervised2supervised + rUF model + proximity + MDS/Spectral (+ gap) + kMeans/hClust 
unsupervised2supervised <- function(X, 
method = c("uniform univariate sampling", "uniform multivariate sampling"),
seed = 2014,
conditionalTo = NULL,
samplingFromGaussian = FALSE,  
bootstrap = FALSE)
{
	np = dim(X); n = np[1]; p = np[2]
	flag = FALSE
	syntheticTrainLabels1 = rep(0,n)
	if (!is.null(rownames(X))) { rowNamesX = rownames(X); flag = TRUE }
	if (is.data.frame(X))
	{	
		X = NAfactor2matrix(X, toGrep = "anythingParticular") 
		cat("X is a data frame and has been converted to a matrix.\n")
	}
	
	XX = matrix(data = NA, nrow = n, ncol = p)
	
	set.seed(seed)
	if (method[1] == "uniform univariate sampling")
	{	
		if (is.null(conditionalTo))
		{	XX <- apply(X, 2, function(Z) sample(Z, n, replace = bootstrap))  }
		else
		{
			if (length(conditionalTo) == nrow(X)) {	 ZZ = round(as.numeric(conditionalTo),0) }
			else {	ZZ = round(as.numeric(X[,conditionalTo[1]]),0) }
			ZZtable = as.numeric(names(table(ZZ)))
			XX = X
			for (i in 1:length(ZZtable))
			{
				idx = which(ZZ == ZZtable[i])
				if (samplingFromGaussian)
				{  XX[idx,] <- apply(X[idx,], 2, function(Z) rnorm(length(idx), mean(Z), sd(Z)))  }
				else				
				{	XX[idx,] <- apply(X[idx,], 2, function(Z) sample(Z, length(idx), replace = bootstrap))	}
			}
		}
		
	}
	else
	{
		if (is.null(conditionalTo))
		{
			for (j in 1:p)
			{
				XX[,j] = sample(X, n, replace = bootstrap)
				
				outOfRange = which( (XX[,j] > max(X[,j])) | (XX[,j] < min(X[,j])) )
				while (length(outOfRange) > 0)
				{	
					XX[outOfRange, j] = sample(X, length(outOfRange), replace = bootstrap)	
					outOfRange = which( (XX[,j] > max(X[,j])) | (XX[,j] < min(X[,j])) )
				}
			}
		}
		else
		{
			if (length(conditionalTo) == nrow(X)) {	 ZZ = round(as.numeric(conditionalTo),0) }
			else {	ZZ = round(as.numeric(X[,conditionalTo[1]]),0) }
			ZZtable = as.numeric(names(table(ZZ)))
			XX = X
			meanX = mean(X)
			sdX = sd(X)
			for (i in 1:length(ZZtable))
			{
				idx = which(ZZ == ZZtable[i])
				for (j in 1:p)
				{
					if (samplingFromGaussian)
					{  XX[idx,j] = rnorm(length(idx), meanX, sdX)	}
					else 
					{ 	XX[idx,j] = sample(X, length(idx), replace = bootstrap)	}
					
					outOfRange = which( (XX[idx,j] > max(X[,j])) | (XX[idx,j] < min(X[,j])) )
					while (length(outOfRange) > 0)
					{	
						XX[idx[outOfRange], j] = sample(X, length(outOfRange), replace = bootstrap)	
						outOfRange = which( (XX[idx[outOfRange],j] > max(X[,j])) | 
						(XX[idx[outOfRange],j] < min(X[,j])) )
					}
				}
			}
		}
	}
	newX = rbind(X, XX)
	if (flag) { rownames(newX) = c(rowNamesX, rowNamesX) }
	syntheticTrainLabels2 = rep(1, n)	
	Y = c(syntheticTrainLabels1, syntheticTrainLabels2)
	
	return(list(X = newX, Y = Y))	
}

proximitiesMatrix <- function(object, fullMatrix = TRUE, Xtest = NULL, predObject = NULL, sparseProximities = TRUE,
OOB = FALSE, pthreads = "auto")
{ 
	object = filter.object(object)

	if (OOB) { object$predictionObject = NULL }
	
	{
		if (is.null(object$predictionObject))
		{
			if (is.null(predObject)) 
			{ 
				if (is.null(Xtest))	{ stop("please provide test data.\n") }
				else
				{
					predObject <- predict.randomUniformForest(object, Xtest, type = "all")
					votesData = predObject$votes.data
				}
			}
			else 
			{  
				if (is.null(predObject$majority.vote)) 
				{ stop("Please provide full prediction object (type = 'all') when calling predict() function") }
				else 
				{	votesData = predObject$votes.data 	}
			}				
		}		
		else
		{	votesData = object$predictionObject$votes.data	}
	}
		
	n = nrow(votesData[[1]])  
	B = length(votesData)
	
	if ((n < 500) && is.character(pthreads)) { pthreads = 1 }
	else	
	{
		#require(parallel)	
		max_threads = detectCores()					
		if (pthreads == "auto")
		{	
			if (max_threads == 2) { pthreads = max_threads }
			else {	pthreads  = max(1, max_threads - 1)  }
		}
		else
		{
			if (max_threads < pthreads) 
			{  cat("Warning : number of threads is higher than logical threads in this computer.\n") }
		}			
		
		# require(doParallel)	
		Cl = makePSOCKcluster(pthreads, type = "SOCK")
		registerDoParallel(Cl)
	}
	
	operatorLimit = 3*800*n^2/(10000^2)
	if (memory.limit() < operatorLimit)
	{ 
		cat("Proximity matrix is likely to be too big. Model will compute a 'n x B' matrix with B = number of trees\n")
		proxMatrix = matrix(0L, B, n)
		fullMatrix = FALSE
	}
	else
	{	proxMatrix = matrix(0L, n, n) }
		
	if (fullMatrix)
	{	
		proxMat = proxMatrix
		if (!sparseProximities)
		{
			if (pthreads == 1)
			{
				for (i in 1:n)
				{		
					for (b in 1:B)
					{
						common.idx = which(votesData[[b]][i,3] == votesData[[b]][,3])
						proxMatrix[i, common.idx] = 1L + proxMatrix[i,common.idx] 
					}
				}			
			}
			else
			{
				proxMatrix <- foreach(i = 1:n, .combine = rbind, .multicombine = FALSE) %dopar%
				{		
					for (b in 1:B)
					{
						common.idx = which(votesData[[b]][i,3] == votesData[[b]][,3])
						proxMat[i, common.idx] = 1L + proxMat[i, common.idx] 
					}
					proxMat[i,] 
				}
				stopCluster(Cl)
			}
		}
		else
		{
			if (pthreads == 1)
			{
				for (i in 1:n)
				{		
					for (b in 1:B)
					{
						common.idx = which(votesData[[b]][i,3] == votesData[[b]][,3])
						proxMatrix[i, common.idx] = 1L 
					}
				}			
			}
			else
			{
				proxMatrix <- foreach(i = 1:n, .combine = rbind, .multicombine = TRUE) %dopar%
				{		
					for (b in 1:B)
					{
						common.idx = which(votesData[[b]][i,3] == votesData[[b]][,3])
						proxMat[i, common.idx] = 1L 
					}
					proxMat[i,] 
				}
				stopCluster(Cl)
			}
		}
	}
	else
	{	
		proxMatrix = matrix(0L, n, B)
		proxMat = proxMatrix
		
		if (!sparseProximities)
		{
			if (pthreads == 1)
			{
				for (i in 1:n)
				{		
					for (b in 1:B)
					{
						common.idx = which((votesData[[b]][i,3] == votesData[[b]][,3]))
						proxMatrix[common.idx, b] = 1L + proxMatrix[common.idx,b] 
					}
					proxMat[i,]
				}
			}
			else
			{
				proxMatrix <- foreach(i = 1:n, .combine = rbind, .multicombine = TRUE) %dopar%
				{		
					for (b in 1:B)
					{
						common.idx = which((votesData[[b]][i,3] == votesData[[b]][,3]))
						proxMat[common.idx, b] = 1L + proxMat[common.idx,b] 
					}
					proxMat[i,]
				}
				stopCluster(Cl)
			}
		}
		else
		{
			if (pthreads == 1)
			{
				for (i in 1:n)
				{		
					for (b in 1:B)
					{
						common.idx = which((votesData[[b]][i,3] == votesData[[b]][,3]))
						proxMatrix[common.idx, b] = 1L 
					}
				}
			}
			else
			{
				proxMatrix <- foreach(i = 1:n, .combine = rbind, .multicombine = TRUE) %dopar%
				{		
					for (b in 1:B)
					{
						common.idx = which((votesData[[b]][i,3] == votesData[[b]][,3]))
						proxMat[common.idx, b] = 1L 
					}
					proxMat[i,]
				}
				stopCluster(Cl)
			}		
		}		
	}
	varNames = NULL
	if (fullMatrix) { nn = n }
	else { nn = B }
	for (i in 1:nn) { varNames = c(varNames,paste("C", i, sep="")) }
              	
	colnames(proxMatrix) = varNames
	rownames(proxMatrix) = if (!is.null(rownames(Xtest))) { rownames(Xtest) } else { 1:n }
	
	if (sparseProximities) { return(proxMatrix) } else { return(proxMatrix/B) }
}


kBiggestProximities <- function(proximitiesMatrix, k) t(apply(proximitiesMatrix, 1, function(Z) 
sort(Z, decreasing = TRUE)[1:(k+1)][-1] ))

MDSscale <- function(proximitiesMatrix, metric = c("metricMDS", "nonMetricMDS"), dimension = 2, distance = TRUE, distanceMethod = c("euclidean", "maximum", "manhattan", "canberra", "binary", "minkowski"),
plotting = TRUE, seed = 2014)
{
	set.seed(seed)
	if (metric[1] == "metricMDS")  
	{ 
		if (distance)  { d = stats::dist(1 - proximitiesMatrix, method = distanceMethod[1]) }
		else 
		{ 
			if (nrow(proximitiesMatrix) == ncol(proximitiesMatrix)) {  d = 1 - proximitiesMatrix  }
			else { d = stats::dist(1 - proximitiesMatrix); cat("Matrix is not a square matrix. Distance is used.\n") }
		}
		fit = stats::cmdscale(d, eig = TRUE, k = dimension) 
	}	
	else
	{	
		eps = 1e-6
		if (distance)  { d = eps + stats::dist(1 - proximitiesMatrix, method = distanceMethod[1]) }
		else 
		{ 
			if (nrow(proximitiesMatrix) == ncol(proximitiesMatrix)) {	d = 1 - (proximitiesMatrix - eps)  }
			else 
			{ 
				d = eps + stats::dist(1 - proximitiesMatrix, method = distanceMethod[1])
				cat("Matrix is not a square matrix. Distance is used.\n") 
			}
		}
		
		#require(MASS)
		fit = MASS::isoMDS(d, k = dimension)
	}
		
	if (plotting)
	{ 
		x <- fit$points[,1]
		y <- fit$points[,2]
		plot(x, y, xlab = "Coordinate 1", ylab = "Coordinate 2", main = metric[1], type = "p", lwd = 1, pch = 20)
	}
	
	return(fit)
}

specClust <- function(proxMat, k = floor(sqrt(ncol(proxMat))))
{
	A = proxMat
	diag(A) = 0
	np = dim(A)
	n = np[1]
	p = np[2]
	D = matrix(0, n, p)
	diag(D) = (rowSums(A))^(-0.5)
	L = (D%*%A%*%D)
	
	X = eigen(L)$vectors[,1:k]
	Xcol = (colSums(X^2))^0.5

	return(X/Xcol)
}

gap.stats <- function(fit, B = 100, maxClusters = 5, seed = 2014)
{
	set.seed(seed)
	#require(cluster)
	return(cluster::clusGap(fit, FUN = kmeans, K.max = max(2, maxClusters), B = B))	
}

kMeans <- function(fit, gapStatFit, maxIters = 10, nstart = 1, plotting = TRUE, algorithm = NULL, k = NULL, 
reduceClusters = FALSE, seed = 2014)
{
	set.seed(seed)
	if (is.null(maxIters)) { maxIters = 10 }
	if (is.null(algorithm)) { algorithm = "Hartigan-Wong" }
	
	if (is.null(k))
	{
		k = cluster::maxSE(gapStatFit$Tab[, "gap"], gapStatFit$Tab[, "SE.sim"])
		if (k == 1) { cat("Only one cluster has been found.\nNumber of clusters has been set to 2.\n") ; k = 2 }
		if (k == length(gapStatFit$Tab[, "gap"])) 
		{  cat("\nNumber of clusters found is equal to the maximum number of clusters allowed.\n") }
	}
	fit.kMeans <- stats::kmeans(fit, k, iter.max = maxIters, algorithm = algorithm, nstart = nstart)
	
	if ( (k > 2) && reduceClusters)
	{
		clusterSizes = table(fit.kMeans$cluster)
		smallSizes = which(clusterSizes < 0.05*length(fit[,1]))
		nSmallSizes = length(smallSizes)
		if (nSmallSizes > 0)
		{ 
			smallSizes = sort(smallSizes, decreasing = TRUE)
			for (i in 1:nSmallSizes)
			{	
				fit.kMeans$cluster[which(fit.kMeans$cluster == smallSizes[i])] = 
				as.numeric(names(clusterSizes))[max(1,smallSizes-1)]
			}
		}		
	}
	
	if (plotting)
	{
		x = fit[,1]
		y = fit[,2]
		plot(x, y, xlab = "Coordinate 1", ylab = "Coordinate 2", main = "Multi-dimensional scaling", 
		type = "p", lwd = 1, pch = 20)
		for (i in 1:k)
		{  
			idx = which(fit.kMeans$cluster == i)
			points(x[idx], y[idx], type = "p", lwd = 1, pch = 20, col = i)
		}
	}
	
	return(fit.kMeans)
}

hClust <- function(proximities, method = NULL, plotting = TRUE, k = NULL, reduceClusters = FALSE, 
distanceMethod = c("euclidean", "maximum", "manhattan", "canberra", "binary", "minkowski"),
seed = 2014)
{
	set.seed(seed)
	if (is.null(method)) { method = "complete" }
	d <- stats::dist(1 - proximities, method = distanceMethod[1])
	XClust <- stats::hclust(d, method = method,  members = NULL)
	
	if (is.null(k))
	{
		diffHeightsIdx = which.max(diff(XClust$height))
		height = (XClust$height[diffHeightsIdx] +  XClust$height[diffHeightsIdx-1])/2
		values <- stats::cutree(XClust, h = height)
		nbClusters = length(unique(values))
	}
	else
	{
		nbClusters = k
		values <- stats::cutree(XClust, k = nbClusters)
		height = NULL
	}
	
	if ( (nbClusters > 2) && reduceClusters)
	{
		clusterSizes = table(values)
		smallSizes = which(clusterSizes < 0.05*length(values))
		nSmallSizes = length(smallSizes)
		if (nSmallSizes > 0)
		{ 
			smallSizes = sort(smallSizes, decreasing = TRUE)
			for (i in 1:nSmallSizes)
			{ values[which(values == smallSizes[i])] = as.numeric(names(clusterSizes))[max(1,smallSizes - 1)] }
		}		
	}
	
	if (plotting)
	{	
		showLabels = if (nrow(proximities) < 300) { TRUE } else { FALSE }
		plot(XClust, xlab = "Observations", labels = showLabels)
		# A2Rplot(XClust, k = nbClusters, boxes = TRUE, show.labels = FALSE, main = "Dendrogram")
		if (!is.null(height)) { abline(h = height , col='red') }
	}
	
    return(list(object = XClust, cluster = values))
}

observationsImportance <- function(X, importanceObject)
{
	X = factor2matrix(X)
	object = importanceObject$localVariableImportance$obsVariableImportance[,-1]
	idx = grep("localVariableFrequency",colnames(object))
	object = object[,-idx]
	n = nrow(X)
	XX = matrix(NA, n, ncol(object))
	for (i in 1:nrow(X))
	{	XX[i,] = X[i, object[i,]]	}
	
	rownames(XX) = 1:n
	return(XX)
}

print.unsupervised <- function(x, ...)
{
	object = x
	rm(x)
	Z = object$unsupervisedModel$cluster
	if (!is.factor(Z))
	{
		if (!is.null(object$unsupervisedModel$clusterOutliers))
		{	
			Z = c(Z, object$unsupervisedModel$clusterOutliers)				
			Z = sortDataframe( data.frame(Z, as.numeric(names(Z))), 2)
			Z = Z[,1]
		}
	}	
	ZZ = table(Z)
	names(attributes(ZZ)$dimnames) = NULL
	
	if (object$params["endModel"] == "MDS")
	{
		x = object$MDSModel$points[,1]
		y = object$MDSModel$points[,2]
		percentExplained = (interClassesVariance(x, Z) +  interClassesVariance(y, Z))/(variance(x) + variance(y))
		
		cat("Average variance between clusters (in percent of total variance): ", round(100*percentExplained,2), "%\n", sep = "")
				
		cat("Average silhouette: ", round(mean(cluster::silhouette(as.numeric(Z), dist(object$MDSModel$points))[,3]),4), "\n",  sep = "")
		cat("Clusters size:\n")
		print(ZZ)
		cat("Clusters centres (in the MDS coordinates):\n") 
		print(round(object$unsupervisedModel$centers,4))		
	}
		
	if ((object$params["endModel"] == "MDSkMeans") || (object$params["endModel"] == "SpectralkMeans"))
	{
		cat("Average variance between clusters (in percent of total variance): ", round(100*sum(object$unsupervisedModel$betweenss)/sum(object$unsupervisedModel$totss),2), "%\n", sep = "")
				
		cat("Average silhouette: ", round(mean(cluster::silhouette(Z, dist(object$MDSModel$points))[,3]),4), "\n",
		sep = "")
		cat("Clusters size:\n")
		print(ZZ)
		cat("Clusters centres (in the MDS coordinates):\n") 
		print(round(object$unsupervisedModel$centers,4))		
	}
	
	if (object$params["endModel"] == "MDShClust")
	{
		x = object$MDSModel$points[,1]
		y = object$MDSModel$points[,2]
		
		percentExplained1 = interClassesVariance(x, Z)/variance(x)
		percentExplained2 = interClassesVariance(y, Z)/variance(y)
		percentExplained  = round(0.5*(percentExplained1 + percentExplained2), 4)				
		cat("Average variance between clusters (in percent of total variance) : ", percentExplained*100, "%\n", sep = "")
		
		cat("Average silhouette : ", round(mean(cluster::silhouette(Z, dist(object$MDSModel$points))[,3]),4), "\n",  sep = "")
		cat("Clusters size:\n")
		print(ZZ)
		cat("Clusters centers (in the MDS coordinates):\n") 
		print(round(object$unsupervisedModel$centers,4))		
	}
}

plot.unsupervised <- function(x, importanceObject = NULL, xlim = NULL, ylim = NULL, coordinates = NULL,...)
{
	object = x
	rm(x)
	if (is.null(coordinates)) 
	{ 
		coordinates = c(as.numeric( substr(object$params["coordinates"], 1, 1)), 
		as.numeric( substr(object$params["coordinates"], 2, 2)) )
		if (ncol(object$MDSModel$points) == 2) 
		{ 
			if (coordinates[1] > 1)
			{ 
				cat(paste("Coordinates", coordinates[1], "and", coordinates[2], "have been plotted, but appear as Coordinate 1 and Coordinate 2.\n", sep=" "))
			}
			coordinates = 1:2 
		}		
	}
	else
	{
		if (ncol(object$MDSModel$points) == 2) coordinates = 1:2 
	}

	x = object$MDSModel$points[,coordinates[1]]
	y = object$MDSModel$points[,coordinates[2]]
	
	offsetY = 0
	if (!is.null(importanceObject))
	{
		clusterFeatures = importanceObject$localVariableImportance$classVariableImportance
		p = ncol(clusterFeatures)
		clusterFeaturesNames = NULL
		varNames = varValues = vector(length = p)
		for (j in 1:p)
		{
			clusterFeatures = sortDataframe(clusterFeatures, j, decrease = TRUE)
			idx = which(clusterFeatures[,j] > 0.05)
			varNames[j] = rownames(clusterFeatures)[idx[1]]
			varValues[j] = clusterFeatures[idx[[1]],j]
			clusterFeaturesNames = c(clusterFeaturesNames, paste(varNames[j], ":", round(100*varValues[j],0), "%",	sep ="" ))
		}
		offsetY = -abs(diff(range(y)))*0.1
	}
		
	clusters = object$unsupervisedModel$cluster
	
	if (object$params["endModel"] != "MDS")
	{
		if (!is.null(object$unsupervisedModel$clusterOutliers))
		{	
			clusters = c(clusters, object$unsupervisedModel$clusterOutliers)				
			clusters = sortDataframe( data.frame(clusters, as.numeric(names(clusters))),2)
			clusters = clusters[,1]
		}
	}
			
	offsetX = abs(diff(range(x)))*0.3
	uniqueClusters = sort(unique(clusters))
	
	dev.new()
	if (object$params["endModel"] == "MDShClust")
	{
		dev.off()
		#percentExplained = 0.5*(percentExplained1 + percentExplained2)
		nbClusters =  length(uniqueClusters)
		
		XClust = object$unsupervisedModel$object
		diffHeightsIdx = which.max(diff(XClust$height))
		height = (XClust$height[diffHeightsIdx] +  XClust$height[diffHeightsIdx-1])/2				
		showLabels = if (nrow(object$proximityMatrix) < 300) { TRUE } else { FALSE }
		plot(XClust, labels = showLabels)
		abline(h = height , col='red') 
		
		# A2Rplot(object$unsupervisedModel$object, k = nbClusters, boxes = TRUE, show.labels = FALSE, 
		# main = "Dendrogram", col.down = nbClusters:1)
		
		cat("Two graphics have been plotted. Please slide the window to see the second one.\n")
		dev.new()		
	}
	
	if (is.null(xlim)) { xlim = c(min(x) - offsetX, max(x) + offsetX) }
	if (is.null(ylim)) { ylim = c(min(y) + offsetY, max(y)) }
	
	if (length(grep("MDS", object$params["endModel"])) == 1)  
	{ plotTitle =  "Multidimensional scaling and Clusters representation" }
	else 
	{  plotTitle =  "Spectral decomposition and Clusters representation" }
	
	plot(x, y, xlab = paste("Coordinate", coordinates[1]), ylab = paste("Coordinate", coordinates[2]), 
	main = plotTitle, type = "p", lwd = 1, pch = 20, xlim = xlim, ylim = ylim)
	nbCusters = length(uniqueClusters)
	for (i in 1:nbCusters)
	{  
		idx = which(clusters == uniqueClusters[i])
		points(x[idx], y[idx], type = "p", lwd = 1, pch = 20, col = i)
		#if (object$params["endModel"] != "MDShClust")
		{	
			points(object$unsupervisedModel$centers[i,coordinates[1]], 
			object$unsupervisedModel$centers[i,coordinates[2]], lwd = 1, cex = 1.5, pch = 8, col = i)	
		}
	}
		
	if (!is.null(importanceObject))
	{ 	
		clusterNames = rm.string(colnames(clusterFeatures), "Class ")
		legend("topright", inset = .01, clusterNames, fill = 1:i, horiz = FALSE, border = NA, bty ="n")
		legend("bottomleft", cex = 0.7, as.character(clusterFeaturesNames), fill = 1:i, horiz = FALSE, border = NA, bty ="n")		
		print(clusterFeatures)		
	}
	else
	{ legend("topright", inset = .01, as.character(uniqueClusters), fill = 1:i, horiz = FALSE, border = NA, bty ="n")}
	
}

modifyClusters <- function(object, decreaseBy = NULL, increaseBy = NULL, seed = 2014)
{
	params = object$modelParams
	endModel = object$params["endModel"]
	nbClusters = object$nbClusters
	
	if (!is.null(decreaseBy))
	{	k = nbClusters - decreaseBy }
	
	if (!is.null(increaseBy))
	{	k = nbClusters + increaseBy }
	
	if (is.null(decreaseBy) && (is.null(increaseBy)))
	{	stop("Please increase or decrease number of clusters")  }
	
	if (k <= 1) 
	{ 
		cat("Only one cluster will remain. Model has not been modified.\n") 
		return(object)
	}
	else
	{
		endModelMetric = if (object$params["endModelMetric"] == "NULL") { NULL } 
		else { object$params["endModelMetric"] }
		Z = object$MDSModel$points
		
		if ( (endModel == "MDSkMeans") || (endModel == "SpectralkMeans") )
		{
			maxIters = if (object$params["maxIters"] == "NULL") { 10 } else { as.numeric(object$params["maxIters"]) }
			nstart = as.numeric(object$params["nstart"])
			X.model <- kMeans(Z, NULL, k = k, maxIters = maxIters, algorithm = endModelMetric, plotting = FALSE, reduceClusters = FALSE, seed = seed, nstart = nstart)
		}
		
		if (endModel == "MDShClust")
		{	
			X.model <- hClust(Z, method = endModelMetric, plotting = FALSE, k = k, reduceClusters = FALSE, seed = seed)
			centers = matrix(NA, k, ncol(Z))
			for (i in 1:k)
			{
				idx = which(X.model$cluster == i)
				centers[i,] = colMeans(Z[idx,, drop = FALSE])
			}
			X.model$centers = centers
		}
		object$unsupervisedModel = NULL
		object$unsupervisedModel = X.model  
		object$nbClusters = k
		
		return(object)
	}
}

splitClusters <- function(object, whichOnes, seed = 2014, ...) 
{
	cat("Note that outliers are not currently taken into account.\n")
	whichOnes = sort(whichOnes)
	
	endModel = object$params["endModel"]
	if (exists("endModelMetric", inherits = FALSE)) { endModelMetric = endModelMetric }
	else
	{
		if (object$params["endModelMetric"] == "NULL") { endModelMetric = NULL } 
		else { endModelMetric = object$params["endModelMetric"] }	
	}
	Z = object$MDSModel$points
	nClusters = length(unique(object$unsupervisedModel$cluster))
		
	for (i in 1:length(whichOnes))
	{
		idx = which(object$unsupervisedModel$cluster == whichOnes[i])	
		if (length(idx) == 0) { stop(paste("No elements available for cluster", i,".\n")) }
		
		if ( (endModel == "MDSkMeans") || (endModel == "SpectralkMeans") )
		{
			if (exists("maxIters", inherits = FALSE)) { maxIters = maxIters }
			else
			{
				if (object$params["maxIters"] == "NULL") { maxIters = 10 } 
				else { maxIters = as.numeric(object$params["maxIters"]) }
			}
			nstart = as.numeric(object$params["nstart"])
			X.model <- kMeans(Z[idx,], NULL, k = 2, maxIters = maxIters, algorithm = endModelMetric, plotting = FALSE, reduceClusters = FALSE, seed = seed, nstart = nstart)	
			object$unsupervisedModel$betweenss = 0.5*(object$unsupervisedModel$betweenss + X.model$betweenss)
			object$unsupervisedModel$totss = 0.5*(object$unsupervisedModel$totss + X.model$totss)
		}
		
		if (endModel == "MDShClust")
		{	
			X.model <- hClust(Z, method = endModelMetric, plotting = FALSE, k = 2, reduceClusters = FALSE, seed = seed)
			centers = matrix(NA, 2, ncol(Z))
			for (j in 1:2)
			{
				idx2 = which(X.model$cluster == j)
				centers[j,] = colMeans(Z[idx2, , drop = FALSE])
			}
			X.model$centers = centers
		}
		  
		idx3 = which(X.model$cluster == 2)
		object$unsupervisedModel$cluster[idx][idx3] = nClusters + i
	}
	
	nClusters = length(unique(object$unsupervisedModel$cluster))
	centers = matrix(NA, nClusters, ncol(Z))
	for (cl in 1:nClusters)
	{
		idx = which(object$unsupervisedModel$cluster == cl)
		centers[cl,] = colMeans(Z[idx, , drop = FALSE])
	}
	
	object$unsupervisedModel$centers = centers
	rownames(object$unsupervisedModel$centers) = 1:nClusters
		
	return(object)
}

mergeClusters <- function(object, whichOnes)
{
	whichOnes = sort(whichOnes)
	
	idx1 = which(object$unsupervisedModel$cluster == whichOnes[1])	
	object$unsupervisedModel$cluster[idx1] = whichOnes[2]
	
	flag = FALSE
	if (!is.null(object$unsupervisedModel$clusterOutliers))
	{  
		idx1 = which(object$unsupervisedModel$clusterOutliers == whichOnes[1])
		if (length(idx1) > 0) { object$unsupervisedModel$clusterOutliers[idx1] = whichOnes[2] }
		flag = TRUE
	}
		
	idx2 = which(object$unsupervisedModel$cluster == whichOnes[2])
	object$unsupervisedModel$centers[whichOnes[2],] = t(colMeans(object$MDSModel$points[idx2,]))
	object$unsupervisedModel$centers = object$unsupervisedModel$centers[-whichOnes[1],]
	
	idx = sort(unique(object$unsupervisedModel$cluster))
	for (i in 1:length(idx))
	{
		newIdx = which(object$unsupervisedModel$cluster == idx[i])
		object$unsupervisedModel$cluster[newIdx] = i
		if (flag)
		{  
			newOutliersIdx = which(object$unsupervisedModel$clusterOutliers == idx[i])
			if (length(newOutliersIdx) > 0)  { object$unsupervisedModel$clusterOutliers[newOutliersIdx] = i }
		}
	}
	rownames(object$unsupervisedModel$centers) = 1:i
	cat(paste("\ncluster ", idx, " has been changed to ", 1:i, sep=""))
	cat("\n")
	
	return(object)
}

clusteringObservations <- function(object, X, OOB = TRUE, predObject = NULL, importanceObject = NULL, 
baseModel = c("proximity", "proximityThenDistance", "importanceThenDistance"),
MDSmetric = c("metricMDS", "nonMetricMDS"),
endModel = c("MDS", "MDSkMeans", "MDShClust", "SpectralkMeans"),
...)
{
	clusterObject <- unsupervised.randomUniformForest(object, Xtest = X, importanceObject = importanceObject, predObject = predObject, baseModel = baseModel[1], MDSmetric = MDSmetric[1], endModel = endModel[1], 
	outliersFilter = FALSE, OOB = OOB, ...)
	plot(clusterObject, importanceObject = importanceObject, ...)
	return(clusterObject)
}

as.supervised <- function(object, X, ...)
{
	if (!(inherits(object,"unsupervised"))) stop("Please provide an unsupervised randomUniformForest object.\n")
	n = nrow(X)
	clusters = object$unsupervisedModel$cluster
	if (!is.null(object$unsupervisedModel$clusterOutliers))
	{ clusters = c(clusters, object$unsupervisedModel$clusterOutliers) }
	if (is.null(names(clusters))) { names(clusters) = 1:n }
	clusters  = sortDataframe(data.frame(clusters, as.numeric(names(clusters))), 2)
	Y = as.factor(clusters[,1])
	
	if (n < 2000) { rUFObject <- randomUniformForest(X, Y, ...) }
	else { rUFObject <- rUniformForest.big(X, Y, nforest = max(2, floor(n/2000)), randomCut = TRUE, ...)  }
	
	return(rUFObject)	
}

update.unsupervised <- function(object, X, oldData = NULL, mapAndReduce = FALSE, updateModel = FALSE, ...)
{
	endModel = object$params["endModel"]
	clusters = as.numeric(object$params["clusters"])
	maxIters = if (object$params["maxIters"] == "NULL") NULL else as.numeric(object$params["maxIters"])
	endModelMetric = if (object$params["endModelMetric"] == "NULL") NULL else object$params["endModelMetric"]
	reduceClusters = if (object$params["reduceClusters"] == "FALSE") FALSE else TRUE
	seed = as.numeric(object$params["seed"])
	nstart = as.numeric(object$params["nstart"])
	
	if ( (endModel != "MDShClust") && (endModel != "MDSkMeans") && (endModel != "SpectralkMeans") )
	{ 
		stop("Update is only available for models which were using endModel = 'MDShClust', endModel = 'MDSkMeans' or endModel = 'SpectralkMeans' options") 
	}
	
	p = ncol(object$MDS$points)
	updateLearningMDS = predictedMDS = learningMDS = vector('list', p)
	if (is.null(object$largeDataLearningModel))
	{
		if (is.null(oldData))
		{ stop("Former data are needed to (learn MDS points and) update model.\n") }
		else
		{
			if (mapAndReduce)
			{	
				for (j in 1:p)
				{ learningMDS[[j]] = rUniformForest.big(oldData, object$MDSModel$points[,j], randomCut = TRUE, ...) } 
			}
			else
			{
				for (j in 1:p)
				{ learningMDS[[j]] = randomUniformForest(oldData, object$MDSModel$points[,j], ...)  }
			}
		}
	}
	else
	{
		for (j in 1:p)
		{ learningMDS[[j]] = object$largeDataLearningModel[[j]] }
	}
	
	formerMDSPoints = object$MDSModel$points
	for (j in 1:p)
	{  predictedMDS[[j]] = predict(learningMDS[[j]], X) }
	
	newMDSPoints = do.call(cbind, predictedMDS)		
	Z = rbind(formerMDSPoints, newMDSPoints)
	
	if (updateModel)
	{
		if (mapAndReduce)
		{	
			for (j in 1:p)
			{  updateLearningMDS[[j]] = rUniformForest.big(X, predictedMDS[[j]], randomCut = TRUE, ...) }
		}
		else
		{
			for (j in 1:p)	{ updateLearningMDS[[j]] = randomUniformForest(X, predictedMDS[[j]], ...) } 
		}
		
		for (j in 1:p) 	{ learningMDS[[j]] = rUniformForest.combine(learningMDS[[j]], updateLearningMDS[[j]]) }
	}
	
	if ( (endModel[1] == "MDSkMeans") || (endModel[1] == "SpectralkMeans") )
	{
		X.model <- kMeans(Z, NULL, k = clusters, maxIters = maxIters, plotting = FALSE, algorithm = endModelMetric, reduceClusters = reduceClusters, seed = seed, nstart = nstart)			
	}
			
	if (endModel[1] == "MDShClust")  
	{	
		X.model <- hClust(Z, method = endModelMetric, plotting = FALSE, k = clusters, 
		reduceClusters = reduceClusters,  seed = seed)
		centers = matrix(NA, clusters, ncol(Z))
		for (i in 1:clusters)
		{
			idx = which(X.model$cluster == i)
			centers[i,] = colMeans(Z[idx,, drop = FALSE])
		}
		X.model$centers = centers
	}
	
	object$X.scale$points = Z
	largeDataLearningModel = learningMDS
	unsupervisedObject = list(proximityMatrix = object$proxMat, MDSModel = object$X.scale, 
		unsupervisedModel = X.model, largeDataLearningModel = largeDataLearningModel, gapStatistics = NULL, 
		rUFObject = object$rUF.model, nbClusters = clusters, params = object$params) 	
	
	class(unsupervisedObject) <- "unsupervised"
	
	unsupervisedObject
}

combineUnsupervised <- function(...)
{
	object <- list(...)
	n = length(object)
	
	i = 1
	endModel = object[[i]]$params["endModel"]
	clusters = as.numeric(object[[i]]$params["clusters"])
	maxIters = if (object[[i]]$params["maxIters"] == "NULL") NULL else as.numeric(object[[i]]$params["maxIters"])
	endModelMetric = if (object[[i]]$params["endModelMetric"] == "NULL") NULL else object[[i]]$params["endModelMetric"]
	reduceClusters = if (object[[i]]$params["reduceClusters"] == "FALSE") FALSE else TRUE
	seed = as.numeric(object[[i]]$params["seed"])
	nstart = as.numeric(object[[i]]$params["nstart"])
	
	if ( (endModel != "MDShClust") && (endModel != "MDSkMeans") && (endModel != "SpectralkMeans") )
	{ 
		stop("Combine is only available for models which were using endModel = 'MDShClust', endModel = 'MDSkMeans' or endModel = 'SpectralkMeans' options") 
	}
		
	Z = NULL
	for (i in 1:n)
	{   Z = rbind(Z, object[[i]]$MDSModel$points)  }
		
	if ( (endModel[1] == "MDSkMeans") || (endModel != "SpectralkMeans"))
	{
		X.model <- kMeans(Z, NULL, k = clusters, maxIters = maxIters, plotting = FALSE, algorithm = endModelMetric,  reduceClusters = reduceClusters, seed = seed, nstart = nstart)			
	}
			
	if (endModel[1] == "MDShClust")  
	{	
		X.model <- hClust(Z, method = endModelMetric, plotting = FALSE, k = clusters, 
		reduceClusters = reduceClusters, seed = seed)
		X.gapstat = NULL	
	}
	
	X.scale = list()
	X.scale$points = Z
		
	unsupervisedObject = list(proximityMatrix = NULL, MDSModel = X.scale, 
		unsupervisedModel = X.model, largeDataLearningModel = NULL, gapStatistics = NULL, 
		rUFObject = NULL, nbClusters = clusters, params = object[[1]]$params) 	
	
	class(unsupervisedObject) <- "unsupervised"
	
	unsupervisedObject
}

scalingMDS <- function(object)
{
	if (is.null(object$MDSModel$standardizedPoints))
	{	object$MDSModel$points = standardize(object$MDSModel$points) }
	else
	{   object$MDSModel$points = object$MDSModel$standardizedPoints 	}
	
	return(object)
}	

updateCombined.unsupervised <- function(object, X, mapAndReduce = FALSE, ...)
{
	if (!is.null(object$largeDataLearningModel))
	{	
		stop("The function can only rebuild a learning model of MDS (or spectral) points for formerly combined objects.\n") 
	}
	
	if (nrow(X) !=  nrow(object$MDSModel$points))
	{ 	stop("MDS (or spectral) points and data do not have the same number of rows.\n") }		
	
	p = ncol(object$MDSModel$points)
	learningMDS = vector('list', p)
	if (mapAndReduce)
	{	
		for (j in 1:p)
		{	learningMDS[[j]] = rUniformForest.big(X, object$MDSModel$points[,j], randomCut = TRUE, ...) }
	}
	else
	{
		for (j in 1:p)
		{	learningMDS[[j]] = randomUniformForest(X, object$MDSModel$points[,j], ...) }
	}
	
	unsupervisedObject = list(proximityMatrix = NULL, MDSModel = object$MDSModel, 
		unsupervisedModel = object$unsupervisedModel, largeDataLearningModel = learningMDS, 
		gapStatistics = object$gapStatistics, rUFObject = object$rUFObject, nbClusters = object$nbClusters, 
		params = object$params) 	
	
	class(unsupervisedObject) <- "unsupervised"
	
	unsupervisedObject
}

clusterAnalysis <- function(object, X, components = 2, maxFeatures = 2, clusteredObject = NULL, categorical = NULL, 
OOB = FALSE)
{
	flagList = regression = FALSE
	if (!is.null(clusteredObject)) 
	{ 
		if (is.list(clusteredObject)) 
		{ 
			flagList = TRUE 
			regression = clusteredObject$forest$regression
		}
		else
		{	
			if (is.numeric(clusteredObject)) { regression = TRUE }
		}
	}
	else { flagNULL = FALSE   }
		
	
	p = ncol(X)
	if (regression)
	{  featuresAndObs = as.data.frame(object$localVariableImportance) }
	else
	{  featuresAndObs = as.data.frame(object$localVariableImportance$obsVariableImportance)	}
	p.new = floor((ncol(featuresAndObs)-1)/2)	
	if (p.new < components) 
	{ 
		cat("Maximal number of components is set to 2. Increase 'maxInteractions' option\nin 'importance()' function in order to assign more components.\n")
		components = p.new
	}
	n = nrow(X)
	varIdx = c(1:(components+1), (p.new+2):(p.new + components + 1))
	   
	if (inherits(clusteredObject, "unsupervised")[1]) { clusterName = "Clusters" } 
	else { clusterName = "Class" }
		
	frequencyFeaturesIdx = grep("Frequency", colnames(featuresAndObs))
	featuresNames = apply(featuresAndObs[,-c(1,frequencyFeaturesIdx)], 2, function(Z) colnames(X)[Z])
	featuresAndObs[,-c(1,frequencyFeaturesIdx)] = featuresNames
	
	if (regression)
	{
		colnames(featuresAndObs)[1] = "class"
		featuresAndObsProxyForClasses = cut(featuresAndObs[,1], 4)		
		if (flagList) {	clusteredObject$classes = levels(featuresAndObsProxyForClasses)	}
		else { clusteredObject = cut(clusteredObject, 4) }
		featuresAndObs[,1] = as.numeric(featuresAndObsProxyForClasses)
	}

	groupedAnalysis = aggregate(featuresAndObs[,2:min((components+1), p.new+1)], list(featuresAndObs$class), 
	   function(Z) names(sort(table(Z), decreasing = TRUE)), simplify = FALSE)
	   
	groupedAnalysisNew = groupedAnalysis
	pp = ncol(groupedAnalysis)
	for (j in 2:pp)
	{	
		groupedAnalysisNew[[j]] = lapply(groupedAnalysis[[j]], 
		function(Z) as.character(Z[1:(min(length(Z),maxFeatures))])) 
	}
	groupedAnalysis = groupedAnalysisNew
	 	
	groupedFrequencies = aggregate(featuresAndObs[,(2 + p.new):min((p.new + 1 + components), 2*p.new+1)], 
	list(featuresAndObs$class), function(Z) round(mean(Z),4))
	  
	colnames(groupedAnalysis)[1] = colnames(groupedFrequencies)[1] = colnames(featuresAndObs)[1] = clusterName
	newNames = vector(length = pp-1)
	for (i in 2:ncol(groupedAnalysis))	{  newNames[i-1] = paste("Component", i-1, sep = "") }
	colnames(groupedAnalysis)[-1] = newNames
	colnames(groupedFrequencies)[-1] = rm.string(colnames(groupedFrequencies)[-1], "localVariable")
	
	if (flagList)
	{
		if (!is.null(clusteredObject$classes))
		{
			featuresAndObs[,1] = clusteredObject$classes[featuresAndObs[,1]]
			groupedAnalysis[,1] = clusteredObject$classes[groupedAnalysis[,1]]
			groupedFrequencies[,1] = clusteredObject$classes[groupedFrequencies[,1]]
		}
	}
	else
	{
		if (is.factor(clusteredObject))
		{
			featuresAndObs[,1] = levels(clusteredObject)[featuresAndObs[,1]]
			groupedAnalysis[,1] = levels(clusteredObject)[groupedAnalysis[,1]]
			groupedFrequencies[,1] = levels(clusteredObject)[groupedFrequencies[,1]]
		}
	}
	
	cat("Clustered observations:\n")
	print(head(featuresAndObs[,varIdx]))
	cat("...\n\nMost influential features by component (", maxFeatures," features per component):\n", sep="")
	print(groupedAnalysis)
	cat("\nComponent frequencies (", components, " out of ", p," possible ones):\n", sep="")
	print(groupedFrequencies)
	cat("\n\n")	
		
	if (!is.null(clusteredObject))
	{
		aggregateCategorical = aggregateNumerical = NULL
		if (inherits(clusteredObject, "unsupervised")[1]) { Class = mergeOutliers(clusteredObject) }
		else 
		{ 
			if (inherits(clusteredObject, "randomUniformForest.formula")[1] || inherits(clusteredObject, "randomUniformForest")[1]) 
			{
				if (OOB)
				{
					if (is.null(clusteredObject$classes)) { stop("'clusteredObject' is not a classification model") }
					
					if (length(clusteredObject$forest$OOB.predicts) == n)
					{  Class = clusteredObject$classes[clusteredObject$forest$OOB.predicts] }
					else
					{  stop("OOB predictions have not their length equal to the number of rows in the data") }
				}
				else 
				{  
					if (is.null(clusteredObject$classes)) { stop("'clusteredObject' is not a classification model") }
					
					if (!is.null(clusteredObject$predictionObject))
					{	Class = clusteredObject$classes[clusteredObject$predictionObject$majority.vote]	}				
					else
					{  stop("Predictions have not their length equal to the number of rows in the data") }	
				}
			}
			else
			{ 	Class = clusteredObject  }
			Class = as.factor(Class)			
		}
		classSize = tabulate(Class)
		#names(classSize) = NULL
		if (is.null(categorical))  
		{ 
			categorical = which.is.factor(X, maxClasses = n) 
			keepIdx = which(categorical == 0)
		}
		else
		{
			if (is.character(categorical))
			{   categorical = which(colnames(X) == categorical) }
			
			keepIdx = (1:p)[-categorical]
		}
			
		pp = length(keepIdx)
		
		if (pp > 0)
		{
			localImportanceVariables = names(object$variableImportanceOverInteractions)
			keepIdx = keepIdx[rmNA(match(rmNA(match(localImportanceVariables, colnames(X))), keepIdx))]
			nFeatures = min(10, length(keepIdx))
			aggregateNumerical = aggregate(X[,keepIdx], list(Class), sum)
			aggregateNumerical = cbind(aggregateNumerical[,1], classSize, aggregateNumerical[,-1])
			colnames(aggregateNumerical)[1] = clusterName
			colnames(aggregateNumerical)[2] = "Size"
			colnames(aggregateNumerical)[-c(1,2)] = colnames(X)[keepIdx]
			cat("Numerical features aggregation (", min(10, nFeatures)," most important ones by their interactions):\n", sep="")
			if (nFeatures < 10) { print(aggregateNumerical) }
			else {  print(aggregateNumerical[,1:10]) }
			
			cat("\n")
			averageNumerical = aggregate(X[,keepIdx], list(Class), function(Z) round(mean(Z), 4))
			averageNumerical = cbind(averageNumerical[,1], classSize, averageNumerical[,-1])
			colnames(averageNumerical)[1] = clusterName
			colnames(averageNumerical)[2] = "Size"
			colnames(averageNumerical)[-c(1,2)] = colnames(X)[keepIdx]
			cat("Numerical features average (", min(10, nFeatures) ," most important ones by their interactions):\n", sep="")
			if (nFeatures < 10) { print(averageNumerical) }
			else {  print(averageNumerical[,1:10]) }
			
			cat("\n")
			standardDeviationNumerical = aggregate(X[,keepIdx], list(Class), function(Z) round(sd(Z), 4))
			standardDeviationNumerical = cbind(standardDeviationNumerical[,1], classSize, standardDeviationNumerical[,-1])
			colnames(standardDeviationNumerical)[1] = clusterName
			colnames(standardDeviationNumerical)[2] = "Size"
			colnames(standardDeviationNumerical)[-c(1,2)] = colnames(X)[keepIdx]
			cat("Numerical features (standard) deviation (", nFeatures ," most important ones by their interactions):\n", sep="")
			if (nFeatures < 10) { print(standardDeviationNumerical) }
			else {  print(standardDeviationNumerical[,1:10]) }
			
			if (pp < p)
			{
				flag = FALSE
				keepIdx2 = (1:p)[-keepIdx]
				keepIdx2 = keepIdx2[rmNA(match(rmNA(match(localImportanceVariables, colnames(X))), keepIdx2))]
				if (length(keepIdx2) == 0) {  keepIdx2 = (1:p)[-keepIdx]; flag = TRUE }
				nFeatures = min(10, length(keepIdx2))
				aggregateCategorical = aggregate(X[,keepIdx2], list(Class), function(Z) names(which.max(table(Z))))
				aggregateCategorical = cbind(aggregateCategorical[,1], classSize, aggregateCategorical[,-1])
				colnames(aggregateCategorical)[1] = clusterName
				colnames(aggregateCategorical)[2] = "Size"
				colnames(aggregateCategorical)[-c(1,2)] = colnames(X)[keepIdx2]
				if (flag)
				{ cat("\nCategorical features aggregation (", min(10, nFeatures) ," first features):\n", sep = "")	}
				else
				{ 	cat("\nCategorical features aggregation (", min(10, nFeatures)," most important ones by their interactions):\n", sep = "")	}
				if (nFeatures < 10) { print(aggregateCategorical) }
				else {  print(aggregateCategorical[,1:10]) }
				cat("\n")
			}
		}
			
		if (pp == 0)
		{
			keepIdx2 = (1:p)
			nFeatures = min(10, length(keepIdx2))
			aggregateCategorical = aggregate(X[,keepIdx2], list(Class), function(Z) names(which.max(table(Z))))
			aggregateCategorical = cbind(aggregateCategorical[,1], classSize, aggregateCategorical[,-1])
			colnames(aggregateCategorical)[1] = clusterName
			colnames(aggregateCategorical)[2] = "Size"
			colnames(aggregateCategorical)[-c(1,2)] = colnames(X)[keepIdx2]
			cat("Categorical features aggregation (", nFeatures ," first features):\n", sep = "")
			if (nFeatures < 10) { print(aggregateCategorical) }
			else {  print(aggregateCategorical[,1:10]) }
		}
						
		return(list(featuresAndObs = featuresAndObs, clusterAnalysis = groupedAnalysis, 
		componentAnalysis = groupedFrequencies, numericalFeaturesAnalysis = aggregateNumerical, categoricalFeaturesAnalysis = aggregateCategorical))
	}
	else
	{
		return(list(featuresAndObs = featuresAndObs, clusterAnalysis = groupedAnalysis, 
		componentAnalysis = groupedFrequencies))
	}
}


rm.coordinates <- function(object, whichOnes, nstart = 1, maxIters = NULL, seed = NULL)
{
	Z = object$MDSModel$points = object$MDSModel$points[,-whichOnes]
	object$unsupervisedModel$centers = object$unsupervisedModel$centers[,-whichOnes] 
	
	if (is.null(seed)) { seed = as.numeric(object$params["seed"]) }
	if (is.null(maxIters)) { maxIters = 10 }
		
	if (object$params["baseModel"] == "MDShClust")
	{
		X.model <- hClust(Z, method = NULL, plotting = FALSE, k = as.numeric(object$params["clusters"]), 
		reduceClusters = FALSE, seed = seed)
		X.gapstat = NULL
		nbClusters = length(unique(X.model$cluster))
		centers = matrix(NA, nbClusters, ncol(Z))
		for (i in 1:nbClusters)
		{
			idx = which(X.model$cluster == i)
			centers[i,] = colMeans(Z[idx, ,drop = FALSE])
		}
		X.model$centers = centers
	}
	else
	{
		nstart = as.numeric(object$params["nstart"])
		X.model <- kMeans(Z, NULL, k = as.numeric(object$params["clusters"]), nstart = nstart,
		maxIters = maxIters, plotting = FALSE, algorithm = NULL, reduceClusters = FALSE, seed = seed)		
	}
	object$unsupervisedModel = X.model
	
	return(object)
}

unsupervised.randomUniformForest <- function(object, 
baseModel = c("proximity", "proximityThenDistance", "importanceThenDistance"),
endModel = c("MDSkMeans", "MDShClust", "MDS", "SpectralkMeans"),
endModelMetric = NULL,
samplingMethod = c("uniform univariate sampling", "uniform multivariate sampling", "with bootstrap"),
MDSmetric = c("metricMDS", "nonMetricMDS"),
proximityMatrix = NULL,
fullProximityMatrix = TRUE,
sparseProximities = FALSE,
outliersFilter = FALSE, 
Xtest = NULL, 
predObject = NULL, 
metricDimension = 2,
distance = c("euclidean", "maximum", "manhattan", "canberra", "binary", "minkowski"), 
coordinates = c(1,2),
bootstrapReplicates = 100,
clusters = NULL,
maxIters = NULL,
nstart = 1,
importanceObject = NULL,
maxInteractions = 2,
reduceClusters = FALSE, 
maxClusters = 5,
mapAndReduce = FALSE,
OOB = FALSE,
subset = NULL,
seed = 2014,
uthreads = "auto",
...)
{
	if (!is.null(predObject))
	{
		if (is.null(predObject$votes.data))
		{ stop("predObject must be provided using option type = 'all' when calling the predict() function.\n") }
	}	
	
	if (!is.null(Xtest)) 
	{ 	
		if (!is.null(subset)) { Xtest = Xtest[subset,] }
		Xtest = NAfactor2matrix(Xtest, toGrep = "anythingParticular")
		if (length(which(is.na(Xtest)) > 0) ){ stop("NA found in data. Please treat them outside of the model.\n") }
	}
	
	flagBig = FALSE
	objectClass = class(object)
	if ( all(objectClass != "randomUniformForest") )
	{
		if (!is.null(subset)) { object = object[subset,] }		
		X = object
		n = nrow(X)
		
		if ( (n > 2000) || mapAndReduce) 
		{
			set.seed(seed)
			subsetIdx = sample(n, min(2000, floor(n/2)))
			bigX = X[-subsetIdx, ]
			X = X[subsetIdx,]
			n = nrow(X)
			flagBig  = TRUE
		}
		
		if (samplingMethod[1] == "with bootstrap")
		{ 
			cat("'with bootstrap' is only needed as the second argument of 'method' option. Default option will be computed.\n")
			samplingMethod[1] = "uniform univariate sampling"
		}
		XY <- unsupervised2supervised(X, method = samplingMethod[1], seed = seed, 
			bootstrap = if (length(samplingMethod) > 1) { TRUE } else {FALSE })
		
		cat("Created synthetic data giving them labels.\n") 
		if ((n > 2000) || mapAndReduce)
		{	
			rUF.model <- rUniformForest.big(XY$X, as.factor(XY$Y), BreimanBounds = FALSE, unsupervised = TRUE, unsupervisedMethod = samplingMethod[1], randomCut = TRUE,...)  
		}
		else
		{ 	
			rUF.model <- randomUniformForest(XY$X, as.factor(XY$Y), BreimanBounds = FALSE, 
			unsupervised = TRUE, unsupervisedMethod = samplingMethod[1],...)  
		}
		
		if (is.null(Xtest)) { Xtest = XY$X[1:n,] }
	}
	else
	{	
		if (any(objectClass == "randomUniformForest")) { rUF.model = object	}
		else { stop("Data are missing or object is not of class randomUniformForest.") }
		n = if (is.null(object$unsupervised)) { length(object$y) } else {  length(object$y)/2 }
	}
		
	if (!is.null(Xtest)) 
	{ 
		n = nrow(Xtest) 
		if ( ((n > 2000) || mapAndReduce) && !flagBig ) 
		{
			set.seed(seed)
			subsetIdx = sample(n, min(2000, floor(n/2)))
			bigX = Xtest[-subsetIdx, ]
			Xtest = Xtest[subsetIdx,]
			n = nrow(Xtest)
			flagBig  = TRUE
		}		
	}	
			
	if (baseModel[1] != "importanceThenDistance")
	{
		if (is.null(proximityMatrix))
		{
			proxMat <- proximitiesMatrix(rUF.model, fullMatrix = fullProximityMatrix, Xtest = Xtest, 
			predObject = predObject, sparseProximities = sparseProximities, pthreads = uthreads, OOB = OOB) 
		}
		else
		{ proxMat = proximityMatrix }
	}
	else
	{
		if (!is.null(importanceObject)) { imp.rUFModel = importanceObject }
		else { imp.rUFModel <- importance(rUF.model, Xtest = Xtest, maxInteractions = maxInteractions) }
		proxMat <- observationsImportance(Xtest, imp.rUFModel) 
	}
	
	p = ncol(proxMat)
	
	grepMDS = length(grep("MDS", endModel[1]))
	grepSpectral = length(grep("Spectral", endModel[1]))
	if ((grepMDS == 1) || (grepSpectral == 1))
	{
		if (grepSpectral == 1)
		{
			X.scale = list()
			cat("Spectral clustered points have been put on ...$MDSModel for compatibility with new points and others clustering objects that have to be updated.\n\n")
			Z <- specClust(proxMat, k = max(3, metricDimension))
			Z = Z[, coordinates]			
		}
		else
		{
			distanceProxy = TRUE
			if (baseModel[1] == "proximity") { 	distanceProxy = FALSE }		
			X.scale <- MDSscale(proxMat, metric = MDSmetric[1], dimension = metricDimension, distance = distanceProxy, distanceMethod = distance[1], plotting = FALSE, seed = seed)
			
			Z = X.scale$points[, ,drop = FALSE]
			
			if (dim(Z)[2] == 0) 
			stop ("MDS can not be achieved. Data can probably be not clustered using this (randomUniformForest) dissimilarity matrix.\nOptions used might be reconsidered.\n")
		}

		if (flagBig)
		{
			pZ = ncol(Z)
			X.scale.ruf = vector('list', pZ)
			newZ = matrix(NA, nrow(Z) + nrow(bigX), pZ)
			if (mapAndReduce)
			{
				cat("Entered in regression mode to learn MDS/spectral points.\n")
				for (i in 1:pZ)	
				{ 
					X.scale.ruf[[i]] = rUniformForest.big(Xtest, Z[,i], randomCut = TRUE,...) 	
					newZ[-subsetIdx,i] = predict(X.scale.ruf[[i]], bigX)
					newZ[subsetIdx,i] = Z[,i]
				}
			}
			else
			{
				for (i in 1:pZ)	
				{ 
					X.scale.ruf[[i]] = randomUniformForest(Xtest, Z[,i], ...) 
					newZ[-subsetIdx,i] = predict(X.scale.ruf[[i]], bigX)
					newZ[subsetIdx,i] = Z[,i]
				}			
			}
			Z = newZ
		}
				
		nn = nrow(Z)
		if (endModel[1] == "MDS") { outliersFilter = FALSE }
		if (outliersFilter)
		{
			idx1 = which( (Z[,1] < quantile(Z[,1], 0.025)) & (Z[,2] < quantile(Z[,2], 0.025)) ) 
			idx2 = which( (Z[,1] > quantile(Z[,1], 0.975)) & (Z[,2] > quantile(Z[,2], 0.975)))
			idx12 = c(idx1, idx2)
			if (length(idx12) > 0)	
			{ 
				cat("Outliers have been removed for the two first coordinates.\n")
				Z = Z[-idx12,]  
			}
			nn = nrow(Z)
		}
		
		if (is.null(predObject))
		{
			if (endModel[1] == "MDS")
			{
				X.gapstat = NULL 
				if (is.null(object$unsupervised))
				{
					if (OOB)
					{
						if (!is.null(object$forest$OOB.predicts))
						{	predictions = object$forest$OOB.predicts }
						else
						{ 
							cat("OOB option has been called, but there is currently no OOB classifier. Hence true labels have been used.\n") 
							predictions = object$y
						}						
					}
					else
					{	
						if (is.null(object$predictionObject)) { predictions = object$y }
						else { 	predictions = object$predictionObject$majority.vote }				
					}
					if (!is.null(subset)) { predictions = predictions[subset] }		
					classes = sort(unique(predictions))
					k = length(classes)
					
					centers = matrix(0, k, ncol(Z))
					for (i in 1:k)
					{	
						idx = which(predictions == classes[i])
						centers[i,] = colMeans(Z[idx,])
					}						
										
					rownames(centers) = as.character(classes)
					X.model = list(cluster = predictions, centers = centers)
				}
				else
				{
					cat("Clustering can not be done except in the supervised mode.\nPlease send ...$MDSModel$points to a clustering algorithm.\n")
					X.model = list(cluster = NULL, centers = NULL)
				}
			}
					
			if ( (endModel[1] == "MDSkMeans") || (endModel[1] == "SpectralkMeans") )
			{
				if (is.null(clusters))  
				{	X.gapstat <- gap.stats(Z, B = bootstrapReplicates, maxClusters = maxClusters, seed = seed) 	}
				else 
				{ X.gapstat = NULL }
				
				X.model <- kMeans(Z, X.gapstat, k = clusters, maxIters = maxIters, plotting = FALSE, 
				algorithm = endModelMetric, reduceClusters = reduceClusters, seed = seed, nstart = nstart)

				if (endModel[1] == "SpectralkMeans") { X.scale$points = Z }
			}
			
			if (endModel[1] == "MDShClust")  
			{	
				X.model <- hClust(Z, method = endModelMetric, plotting = FALSE, k = clusters, 
				reduceClusters = reduceClusters, distanceMethod = distance[1], seed = seed)
				X.gapstat = NULL
				nbClusters = length(unique(X.model$cluster))
				centers = matrix(NA, nbClusters, ncol(Z))
				for (i in 1:nbClusters)
				{
					idx = which(X.model$cluster == i)
					centers[i,] = colMeans(Z[idx,, drop = FALSE])
				}
				X.model$centers = centers
			}

			if (outliersFilter)
			{
				if (length(idx12) > 0)
				{
					if ( (endModel[1] == "MDSkMeans") || (endModel[1] == "SpectralkMeans") ) 
					{	clusterOutliers <- which.is.nearestCenter(Z[c(idx1,idx2),], X.model$centers)	}
					if (endModel[1] == "MDShClust")  
					{	
						nbClusters = length(unique(X.model$cluster))
						centers = matrix(NA, nbClusters, ncol(Z))
						for (i in 1:nbClusters)
						{
							idx = which(X.model$cluster == i)
							centers[i,] = colMeans(Z[idx,, drop = FALSE])
						}
						
						clusterOutliers <- which.is.nearestCenter(Z[c(idx1,idx2),], centers)
					}
					names(clusterOutliers) = c(idx1,idx2)
					X.model$clusterOutliers = clusterOutliers
				}
			}
		}
		else
		{
			predictions = predObject$majority.vote
			classes = sort(unique(predictions))
			k = length(classes)
			centers = matrix(0, k, ncol(Z))
			for (i in 1:p)
			{	
				idx = which(predictions == classes[i])
				centers[i,] = colMeans(Z[idx,])
			}
			rownames(centers) = as.character(classes)
			X.model = list(cluster = predictions, centers = centers)
			X.gapstat = NULL 
		}
	}
	else
	{
		if (endModel[1] == "kMeans")
		{
			X.scale = NULL
			if (is.null(clusters))  
			{	
				X.gapstat <- gap.stats(proxMat[sample(n, floor(n/2)),, drop = FALSE], B = bootstrapReplicates, maxClusters = maxClusters,  seed = seed) 
			}
			else 
			{ X.gapstat = NULL }		
			X.model <- kMeans(proxMat, X.gapstat, k = clusters, maxIters = maxIters, plotting = FALSE, algorithm = endModelMetric, seed = seed, nstart = nstart)
		}
		
		if (endModel[1] == "hClust")  
		{	
			X.scale = NULL
			X.model <- hClust(proxMat, method = endModelMetric, plotting = FALSE, k = clusters, 
			distanceMethod = distance[1], seed = seed)
			X.gapstat = NULL 
		}
	}
	clusters = length(unique(X.model$cluster))	
		
	modelParams = c(baseModel[1], endModel[1],  if (is.null(endModelMetric)) { "NULL" } else { endModelMetric }, 
	samplingMethod[1], MDSmetric[1], is.null(Xtest), is.null(predObject), metricDimension, concatCore(as.character(coordinates)), bootstrapReplicates, clusters, if (is.null(maxIters)) { "NULL" } else { maxIters }, 
	maxInteractions, maxClusters, mapAndReduce, outliersFilter, reduceClusters, seed, sparseProximities, nstart)

	names(modelParams) = c("baseModel", "endModel", "endModelMetric", "samplingMethod", "MDSmetric", "Xtest", "predObject", "metricDimension", "coordinates", "bootstrapReplicates", "clusters", "maxIters", "maxInteractions", "maxClusters", "mapAndReduce", "outliersFilter", "reduceClusters", "seed", "sparseProximities", "nstart")
		
	if (flagBig) 
	{ 	
		X.scale$points = Z
		largeDataLearningModel = X.scale.ruf
		unsupervisedObject = list(proximityMatrix = proxMat, MDSModel = X.scale, unsupervisedModel = X.model, largeDataLearningModel = largeDataLearningModel, gapStatistics = X.gapstat, rUFObject = rUF.model, 
		nbClusters = clusters, params = modelParams) 	
	}
	else
	{
		unsupervisedObject = list(proximityMatrix = proxMat, MDSModel = X.scale, unsupervisedModel = X.model, gapStatistics = X.gapstat, rUFObject = rUF.model, nbClusters = clusters, params = modelParams) 	
	}
	
	class(unsupervisedObject) <- "unsupervised"
	
	unsupervisedObject
}
# END OF FILE

Try the randomUniformForest package in your browser

Any scripts or data that you put into this service are public.

randomUniformForest documentation built on June 22, 2022, 1:05 a.m.