Nothing
#' Binary classification with Rotation Forest (Rodriguez en Kuncheva, 2006)
#'
#' \code{rotationForest} implements an ensemble method where each base classifier (tree) is fit on the principal components of the variables of random partitions of the feature set.
#'
#' @param x A data frame of predictors (numeric, or integer). Categorical variables need to be transformed to indicator (dummy) variables. At minimum \code{x} requires two columns.
#' @param y A factor containing the response vector. Only \{0,1\} is allowed.
#' @param K The number of variable subsets. The default is the value \code{K} that results in three features per subset.
#' @param L The number of base classifiers (trees using the \code{rpart} package). The default is 10.
#' @param verbose Boolean. Should information about the subsets be printed?
#' @param ... Arguments to \code{rpart.control}. First run \code{library(rpart)}.
#' @examples
#' data(iris)
#' y <- as.factor(ifelse(iris$Species[1:100]=="setosa",0,1))
#' x <- iris[1:100,-5]
#' rF <- rotationForest(x,y)
#' predict(object=rF,newdata=x)
#' @references Rodriguez, J.J., Kuncheva, L.I., 2006. Rotation forest: A new classifier ensemble method. IEEE Trans. Pattern Anal. Mach. Intell. 28, 1619-1630. doi:10.1109/TPAMI.2006.211
#' @seealso \code{\link{predict.rotationForest}}
#' @return An object of class \code{rotationForest}, which is a list with the following elements:
#' \item{models}{A list of trees.}
#' \item{loadings}{A list of loadings.}
#' \item{columnnames}{Column names of x.}
#' @author Michel Ballings and Dirk Van den Poel, Maintainer: \email{Michel.Ballings@@GMail.com}
#' @keywords classification
rotationForest <- function(x,y,K=round(ncol(x)/3,0),L=10, verbose=FALSE, ...){
if (K >= ncol(x)) stop("K should not be greater than or equal to the number of columns in x")
if (all(sapply(x, is.numeric)) == FALSE) stop("All features in x need to be of type numeric")
if (K > floor(ncol(x) / 2)) stop(cat('The maximum K for your data is',floor(ncol(x) / 2)))
if (K == 0) stop('The minimum K is 1')
sets <- rep(floor(ncol(x)/K),K)
overflow <- rep(1, ncol(x)-sum(sets))
overflow <- c(overflow, rep(0,K-length(overflow)))
overflow <- overflow[sample(1:length(overflow),length(overflow),FALSE)]
subsetsizes <- sets + overflow
if (verbose) cat("Number of subsets: ", K ,". Number of variables per respective subset: ", sep="") ; if (verbose) cat(subsetsizes, "\n", sep=" ")
fact <- factor(rep(1:K, subsetsizes))
#reset K as to the remainder of the number of features and K should is 0
#%% moduls
# while (ncol(x) %% K != 0) {
# K <- K - 1
# }
#
# #calculate number of features M=n/K per subset
# M <- round(ncol(x)/K)
predicted <- list()
fit <- numeric()
Ri <- list()
Ria <- list()
fit <- list()
predicted <- matrix(NA,nrow=nrow(x),ncol=L)
subsets <- list()
SelectedClass<- list()
IndependentsClassSubset<- list()
IndependentsClassSubsetBoot<- list()
pcdata <- list()
loadings <- list()
#For i=1..L=number of classifiers
for (i in 1:L) {
#############prepare the rotation matrix RAi
#Split F(the feature set) randomly into K subsets: Fi,j (for j= 1...K)
#randomize order of variables
# Independents <- x[,sample(1:ncol(x),ncol(x))]
# n <- 0
subsets[[i]] <- list()
SelectedClass[[i]]<- list()
IndependentsClassSubset[[i]]<- list()
IndependentsClassSubsetBoot[[i]]<- list()
pcdata[[i]] <- list()
loadings[[i]] <- list()
varIndPerSubset <- split(sample(1:ncol(x),ncol(x),FALSE), fact)
#For j=1...K (this is n in subsets[[n]]
for (j in seq(1,K)) {
#let Xi,j be the dataset X for the feature set Fi,j
# n <- n + M
# subsets[[i]][[j]] <- data.frame(Independents[,(n-(M-1)):n],y)
# subsets[[i]][[j]] <- data.frame(x[,(n-(M-1)):n],y)
subsets[[i]][[j]] <- data.frame(x[,varIndPerSubset[[j]]],y)
#Eliminate from Xi,j a random subset of classes
SelectedClass[[i]][[j]]<- as.integer(sample(levels(as.factor(y)),1))
IndependentsClassSubset[[i]][[j]] <- subsets[[i]][[j]][subsets[[i]][[j]]$y == SelectedClass[[i]][[j]],]
#Select a bootstrap sample from Xi,j of size 75% of the number of objects in Xi,j. Denote the new set by XijBoot
IndependentsClassSubsetBoot[[i]][[j]] <- IndependentsClassSubset[[i]][[j]][sample(1:dim(IndependentsClassSubset[[i]][[j]])[1], round(0.75*nrow(IndependentsClassSubset[[i]][[j]])), replace = TRUE),]
#Apply PCA on XijBoot to obtain the coefficients in a matrix Ci,j (here called loadings)
# pcdata[[i]][[j]] <- princomp(IndependentsClassSubsetBoot[[i]][[j]][,!colnames(IndependentsClassSubsetBoot[[i]][[j]]) %in% "y"])
pcdata[[i]][[j]] <- prcomp(IndependentsClassSubsetBoot[[i]][[j]][,!colnames(IndependentsClassSubsetBoot[[i]][[j]]) %in% "y"])
# loadings[[i]][[j]] <- pcdata[[i]][[j]]$loadings[,]
loadings[[i]][[j]] <- pcdata[[i]][[j]]$rotation
#rename columns with coefficients so they can be merged properly
#colnames(loadings[[i]][[j]]) <- gsub("V", "a",dimnames(loadings[[i]][[j]])[[1]])
colnames(loadings[[i]][[j]]) <- dimnames(loadings[[i]][[j]])[[1]]
#add unique ID and name it rowID (for merging)
loadings[[i]][[j]] <- data.frame(dimnames(loadings[[i]][[j]])[[1]],loadings[[i]][[j]])
colnames(loadings[[i]][[j]])[1] <- "rowID"
}
#Arrange the Ci,j, for j=1...K in a Rotation matrix Ri
#pregrow data frame C (Coefficients = loadings)
#because the number of columns and rows can differ we should do a loop across all loading subsets and sum the nrow's and ncol's
#Ri <- data.frame(matrix(nrow=(n*dim(loadings[[j]])[[1]]), ncol=(2*dim(loadings[[j]])[[2]]-(j-1))) )
Ri[[i]] <- Reduce(function(x, y) merge(x, y, by='rowID',all=TRUE),loadings[[i]])
#Construct Ria by rearranging the columns of Ri so as to match the order of features in F (Feature set)
#replace NA's with 0
Ri[[i]][is.na(Ri[[i]])] <- 0
#at this point we have to sort everything (rows and columns) for subsequent multiplication
#sort rows and columns
Ria[[i]] <- Ri[[i]][order(match(Ri[[i]]$rowID,colnames(x))),order(match(colnames(Ri[[i]]),colnames(x)))]
rownames(Ria[[i]]) <- Ria[[i]]$rowID
Ria[[i]]$rowID <- NULL
final <- data.frame(as.matrix(x) %*% as.matrix(Ria[[i]]),y)
#############build classifier Di using (X Ria, y) as the training set
fit[[i]] <- rpart(y~ ., method="class", data=final,...)
}
res <- list(models=fit, loadings=Ria, columnnames=colnames(x))
class(res) <- "rotationForest"
res
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.