Nothing
# Metaheuristics
capture.output(library(GA))
#' @author Francisco Aragón Royón
#' @title Genetic Algorithm
#' @description Generates a search function based on a genetic algorithm. This function is called internally within the \code{\link{searchAlgorithm}} function. The geneticAlgorithm method \insertCite{yang1998feature}{FSinR} starts with an initial population of solutions and at each step applies a series of operators to the individuals in order to obtain new and better population of individuals. These operators are selection, crossing and mutation methods. This method uses the GA package implementation \insertCite{GAPkg1}{FSinR} \insertCite{GAPkg2}{FSinR}.
#'
#' @param popSize The popuplation size
#' @param pcrossover The probability of crossover between individuals
#' @param pmutation The probability of mutation between individuals
#' @param maxiter The number of iterations
#' @param run Number of consecutive iterations without fitness improvement to stop the algorithm
#' @param verbose Print the partial results in each iteration. This functionality is not available if the objective of the evaluation method is to minimize the target value (e.g. regression methods)
#'
#' @return Returns a search function that is used to guide the feature selection process.
#'
#' @references
#' \insertAllCited{}
#' @importFrom Rdpack reprompt
#' @export
#' @import GA
#' @importFrom utils capture.output
#'
#' @examples
#'\dontrun{
#'
#' ## The direct application of this function is an advanced use that consists of using this
#' # function directly and performing a search process in a feature space
#' ## Classification problem
#'
#' # Generates the filter evaluation function
#' filter_evaluator <- filterEvaluator('determinationCoefficient')
#'
#' # Generates the search function with Genetic algorithm
#' ga_search <- geneticAlgorithm()
#' # Performs the search process directly (parameters: dataset, target variable and evaluator)
#' ga_search(iris, 'Species', filter_evaluator)
#' }
geneticAlgorithm <- function(popSize=20, pcrossover = 0.8, pmutation = 0.1, maxiter = 100, run = 100, verbose = FALSE){
geneticAlgorithmSearch <- function(data, class, featureSetEval){
if (attr(featureSetEval, 'kind') == "Individual measure") {
stop('Only feature set measures can be used');
}
# Extract and eliminate the class to have only the features in the variable 'features'
column.names <- names(data)
class.position <- which(column.names == class)
features <- column.names[-class.position]
# Check for maximization-minimization
metricTarget <- attr(featureSetEval,'target')
if(metricTarget=="maximize"){
max <- TRUE
}else if(metricTarget=="minimize"){
max <- FALSE
}else{ # Metric is not specified
# Wrapper methods use by default RMSE for regression and Acuraccy for classification (in filter methods the metric is always specified)
max <- ifelse(is.factor(data[,class]), TRUE, FALSE)
}
# Fitness function
fitness <- function(ind, data, class){
# Features selected
feat <- features[which(ind==1)]
if(length(feat)==0){ # No features implies bad solution
if(max){ # Classification
value <- 0
}else{ # Regression
value <- -Inf
}
}else{ # Evaluation
if(max){ # Classification -> Maximize
value <- featureSetEval(data, class, feat)
}else{ # Regression -> Minimize
value <- -(featureSetEval(data, class, feat))
}
}
return(value)
}
# Genetic Algorithm
if(!verbose){
capture.output(GA <- GA::ga(type = "binary", fitness = fitness, data=data, class=class, nBits = length(features),
popSize = popSize,
pcrossover =pcrossover,
pmutation = pmutation,
maxiter = maxiter,
run = run)
)
}else{
if(max){
GA <- GA::ga(type = "binary", fitness = fitness, data=data, class=class, nBits = length(features),
popSize = popSize,
pcrossover =pcrossover,
pmutation = pmutation,
maxiter = maxiter,
run = run)
}else{
capture.output(GA <- GA::ga(type = "binary", fitness = fitness, data=data, class=class, nBits = length(features),
popSize = popSize,
pcrossover =pcrossover,
pmutation = pmutation,
maxiter = maxiter,
run = run)
)
}
}
# List with results
res <- list(NULL)
res[[1]] <- GA@solution
colnames(res[[1]]) <- features
res[[2]] <- GA@fitnessValue
if(!max){
res[[2]] <- -res[[2]]
}
ncol <- ncol(data)
res[[3]] <- matrix(cbind(GA@population,GA@fitness), ncol=ncol, dimnames = list(c(), c(paste0("x", 1:((ncol(data)-1))), c("fitness"))) )
names(res) <- c("bestFeatures","bestFitness","population")
if(!max){
res[[3]][,'fitness'] <- -res[[3]][,'fitness']
}
res
}
attr(geneticAlgorithmSearch,'shortName') <- "ga"
attr(geneticAlgorithmSearch,'name') <- "Genetic Algorithm"
return(geneticAlgorithmSearch)
}
#' @author Francisco Aragón Royón
#' @title Simulated Annealing
#' @description Generates a search function based on simulated annealing. This function is called internally within the \code{\link{searchAlgorithm}} function. The simulatedAnnealing method \insertCite{KirkpatrickGelattVecchi1983}{FSinR} starts with a certain set of features and in each iteration modifies an element of the previous feature vector and decreases the temperature. If the energy of the new feature vector is better than that of the old vector, it is accepted and moved towards it, otherwise it is moved towards the new vector according to an acceptance probability. The algorithm ends when the minimum temperature has been reached. Additionally, a number of internal iterations can be performed within each iteration of the algorithm. In this case, the same temperature value of the outer iteration is used for the inner iterations
#'
#' @param start Binary vector with the set of initial features
#' @param temperature Temperature initial
#' @param temperature_min Temperature to stops in the outer loop
#' @param reduction Temperature reduction in the outer loop
#' @param innerIter Number of iterations of inner loop. By default no inner iterations are established
#' @param verbose Print the partial results in each iteration
#'
#' @return Returns a search function that is used to guide the feature selection process.
#'
#' @references
#' \insertAllCited{}
#' @importFrom Rdpack reprompt
#'
#' @importFrom stats runif
#' @export
#'
#' @examples
#'\dontrun{
#'
#' ## The direct application of this function is an advanced use that consists of using this
#' # function directly and performing a search process in a feature space
#' ## Classification problem
#'
#' # Generates the filter evaluation function
#' filter_evaluator <- filterEvaluator('determinationCoefficient')
#'
#' # Generates the search function with Simulated annealing
#' sa_search <- simulatedAnnealing()
#' # Performs the search process directly (parameters: dataset, target variable and evaluator)
#' sa_search(iris, 'Species', filter_evaluator)
#' }
simulatedAnnealing <- function(start=NULL, temperature=1, temperature_min=0.01, reduction=0.6, innerIter=1, verbose = FALSE) {
simulatedAnnealingSearch <- function(data, class, featureSetEval) {
if (attr(featureSetEval, 'kind') == "Individual measure") {
stop('Only feature set measures can be used');
}
generateRandomNeighbor <- function(subset){
repeat{
pos <- sample(1:length(subset), 1)
if(!all(subset[-pos]==0)){
break
}
}
subset[pos] <- abs(subset[pos]-1)
subset
}
acceptanceProbability <- function(old, new, t){
exp(-abs(new-old)/t)
#exp(-(new-old)/t)
#exp(-((new-old)/t))
}
# Check parameters
t <- temperature
if(temperature < 0) stop('Temperature less than 0')
t_min <- temperature_min
if(t_min < 0) stop('Minimum temperature less than 0')
if(t_min > temperature) stop('Minimum temperature greater than temperature')
alpha <- reduction
if(t_min < 0) stop('Temperature descent less than 0')
if(innerIter < 0) stop('InnerIter less than 0')
# If the initial feature set is not passed, it is generated randomly
if(is.null(start)){
start <- sample(0:1,ncol(data)-1,replace=TRUE)
}
if(all(start==0)){
repeat{
start <- sample(0:1,ncol(data)-1,replace=TRUE)
if(!all(start==0)){
break
}
}
}
# The data should be a data.frame
data <- data.frame(data)
# Extract and eliminate the class to have only the features in the variable 'features'
column.names <- names(data)
class.position <- which(column.names == class)
features <- column.names[-class.position]
# Check for maximization-minimization
metricTarget <- attr(featureSetEval,'target')
if(metricTarget=="maximize"){
max <- TRUE
}else if(metricTarget=="minimize"){
max <- FALSE
}else{ # Metric is not specified
# Wrapper methods use by default RMSE for regression and Acuraccy for classification (in filter methods the metric is always specified)
max <- ifelse(is.factor(data[,class]), TRUE, FALSE)
}
# List with results
res <- list(NULL)
res[[1]] <- NA
res[[2]] <- NA
res[[3]] <- start
res[[4]] <- NA
ncol <- 4 + length(start)
res[[5]] <- matrix(rep(NA,ncol), ncol=ncol, dimnames = list(c(),c(c("n_outter", "temperature","move?"), paste0("x", 1:(length(start))), c("energy"))))
if(innerIter>1){
res[[6]] <- list(matrix(rep(NA,ncol), ncol=ncol, dimnames = list(c(),c(c("n_inner", "temperature","move?"), paste0("x", 1:(length(start))), c("energy")))))
}else{
res[[6]] <- NA
}
names(res) <- c("bestFeatures","bestFitness","initialVector","initialEnergy","traceOutter","traceInner")
#
if(all(start==0)){
repeat{
start <- sample(0:1,ncol(data)-1,replace=TRUE)
if(!all(start==0)){
old_subset <- start
res[[3]] <- start
break
}
}
}else{
old_subset <- start
}
old_cost <- featureSetEval(data, class, features[which(old_subset==1)])
res$initialEnergy <- old_cost
best.set <- old_subset
best.value <- old_cost
if(verbose){
cat(paste("SA | InitialVector = ",paste(old_subset,collapse="")," | Energy = ",round(old_cost,7),"\n"))
}
# Number of outer iterations
o <- 1
while(t>t_min){
# Number of inner iterations
i <- 1
moveOutter <- FALSE
while(i <= innerIter){
moveInner <- FALSE
new_subset <- generateRandomNeighbor(old_subset) # Generate new Neighbor
new_cost <- featureSetEval(data, class, features[which(new_subset==1)]) # Evaluate the energy
if(max){ # Classification -> maximize
move <- new_cost>old_cost
}else{ # Regression -> minimize
move <- new_cost<old_cost
}
if(move){ # Move
old_subset <- new_subset
old_cost <- new_cost
if(max){ # Classification -> maximize
best <- new_cost>best.value
}else{ # Regression -> minimize
best <- new_cost<best.value
}
if(best){ # Actualize best value
best.set <- new_subset
best.value <- new_cost
}else if(new_cost==best.value){
best.set <- rbind(best.set,new_subset)
}
moveOutter <- TRUE
moveInner <- TRUE
}else{
if(new_cost==best.value){
best.set <- rbind(best.set,new_subset)
}
a <- acceptanceProbability(old_cost, new_cost, t)
ran <- runif(1, 0, 1)
if(a > ran){ # Move
old_subset <- new_subset
old_cost <- new_cost
moveOutter <- TRUE
moveInner <- TRUE
} # Else don't move
}
# Save the inner iteration results
if(innerIter>1){
if(i==1){
if(o==1){
res$traceInner[[o]][i,] <- c(i, t, moveInner, old_subset, old_cost)
}else{
res$traceInner[[o]] <- matrix(c(i, t, moveInner, old_subset, old_cost), ncol=ncol, dimnames = list(c(),c(c("n_inner", "temperature","move?"), paste0("x", 1:(length(start))), c("energy"))))
}
names(res$traceInner)[o] <- paste("Outter",o,sep="")
}else{
res$traceInner[[o]] <- rbind(res$traceInner[[o]], c(i, t, moveInner, old_subset, old_cost))
}
}
i <- i + 1
}
# Save the outer iteration results
if(o==1){
res$traceOutter[o,] <- c(o, t, moveOutter, old_subset, old_cost)
}else{
res$traceOutter <- rbind(res$traceOutter, c(o, t, moveOutter, old_subset, old_cost))
}
if(verbose){
cat(paste("SA | iter = ",o," | Temp = ",round(t,4)," | Vector = ",paste(old_subset,collapse="")," | Energy = ",round(old_cost,7),"\n"))
}
# Updates the temperature
t <- t * alpha
o <- o + 1
}
if(is.null(nrow(best.set))){
res$bestFeatures <- matrix(best.set, ncol=length(features), byrow=FALSE, dimnames=list(c(),features))
}else{
res$bestFeatures <- matrix(unique(best.set), ncol=length(features), byrow=FALSE, dimnames=list(c(),features))
}
colnames(res$bestFeatures) <- features
res$bestFitness <- best.value
return(res)
}
attr(simulatedAnnealingSearch,'shortName') <- "sa"
attr(simulatedAnnealingSearch,'name') <- "Simmulated Annealing"
return(simulatedAnnealingSearch)
}
#' @author Francisco Aragón Royón
#' @title Whale Optimization Algorithm (Binary Whale Optimization Algorithm)
#' @description Generates a search function based on the whale optimization algorithm. This function is called internally within the \code{\link{searchAlgorithm}} function. Binary Whale Optimization Algorithm \insertCite{Kumar2018}{FSinR} is an algorithm that simulates the social behavior of humpback whales. This algorithm employs a binary version of the bubble-net hunting strategy. The algorithm starts with an initial population of individuals, and in each iteration updates the individuals according to several possible actions: Encircling prey, Bubble-net attacking or Search for prey
#'
#' @param population The number of whales population
#' @param iter The number of iterations of the algorithm
#' @param verbose Print the partial results in each iteration
#'
#' @return Returns a search function that is used to guide the feature selection process.
#'
#' @references
#' \insertAllCited{}
#' @importFrom Rdpack reprompt
#' @export
#'
#' @examples
#'\dontrun{
#'
#' ## The direct application of this function is an advanced use that consists of using this
#' # function directly and performing a search process in a feature space
#' ## Classification problem
#'
#' # Generates the filter evaluation function
#' filter_evaluator <- filterEvaluator('determinationCoefficient')
#'
#' # Generates the search function with WOA
#' woa_search <- whaleOptimization()
#' # Performs the search process directly (parameters: dataset, target variable and evaluator)
#' woa_search(iris, 'Species', filter_evaluator)
#' }
whaleOptimization <- function(population=10, iter=10, verbose = FALSE) {
whaleOptimizationSearch <- function(data, class, featureSetEval) {
if (attr(featureSetEval, 'kind') == "Individual measure") {
stop('Only feature set measures can be used');
}
# Check parameters
if(population <= 1) stop('The population should be at least 2')
if(population < 1) stop('The number of iterations should be at least 1')
# Extract and eliminate the class to have only the features in the variable 'features'
column.names <- names(data)
class.position <- which(column.names == class)
features <- column.names[-class.position]
# Check for maximization-minimization
metricTarget <- attr(featureSetEval,'target')
if(metricTarget=="maximize"){
max <- TRUE
}else if(metricTarget=="minimize"){
max <- FALSE
}else{ # Metric is not specified
# Wrapper methods use by default RMSE for regression and Acuraccy for classification (in filter methods the metric is always specified)
max <- ifelse(is.factor(data[,class]), TRUE, FALSE)
}
# Generate and initializate the population of whales
whales <- matrix(NA, nrow=population, ncol=length(features),dimnames=list(paste0("Whale", 1:population),features))
for(i in 1:nrow(whales)){
repeat{
random <- sample(0:1, length(features), replace = TRUE)
if(sum(random!=0)){
break
}
}
whales[i,] <- random
}
# Calculate the fitness of each search agents (the fitness of each whale)
whalesFitness <- rep(NA, population)
for(i in 1:nrow(whales)){
whalesFitness[i] <- featureSetEval(data, class, features[which(whales[i,]==1)])
}
# Best search agent (best whale's fitness)
if(max){
posBestSearchAgent <- which(whalesFitness==max(whalesFitness))
if(length(posBestSearchAgent)>1){ # If there's more than one...
aux <- apply(whales[posBestSearchAgent,], 1, sum) # The one with the fewest number of features
posBestSearchAgent <- posBestSearchAgent[which(aux==min(aux))]
if(length(posBestSearchAgent)>1){ # If there's more than one...
posBestSearchAgent <- sample(posBestSearchAgent, size=1) # Randomly
}
}
bestSearchAgent <- whales[posBestSearchAgent,]
bestFitness <- max(whalesFitness)
}else{
posBestSearchAgent <- which(whalesFitness==min(whalesFitness))
if(length(posBestSearchAgent)>1){ # If there's more than one...
aux <- apply(whales[posBestSearchAgent,], 1, sum) # The one with the fewest number of features
posBestSearchAgent <- posBestSearchAgent[which(aux==min(aux))]
if(length(posBestSearchAgent)>1){ # If there's more than one...
posBestSearchAgent <- sample(posBestSearchAgent, size=1) # Randomly
}
}
bestSearchAgent <- whales[posBestSearchAgent,]
bestFitness <- min(whalesFitness)
}
# List with results
res <- list(NULL)
res[[1]] <- NA
res[[2]] <- NA
res[[3]] <- list(NULL)
res[[3]][[1]] <- cbind(whales,whalesFitness)
names(res) <- c("bestFeatures","bestFitness","popIter")
names(res[[3]])[1] <- "InitialIteration"
if(verbose==TRUE){
cat(paste("WOA | BestInitialWhale = ", paste(bestSearchAgent,collapse="")," | BestFitness = ",bestFitness, " | MeanFitness", mean(whalesFitness),"\n"))
}
# For each iteration...
for (t in 1:iter){
# The a parameter is updated
a <- 2-t*(2/iter) # a decrease from 2 to 0 over the course of iterations
# For each whale...
for(w in 1:nrow(whales)){
# Repeats if the resulting search space is all 0
repeat{
# The parameters are updated
r1 <- runif(1) # Random value between 0 and 1
A <- 2*a*r1-a # Vector A
r2 <- runif(1) # Random value between 0 and 1
C <- 2*r2 # Vector C
p <- runif(1) # Random number to select the attacking method
# Parameters for Spiral updating position
#b <- 1 # Constant number for defining the shape of the spiral
#l <- runif(1, -1, 1) # Random number in [-1,1]
# Salvar por si acaso lo deja todo a cero
# For each feature...
for (j in 1:ncol(whales)){
if(p<0.5){ # Shrinking encircling prey
if(abs(A)<1){ # Encircling prey (shrinking encircling)
D <- abs(C*bestSearchAgent[j]-whales[w,j])
cstep <- (1)/(1+exp(-10*(A*D-0.5)))
whales[w,j] <- ifelse(p<cstep,1-(whales[w,j]),whales[w,j])
#whales[w,j] <- bestSearchAgent[j]-A*D
}else if(abs(A)>=1){ # Search for prey (exploration phase)
# Choose a random whale
repeat{
randomWhale <- floor(nrow(whales)*runif(1)+1)
if(randomWhale!=j){
break
}
}
D <- abs(C*whales[randomWhale,][j]-whales[w,j])
cstep <- (1)/(1+exp(-10*(A*D-0.5)))
whales[w,j] <- ifelse(p<cstep,1-(whales[w,j]),whales[w,j])
#whales[w,j] <- whales[randomWhale,][j]-A*D
}
}else if(p >= 0.5){ # Spiral position updating
D <- abs(bestSearchAgent[j]-whales[w,j]) # Distance between the whale and prey
cstep <- (1)/(1+exp(-10*(A*D-0.5)))
whales[w,j] <- ifelse(p<cstep,1-(whales[w,j]),whales[w,j])
#whales[w,j] <- D*exp(b*l)*cos(l*2*pi)+bestSearchAgent[j]
}
}
if(sum(whales[w,])!=0){
break
}
}
}
# Calculate the new fitness of each search agents (the fitness of the whales)
for(i in 1:nrow(whales)){
whalesFitness[i] <- featureSetEval(data, class, features[which(whales[i,]==1)])
}
# Check the new fitness
if(max){
if(bestFitness < max(whalesFitness)){
# New best search agent (best whale's fitness)
posBestSearchAgent <- which(whalesFitness==max(whalesFitness))
if(length(posBestSearchAgent)>1){ # If there's more than one...
aux <- apply(whales[posBestSearchAgent,], 1, sum) # The one with the fewest number of features
posBestSearchAgent <- posBestSearchAgent[which(aux==min(aux))]
if(length(posBestSearchAgent)>1){ # If there's more than one...
posBestSearchAgent <- sample(posBestSearchAgent, size=1) # Randomly
}
}
bestSearchAgent <- whales[posBestSearchAgent,]
bestFitness <- max(whalesFitness)
}
}else{
if(bestFitness > max(whalesFitness)){
# New best search agent (best whale's fitness)
posBestSearchAgent <- which(whalesFitness==min(whalesFitness))
if(length(posBestSearchAgent)>1){ # If there's more than one...
aux <- apply(whales[posBestSearchAgent,], 1, sum) # The one with the fewest number of features
posBestSearchAgent <- posBestSearchAgent[which(aux==min(aux))]
if(length(posBestSearchAgent)>1){ # If there's more than one...
posBestSearchAgent <- sample(posBestSearchAgent, size=1) # Randomly
}
}
bestSearchAgent <- whales[posBestSearchAgent,]
bestFitness <- min(whalesFitness)
}
}
if(verbose==TRUE){
cat(paste("WOA | iter = ",t," | BestWhale = ",paste(bestSearchAgent,collapse="")," | BestFitness = ",bestFitness, " | MeanFitness", mean(whalesFitness),"\n"))
}
res[[3]][[t+1]] <- cbind(whales, whalesFitness)
names(res[[3]])[t+1] <- paste("Iteration",t,sep="")
}
res[[1]] <- bestSearchAgent
res[[2]] <- bestFitness
# Modify the output
res[[1]] <- matrix(res[[1]], nrow=1, byrow=FALSE, dimnames=list(c(),features))
res
}
attr(whaleOptimizationSearch,'shortName') <- "woa"
attr(whaleOptimizationSearch,'name') <- "Whale Optimization Algorithm"
return(whaleOptimizationSearch)
}
#' @author Francisco Aragón Royón
#' @title Ant Colony Optimization (Advanced Binary Ant Colony Optimization)
#' @description Generates a search function based on the ant colony optimization. This function is called internally within the \code{\link{searchAlgorithm}} function. The Ant Colony Optimization (Advanced Binary Ant Colony Optimization) \insertCite{Kashef2015}{FSinR} algorithm consists of generating in each iteration a random population of individuals (ants) according to the values of a pheromone matrix (which is updated each iteration according to the paths most followed by the ants) and a heuristic (which determines how good is each path to follow by the ants). The evaluation measure is calculated for each individual. The algorithm ends once the established number of iterations has been reached
#'
#' @param population The number of ants population
#' @param iter The number of iterations
#' @param a Parameter to control the influence of the pheromone (If a=0, no pheromone information is used)
#' @param b Parameter to control the influence of the heuristic (If b=0, the attractiveness of the movements is not taken into account)
#' @param p Rate of pheromone evaporation
#' @param q Constant to determine the amount of pheromone deposited by the best ant. This amount is determined by the Q/F equation (for minimization) where F is the cost of the solution (F/Q for maximization)
#' @param t0 Initial pheromone level
#' @param tmin Minimum pheromone value
#' @param tmax Maximum pheromone value
#' @param mode Heuristic information measurement. 1 -> min redundancy (by default). 2-> max-relevance and min-redundancy. 3-> feature-feature. 4-> based on F-score
#' @param verbose Print the partial results in each iteration
#'
#' @return Returns a search function that is used to guide the feature selection process.
#'
#' @references
#' \insertAllCited{}
#' @importFrom Rdpack reprompt
#' @importFrom stats cor.test
#' @export
#'
#' @examples
#'\dontrun{
#'
#' ## The direct application of this function is an advanced use that consists of using this
#' # function directly and performing a search process in a feature space
#' ## Classification problem
#'
#' # Generates the filter evaluation function with ACO
#' filter_evaluator <- filterEvaluator('determinationCoefficient')
#'
#' # Generates the search function
#' aco_search <- antColony()
#' # Performs the search process directly (parameters: dataset, target variable and evaluator)
#' aco_search(iris, 'Species', filter_evaluator)
#' }
antColony <- function(population=10, iter=10, a=1, b=1, p=0.2, q=1, t0=0.2, tmin=0, tmax=1, mode=1, verbose = FALSE) {
antColonySearch <- function(data, class, featureSetEval) {
if (attr(featureSetEval, 'kind') == "Individual measure") {
stop('Only feature set measures can be used');
}
# Check parameters
if(population <= 1) stop('The population should be at least 2')
if(iter < 1) stop('The number of iterations should be at least 1')
if(p > 1 || p <= 0) stop('The rate of pheromone evaporation should be between the range (0,1]')
if(tmin>=tmax) stop('The maximum value of the pheromone may not be less than the minimum value')
if(t0<tmin || t0>tmax) stop('The initial pheromone level should be between the range [tmin,tmax]]')
if(a<0) stop('The value of a should be >= 0')
if(b<0) stop('The value of b should be >= 0')
if(mode>4||mode<1) stop('Invalid mode')
if( !all( lapply(data[1:(ncol(data)-1)], is.numeric)==TRUE ) ) stop('All features must contain numerical values and not character, boolean, or factor type values since heuristics work only with numerical values')
# Extract and eliminate the class to have only the features in the variable 'features'
column.names <- names(data)
class.position <- which(column.names == class)
features <- column.names[-class.position]
# Check for maximization-minimization
metricTarget <- attr(featureSetEval,'target')
if(metricTarget=="maximize"){
max <- TRUE
}else if(metricTarget=="minimize"){
max <- FALSE
}else{ # Metric is not specified
# Wrapper methods use by default RMSE for regression and Acuraccy for classification (in filter methods the metric is always specified)
max <- ifelse(is.factor(data[,class]), TRUE, FALSE)
}
## FUNCTIONS
# Function to initialize the heuristic values between nodes
initializeHeuristic <- function(mode){
# Matrix of general heuristics between features (correlation between i and j)
graphHeuristicInfoAux <- matrix(rep(NA,length(features)*length(features)),ncol=length(features), nrow=length(features), dimnames=list(features,features))
for(i in 1:length(features)){
for(j in 1:length(features)){
if(i!=j){
graphHeuristicInfoAux[j,i] <- cor.test(data[,i], data[,j], method="pearson")$estimate[[1]] # Calculate the pearson's correlation between features
}
}
}
# Matrix of general heuristics between features (correlation between i and y)
graphHeuristicInfoAux2 <- matrix(rep(NA,length(features)),ncol=length(features), nrow=1, dimnames=list("y",features))
for(i in 1:length(features)){
if(max){
graphHeuristicInfoAux2[1,i] <- cor.test(data[,i], as.numeric(data[,length(data)]), method="pearson")$estimate[[1]] # Calculate the pearson's correlation between features
}else{
graphHeuristicInfoAux2[1,i] <- cor.test(data[,i], data[,length(data)], method="pearson")$estimate[[1]] # Calculate the pearson's correlation between features
}
}
# Matrix for final heuristic between subnodes
if(mode==1){ # Min redundancy
graphHeuristicInfo <- matrix(rep(NA,(length(features)*2)*(length(features)*2)),ncol=length(features)*2, nrow=length(features)*2, dimnames=list(do.call(paste0, expand.grid(features, c(0, 1))),do.call(paste0, expand.grid(features, c(0, 1))))) # The reading is from top to bottom
for(i in 1:(length(features)*2)){
for(j in 1:(length(features)*2)){
# Check if the characteristics are the same
if(i%%length(features)!=0){
featureI <- features[i%%length(features)]
}else{
featureI <- features[length(features)]
}
if(j%%length(features)!=0){
featureJ <- features[j%%length(features)]
}else{
featureJ <- features[length(features)]
}
if(featureI!=featureJ){ # Not between the same feature
if(j<=length(features)){ # To no selected (zeros)
if(i%%length(features)!=0){
graphHeuristicInfo[j,i] <- abs(graphHeuristicInfoAux[j,i%%length(features)])
}else{
graphHeuristicInfo[j,i] <- abs(graphHeuristicInfoAux[j,length(features)])
}
}else{ # To selected (ones)
if(i%%length(features)!=0){
if(j%%length(features)!=0){
graphHeuristicInfo[j,i] <- 1 - abs(graphHeuristicInfoAux[(j%%length(features)),(i%%length(features))])
}else{
graphHeuristicInfo[j,i] <- 1 - abs(graphHeuristicInfoAux[(length(features)),(i%%length(features))])
}
}else{
if(j%%length(features)!=0){
graphHeuristicInfo[j,i] <- 1 - abs(graphHeuristicInfoAux[(j%%length(features)),length(features)])
}else{
graphHeuristicInfo[j,i] <- 1 - abs(graphHeuristicInfoAux[(length(features)),length(features)])
}
}
}
}
}
}
}else if(mode==2){ # Max-relevance and min-redundancy
graphHeuristicInfo <- matrix(rep(NA,(length(features)*2)*(length(features)*2)),ncol=length(features)*2, nrow=length(features)*2, dimnames=list(do.call(paste0, expand.grid(features, c(0, 1))),do.call(paste0, expand.grid(features, c(0, 1))))) # The reading is from top to bottom
for(i in 1:(length(features)*2)){
for(j in 1:(length(features)*2)){
# Check if the characteristics are the same
if(i%%length(features)!=0){
featureI <- features[i%%length(features)]
}else{
featureI <- features[length(features)]
}
if(j%%length(features)!=0){
featureJ <- features[j%%length(features)]
}else{
featureJ <- features[length(features)]
}
if(featureI!=featureJ){ # Not between the same feature
if(j<=length(features)){ # To no selected (zeros)
if(i%%length(features)!=0){
graphHeuristicInfo[j,i] <- sqrt( abs(graphHeuristicInfoAux[j,i%%length(features)]) * ( 1 - abs(graphHeuristicInfoAux2[j]) ) )
}else{
graphHeuristicInfo[j,i] <- sqrt( abs(graphHeuristicInfoAux[j,length(features)]) * ( 1 - abs(graphHeuristicInfoAux2[j]) ) )
}
}else{ # To selected (ones)
if(i%%length(features)!=0){
if(j%%length(features)!=0){
graphHeuristicInfo[j,i] <- sqrt( ( 1 - abs(graphHeuristicInfoAux[j%%length(features),i%%length(features)]) ) * ( abs(graphHeuristicInfoAux2[j%%length(features)]) ) )
}else{
graphHeuristicInfo[j,i] <- sqrt( ( 1 - abs(graphHeuristicInfoAux[length(features),i%%length(features)]) ) * ( abs(graphHeuristicInfoAux2[length(features)]) ) )
}
}else{
if(j%%length(features)!=0){
graphHeuristicInfo[j,i] <- sqrt( ( 1 - abs(graphHeuristicInfoAux[j%%length(features),length(features)]) ) * ( abs(graphHeuristicInfoAux2[j%%length(features)]) ) )
}else{
graphHeuristicInfo[j,i] <- sqrt( ( 1 - abs(graphHeuristicInfoAux[length(features),length(features)]) ) * ( abs(graphHeuristicInfoAux2[length(features)]) ) )
}
}
}
}
}
}
}else if(mode==3){ # feature–feature
graphHeuristicInfo <- matrix(rep(NA,(length(features)*2)*(length(features)*2)),ncol=length(features)*2, nrow=length(features)*2, dimnames=list(do.call(paste0, expand.grid(features, c(0, 1))),do.call(paste0, expand.grid(features, c(0, 1))))) # The reading is from top to bottom
for(i in 1:(length(features)*2)){
for(j in 1:(length(features)*2)){
# Check if the characteristics are the same
if(i%%length(features)!=0){
featureI <- features[i%%length(features)]
}else{
featureI <- features[length(features)]
}
if(j%%length(features)!=0){
featureJ <- features[j%%length(features)]
}else{
featureJ <- features[length(features)]
}
if(featureI!=featureJ){ # Not between the same feature
if(i<=length(features)&&(j<=length(features))){ # Zero to zero
graphHeuristicInfo[j,i] <- 1 - abs(graphHeuristicInfoAux2[j])
}
if(i<=length(features)&&(j>length(features))){ # Zero to one
graphHeuristicInfo[j,i] <- abs(graphHeuristicInfoAux2[j-length(features)])
}
if(i>length(features)&&(j<=length(features))){ # One to zero
graphHeuristicInfo[j,i] <- abs(graphHeuristicInfoAux[i-length(features),j])
}
if(i>length(features)&&(j>length(features))){ # One to one
graphHeuristicInfo[j,i] <- 1 - abs(graphHeuristicInfoAux[i-length(features),j-length(features)])
}
}
}
}
}else{ # based on F-score
# Matrix of general heuristics between features (fscore between i and y)
graphHeuristicInfoAux3 <- matrix(rep(NA,length(features)),ncol=length(features), nrow=1, dimnames=list("y",features))
for(i in 1:length(features)){
# numerator
discriminationAmongCategories <- lapply(levels(data[,class.position]), function(x){
( mean( data[which(data[,class.position]==x), i] ) - mean(data[,i]) ) ^ 2
})
# denominator
discriminatioWithinEachCategory <- lapply(levels(data[,class.position]), function(x){
first <- (1 / ((length(data[which(data[,class.position]==x),i])) - 1) )
esima <- data[which(data[,class.position]==x),i]
media <- mean(data[which(data[,class.position]==x),i])
second <- lapply(esima, function(x){
( x - media ) ^ 2
})
first*sum(unlist(second))
})
# fscore
graphHeuristicInfoAux3[i] <- ( sum( unlist(discriminationAmongCategories) ) ) / ( sum( unlist(discriminatioWithinEachCategory) ) )
}
graphHeuristicInfo <- matrix(rep(NA,(length(features)*2)*(length(features)*2)),ncol=length(features)*2, nrow=length(features)*2, dimnames=list(do.call(paste0, expand.grid(features, c(0, 1))),do.call(paste0, expand.grid(features, c(0, 1))))) # The reading is from top to bottom
for(i in 1:(length(features)*2)){
for(j in 1:(length(features)*2)){
# Check if the characteristics are the same
if(i%%length(features)!=0){
featureI <- features[i%%length(features)]
}else{
featureI <- features[length(features)]
}
if(j%%length(features)!=0){
featureJ <- features[j%%length(features)]
}else{
featureJ <- features[length(features)]
}
if(featureI!=featureJ){ # Not between the same feature
if(i<=length(features)&&(j<=length(features))){ # Zero to zero
graphHeuristicInfo[j,i] <- (1/length(features)) * sum(graphHeuristicInfoAux3)
}
if(i<=length(features)&&(j>length(features))){ # Zero to one
graphHeuristicInfo[j,i] <- graphHeuristicInfoAux3[j-length(features)]
}
if(i>length(features)&&(j<=length(features))){ # One to zero
graphHeuristicInfo[j,i] <- (1/length(features)) * sum(graphHeuristicInfoAux3)
}
if(i>length(features)&&(j>length(features))){ # One to one
graphHeuristicInfo[j,i] <- graphHeuristicInfoAux3[j-length(features)]
}
}
}
}
}
return(graphHeuristicInfo)
}
# Function to initialize the pheromone values between nodes
initializePheromone <- function(){
# Matrix for final pheromone between subnodes
graphPheromone <- matrix(rep(NA,(length(features)*2)*(length(features)*2)),ncol=length(features)*2, nrow=length(features)*2, dimnames=list(do.call(paste0, expand.grid(features, c(0, 1))),do.call(paste0, expand.grid(features, c(0, 1))))) # The reading is from top to bottom
for(i in 1:(length(features)*2)){
for(j in 1:(length(features)*2)){
# Check if the characteristics are the same
if(i%%length(features)!=0){
featureI <- features[i%%length(features)]
}else{
featureI <- features[length(features)]
}
if(j%%length(features)!=0){
featureJ <- features[j%%length(features)]
}else{
featureJ <- features[length(features)]
}
if(featureI!=featureJ){ # Not between the same feature
if(j<=length(features)){ # To no selected (zeros)
if(i%%length(features)!=0){
graphPheromone[j,i] <- t0
}else{
graphPheromone[j,i] <- t0
}
}else{ # To selected (ones)
if(i%%length(features)!=0){
if(j%%length(features)!=0){
graphPheromone[j,i] <- t0
}else{
graphPheromone[j,i] <- t0
}
}else{
if(j%%length(features)!=0){
graphPheromone[j,i] <- t0
}else{
graphPheromone[j,i] <- t0
}
}
}
}
}
}
return(graphPheromone)
}
# Function to generate a solution to an ant
generateSolutions <- function(graphHeuristicInfo, graphPheromone){
# Nodes visited for the ant
nodesToVisit <- matrix(rep("not visited",length(features)),ncol=length(features), nrow=1, dimnames=list("nodes",features))
# Features selected for the ant
antFeatures <- matrix(rep(NA,length(features)),ncol=length(features), nrow=1, dimnames=list("selection",features))
# Select in a random way the node of the features graph to start the ant
randomStartFeature <- sample(1:length(features), 1)
actualNode <- randomStartFeature
# Add the node visited to the variables
nodesToVisit[randomStartFeature] <- "visited"
antFeatures[randomStartFeature] <- sample(0:1,1) # In a random way
# Road followed by the ant
road <- matrix(rep(NA,length(features)*2),ncol=length(features)*2, nrow=1, dimnames=list(c("position"),do.call(paste0, expand.grid(features, c(0, 1)))))
road[(actualNode)+(length(features)*antFeatures[actualNode])] <- 1
# Until all variables are visited...
for(j in 1:(length(features)-1)){
# New node selection (only nodes not visited before)
nodeFrom <- actualNode+(length(features)*antFeatures[actualNode])
heuristic <- graphHeuristicInfo[, nodeFrom]
pheromone <- graphPheromone[, nodeFrom]
# Matrix for the values for the probability of transition
transitionProbability <- matrix(rep(NA,length(features)*2),ncol=length(features)*2, nrow=1, dimnames=list(c("transitionProbability"),do.call(paste0, expand.grid(features, c(0, 1))))) # Generate the values for the probability of transition
# Nodes not visited
nodes <- which(nodesToVisit=="not visited")
for(i in nodes){ # Generates the values of the transition functions
heuristicsZeros <- apply( as.matrix(heuristic[1:length(features)]), 1, function(x){x^b}) # To zeros
pheromonesZeros <- apply( as.matrix(pheromone[1:length(features)]), 1, function(x){x^a}) # To zeros
heuristicsOnes <- apply( as.matrix(heuristic[(length(features)+1):(length(features)*2)]), 1, function(x){x^b}) # To ones
pheromonesOnes <- apply( as.matrix(pheromone[(length(features)+1):(length(features)*2)]), 1, function(x){x^a}) # To ones
transitionProbability[i] <- ( (heuristic[i]^b)*(pheromone[i]^a) ) / ( sum(heuristicsZeros*pheromonesZeros, na.rm=TRUE) + sum(heuristicsOnes*pheromonesOnes, na.rm=TRUE))
transitionProbability[i+length(features)] <- ( (heuristic[i+length(features)]^b)*(pheromone[i+length(features)]^a) ) / ( sum(heuristicsZeros*pheromonesZeros, na.rm=TRUE) + sum(heuristicsOnes*pheromonesOnes, na.rm=TRUE))
}
# Selects the node with the highest probability
maxProbability <- which(transitionProbability==max(transitionProbability, na.rm=TRUE))
if(length(maxProbability)>1){
maxProbability <- sample(maxProbability, 1)
}
nextFeatureAux <- colnames(transitionProbability)[maxProbability]
nextFeature <- substr(nextFeatureAux, 0, nchar(nextFeatureAux)-1)
nextValue <- as.integer(substr(nextFeatureAux, nchar(nextFeatureAux), nchar(nextFeatureAux)))
# Adds the node to the list of visited nodes
nodesToVisit[which(colnames(nodesToVisit)==nextFeature)] <- "visited"
antFeatures[which(colnames(nodesToVisit)==nextFeature)] <- nextValue
# Updates the current node
actualNode <- which(colnames(nodesToVisit)==nextFeature)
road[(actualNode)+(length(features)*antFeatures[actualNode])] <- (j+1)
}
return(list(antFeatures,road))
}
# Function to generate the fitness of each ant
generateFitness <- function(ants){
fitness <- rep(NA, population)
for(i in 1:nrow(ants)){
fitness[i] <- featureSetEval(data, class, features[which(ants[i,]==1)])
}
return(fitness)
}
# Function to update the pheromone level
pheromoneUpdate <- function(graphPheromone, road, cost){
# Pheromone evaporation
for(i in 1:(length(features)*2)){
for(j in 1:(length(features)*2)){
if(!is.na(graphPheromone[j,i])){
graphPheromone[j,i] <- ((1-p)*graphPheromone[j,i])
}
}
}
# Pheromone deposition
for(i in 1:(length(features)-1)){
iniAux <- names(road)[which(road==i)]
endAux <- names(road)[which(road==(i+1))]
ini <- which(colnames(graphPheromone)==iniAux)
end <- which(rownames(graphPheromone)==endAux)
if(max){
graphPheromone[end,ini] <- graphPheromone[end, ini] + (cost/q)
}else{
graphPheromone[end,ini] <- graphPheromone[end,ini] + (q/cost)
}
}
# Pheromone between limits (max-min)
for(i in 1:(length(features)*2)){
for(j in 1:(length(features)*2)){
if(!is.na(graphPheromone[j,i])){
if(graphPheromone[j,i] > tmax){
graphPheromone[j,i] <- tmax
}else if(graphPheromone[j,i] < tmin){
graphPheromone[j,i] <- tmin
}
}
}
}
return(graphPheromone)
}
## PARAMETERS INITIALIZATION
# Ants population
ants <- matrix(rep(NA,population*length(features)),ncol=length(features), nrow=population, dimnames=list(paste0("Ant", 1:population),features))
antsRoads <- matrix(rep(NA,population*length(features)*2),ncol=length(features)*2, nrow=population, dimnames=list(paste0("Ant", 1:population),do.call(paste0, expand.grid(features, c(0, 1)))))
# Initialize heuristic between nodes
graphHeuristic <- initializeHeuristic(mode)
# Initialize pheromone between nodes
graphPheromone <- initializePheromone()
# Initialize the list with results
res <- list(NULL)
res[[1]] <- NA
res[[2]] <- NA
res[[3]] <- list(NULL)
res[[4]] <- list(NULL)
names(res) <- c("bestFeatures","bestFitness","antsIter","pheromoneIter")
## ITERATIONS
# In each iteration of the algorithm...
for(i in 1:iter){
# Generate candidate solutions
for(j in 1:population){
aux <- generateSolutions(graphHeuristic, graphPheromone)
ants[j,] <- aux[[1]]
antsRoads[j,] <- aux[[2]]
}
# Calculate the fitness of each ants
antsFitness <- rep(NA, population)
antsFitness <- generateFitness(ants)
# The ant with the best fitness
if(max){ # For maximization
bestAnt <- which(antsFitness==max(antsFitness))
if(length(bestAnt)>1){
bestAnt <- sample(bestAnt, 1)
}
# Save the best fitness
if(i==1){
bestFeatures <- ants[bestAnt,]
bestFitness <- max(antsFitness)
}else{
if(max(antsFitness)>bestFitness){
bestFeatures <- ants[bestAnt,]
bestFitness <- max(antsFitness)
}
}
}else{ # For minimization
bestAnt <- which(antsFitness==min(antsFitness))
if(length(bestAnt)>1){
bestAnt <- sample(bestAnt, 1)
}
# Save the best fitness
if(i==1){
bestFeatures <- ants[bestAnt,]
bestFitness <- min(antsFitness)
}else{
if(min(antsFitness)<bestFitness){
bestFeatures <- ants[bestAnt,]
bestFitness <- min(antsFitness)
}
}
}
# Pheromones actualization and evaporation
if(max){
graphPheromone <- pheromoneUpdate(graphPheromone, antsRoads[bestAnt,], max(antsFitness))
}else{
graphPheromone <- pheromoneUpdate(graphPheromone, antsRoads[bestAnt,], min(antsFitness))
}
if(verbose){
if(max){
cat(paste("ACO | iter = ",i," | BestIterAnt = ",paste(bestFeatures,collapse="")," | BestIterFitness = ",max(antsFitness), " | MeanIterFitness", mean(antsFitness)," | BestGlobalAnt",bestFitness," \n"))
}else{
cat(paste("ACO | iter = ",i," | BestIterAnt = ",paste(bestFeatures,collapse="")," | BestIterFitness = ",min(antsFitness), " | MeanIterFitness", mean(antsFitness)," | BestGlobalAnt",bestFitness," \n"))
}
}
# Save the population
res[[3]][[i]] <- cbind(ants,antsFitness)
names(res$antsIter)[[i]] <- paste("Iter",i,sep="")
# Save the pheromone
res[[4]][[i]] <- graphPheromone
names(res$pheromoneIter)[[i]] <- paste("Iter",i,sep="")
}
res[[1]] <- bestFeatures
res[[2]] <- bestFitness
# Modify the output
res[[1]] <- matrix(res[[1]], nrow=1, byrow=FALSE, dimnames=list(c(),features))
return(res)
}
attr(antColonySearch,'shortName') <- "aco"
attr(antColonySearch,'name') <- "Ant Colony Optimization"
return(antColonySearch)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.