predomicspkg: Interpretable Prediction in Omics Data

Documented in crossing2 crossingIndividual_v1 crossingIndividual_v2 crossingIndividual_v3 evolve1 evolve2m evolve3m get_IndividualToBeMutated get_Parents individualGetDenseVec individual_vec_v1 individual_vec_v2 individual_vec_v3 mutate2 mutator_v1 mutator_v2 population2 rankFeatures resetTags select selectIndividualToKeep selector_v1 selector_v2 tag_Couples tag_select tag_SelectElite tag_SelectRandom tag_ToBeMutated

################################################################
#  _____       _                   ____            _           #
# |_   _|     | |                 / __ \          (_)          #
#   | |  _ __ | |_ ___  __ _ _ __| |  | |_ __ ___  _  ___ ___  #
#   | | | '_ \| __/ _ \/ _` | '__| |  | | '_ ` _ \| |/ __/ __| #
#   | |_| | | | ||  __| (_| | |  | |__| | | | | | | | (__\__ \ #
# |_____|_| |_|\__\___|\__, |_|   \____/|_| |_| |_|_|\___|___/ #
#                       __/ |                                  #
#                      |___/                                   #
################################################################

################################################################
# @script: terga2.lib.R                                          
# @author: Edi Prifti
# @author: Lucas Robin
# @date: August 2016                                                    
################################################################


######################################################
###               CLASS POPULATION               #####
######################################################

#' Generate and Initialize a Population for Evolutionary Computation
#'
#' This function generates an initial population of individuals for evolutionary
#' algorithms, potentially using predefined parameters and sources, and
#' evaluates their fitness.
#'
#' @param X A matrix or data frame of feature values, where each row represents
#'   a feature and each column represents a sample.
#' @param y A response vector or target variable for supervised learning. The
#'   length of \code{y} should match the number of columns in \code{X}.
#' @param clf A classifier object that includes parameters for generating and
#'   evaluating individuals. Expected to contain `params` and possibly `coeffs_`
#'   elements.
#' @param featEval An optional named vector of feature evaluation metrics. If
#'   \code{NULL}, an NA-filled vector will be created with the same names as the
#'   row names of \code{X}.
#'
#' @details The function initializes an evolutionary population by creating
#' individuals based on signs, injecting predefined individuals (if specified),
#' loading additional individuals from files, and generating new individuals
#' with random or specific feature models. After generation, the function
#' evaluates and sorts the population based on fitness.
#'
#' @return A list representing the initialized and evaluated population, sorted
#'   by fitness. Returns \code{NULL} if the population fails to meet validity
#'   checks.
#'
#' @examples
#' \dontrun{
#' X <- matrix(rnorm(100), nrow = 10) # Random features
#' y <- sample(c(0, 1), 10, replace = TRUE) # Random binary response
#' clf <- list(params = list(randomSigns = FALSE, language = "bin", size_pop_random = 5, parallel.local = FALSE, in_pop = "NULL", popSourceFile = "NULL", verbose = TRUE, evolve_k1 = FALSE))
#' pop <- population2(X, y, clf)
#' }
#'
#' @import foreach
#' @import doRNG
#' @export
population2 <- function(X, y, clf, featEval = NULL)
{
  # initialize featuEval
  if(is.null(featEval))
  {
    featEval <-rep(NA, length(rownames(X)))
    names(featEval) <- rownames(X)
  }
  
  pop <- list()
  if(!clf$params$randomSigns) # if we need to specify signs
  {
    if(clf$params$language=="bin" | clf$params$language=="bininter") 
    {
      signs  <- rep(1, nrow(X))
    }else # for the other languages get the signs
    {
      if(!is.null(clf$coeffs_))
      {
        signs <- clf$coeffs_
      }else # we compute here otherwise
      {
        signs  <- getSign(X = X, y = y, clf = clf)  # +1, -1 for the ternary learning    
      }
    }
  } else 
  {
    signs = NULL
  }
  
  if(clf$params$size_pop_random!=0){
    # create individuals using the signs
    if(clf$params$parallel.local)
    {
      #print("OUI!!!!!!!!!!!!! paralle!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!")
      pop <- foreach(i = 1:clf$params$size_pop_random) %dorng%
        {
          individual(X, y, clf, signs = signs, eval.all = FALSE)
        }
    } else
    {
      for(i in 1:clf$params$size_pop_random)
      {
        pop[[i]] <- individual(X, y, clf, signs = signs, eval.all = FALSE)
      }
    }
  }
  
  
  # If there are individuals to inject in the population 
  # passed by the clf
  if(!(length(clf$params$in_pop)==1 & all(clf$params$in_pop=="NULL")))
  {
    pop_add <- clf$params$in_pop
    pop[(length(pop) +1):(length(pop) + length(pop_add))] <- pop_add
    
    if(clf$params$verbose)
    {
      cat("\t\tseeding population with ",length(pop_add)," injected individuals","\n")
    }
  }
  
  # or from a file
  if(clf$params$popSourceFile!="NULL")
  {
    if(length(clf$params$popSourceFile)==1)
    {
      popFromFile <- loadPopulation(clf$params$popSourceFile[[1]])
      pop[(length(pop) +1):(length(pop) + length(popFromFile))] <- popFromFile
      
      if(clf$params$verbose)
      {
        cat("\t\tseeding population with ",length(popFromFile)," individuals loaded from file","\n")
      }
    }else
    {
      # if more than one file
      for(i in 1:length(clf$params$popSourceFile))
      {
        popFromFile <- loadPopulation(clf$params$popSourceFile[[i]])
        pop[(length(pop) +1):(length(pop) + length(popFromFile))] <- popFromFile
        
        if(clf$params$verbose)
        {
          cat("\t\tseeding population with ",length(popFromFile)," individuals loaded from file ",i,"\n")
        }
      } # end for
    } # end more files
  } # end load from files
  
  # generate randomly created individuals
  if(clf$params$size_pop_random!=0)
  {
    # make other individuals
    if(!clf$params$evolve_k1)
    {
      # generate all the single feature models
      singleFeatMod <- generateAllSingleFeatureModel(X, y, clf, eval.all = FALSE)
      # add them to the main initial population
      pop[(length(pop) +1):(length(pop) + length(singleFeatMod))] <- singleFeatMod
      
      if(clf$params$verbose)
      {
        cat("\t\tseeding population with ",length(singleFeatMod)," randomly created individuals ","\n")
      }
    }
  } # end random generation
  
  if(!isPopulation(pop))
  {
    return(NULL)
  }
  
  # evaluate only the population but only the evalToFit and what it needs to be computed
  pop <- evaluatePopulation(X, y, clf, pop, eval.all = FALSE)
  pop <- unique(pop) # keep only unique samples
  
  # sort the population by fitness
  pop <- sortPopulation(pop, evalToOrder = "fit_")
  
  if(clf$params$verbose)
  {
    cat("\t\tThe final population is composed of ",length(pop)," individuals","\n")
  }
  return(pop)
}


#' Generate a Sparse Individual Vector for Evolutionary Computation
#'
#' This function generates an individual vector representation for evolutionary
#' algorithms. The vector is generated with specified sparsity, based on binary
#' or ternary language, depending on the configuration in the classifier object
#' (`clf`).
#'
#' @param clf A classifier object containing parameters that guide the vector
#'   generation, including `language`, `size_world`, `sparsity.mean`,
#'   `sparsity.min`, and `sparsity.max`.
#' @param signs An optional vector of signs used to set or adjust the values in
#'   the generated individual vector. Defaults to \code{NULL}.
#'
#' @details The function creates a vector of length `clf$params$size_world`,
#' with elements initialized based on a specified probability distribution. For
#' binary language, the vector elements are either 1 or 0, while for ternary
#' language, they can be -1, 0, or 1. The function ensures that the vector
#' respects specified minimum and maximum sparsity levels
#' (`clf$params$sparsity.min` and `clf$params$sparsity.max`), adjusting the
#' number of non-zero elements as needed.
#'
#' @return A numeric vector of length `clf$params$size_world`, representing an
#'   individual with the desired sparsity and language settings.
#'
#' @examples
#' \dontrun{
#' clf <- list(params = list(language = "ternary", size_world = 100, sparsity.mean = 0.1, sparsity.min = 5, sparsity.max = 20))
#' individual <- individual_vec_v1(clf)
#' }
#'
#' @export
individual_vec_v1 <- function(clf, signs = NULL) # changer à dense
{
  prob_1 <- clf$params$sparsity.mean/clf$params$size_world
  prob <- c(prob_1/2, 1 - prob_1, prob_1/2)
  
  # Check binary language
  if(clf$params$language=="bin" | clf$params$language=="bininter") # for binary language
  {
    res <- sample(1, clf$params$size_world, replace = TRUE, prob = prob)
  }else # for ternary language
  {
    res <- sample(-1:1, clf$params$size_world, replace = TRUE, prob = prob)  
  }
  
  effectiveSpar <- length(which(res != 0))
  if(effectiveSpar < clf$params$sparsity.min)
  {
    if(clf$params$sparsity.min < clf$params$sparsity.max)
    {
      ind2Add.nb <- sample((clf$params$sparsity.min - effectiveSpar):(clf$params$sparsity.max - effectiveSpar), 1)
    } else
    {
      ind2Add.nb <- clf$params$sparsity.min - effectiveSpar
    }
    notNull <- which(res != 0)
    world <- (1:clf$params$size_world)
    if(length(notNull) > 0)
      world <- world[-notNull]
    
    ind2Add <- sample(world, ind2Add.nb)
    
    # Check binary language
    if(clf$params$language=="bin" | clf$params$language=="bininter") # for binary language
    {
      res[ind2Add] <- sample(c(1), length(ind2Add), replace = TRUE)
    }else # for ternary language
    {
      res[ind2Add] <- sample(c(-1,1), length(ind2Add), replace = TRUE)
    }
    
  } else if(effectiveSpar > clf$params$sparsity.max) 
  {
    if(clf$params$sparsity.min < clf$params$sparsity.max)
    {
      ind2rm.nb <- sample((effectiveSpar - clf$params$sparsity.max):(effectiveSpar - clf$params$sparsity.min), 1)
    } else 
    {
      ind2rm.nb <- effectiveSpar - clf$params$sparsity.max
    }
    notNull <- which(res != 0)
    ind2Rm <- sample(notNull, ind2rm.nb)
    res[ind2Rm] <- 0
  }
  return(res)
}


#' Generate a Sparse Individual Vector with Specified Density
#'
#' This function generates an individual vector with a specific sparsity level,
#' based on parameters defined in a classifier object (`clf`). It supports
#' binary and ternary language configurations and allows for sign customization.
#'
#' @param clf A classifier object that contains parameters for generating the
#'   vector, such as `size_world`, `sparsity.min`, `sparsity.max`,
#'   `sparsity.mean`, and `language`.
#' @param signs An optional vector of signs used to populate the individual
#'   vector at specified indices. If \code{NULL}, the function randomly assigns
#'   values based on the language setting in \code{clf}.
#'
#' @details The function constructs a vector of length `clf$params$size_world`
#' and assigns values based on either binary or ternary language, as specified
#' in `clf$params$language`. The sparsity level is controlled by `sparsity.min`
#' and `sparsity.max`, or by `sparsity.mean` if the minimum and maximum sparsity
#' levels are equal. If `signs` is provided, it will populate the vector at
#' selected indices.
#'
#' If `clf$params$size_world` is not set, a warning is issued (if warnings are
#' enabled in `clf`) and the function returns \code{NULL}.
#'
#' @return A numeric vector of length `clf$params$size_world` with elements set
#'   based on sparsity and language parameters. Returns \code{NULL} if
#'   `size_world` is not properly set.
#'
#' @examples
#' \dontrun{
#' clf <- list(params = list(language = "bin", size_world = 50, sparsity.mean = 10, sparsity.min = 5, sparsity.max = 15, warnings = TRUE))
#' individual <- individual_vec_v2(clf)
#' }
#'
#' @export
individual_vec_v2 <- function(clf, signs = NULL)
{
  if(clf$params$size_world == "NULL")
  {
    if(clf$params$warnings) warning("individual_vec_v2: the size_world is not set while expected")
    return(NULL)
  }
  
  if(clf$params$sparsity.min != clf$params$sparsity.max)
  {
    notNull.number    <- sample(clf$params$sparsity.min:clf$params$sparsity.max, 1)
  } else
  {
    notNull.number    <- clf$params$sparsity.mean
  }
  notNull.ind         <- sample(1:clf$params$size_world, notNull.number)
  res                 <- rep(0, clf$params$size_world)
  
  if(is.null(signs)) 
  {
    # Check binary language
    if(clf$params$language=="bin" | clf$params$language=="bininter") # for binary language
    {
      res[notNull.ind]  <- sample(c(1), notNull.number, replace = TRUE)
    }else # for ternary language
    {
      res[notNull.ind]  <- sample(c(-1, 1), notNull.number, replace = TRUE)
    }
    
  } else 
  {
    res[notNull.ind]  <- signs[notNull.ind]
  }
  return(res)
}


#' Generate Sparse Individual Vector with Indices and Coefficients
#'
#' This function generates a sparse individual vector representation for
#' evolutionary algorithms, returning a list with the indices and coefficients
#' of non-zero elements. The values and sparsity level are based on the
#' parameters defined in a classifier object (`clf`).
#'
#' @param clf A classifier object containing parameters for generating the
#'   vector, including `size_world`, `sparsity.min`, `sparsity.max`, and
#'   `language`.
#' @param signs An optional vector of signs (not used in this function but
#'   included for consistency with other versions).
#'
#' @details The function randomly selects a number of non-zero elements within
#' the range specified by `sparsity.min` and `sparsity.max`. It then assigns
#' values to these elements based on `clf$params$language`: for binary language,
#' only 1s are used; for ternary language, values are -1 or 1.
#'
#' @return A list with two elements:
#' \describe{
#'   \item{indices}{A vector of indices corresponding to non-zero elements in the generated vector.}
#'   \item{coeffs}{A vector of coefficients (1 or -1) at the specified indices.}
#' }
#'
#' @examples
#' \dontrun{
#' clf <- list(params = list(language = "ternary", size_world = 50, sparsity.min = 5, sparsity.max = 15))
#' individual <- individual_vec_v3(clf)
#' print(individual)
#' }
#'
#' @export
individual_vec_v3 <- function(clf, signs = NULL)
{
  notNull.number   <- sample(clf$params$sparsity.min:clf$params$sparsity.max, 1)
  notNull.ind      <- sample(1:clf$params$size_world, notNull.number)
  
  # Check binary language
  if(clf$params$language=="bin" | clf$params$language=="bininter") # for binary language
  {
    res <- sample(c(1), notNull.number, replace = TRUE)
  }else # for ternary language
  {
    res <- sample(c(-1, 1), notNull.number, replace = TRUE)
  }
  return(list(indices = notNull.ind, coeffs = res))
}


####### Tests effectué sur les deux fonctions précédentes : #################################################################
# microbenchmark::microbenchmark(                                                                                           #
#   test <- foreach(k = 1:1000) %do% {individual_vec_v1(clf.terga)},                                                        #
#   test <- foreach(k = 1:1000) %do% {individual_vec_v2(clf.terga)},                                                        #
#   test <- foreach(k = 1:1000) %do% {individual_vec_v3(clf.terga)},                                                        #
#   times = 100                                                                                                             #
# )                                                                                                                         #
# Unit: milliseconds                                                                                                        #
#                                                            expr      min       lq     mean   median       uq max    neval #
# test <- foreach(k = 1:1000) %do% {individual_vec_v1(clf.terga)} 243.6722 249.3285 254.2703 252.2820 255.5914 325.4101 100 #
# test <- foreach(k = 1:1000) %do% {individual_vec_v2(clf.terga)} 246.8161 253.1758 260.6690 255.8954 260.4250 370.5941 100 #
# test <- foreach(k = 1:1000) %do% {individual_vec_v3(clf.terga)} 244.4738 250.4082 257.0116 252.7833 256.8256 357.6657 100 #
#############################################################################################################################


#' Select Elite Individuals in a Population
#'
#' This function selects a specified number of elite individuals from a
#' population by marking them as selected. The population is sorted by fitness,
#' and the top individuals are tagged based on the `nbToSelect` parameter.
#'
#' @param clf A classifier object that may contain parameters for selection
#'   (though it is not directly used in this function).
#' @param pop A list representing the population, where each individual is an
#'   element with a `selected` attribute and a `fit_` fitness attribute.
#' @param nbToSelect An integer specifying the number of individuals to mark as
#'   elite in the population.
#'
#' @details The function sorts the population by the `fit_` attribute
#' (presumably representing fitness) in descending order. It then iterates
#' through the sorted list, marking the top `nbToSelect` individuals as
#' `selected` by setting their `selected` attribute to `TRUE`. If the population
#' is empty, a warning is issued and the function returns \code{NULL}.
#'
#' @return The modified population list with the `selected` attribute set to
#'   \code{TRUE} for the top elite individuals.
#'
#' @examples
#' \dontrun{
#' pop <- list(
#'   list(fit_ = 0.9, selected = FALSE),
#'   list(fit_ = 0.8, selected = FALSE),
#'   list(fit_ = 0.7, selected = FALSE)
#' )
#' clf <- list() # Placeholder for classifier object
#' pop <- tag_SelectElite(clf, pop, nbToSelect = 2)
#' # Check which individuals are selected
#' sapply(pop, function(ind) ind$selected)
#' }
#'
#' @export
tag_SelectElite <- function(clf, pop, nbToSelect)
{
  pop <- sortPopulation(pop, evalToOrder = "fit_")
  
  if(is.null(pop))
  {
    warning("... ... resetTags: the population is empty\n")
    return(NULL)
  }
  
  nbSelected <- 0
  for(i in 1:length(pop))
  {
    if(nbSelected < nbToSelect)
    {
      if(!pop[[i]]$selected)
      {
        pop[[i]]$selected <- TRUE
        nbSelected <- nbSelected +1
      }
    }
  }
  return(pop)
}


#' Randomly Select Individuals in a Population
#'
#' This function randomly selects a specified number of unselected individuals
#' from a population and marks them as selected. The function can operate in
#' parallel if specified in the classifier parameters.
#'
#' @param clf A classifier object that contains parameters for selection,
#'   including `params$warnings` (to control warnings) and
#'   `params$parallel.local` (for parallel execution).
#' @param pop A list representing the population, where each individual has a
#'   `selected` attribute.
#' @param nbToSelect An integer specifying the number of individuals to randomly
#'   select and mark as selected.
#'
#' @details The function identifies individuals in the population who have not
#' been selected (i.e., their `selected` attribute is `FALSE`). It then randomly
#' selects up to `nbToSelect` of these unselected individuals and sets their
#' `selected` attribute to `TRUE`. If fewer than `nbToSelect` unselected
#' individuals are available, it selects all unselected individuals.
#'
#' If parallel processing is enabled (`clf$params$parallel.local` is `TRUE`),
#' selection is performed in parallel using `foreach` and `doRNG`. If the
#' population is empty, the function issues a warning (if `clf$params$warnings`
#' is `TRUE`) and returns \code{NULL}.
#'
#' @return The modified population list with randomly selected individuals
#'   marked as `selected = TRUE`.
#'
#' @examples
#' \dontrun{
#' pop <- list(
#'   list(fit_ = 0.9, selected = FALSE),
#'   list(fit_ = 0.8, selected = FALSE),
#'   list(fit_ = 0.7, selected = FALSE)
#' )
#' clf <- list(params = list(warnings = TRUE, parallel.local = FALSE))
#' pop <- tag_SelectRandom(clf, pop, nbToSelect = 2)
#' # Check which individuals are selected
#' sapply(pop, function(ind) ind$selected)
#' }
#'
#' @import foreach
#' @import doRNG
#' @export
tag_SelectRandom <- function(clf, pop, nbToSelect)
{
  if(is.null(pop))
  {
    if(clf$params$warnings) warning("resetTags: the population is empty")
    return(NULL)
  }
  
  indexNonSelected <- list()
  for(i in 1:length(pop))
  {
    if(!pop[[i]]$selected)
    {
      indexNonSelected[[length(indexNonSelected) +1]] <- i
    }
  }
  
  if(length(indexNonSelected) < nbToSelect)
  {
    selection <- sample(indexNonSelected, length(indexNonSelected))  
  }else
  {
    selection <- sample(indexNonSelected, nbToSelect)
  }
  
  if(clf$params$parallel.local)
  {
    pop <- foreach(i = 1:length(pop)) %dorng%
      {
        if(i %in% selection)
        {
          pop[[i]]$selected <- TRUE
        }
        return(pop[[i]])
      }
  } else
  {
    for(i in 1:length(pop))
    {
      if(i %in% selection)
      {
        pop[[i]]$selected <- TRUE
      }
    }
  }
  return(pop)
}


#' Pair Individuals in Population as Mating Couples
#'
#' This function pairs individuals in a population to form mating couples,
#' assigning each pair a `mate` attribute. Each individual can be matched with
#' one other individual from the specified parents list.
#'
#' @param pop A list representing the population, where each individual has a
#'   `mate` attribute to indicate their paired individual.
#' @param parents A vector of indices or identifiers for individuals in the
#'   population that are eligible to be paired.
#'
#' @details The function randomly pairs individuals from the `parents` list. For
#'   each couple, it assigns the `mate` attribute of one individual to the
#'   other’s index, and vice versa. If an individual’s `mate` attribute is
#'   positive, it indicates the index of their partner. If it’s negative, it
#'   indicates their partner’s index with a negative sign.
#'
#'   If the population is empty, the function issues a warning and returns
#'   \code{NULL}.
#'
#' @return The modified population list, with `mate` attributes assigned for
#'   paired individuals.
#'
#' @examples
#' \dontrun{
#' pop <- list(
#'   list(fit_ = 0.9, selected = FALSE, mate = NULL),
#'   list(fit_ = 0.8, selected = FALSE, mate = NULL),
#'   list(fit_ = 0.7, selected = FALSE, mate = NULL),
#'   list(fit_ = 0.6, selected = FALSE, mate = NULL)
#' )
#' parents <- c(1, 2, 3, 4)
#' pop <- tag_Couples(pop, parents)
#' # Check mates for each individual
#' sapply(pop, function(ind) ind$mate)
#' }
#'
#' @export
tag_Couples <- function(pop, parents)
{
  if(is.null(pop))
  {
    if(clf$params$warnings) warning("resetTags: the population is empty")
    return(NULL)
  }
  
  for(i in 1:(length(parents)/2))
  {
    if(length(parents) > 1)
    {
      couple <- sample(parents, 2)
      couple <- couple[order(couple)]
      parents <- parents[-which(parents == couple[[1]])]
      parents <- parents[-which(parents == couple[[2]])]
      
      pop[[couple[1]]]$mate <- couple[2]
      pop[[couple[2]]]$mate <- -couple[1]
    }
  }
  return(pop)
}


#' Tag Individuals for Mutation in a Population
#'
#' This function tags a specified number of individuals in the population for
#' mutation, while optionally protecting certain individuals from being
#' selected. The tagged individuals are marked with a `toBeMutated` attribute
#' set to `TRUE`.
#'
#' @param pop A list representing the population, where each individual has a
#'   `toBeMutated` attribute.
#' @param mutate_size An integer specifying the number of individuals to mark
#'   for mutation.
#' @param protected An optional vector of indices representing individuals that
#'   should not be tagged for mutation. Defaults to \code{NULL}.
#'
#' @details The function selects `mutate_size` individuals from the population,
#' excluding any indices specified in `protected`. If `mutate_size` exceeds the
#' number of non-protected individuals, the function issues a warning and
#' reduces the mutation count to the maximum available.
#'
#' If the number of protected individuals is too large to allow sufficient
#' mutations, the function removes some individuals from the `protected` list as
#' necessary to meet the mutation quota.
#'
#' If the population is empty, the function issues a warning (if
#' `clf$params$warnings` is `TRUE`) and returns \code{NULL}.
#'
#' @return The modified population list with `toBeMutated` attributes set to
#'   `TRUE` for the selected individuals.
#'
#' @examples
#' \dontrun{
#' pop <- list(
#'   list(fit_ = 0.9, toBeMutated = FALSE),
#'   list(fit_ = 0.8, toBeMutated = FALSE),
#'   list(fit_ = 0.7, toBeMutated = FALSE),
#'   list(fit_ = 0.6, toBeMutated = FALSE)
#' )
#' protected <- c(1, 2)
#' pop <- tag_ToBeMutated(pop, mutate_size = 2, protected = protected)
#' # Check which individuals are tagged for mutation
#' sapply(pop, function(ind) ind$toBeMutated)
#' }
#'
#' @export
tag_ToBeMutated <- function(pop, mutate_size, protected = NULL)
{
  if(is.null(pop))
  {
    if(clf$params$warnings) warning("resetTags: the population is empty")
    return(NULL)
  }
  
  all.indexes <- seq_len(length(pop))
  
  if(!is.null(protected))
  {
    # sanity check
    if(!all(protected %in% all.indexes))
    {
      stop("tag_ToBeMutated: protected indices don't match the population")
    }
    possible.to.mutate <- (length(pop)-length(protected))
    
    # TODO fix negative delta ?????
    delta = round(mutate_size) - possible.to.mutate
    
    if(round(mutate_size) > possible.to.mutate)
    {
      if(clf$params$warnings) warning(paste("tag_ToBeMutated: the protected individuals don't leave enough to mutate: nb protected",
                                            length(protected), " and nb available", possible.to.mutate,"mutating less ..."))
      protected <- protected[1:(length(protected) - delta)] # stop protecting some to assure the mutation quota.
    }
    tosample.indexes <- which(!all.indexes %in% protected)
  }else # if none is protected
  {
    tosample.indexes <- all.indexes
  }
  
  # select mutate_size indivuduals out of the not protected
  indexToBeMutated <- sample(tosample.indexes, mutate_size)
  # set the indexes to be mutated
  for(i in indexToBeMutated)
  {
    pop[[i]]$toBeMutated <- TRUE
  }
  
  return(pop)
}


#' Select and Tag Individuals in a Population for Evolutionary Operations
#'
#' This function selects individuals in a population for evolutionary
#' operations, including elite and random selection, pairing individuals for
#' mating, and tagging some individuals for mutation. The selection process is
#' based on specified percentages in the classifier object (`clf`).
#'
#' @param X A matrix or data frame of feature values, where each row represents
#'   a feature and each column represents a sample.
#' @param y A response vector or target variable for supervised learning. The
#'   length of \code{y} should match the number of columns in \code{X}.
#' @param clf A classifier object containing parameters for selection, including
#'   `select_perc`, `select_percByMethod`, `size_pop`, and `mutate_size`.
#' @param pop A list representing the population, where each individual has
#'   attributes including `mate` and `toBeMutated`.
#'
#' @details The function begins by sorting the population based on fitness
#' (`fit_` attribute). It then calculates the number of individuals to be
#' selected for mutation based on `select_perc` in `clf`. Selection is split
#' into two methods, elite and random, as specified by `select_percByMethod`,
#' and individuals are tagged for mating within these groups.
#'
#' Following selection, individuals are paired for mating, with each selected
#' individual having a `mate` attribute set. The function then calls
#' `tag_ToBeMutated` to mark some individuals for mutation based on
#' `mutate_size` from `clf`.
#'
#' @return The modified population list, where selected individuals are tagged
#'   with `mate` attributes for mating and `toBeMutated` attributes for
#'   mutation.
#'
#' @examples
#' \dontrun{
#' X <- matrix(rnorm(100), nrow = 10) # Random features
#' y <- sample(c(0, 1), 10, replace = TRUE) # Random binary response
#' clf <- list(params = list(select_perc = 50, select_percByMethod = c(30, 20), size_pop = 10, mutate_size = 10))
#' pop <- list(
#'   list(fit_ = 0.9, mate = list(), toBeMutated = FALSE),
#'   list(fit_ = 0.8, mate = list(), toBeMutated = FALSE),
#'   # Add additional individuals as needed
#' )
#' pop <- tag_select(X, y, clf, pop)
#' # Check which individuals are selected for mating and mutation
#' sapply(pop, function(ind) ind$mate)
#' sapply(pop, function(ind) ind$toBeMutated)
#' }
#'
#' @export
tag_select <- function(X, y, clf, pop)
{
  pop <- sortPopulation(pop, evalToOrder = "fit_")
  
  nb2BeMutated <- ceiling(clf$params$select_perc * clf$params$size_pop/100)
  nbByMethod   <- sapply(clf$params$select_percByMethod,        # On supose qu'il n'y a que 2 methodes de selection,
                         function(x) {x * nb2BeMutated/100})    # et qu'elles sont : elite et random
  selectElite   <- 1:nbByMethod[1]
  
  selectRandom <- sample(length(pop), nbByMethod[2])
  for(x in selectElite)
  {
    # At the current version an individual can be crossed with itself
    pop[[x]]$mate[length(pop[[x]]$mate)+1] <- sample(unique(selectElite, selectRandom), 1) 
  }
  for(x in selectRandom)
  {
    # At the current version an individual can be crossed with itself
    pop[[x]]$mate[length(pop[[x]]$mate)+1] <- sample(unique(selectElite, selectRandom), 1)
  }
  
  # tag to be mutated
  pop <- tag_ToBeMutated(pop = pop, mutate_size = (clf$params$size_pop * clf$params$mutate_size / 100), protected = NULL)
  
  return(pop)
}



#' Retrieve Selected Individuals as Parents
#'
#' This function identifies and returns the indices of individuals in the
#' population who are marked as selected, which can be used as parents in
#' evolutionary processes.
#'
#' @param pop A list representing the population, where each individual has a
#'   `selected` attribute indicating if they are selected.
#'
#' @details The function uses `populationGet_X` to retrieve individuals with the
#' `selected` attribute set to `TRUE`. It returns the indices of these
#' individuals, allowing them to be used as parents in subsequent evolutionary
#' steps.
#'
#' @return A vector of indices corresponding to individuals in the population
#'   who are marked as selected.
#'
#' @examples
#' \dontrun{
#' pop <- list(
#'   list(fit_ = 0.9, selected = TRUE),
#'   list(fit_ = 0.8, selected = FALSE),
#'   list(fit_ = 0.7, selected = TRUE)
#' )
#' parents <- get_Parents(pop)
#' print(parents) # Should return indices of selected individuals
#' }
#'
#' @export
get_Parents <- function(pop)
{
  return(which(populationGet_X(element2get = "selected", toVec = TRUE, na.rm = TRUE)(pop)))
}


#' Retrieve Individuals Marked for Mutation
#'
#' This function identifies and returns the indices of individuals in the
#' population who are marked for mutation, as indicated by the `toBeMutated`
#' attribute.
#'
#' @param pop A list representing the population, where each individual has a
#'   `toBeMutated` attribute indicating if they are marked for mutation.
#'
#' @details The function uses `populationGet_X` to retrieve individuals with the
#' `toBeMutated` attribute set to `TRUE`. It returns the indices of these
#' individuals, allowing them to be processed for mutation in subsequent
#' evolutionary steps.
#'
#' @return A vector of indices corresponding to individuals in the population
#'   who are marked for mutation.
#'
#' @examples
#' \dontrun{
#' pop <- list(
#'   list(fit_ = 0.9, toBeMutated = TRUE),
#'   list(fit_ = 0.8, toBeMutated = FALSE),
#'   list(fit_ = 0.7, toBeMutated = TRUE)
#' )
#' toBeMutated <- get_IndividualToBeMutated(pop)
#' print(toBeMutated) # Should return indices of individuals marked for mutation
#' }
#'
#' @export
get_IndividualToBeMutated <- function(pop)
{
  return(which(populationGet_X(element2get = "toBeMutated", toVec = TRUE, na.rm = TRUE)(pop)))
}


#' Reset Tags for All Individuals in a Population
#'
#' This function resets specific tags for all individuals in a population,
#' including `selected`, `toBeMutated`, and `mate` attributes. It can be used to
#' clear or initialize these tags before further processing.
#'
#' @param pop A list representing the population, where each individual is
#'   expected to have attributes `selected`, `toBeMutated`, and `mate`.
#' @param selected A logical value to set for the `selected` attribute of each
#'   individual. Defaults to \code{FALSE}.
#' @param toBeMutated A logical value to set for the `toBeMutated` attribute of
#'   each individual. Defaults to \code{FALSE}.
#' @param mate A value to set for the `mate` attribute of each individual.
#'   Defaults to \code{-1}.
#'
#' @details The function iterates through each individual in the population and
#' sets their `selected`, `toBeMutated`, and `mate` attributes to the specified
#' values. This function is useful for resetting the population's status before
#' selection and mutation operations.
#'
#' If `pop` is \code{NULL}, the function issues a warning (if
#' `clf$params$warnings` is enabled) and returns \code{NULL}. If `pop` is not a
#' list, an error is raised.
#'
#' @return The modified population list with updated `selected`, `toBeMutated`,
#'   and `mate` attributes for each individual.
#'
#' @examples
#' \dontrun{
#' pop <- list(
#'   list(fit_ = 0.9, selected = TRUE, toBeMutated = TRUE, mate = 2),
#'   list(fit_ = 0.8, selected = TRUE, toBeMutated = FALSE, mate = 3),
#'   list(fit_ = 0.7, selected = FALSE, toBeMutated = TRUE, mate = -1)
#' )
#' pop <- resetTags(pop, selected = FALSE, toBeMutated = FALSE, mate = -1)
#' # Check updated population
#' str(pop)
#' }
#'
#' @export
resetTags <- function(pop, selected = FALSE, toBeMutated = FALSE, mate = -1)
{
  
  if(is.null(pop))
  {
    if(clf$params$warnings) warning("resetTags: the population is empty")
    return(NULL)
  }
  
  if(!is.list(pop))
  {
    stop("resetTags: the population is not a list")
  }
  
  for(i in 1:length(pop))
  {
    if(!is.null(selected)) pop[[i]]$selected <- selected
    if(!is.null(toBeMutated)) pop[[i]]$toBeMutated <- toBeMutated
    if(!is.null(mate)) pop[[i]]$mate <- mate
  }
  return(pop)
}



#' Evolve a Population in an Evolutionary Algorithm
#'
#' This function performs one cycle of evolution on a population, including
#' evaluation, selection, crossover, and mutation steps, based on the specified
#' classifier parameters. It returns the evolved population after these
#' operations.
#'
#' @param X A matrix or data frame of feature values, where each row represents
#'   a feature and each column represents a sample.
#' @param y A response vector or target variable for supervised learning. The
#'   length of \code{y} should match the number of columns in \code{X}.
#' @param clf A classifier object containing parameters for evolution, including
#'   options for crossover, mutation, and verbose output.
#' @param pop A list representing the population, where each individual has
#'   attributes like `fit_` for fitness.
#' @param featEval A vector or list containing feature evaluation metrics, used
#'   in mutation to assess feature significance.
#'
#' @details The function first evaluates the fitness of the current population
#' if it has not been done. It then selects the best individual and proceeds
#' through the evolutionary steps:
#'
#' 1. **Evaluation**: If fitness values are missing, the population is
#' evaluated. 2. **Selection & Crossover**: If crossover is enabled
#' (`clf$params$cross`), parents are selected and crossover is performed to
#' generate new individuals. 3. **Mutation**: If mutation is enabled
#' (`clf$params$mutate`), some individuals are selected for mutation based on
#' `mutate_size`, and mutations are applied using `featEval`.
#'
#' After each step, the population is re-evaluated, and the best individual is
#' updated. If `verbose` is enabled, details about the best individual are
#' printed.
#'
#' @return The evolved population list with updated individuals and fitness
#'   values.
#'
#' @examples
#' \dontrun{
#' X <- matrix(rnorm(100), nrow = 10) # Random features
#' y <- sample(c(0, 1), 10, replace = TRUE) # Random binary response
#' clf <- list(params = list(cross = TRUE, mutate = TRUE, mutate_size = 10, verbose = TRUE))
#' pop <- list(
#'   list(fit_ = NA, indices_ = 1:5),
#'   list(fit_ = NA, indices_ = 6:10)
#' )
#' featEval <- runif(10)
#' evolved_pop <- evolve1(X, y, clf, pop, featEval)
#' }
#'
#' @export
evolve1 <- function(X, y, clf, pop, featEval) 
{
  if(anyNA(lapply(pop, function(x) x$fit_)))
    pop <- evaluatePopulation(X, y, clf, pop, eval.all = FALSE)
  
  # 1. Evaluate the current population
  # evaluation <- getFitPopulation(pop)
  evaluation <- populationGet_X(element2get = "fit_", toVec = TRUE, na.rm = TRUE)(pop)
  best_individual <- pop[[which.max(abs(evaluation))]]
  evolved_pop <- pop
  
  #   if (clf$params$verbose) cat("\tBest individual: ", best_individual$indices_,
  #                               "\t\tAUC:\t", best_individual$fit_, "\n")
  
  # 2 Select the parents of the next generation (indexes in the population)
  if(clf$params$cross)
  {
    parents <- select(clf, pop)
    evolved_pop <- crossing2(X, y, clf, evolved_pop, parents)
    evolved_pop <- evaluatePopulation(X, y, clf, evolved_pop, eval.all = FALSE)
    evaluation <- populationGet_X(element2get = "fit_", toVec = TRUE, na.rm = TRUE)(evolved_pop)
  }
  
  # 4. Mutate the new population
  if(clf$params$mutate)
  {
    selection     <- !(1:length(evaluation)) %in% which.max(abs(evaluation))
    selection.ind <- sample(which(selection), ceiling(sum(selection)*clf$params$mutate_size/100))
    evolved_pop   <- mutate2(X, y, clf, evolved_pop, selection.ind, featEval)
    evolved_pop   <- evaluatePopulation(X, y, clf, evolved_pop, force.re.evaluation = TRUE, eval.all = FALSE)
    evaluation <- populationGet_X(element2get = "fit_", toVec = TRUE, na.rm = TRUE)(evolved_pop)
  }
  
  best_individual <- evolved_pop[[which.max(abs(evaluation))]]
  if (clf$params$verbose) 
  {
    if(isModel(best_individual))
    {
      try(print(printModel(mod = best_individual, method = clf$params$print_ind_method, score = "fit_")), silent = TRUE)
    }
  }
  
  return(evolved_pop)
}



#' Evolve a Population with Advanced Selection, Crossover, and Mutation
#'
#' This function performs one generation of evolution on a population,
#' implementing selection, crossover, and mutation with customizable methods. It
#' returns the evolved population after these operations.
#'
#' @param X A matrix or data frame of feature values, where each row represents
#'   a feature and each column represents a sample.
#' @param y A response vector or target variable for supervised learning. The
#'   length of \code{y} should match the number of columns in \code{X}.
#' @param clf A classifier object containing parameters for evolutionary
#'   operations, including options for selection percentages, mutation size,
#'   population size, parallelization, and debugging.
#' @param pop A list representing the population, where each individual has
#'   attributes like `fit_`, `selected`, `mate`, and `toBeMutated`.
#' @param featEval A vector or list containing feature evaluation metrics, used
#'   to guide mutation operations.
#' @param generation An integer representing the current generation number in
#'   the evolutionary process.
#'
#' @details The `evolve2m` function implements a detailed evolutionary cycle
#' with the following steps:
#'
#' 1. **Population Sorting**: The population is sorted by fitness, and
#' individuals are evaluated if necessary. 2. **Tagging for Selection**:
#' Individuals are tagged for elite and random selection based on selection
#' percentages in `clf$params$select_percByMethod`. 3. **Tagging for Mutation**:
#' Some individuals are tagged for mutation based on `mutate_size`. 4.
#' **Parallel and Sequential Processing**: If parallel processing is enabled
#' (`clf$params$parallel.local`), crossover and mutation are performed in
#' parallel; otherwise, they are processed sequentially. 5. **Mating and
#' Mutation**: Selected individuals are paired for mating, and some are mutated
#' based on the feature evaluation metrics (`featEval`). 6. **Population Pruning
#' and Expansion**: The evolved population is pruned to retain the top 2/3 of
#' individuals by fitness, then expanded by adding new individuals or retaining
#' some from the previous population if necessary.
#'
#' The population is further cleaned, sorted, and prepared for the next
#' generation. If `debug` mode is enabled in `clf`, various messages are printed
#' to track the evolution process.
#'
#' @return The evolved population list, containing updated individuals with
#'   modified `fit_`, `selected`, `mate`, and `toBeMutated` attributes.
#'
#' @examples
#' \dontrun{
#' X <- matrix(rnorm(100), nrow = 10) # Random features
#' y <- sample(c(0, 1), 10, replace = TRUE) # Random binary response
#' clf <- list(params = list(
#'   select_perc = 50, select_percByMethod = c(30, 20), size_pop = 10,
#'   mutate_size = 10, cross = TRUE, mutate = TRUE, parallel.local = FALSE, debug = TRUE
#' ))
#' pop <- list(
#'   list(fit_ = NA, indices_ = 1:5, selected = FALSE, toBeMutated = FALSE, mate = NULL),
#'   list(fit_ = NA, indices_ = 6:10, selected = FALSE, toBeMutated = FALSE, mate = NULL)
#' )
#' featEval <- runif(10)
#' evolved_pop <- evolve2m(X, y, clf, pop, featEval, generation = 1)
#' }
#'
#' @export
evolve2m <- function(X, y, clf, pop, featEval, generation)
{
  pop <- sortPopulation(pop, evalToOrder = "fit_")
  if(!isPopulation(obj = pop)) return(NULL)
  
  if(clf$params$debug) {print(paste("evolve2m after sort:", length(pop)))}
  
  nb2BeMutated <- ceiling(clf$params$select_perc * clf$params$size_pop/100)
  nbByMethod   <- lapply(clf$params$select_percByMethod,        # On supose qu'il n'y a que 2 methodes de selection,
                         function(x) {x * nb2BeMutated/100})    # et qu'elles sont : elite et random
  
  pop <- resetTags(pop)
  
  # for(i in 1:length(nbByMethod))
  # {
  pop <- tag_SelectElite(clf, pop, nbByMethod[[1]])
  pop <- tag_SelectRandom(clf, pop, nbByMethod[[2]])
  # }
  indexListOfAvailableParents <- get_Parents(pop)
  pop <- tag_Couples(pop, indexListOfAvailableParents)
  
  mutate_size <- (length(pop) * clf$params$mutate_size / 100)
  pop <- tag_ToBeMutated(pop = pop, mutate_size = mutate_size, protected = NULL)
  
  # indexListOfIndividualToBeMutated <- get_IndividualToBeMutated(pop)
  
  if(clf$params$debug) {print(paste("evolve2m after tagging:", length(pop)))}
  
  if(clf$params$parallel.local)
  {
    newPop <- foreach(i = 1:length(pop)) %dorng%
      {
        child <- list()
        mutatedIndiv <- list()
        
        if(myAssertNotNullNorNa(pop[[i]]$selected) & myAssertNotNullNorNa(pop[[i]]$mate) & myAssertNotNullNorNa(pop[[i]]$toBeMutated))
        {
          # Appliquer les operations sur pop[[i]]
          if(pop[[i]]$selected && (pop[[i]]$mate > 0))
          {
            child_1 <- clf$functions$crosser(X, y, clf, pop[[i]], pop[[pop[[i]]$mate]])
            
            # if parents have the same language
            if(pop[[i]]$language == pop[[pop[[i]]$mate]]$language)
            {
              # make only one child
              child[[length(child) +1]]<-evaluateModel(mod = child_1, X = X, y = y, clf = clf, eval.all = FALSE, force.re.evaluation = TRUE)
            }else
            {
              # otherwise make two identical with each of the parent's languages
              child_2 <-child_1
              child_1$language <- pop[[i]]$language
              child_2$language <- pop[[pop[[i]]$mate]]$language
              child[[length(child) +1]]<-evaluateModel(mod = child1, X = X, y = y, clf = clf, eval.all = FALSE, force.re.evaluation = TRUE)
              child[[length(child) +1]]<-evaluateModel(mod = child2, X = X, y = y, clf = clf, eval.all = FALSE, force.re.evaluation = TRUE)
            }
          }
          
          if(pop[[i]]$toBeMutated)
          {
            mutatedIndiv <- clf$functions$mutator(X, y, clf, pop, pop[[i]], 1:clf$params$size_world, featEval)
            mutatedIndiv <- evaluateModel(mod = mutatedIndiv, X = X, y = y, clf = clf, eval.all = FALSE, force.re.evaluation = TRUE)
          }
        } # end iff test
        
        return(list(child = child, mutatedIndiv = mutatedIndiv))
      }
    
    mutatedPop <- lapply(newPop, function(x)
    {
      if(length(x$mutatedIndiv) > 0)
      {
        return(x$mutatedIndiv)
      } else
      {
        return(NULL)
      }
    })
    mutatedPop <- mutatedPop[!sapply(mutatedPop, is.null)]
    
    children <- list()
    for(l in 1:length(newPop))
    {
      if(length(newPop[[l]]$child) > 0)
      {
        for(m in 1:length(newPop[[l]]$child))
        {
          children[[length(children)+1]] <- newPop[[l]]$child[[m]]
        }
      }
    }
    
    newPop <- c(children, mutatedPop)
    
  }else # if not parallel
  {
    newPop <- list()
    # for each of the size_pop best individuals (smaller then the real population size)
    for(i in 1:length(pop))
    {
      if(myAssertNotNullNorNa(pop[[i]]$selected) & myAssertNotNullNorNa(pop[[i]]$mate) & myAssertNotNullNorNa(pop[[i]]$toBeMutated))
      {
        if(pop[[i]]$selected && (pop[[i]]$mate > 0))
        {
          # create the first child
          child_1 <- clf$functions$crosser(X, y, clf, pop[[i]], pop[[pop[[i]]$mate]])
          
          # if parents languages are the same we keep only one child
          if(pop[[i]]$language == pop[[pop[[i]]$mate]]$language)
          {
            child <- evaluateModel(mod = child_1, X = X, y = y, clf = clf, eval.all = FALSE, force.re.evaluation = TRUE)
            newPop[[length(newPop) +1]]  <- child
            
          }else # if they have different languages (we create two identical children, each with one language)
          {
            child_2 <- child_1
            child_1$language <- pop[[i]]$language
            child_2$language <- pop[[pop[[i]]$mate]]$language
            newPop[[length(newPop) +1]] <- evaluateModel(mod = child_1, X = X, y = y, clf = clf, eval.all = FALSE, force.re.evaluation = TRUE)  
            newPop[[length(newPop) +1]] <- evaluateModel(mod = child_2, X = X, y = y, clf = clf, eval.all = FALSE, force.re.evaluation = TRUE)
          }
        }
        
        if(pop[[i]]$toBeMutated)
        {
          mutatedIndiv <- clf$functions$mutator(X, y, clf, pop, individual_to_be_mutated = pop[[i]], all_genes = c(1:clf$params$size_world), featEval)
          newPop[[length(newPop) +1]] <- evaluateModel(mod = mutatedIndiv, X = X, y = y, clf = clf, eval.all = FALSE, force.re.evaluation = TRUE)  
        }
      }
    } # end for each model in the population size
  } # end if no parallel
  
  if(clf$params$debug) {print(paste("evolve2m after crossing/mutation:", length(pop)))}
  
  pop <- sortPopulation(pop, evalToOrder = "fit_")
  pop <- pop[1:min(length(pop),round(clf$params$size_pop*2/3))] # get the 2/3 population or the size of the pop if smaller
  
  if(clf$params$debug) {print(paste("evolve2m after selection:", length(pop)))}
  
  # set population size bigger at the first generation
  if(generation==0) { clf$params$size_pop <- round(clf$params$size_pop*3/2) }
  
  n_rest <- clf$params$size_pop - length(pop)
  # add in the rest of the population 1/3, the best models
  pop <- c(pop, sortPopulation(newPop, evalToOrder = "fit_")[1:n_rest])
  #pop[(length(pop)+1):(length(pop)+ n_rest)] <- sortPopulation(newPop, evalToOrder = "fit_")[1:n_rest]
  # new2keep <-  selectIndividualToKeep(clf, newPop)
  # pop[(length(pop)+1):(length(pop)+ length(new2keep))] <- new2keep
  
  # add the whole new population
  #pop[(length(pop)+1):(length(pop) + length(newPop))] <- newPop
  pop <- c(pop, newPop)
  pop <- cleanPopulation(pop = pop, clf = clf)
  pop <- unique(pop) # clean
  pop <- sortPopulation(pop, evalToOrder = "fit_") # sort
  pop <- resetTags(pop) # reset
  
  if(clf$params$debug) {print(paste("evolve2m after merging the new pop:", length(pop)))}
  
  return(pop)
}



#' Evolve a Population with Advanced Mating and Mutation
#'
#' This function performs one generation of evolution on a population, including
#' advanced mating and mutation, with optional parallel processing. It returns
#' the evolved population after these operations.
#'
#' @param X A matrix or data frame of feature values, where each row represents
#'   a feature and each column represents a sample.
#' @param y A response vector or target variable for supervised learning. The
#'   length of \code{y} should match the number of columns in \code{X}.
#' @param clf A classifier object containing parameters for evolution, including
#'   options for mutation size, parallel processing, and debugging.
#' @param pop A list representing the population, where each individual has
#'   attributes like `mate` and `toBeMutated`.
#' @param featEval A vector or list containing feature evaluation metrics, used
#'   to guide mutation.
#'
#' @details The `evolve3m` function performs an evolutionary cycle on the
#' population with the following steps:
#'
#' 1. **Population Evaluation**: Each individual in the population is evaluated
#' if not already done, and `mate` and `toBeMutated` attributes are initialized.
#' 2. **Tagging for Selection and Mating**: Individuals are tagged based on
#' selection criteria from `clf`. 3. **Crossover and Mutation**: For individuals
#' with mates, offspring are generated using the `crosser` function. If an
#' individual is marked for mutation, the `mutator` function is applied.
#'
#' The function supports parallel processing, where crossover and mutation steps
#' are performed concurrently if `clf$params$parallel.local` is `TRUE`. After
#' crossover and mutation, children and mutated individuals are combined into a
#' new population. If `debug` mode is enabled, messages track the evolution
#' process.
#'
#' @return The evolved population list, including new offspring and mutated
#'   individuals.
#'
#' @examples
#' \dontrun{
#' X <- matrix(rnorm(100), nrow = 10) # Random features
#' y <- sample(c(0, 1), 10, replace = TRUE) # Random binary response
#' clf <- list(params = list(
#'   mutate_size = 10, parallel.local = FALSE, debug = TRUE
#' ))
#' pop <- list(
#'   list(fit_ = NA, indices_ = 1:5, selected = FALSE, toBeMutated = FALSE, mate = NULL),
#'   list(fit_ = NA, indices_ = 6:10, selected = FALSE, toBeMutated = FALSE, mate = NULL)
#' )
#' featEval <- runif(10)
#' evolved_pop <- evolve3m(X, y, clf, pop, featEval)
#' }
#'
#' @export
evolve3m <- function(X, y, clf, pop, featEval) 
{
  
  if(clf$params$debug) # DEBUG
  {
    pop.lengths <- list()
    pop.lengths["pop"] <- length(pop)
  }
  # evaluate the population
  pop <- evaluatePopulation(X, y, clf, pop, eval.all = FALSE)
  
  # set two empty parameters to each individual (mate and toBeMutated)
  for(i in 1:length(pop))
  {
    pop[[i]]$mate <- c()
    pop[[i]]$toBeMutated <- FALSE
  }
  
  # Tag the population
  pop <- tag_select(X, y, clf, pop)
  
  ###################################################################################################################                         
  # function(x) {x * nb2BeMutated/100})    # et qu'elles sont : elite et random
  
  if(clf$params$parallel.local)
  {
    newPop <- foreach(i = 1:length(pop)) %dorng%
      {
        children <- list()
        mutatedIndiv <- list()
        # Appliquer les operations sur pop[[i]]
        if(length(pop[[i]]$mate) > 0)
        {
          for(j in 1:length(pop[[i]]$mate))
          {
            
            
            child<-clf$functions$crosser(X, y, clf, pop[[i]],  pop[[pop[[i]]$mate[j]]])
            if(pop[[i]]$language==pop[[pop[[i]]$mate[j]]]$language){
              children[[length(children) +1]]<-evaluateModel(mod = child, X = X, y = y, clf = clf, eval.all = TRUE, force.re.evaluation = TRUE)
            }else
            {
              child_2 <-child
              child$language <- pop[[i]]$language
              child_2$language <- pop[[pop[[i]]$mate[j]]]$language
              children[[length(children) +1]]<-evaluateModel(mod = child, X = X, y = y, clf = clf, child, eval.all = TRUE, force.re.evaluation = TRUE)
              children[[length(children) +1]]<-evaluateModel(mod = child_2, X = X, y = y, clf = clf, eval.all = TRUE, force.re.evaluation = TRUE)
            }
            
          }
        }
        
        if(pop[[i]]$toBeMutated)
        {
          mutatedIndiv <- clf$functions$mutator(X, y, clf, pop, pop[[i]], 1:clf$params$size_world, featEval)
          mutatedIndiv <- evaluateModel(mod = mutatedIndiv, X = X, y = y, clf = clf, eval.all = TRUE)
        }
        
        return(list(children = children, mutatedIndiv = mutatedIndiv))
      }
    
    mutatedPop <- lapply(newPop, function(x) 
    {
      if(length(x$mutatedIndiv) > 0)
      {
        return(x$mutatedIndiv)
      } else
      {
        return(NULL)
      }
    })
    mutatedPop <- mutatedPop[!sapply(mutatedPop, is.null)]
    
    if(clf$params$debug) # DEBUG
    {
      pop.lengths["mutated_pop"] <- length(mutatedPop)
    }
    
    children <- list()
    for(l in 1:length(newPop))
    {
      if(length(newPop[[l]]$children) > 0)
      {
        for(m in 1:length(newPop[[l]]$children))
        {
          children[[length(children)+1]] <- newPop[[l]]$children[[m]]
        }
      }
    }
    cat("Taille de la population : ", length(children) + length(mutatedPop), "\n")
    newPop <- c(children, mutatedPop)
    
    if(clf$params$debug) # DEBUG
    {
      pop.lengths["newPop"] <- length(newPop)
    }
    
  } else # else if not in parallel
  {
    newPop <- list()
    for(i in 1:length(pop))
    {
      if(length(pop[[i]]$mate) > 0) # if there at least one mate this is to become a parent
      {
        for(j in 1:length(pop[[i]]$mate)) # for all mates
        {
          if(pop[[i]]$language == pop[[pop[[i]]$mate[j]]]$language){
            child <- clf$functions$crosser(X=X, y=y, clf=clf, parent1=pop[[i]],  parent2=pop[[pop[[i]]$mate[j]]])
            newPop[[length(newPop) +1]] <- evaluateModel(mod = child, X = X, y = y, clf = clf, eval.all = TRUE, force.re.evaluation = TRUE)
          }else # if parents are different (in terms of languages)
          { # compute two metisse childs
            parent1 <- pop[[i]]
            parent2 <- pop[[pop[[i]]$mate[j]]]
            # create a crossed individual
            child_1 <- clf$functions$crosser(X=X, y=y, clf=clf, parent1=parent1,  parent2=parent2)
            # duplicate it
            child_2 <- child_1
            child_1$languge <- parent1$language
            child_2$languge <- parent2$language
            newPop[[length(newPop) +1]] <- evaluateModel(mod = child_1, X = X, y = y, clf = clf, eval.all = TRUE, force.re.evaluation = TRUE)
            newPop[[length(newPop) +1]] <- evaluateModel(mod = child_2, X = X, y = y, clf = clf, eval.all = TRUE, force.re.evaluation = TRUE)
          }
          
        } # end mate loop
      }
      
      if(pop[[i]]$toBeMutated)
      {
        mutatedIndiv <- clf$functions$mutator(X, y, clf, pop, pop[[i]], 1:clf$params$size_world, featEval)
        newPop[[length(newPop) +1]] <- evaluateModel(mod = mutatedIndiv, X = X, y = y, clf = clf, eval.all = TRUE)
      } # end mutation
    }
    
    if(clf$params$debug) # DEBUG
    {
      pop.lengths["newPop"] <- length(newPop)
    }
  }
  
  
  #t<-length(newPop)
  #newPop[(length(newPop)+1):(length(newPop)+length(pop))]<-pop
  #newPop <- unique(newPop)
  #newPop <- sortPopulation(newPop, evalToOrder = "fit_")[1:t]
  
  if(clf$params$debug) # DEBUG
  {
    print(unlist(pop.lengths))
  }
  
  return(newPop)
}


#' Mutate an Individual's Genes in the Population
#'
#' This function mutates a specified individual's genes by altering a subset of
#' them according to a defined mutation rate and gene reservoir.
#'
#' @param X A matrix or data frame of feature values, where each row represents
#'   a feature and each column represents a sample.
#' @param y A response vector or target variable for supervised learning. The
#'   length of \code{y} should match the number of columns in \code{X}.
#' @param clf A classifier object containing parameters for mutation, including
#'   `mutate_rate` and `size_world`.
#' @param pop The current population list, used as context for the mutation but
#'   not directly modified by this function.
#' @param individual_to_be_mutated A list representing the individual to be
#'   mutated, with attributes `eval.sparsity` (sparsity level) and `indices_`
#'   (genes).
#' @param all_genes A vector of all possible gene indices, representing the
#'   complete set of genes that could be selected during mutation.
#' @param featEval A vector or list of feature evaluation metrics, used to guide
#'   the mutation.
#'
#' @details The function calculates the number of genes to mutate (`perc`) based
#' on the individual’s sparsity level and the mutation rate defined in `clf`. If
#' the number of genes to mutate exceeds the available gene reservoir, the
#' mutation rate is adjusted accordingly. Genes for mutation are chosen from
#' `all_genes` and replaced in the individual's genome.
#'
#' The mutation process includes:
#' - Calculating mutation degree based on mutation rate and sparsity.
#' - Selecting genes for mutation and replacing them with new genes from the reservoir.
#'
#' @return A mutated individual list, representing the modified genome of the
#'   input individual.
#'
#' @examples
#' \dontrun{
#' X <- matrix(rnorm(100), nrow = 10) # Random features
#' y <- sample(c(0, 1), 10, replace = TRUE) # Random binary response
#' clf <- list(params = list(mutate_rate = 20, size_world = 100))
#' individual_to_be_mutated <- list(eval.sparsity = 5, indices_ = sample(1:100, 5))
#' all_genes <- 1:100
#' featEval <- runif(10)
#' mutated_individual <- mutator_v1(X, y, clf, pop = NULL, individual_to_be_mutated, all_genes, featEval)
#' }
#'
#' @export
mutator_v1 <- function(X, y, clf, pop, individual_to_be_mutated, all_genes, featEval)
{
  # degree of mutation (number of genes to be mutated)
  perc <- ceiling(individual_to_be_mutated$eval.sparsity * clf$params$mutate_rate/100)
  
  # the size of the reservoir
  size_reservoir <- clf$params$size_world - individual_to_be_mutated$eval.sparsity
  
  if(perc > size_reservoir)  # if the gene reservoir is smaller than the number of genes to mutate, 
  {                          # lower the mutation rate to the size of the reservoir
    perc <- size_reservoir
  }
  # identify the idexes of the genes to be mutated
  index_genes_to_mutate <- sample(individual_to_be_mutated$eval.sparsity, perc)
  # the unique genes remaining in the reservoir that are not in the untouched part of the genome
  # It is to be noted that the genes to mutate are part of this reservoir and some of them can be 
  # picked again, which lowers the percentage of mutation
  remaining_unique_genes <- all_genes[!all_genes %in% individual_to_be_mutated$indices_[-index_genes_to_mutate]]
  # draw new genes (some of them might be old)
  new_genes <- sample(x = remaining_unique_genes, size = perc)
  # the mutated individual
  mutated_individual <- individual_to_be_mutated$indices_
  mutated_individual[index_genes_to_mutate] <- new_genes
  mutated_individual <- sort(mutated_individual)
  
  return(individual(X, y, clf, ind = mutated_individual))
}


#' Mutate an Individual's Genes Based on Feature Evaluation
#'
#' This function mutates a specified individual's genes, choosing mutation
#' actions based on feature evaluation probabilities rather than a fixed
#' mutation rate. Actions include modifying, adding, or removing genes,
#' depending on mutation conditions and individual sparsity.
#'
#' @param X A matrix or data frame of feature values, where each row represents
#'   a feature and each column represents a sample.
#' @param y A response vector or target variable for supervised learning. The
#'   length of \code{y} should match the number of columns in \code{X}.
#' @param clf A classifier object containing parameters for mutation, including
#'   `current_seed`, `language`, and `sparsity`.
#' @param pop The current population list, used as context for mutation but not
#'   directly modified by this function.
#' @param individual_to_be_mutated A list representing the individual to mutate,
#'   with attributes `indices_` (genes) and `coeffs_` (gene coefficients).
#' @param all_genes A vector of all possible gene indices, representing the
#'   complete set of genes that could be selected during mutation.
#' @param featEval A vector or list of feature evaluation metrics, used to guide
#'   mutation probabilities for selecting features.
#'
#' @details The mutation process is guided by `featEval`, which assigns a
#' probability for each feature to be selected for mutation based on its
#' evaluation score. The mutation action is chosen based on the individual’s
#' sparsity and classifier settings:
#'
#' - **Modify**: Inverts the coefficient of a randomly selected gene within the individual.
#' - **Add**: Adds a new gene to the individual, drawn from `all_genes` based on feature evaluation probabilities.
#' - **Remove**: Deletes a randomly selected gene from the individual.
#'
#' The mutation action is determined based on the individual’s current sparsity
#' and the classifier's settings. If no suitable genes remain for mutation, the
#' function returns the individual unchanged.
#'
#' @return A mutated individual list with updated `indices_` and `coeffs_`,
#'   representing the modified genome of the input individual.
#'
#' @examples
#' \dontrun{
#' X <- matrix(rnorm(100), nrow = 10) # Random features
#' y <- sample(c(0, 1), 10, replace = TRUE) # Random binary response
#' clf <- list(params = list(current_seed = 42, language = "ternary", sparsity = c(5, 10)))
#' individual_to_be_mutated <- list(indices_ = sample(1:100, 5), coeffs_ = rep(1, 5))
#' all_genes <- 1:100
#' featEval <- runif(10)
#' mutated_individual <- mutator_v2(X, y, clf, pop = NULL, individual_to_be_mutated, all_genes, featEval)
#' }
#'
#' @export
mutator_v2 <- function(X, y, clf, pop, individual_to_be_mutated, all_genes, featEval) #This one does not use the mutate rate
{
  ## start idea for feature mutation based on their evaluation 
  featEval[is.na(featEval)] <- rep(max(featEval, na.rm = TRUE), length(featEval[is.na(featEval)])) # for the NA fill them up
  featProba <- featEval/sum(featEval)
  
  if(any(featEval < 0))
  {
    stop("mutator_v2: Negative probabiliy in featProba")
  }
  
  # which mutation event to apply
  set.seed(clf$params$current_seed + sum(individual_to_be_mutated$indices_)) # fix the seed this individual
  
  if(clf$params$language == "bin" | clf$params$language == "bininter")
  {
    mutation2apply <- sample(c("add", "rm"), 1)
  }else
  {
    mutation2apply <- sample(c("modif", "add", "rm"), 1)
  }
  
  if(max(clf$params$sparsity) == min(clf$params$sparsity))
  {
    mutation2apply <- "modif"
  } else if((mutation2apply == "add") &&
            (length(individual_to_be_mutated$indices_) >= max(clf$params$sparsity)))
  {
    mutation2apply <- "rm"
  } else if((mutation2apply == "rm") && 
            (length(individual_to_be_mutated$indices_) <= min(clf$params$sparsity)))
  {
    mutation2apply <- "add"
  }
  switch(mutation2apply, 
         modif= # mutate
           {
             # select the feature to modify
             gene2Modify <- sample.int(length(individual_to_be_mutated$indices_), 1)
             # create a copy to manipulate
             mutated_individual <- individual_to_be_mutated
             # change the coefficient of that index
             mutated_individual$coeffs_[[gene2Modify]] <- individual_to_be_mutated$coeffs_[[gene2Modify]] * -1
             return(mutated_individual)
           },
         add= # insert
           {
             # which genes are outside the individuals from which we can draw
             remaining_unique_genes <- all_genes[!all_genes %in% individual_to_be_mutated$indices_]
             
             # If enough genes are left to draw from
             if(length(remaining_unique_genes) > 1) 
             {
               if(length(unique(remaining_unique_genes)) < length(remaining_unique_genes))
               {
                 stop("mutator_v2: list of genes from which to choose contains identical values")
               }
               
               # sample a gene do to add
               set.seed(clf$params$current_seed + sum(individual_to_be_mutated$indices_)) # fix the seed this individual  
               gene2Add <- sample(all_genes, 1, prob = featProba)
               
               if(!gene2Add %in% individual_to_be_mutated$indices_)
               {
                 indices <- individual_to_be_mutated$indices_
                 indices[[length(indices) +1 ]] <- gene2Add
                 indices <- indices[order(indices)]
                 return(individual(X, y, clf, ind = indices))
               } else
               {
                 return(individual_to_be_mutated)
               }
             } else
             {
               if(clf$params$warnings) warning("mutator_v2: Not enough features remaining for the insertion mutation. Nothing happened")
               return(individual_to_be_mutated)
             }
           },
         rm= # delete
           {
             # sample a gene do to remove
             set.seed(clf$params$current_seed + sum(individual_to_be_mutated$indices_)) # fix the seed this individual 
             gene2Rm <- sample.int(length(individual_to_be_mutated$indices_), 1)
             mutated_individual <- individual_to_be_mutated
             mutated_individual$indices_ <- individual_to_be_mutated$indices_[-gene2Rm]
             mutated_individual$coeffs_  <- individual_to_be_mutated$coeffs_[-gene2Rm]
             mutated_individual$names_   <- individual_to_be_mutated$names_[-gene2Rm]
             return(mutated_individual)
           }
  ) # end switch
}



#' Rank Features Based on Model Fitness
#'
#' This function evaluates and ranks features in the population based on their
#' presence in individual models and their associated fitness scores. It updates
#' a feature evaluation metric (`featEval`) to reflect the average fitness of
#' models in which each feature appears.
#'
#' @param X A matrix or data frame of feature values, where each row represents
#'   a feature and each column represents a sample.
#' @param y A response vector or target variable for supervised learning. The
#'   length of \code{y} should match the number of columns in \code{X}.
#' @param clf A classifier object containing parameters for evaluation,
#'   including `evalToFit`, which specifies the fitness attribute in each model.
#' @param pop A list representing the population, where each individual model
#'   contains `indices_` (genes or features used in the model) and `fit_`
#'   (fitness score).
#' @param featEval A numeric vector of feature evaluation scores, where each
#'   element corresponds to a feature in \code{X}. This vector is updated based
#'   on the average fitness of models that include each feature.
#'
#' @details The function iterates over each model in the population (`pop`) and
#' each feature (gene) in the model. For each feature, it updates `featEval` by
#' averaging the current feature score with the fitness score of the model in
#' which it appears. The result is an updated ranking of features based on their
#' presence in high-fitness models.
#'
#' The function checks for negative values in `featEval` and raises an error if
#' any are found, as negative values indicate an issue in the evaluation
#' process.
#'
#' @return A numeric vector `featEval`, with updated values reflecting the
#'   average fitness scores of models that include each feature.
#'
#' @examples
#' \dontrun{
#' X <- matrix(rnorm(100), nrow = 10) # Random features
#' y <- sample(c(0, 1), 10, replace = TRUE) # Random binary response
#' clf <- list(params = list(evalToFit = "fit_"))
#' pop <- list(
#'   list(indices_ = c(1, 3, 5), fit_ = 0.8),
#'   list(indices_ = c(2, 4, 6), fit_ = 0.7)
#' )
#' featEval <- rep(0, 10)
#' updatedFeatEval <- rankFeatures(X, y, clf, pop, featEval)
#' }
#'
#' @export
rankFeatures <- function(X, y, clf, pop, featEval)
{
  featuresInModels <- populationGet_X(element2get = "indices_", toVec = FALSE, na.rm = FALSE)(pop)
  for(modIndice in 1:length(featuresInModels))
  {
    listFeat <- featuresInModels[[modIndice]]
    for(i in 1:length(listFeat))
    {
      #featEval[listFeat[i]] <- mean(c(featEval[listFeat[i]], pop[[modIndice]][[clf$params$evalToFit]]), na.rm = TRUE)
      featEval[listFeat[i]] <- mean(c(featEval[listFeat[i]], pop[[modIndice]]$fit_), na.rm = TRUE) # to take into account the penalized fit
      featEval <- featEval
      i <- i + 1
    }
  }
  
  if(any(featEval < 0 & !is.na(featEval)))
  {
    stop("rankFeatures, negative evaluation")
  }
  
  return(featEval)
}


#' Apply Mutation to a Selected Subset of the Population
#'
#' This function performs mutation on a selected subset of individuals in a
#' population, using feature evaluations to guide mutation decisions. It returns
#' the mutated population.
#'
#' @param X A matrix or data frame of feature values, where each row represents
#'   a feature and each column represents a sample.
#' @param y A response vector or target variable for supervised learning. The
#'   length of \code{y} should match the number of columns in \code{X}.
#' @param clf A classifier object containing parameters for mutation, including
#'   `size_world` (total number of genes) and `parallel.local` (to enable
#'   parallel computation).
#' @param pop A list representing the population, where each individual has
#'   attributes relevant to mutation.
#' @param selection A vector of indices representing the individuals in `pop`
#'   that should be mutated.
#' @param featEval A numeric vector of feature evaluation scores, which will be
#'   updated to reflect the mutation's effect on feature importance.
#'
#' @details The `mutate2` function first ranks features using `rankFeatures`,
#' updating `featEval` based on model fitness in the population. It then
#' iterates over the selected individuals (`selection`), applying mutation to
#' each. If parallel processing is enabled (`clf$params$parallel.local`),
#' mutations are performed concurrently.
#'
#' The mutation process uses the classifier's `mutator` function to alter gene
#' indices or coefficients in each selected individual based on feature
#' evaluation probabilities.
#'
#' @return A list representing the mutated population, with the individuals in
#'   `selection` modified according to mutation operations.
#'
#' @examples
#' \dontrun{
#' X <- matrix(rnorm(100), nrow = 10) # Random features
#' y <- sample(c(0, 1), 10, replace = TRUE) # Random binary response
#' clf <- list(params = list(size_world = 10, parallel.local = FALSE),
#'             functions = list(mutator = mutator_v1))
#' pop <- list(
#'   list(indices_ = c(1, 3, 5), fit_ = 0.8),
#'   list(indices_ = c(2, 4, 6), fit_ = 0.7)
#' )
#' selection <- 1:2
#' featEval <- rep(0, 10)
#' mutated_pop <- mutate2(X, y, clf, pop, selection, featEval)
#' }
#'
#' @export
mutate2 <- function(X, y, clf, pop, selection, featEval) 
{
  mutated_pop <- pop                                          ###
  all_genes <- (1:clf$params$size_world) # all genes       ######
  #####################################
  featEval <- rankFeatures(X, y, clf, pop, featEval) ##################### Work to do here ##
  #####################################
  #print(summary(featEval))                                ######
  if(clf$params$parallel.local)                               ###
  { # if // computing
    res <- foreach(i = selection)  %dorng% 
      {
        individual_to_be_mutated <- pop[[i]]
        mutated_indiv <- clf$functions$mutator(X, y, clf, pop, individual_to_be_mutated, all_genes, featEval)
        return(mutated_indiv)
      }
    mutated_pop[selection] <- res 
  } else
  {
    for (i in selection)
    {
      individual_to_be_mutated <- pop[[i]]
      mutated_pop[[i]] <- clf$functions$mutator(X, y, clf, pop, individual_to_be_mutated, all_genes, featEval)
    }
  }
  
  return(mutated_pop)
}


#' Create a New Individual by Crossing Genes from Two Parents
#'
#' This function generates a new individual by combining genes from two parent
#' individuals. It samples a subset of genes from the parents' combined gene
#' pool, with the child’s gene sparsity determined based on the parents'
#' sparsity levels.
#'
#' @param X A matrix or data frame of feature values, where each row represents
#'   a feature and each column represents a sample.
#' @param y A response vector or target variable for supervised learning. The
#'   length of \code{y} should match the number of columns in \code{X}.
#' @param clf A classifier object containing parameters for generating a new
#'   individual.
#' @param parent1 A list representing the first parent, which includes
#'   `indices_` (genes) and `eval.sparsity` (gene sparsity).
#' @param parent2 A list representing the second parent, also with `indices_`
#'   and `eval.sparsity`.
#'
#' @details The function combines the genes of `parent1` and `parent2` into a
#' unique pool and selects a subset for the child. The child’s sparsity level is
#' randomly determined based on the range between the two parents'
#' `eval.sparsity`. If the parents have identical sparsity, a subset is sampled
#' within the parent's sparsity limit.
#'
#' **Note**: The function currently has limitations and may need further development to handle edge cases and differing gene pools effectively.
#'
#' @return A list representing the new individual, with a subset of genes
#'   (`indices`) inherited from the parents.
#'
#' @examples
#' \dontrun{
#' X <- matrix(rnorm(100), nrow = 10) # Random features
#' y <- sample(c(0, 1), 10, replace = TRUE) # Random binary response
#' clf <- list() # Placeholder for classifier settings
#' parent1 <- list(indices_ = c(1, 3, 5, 7), eval.sparsity = 4)
#' parent2 <- list(indices_ = c(2, 4, 6, 8), eval.sparsity = 4)
#' child <- crossingIndividual_v1(X, y, clf, parent1, parent2)
#' print(child)
#' }
#'
#' @export
crossingIndividual_v1 <- function(X, y, clf, parent1, parent2) #not working for the moment
{
  parents_gene_reservoir <- sort(unique(c(parent1$indices_, parent2$indices_)))
  # unique vars
  
  if(parent1$eval.sparsity != parent2$eval.sparsity)
  {
    child.sparsity <- sample(parent1$eval.sparsity:parent2$eval.sparsity, 1)
  } else
  {
    child.sparsity <- parent1$eval.sparsity - sample(0:parent1$eval.sparsity, 1)
  }
  
  indices <- sort(parents_gene_reservoir[sample(x = length(parents_gene_reservoir), 
                                                size = child.sparsity, replace = FALSE)])
  return(individual(X, y, clf, ind = indices))
}



#' Create a New Individual by Combining Gene Vectors from Two Parents
#'
#' This function generates a new individual by combining dense gene vectors from
#' two parent individuals. It applies sparsity control to ensure the child’s
#' gene vector meets specified minimum and maximum sparsity constraints.
#'
#' @param X A matrix or data frame of feature values, where each row represents
#'   a feature and each column represents a sample.
#' @param y A response vector or target variable for supervised learning. The
#'   length of \code{y} should match the number of columns in \code{X}.
#' @param clf A classifier object containing parameters for generating a new
#'   individual, including `sparsity.min` and `sparsity.max` to control
#'   sparsity.
#' @param parent1 A list representing the first parent, containing attributes
#'   necessary for obtaining a dense gene vector.
#' @param parent2 A list representing the second parent, also containing
#'   attributes necessary for obtaining a dense gene vector.
#'
#' @details The function creates a dense gene vector (`child`) by taking half of
#' the gene vector from `parent1` and half from `parent2`. After combining, it
#' applies sparsity control:
#'
#' - **If the child’s sparsity is below the minimum** (`sparsity.min`), additional genes are randomly added from the parents' combined gene reservoir until the minimum sparsity is reached.
#' - **If the child’s sparsity exceeds the maximum** (`sparsity.max`), excess genes are randomly removed until the sparsity meets the maximum threshold.
#'
#' This approach ensures that the child’s gene vector adheres to the specified
#' sparsity constraints, creating a balanced and controlled gene distribution.
#'
#' @return A list representing the new individual, with a gene vector created by
#'   combining and adjusting genes from both parents.
#'
#' @examples
#' \dontrun{
#' X <- matrix(rnorm(100), nrow = 10) # Random features
#' y <- sample(c(0, 1), 10, replace = TRUE) # Random binary response
#' clf <- list(params = list(sparsity.min = 3, sparsity.max = 6))
#' parent1 <- list() # Placeholder, assuming `individualGetDenseVec` can process this
#' parent2 <- list() # Placeholder, assuming `individualGetDenseVec` can process this
#' child <- crossingIndividual_v2(X, y, clf, parent1, parent2)
#' print(child)
#' }
#'
#' @export
crossingIndividual_v2 <- function(X, y, clf, parent1, parent2)
{
  size <- nrow(X)
  parent1_dense <- individualGetDenseVec(parent1, size)
  parent2_dense <- individualGetDenseVec(parent2, size)
  
  child <- c(parent1_dense[1:(size/2)], parent2_dense[(size/2):size])
  
  ###### Controle de la sparsité ######
  sparsity <- length(which(child != 0))
  if(sparsity < clf$params$sparsity.min)
  {
    reservoir <- parent1_dense + parent2_dense
    reservoir <- which(reservoir != 0)
    genes2add <- sample(reservoir, clf$params$sparsity.min - sparsity) ### A changer si ça marche
    ### afin de garder une sparsité homogène
    child[genes2add] <- sample(c(-1, 1), clf$params$sparsity.min - sparsity, replace = TRUE)
  } else if(sparsity > clf$params$sparsity.max)
  {
    reservoir <- which(child != 0)
    genes2rm  <- sample(reservoir, sparsity - clf$params$sparsity.max) ### Pareil
    child[genes2rm] <- 0
  }
  
  return(individual(X, y, clf, coeffs = child))
}



#' Create a New Individual by Sampling Genes from Two Parents with Controlled
#' Sparsity
#'
#' This function generates a new individual by sampling genes from the combined
#' gene pool of two parents, with controlled sparsity based on each parent's
#' sparsity level. It ensures sufficient unique features in the gene pool to
#' meet the child's sparsity requirements.
#'
#' @param X A matrix or data frame of feature values, where each row represents
#'   a feature and each column represents a sample.
#' @param y A response vector or target variable for supervised learning. The
#'   length of \code{y} should match the number of columns in \code{X}.
#' @param clf A classifier object containing parameters for generating a new
#'   individual.
#' @param parent1 A list representing the first parent, containing attributes
#'   `indices_` (gene indices) and `eval.sparsity` (sparsity level).
#' @param parent2 A list representing the second parent, also containing
#'   `indices_` and `eval.sparsity`.
#'
#' @details The function starts by creating a combined feature reservoir
#' (`feat_reserv`) from the gene indices of both parents. It checks for adequate
#' unique features to satisfy the maximum sparsity of either parent. If either
#' parent lacks enough unique genes to meet its declared sparsity, the function
#' raises an error.
#'
#' The child’s sparsity level is randomly chosen from the sparsity of one of the
#' parents. If the feature reservoir has duplicates, probabilities are assigned
#' based on feature frequency. The function then samples a subset of genes from
#' the reservoir for the child, ensuring the child’s sparsity matches the chosen
#' level.
#'
#' @return A list representing the new individual, with a subset of genes
#'   sampled from both parents. If insufficient unique genes are available, the
#'   function raises an error.
#'
#' @examples
#' \dontrun{
#' X <- matrix(rnorm(100), nrow = 10) # Random features
#' y <- sample(c(0, 1), 10, replace = TRUE) # Random binary response
#' clf <- list() # Placeholder for classifier settings
#' parent1 <- list(indices_ = c(1, 3, 5, 7), eval.sparsity = 4)
#' parent2 <- list(indices_ = c(2, 4, 6, 8), eval.sparsity = 4)
#' child <- crossingIndividual_v3(X, y, clf, parent1, parent2)
#' print(child)
#' }
#'
#' @export
crossingIndividual_v3 <- function(X, y, clf, parent1, parent2)
{
  # Get the all the features in the parents
  feat_reserv <- c(parent1$indices_, parent2$indices_)
  feat_reserv <- feat_reserv[order(feat_reserv)]
  
  if(length(unique(feat_reserv)) < max(parent1$eval.sparsity,parent2$eval.sparsity))
  {
    if(length(unique(parent1$indices_)) < parent1$eval.sparsity)
    {
      stop(paste("Parent1 has sparsity", parent1$eval.sparsity, "but only", 
                 length(unique(parent1$indices_)), "unique features"))
    } else if(length(unique(parent2$indices_)) < parent2$eval.sparsity)
    {
      stop(paste("Parent2 has sparsity", parent2$eval.sparsity, "but only", 
                 length(unique(parent2$indices_)), "unique features"))
    } else
    {
      stop("One of the parents already has a feature list with a few identical values")
    }
  }
  
  if(length(unique(feat_reserv)) > 1)
  {
    if(length(feat_reserv) != length(unique(feat_reserv)))
    {
      prob <- lapply(unique(feat_reserv), 
                     function(x, feat_reserv) 
                     {sum(x == feat_reserv)/length(feat_reserv)}, feat_reserv)
    } else
    {
      prob <- rep(1/length(feat_reserv), length(feat_reserv))
    }
    child.spar <- sample(c(parent1$eval.sparsity,parent2$eval.sparsity), 1)
    
    if(length(unique(feat_reserv)) < child.spar)
    {
      stop(paste("Error in crossingIndividual_v3, not enought differents variables in both parents. 
                 Only", length(unique(feat_reserv)), "in the parents, but", child.spar, 
                 "required in the child\n", sep = " "))
    }
    
    child.indices <- sample(unique(feat_reserv), child.spar, prob = prob)
    child.indices <- child.indices[order(child.indices)]
    return(individual(X, y, clf, ind = child.indices))
  } else 
  {
    return(parent1)
  }
}


#' Generate Offspring by Pairing Parents in a Population
#'
#' This function generates a new set of offspring by pairing parents from a
#' population. It randomly selects pairs of parents to create offspring through
#' a specified crossover function. The result is a combined list of the original
#' parents and their offspring.
#'
#' @param X A matrix or data frame of feature values, where each row represents
#'   a feature and each column represents a sample.
#' @param y A response vector or target variable for supervised learning. The
#'   length of \code{y} should match the number of columns in \code{X}.
#' @param clf A classifier object containing parameters for crossover, including
#'   `parallel.local` (to enable parallel computation) and a `crosser` function.
#' @param pop A list representing the current population, where each individual
#'   can be selected for pairing.
#' @param parents A list of selected parents (indices or individuals) from the
#'   population to be used for pairing and offspring generation.
#'
#' @details The function creates pairs of parents from `parents` and uses the
#' `crosser` function defined in `clf$functions` to generate offspring for each
#' pair. If there aren’t enough unique parents, it reuses the full list to
#' ensure the required number of couples. Offspring generation can be
#' parallelized if `clf$params$parallel.local` is set to `TRUE`.
#'
#' The process includes:
#' - **Pairing**: Randomly pairs parents from `parents`.
#' - **Offspring Generation**: Calls the `crosser` function to create offspring for each pair.
#' - **Parallelization**: Uses parallel computation if enabled, otherwise processes sequentially.
#'
#' @return A combined list of the original parents and their offspring.
#'
#' @examples
#' \dontrun{
#' X <- matrix(rnorm(100), nrow = 10) # Random features
#' y <- sample(c(0, 1), 10, replace = TRUE) # Random binary response
#' clf <- list(params = list(parallel.local = FALSE),
#'             functions = list(crosser = crossingIndividual_v3)) # Assume `crossingIndividual_v3` is defined
#' pop <- list(
#'   list(indices_ = c(1, 3, 5), fit_ = 0.8),
#'   list(indices_ = c(2, 4, 6), fit_ = 0.7)
#' )
#' parents <- 1:2
#' combined_population <- crossing2(X, y, clf, pop, parents)
#' print(combined_population)
#' }
#'
#' @export
crossing2 <- function(X, y, clf, pop, parents) 
{
  # select the needed number of couples
  number_couples <- length(pop) - length(parents)
  old_pop <- parents
  
  couples <- list()
  for(i in 1:number_couples) 
  {
    if(length(old_pop) < 2) # If there is no more parents available in the list we start again with 
      old_pop <- parents    # the full population of parents
    selectedParents <- sample(1:length(old_pop), 2)
    couples[[i]] <- old_pop[selectedParents]
    old_pop <- old_pop[-selectedParents]
  }
  
  children <- list()
  # parallel computing
  if(clf$params$parallel.local)
  {
    children <- foreach(i = 1:number_couples)  %dorng% 
      {
        child <- clf$functions$crosser(X, y, clf, couples[[i]][[1]], couples[[i]][[2]])
        return(child)
      }
  } else
  {
    for (i in 1:number_couples)
    {
      children[[i]] <- clf$functions$crosser(X, y, clf, couples[[i]][[1]], couples[[i]][[2]])
    }
  }
  return(c(parents, children))
}

#########################
#                       #
#  ###################  #
#  #### SELECTION ####  #
#  ###################  #
#                       #
#########################

#' Select Top Individuals from a Population Based on Fitness
#'
#' This function selects a specified number of top individuals from a population
#' based on their fitness scores. It sorts the population in descending order of
#' fitness and returns the highest-ranking individuals.
#'
#' @param pop A list representing the population, where each individual has a
#'   `fit_` attribute that represents its fitness score.
#' @param number An integer specifying the number of individuals to select from
#'   the sorted population.
#' @param clf A classifier object containing any additional parameters needed
#'   for selection (though not directly used in this function).
#'
#' @details The function sorts the population in descending order by the `fit_`
#' attribute and returns the top `number` individuals. This selection strategy
#' prioritizes individuals with the highest fitness scores, making it useful for
#' selecting elites in evolutionary algorithms.
#'
#' @return A list of the top `number` individuals from the population, sorted by
#'   fitness.
#'
#' @examples
#' \dontrun{
#' pop <- list(
#'   list(fit_ = 0.9, indices_ = 1:5),
#'   list(fit_ = 0.8, indices_ = 6:10),
#'   list(fit_ = 0.7, indices_ = 11:15)
#' )
#' clf <- list() # Placeholder for classifier settings
#' selected_individuals <- selector_v1(pop, number = 2, clf)
#' print(selected_individuals)
#' }
#'
#' @export
selector_v1 <- function(pop, number, clf)
{
  # evaluation <- getFitPopulation(population)
  # ind <- order(evaluation,decreasing = T)[1:number]
  return(sortPopulation(pop, evalToOrder = "fit_")[1:number])
  # return(population[ind])
}



#' Randomly Select Individuals from a Population
#'
#' This function randomly selects a specified number of individuals from a
#' population without considering their fitness scores. It is a simple selection
#' strategy that introduces diversity by choosing individuals at random.
#'
#' @param pop A list representing the population from which individuals are
#'   selected.
#' @param number An integer specifying the number of individuals to select
#'   randomly from the population.
#' @param clf An optional classifier object that can contain additional
#'   parameters for selection (not directly used in this function).
#'
#' @details The function selects `number` individuals randomly from the
#' population `pop`, without replacement. This approach does not prioritize
#' fitness and can be useful for introducing genetic diversity in evolutionary
#' algorithms.
#'
#' @return A list of randomly selected individuals from the population.
#'
#' @examples
#' \dontrun{
#' pop <- list(
#'   list(fit_ = 0.9, indices_ = 1:5),
#'   list(fit_ = 0.8, indices_ = 6:10),
#'   list(fit_ = 0.7, indices_ = 11:15)
#' )
#' selected_individuals <- selector_v2(pop, number = 2)
#' print(selected_individuals)
#' }
#'
#' @export
selector_v2 <- function(pop, number, clf = NULL)
{ #TODO: changer ce selecteur
  ind <- sample(x = 1:length(pop), size  =  number, replace = FALSE)
  return(pop[ind])
}


#' Select Individuals from a Population Using Multiple Selection Methods
#'
#' This function selects individuals from a population using multiple selection
#' methods specified in the classifier. It divides the selection based on
#' specified percentages for each method, allowing a combination of selection
#' strategies.
#'
#' @param clf A classifier object containing parameters and functions for
#'   selection, including:
#' - `select_perc`: The percentage of the population to be selected.
#' - `size_pop`: The total size of the population.
#' - `select_percByMethod`: A vector of percentages defining the proportion of individuals to select by each method.
#' - `functions$selector`: A list of selection functions to apply, corresponding to each method in `select_percByMethod`.
#' @param pop A list representing the population from which individuals are
#'   selected.
#'
#' @details The function calculates the number of individuals to select
#' (`nb2BeMutated`) based on `select_perc` and the population size. It then
#' splits the selection across methods, as defined in `select_percByMethod`. For
#' each method, the respective selector function (from `clf$functions$selector`)
#' is applied to a subset of the population, excluding individuals already
#' selected by previous methods.
#'
#' The result is a list of selected individuals, combining the results of each
#' selection method.
#'
#' @return A list of selected individuals from the population, chosen by
#'   multiple selection methods.
#'
#' @examples
#' \dontrun{
#' pop <- list(
#'   list(fit_ = 0.9, indices_ = 1:5),
#'   list(fit_ = 0.8, indices_ = 6:10),
#'   list(fit_ = 0.7, indices_ = 11:15)
#' )
#' clf <- list(
#'   params = list(select_perc = 50, size_pop = 10, select_percByMethod = c(60, 40)),
#'   functions = list(selector = list(selector_v1, selector_v2))
#' )
#' selected_individuals <- select(clf, pop)
#' print(selected_individuals)
#' }
#'
#' @export
select <- function(clf, pop)
{
  nb2BeMutated <- ceiling(clf$params$select_perc * clf$params$size_pop/100)
  nbByMethod   <- lapply(clf$params$select_percByMethod, 
                         function(x) {x * nb2BeMutated/100})
  res <- rep(NA, nb2BeMutated)
  start <- 1
  end   <- nbByMethod[[1]]
  popTMP <- pop
  for(i in 1:length(nbByMethod))
  {
    popTMP <- pop[which(!(popTMP %in% res))]
    res[start:end] <- clf$functions$selector[[i]](popTMP, nbByMethod[[i]], clf)
    start <- end +1
    end <- end + nbByMethod[[i]]
  }
  return(res)
}


#' Select the Best Individual by Sparsity Level from a Population
#'
#' This function selects the best individual from a population for each
#' specified sparsity level, based on fitness. It groups individuals by their
#' sparsity, then identifies the highest-ranking individual in each group.
#'
#' @param clf A classifier object containing parameters for selection, including
#'   `sparsity`, which defines the target sparsity levels.
#' @param pop A list representing the population, where each individual has
#'   attributes like `eval.sparsity` (sparsity level) and `fit_` (fitness
#'   score).
#'
#' @details The function first groups individuals in `pop` by their sparsity
#' levels, as specified in `clf$params$sparsity`. For each group, it selects the
#' individual with the highest fitness score (`fit_`). Only sparsity levels with
#' at least one individual in the population are considered.
#'
#' This approach allows for maintaining a diverse set of individuals across
#' different sparsity levels, focusing on the best-performing individual within
#' each sparsity category.
#'
#' @return A list of the best individuals for each sparsity level specified in
#'   `clf$params$sparsity`, selected based on fitness.
#'
#' @examples
#' \dontrun{
#' pop <- list(
#'   list(eval.sparsity = 3, fit_ = 0.9),
#'   list(eval.sparsity = 4, fit_ = 0.8),
#'   list(eval.sparsity = 3, fit_ = 0.85)
#' )
#' clf <- list(params = list(sparsity = c(3, 4, 5)))
#' best_individuals <- selectIndividualToKeep(clf, pop)
#' print(best_individuals)
#' }
#'
#' @export
selectIndividualToKeep <- function(clf, pop)
{
  spar <- populationGet_X(element2get = "eval.sparsity", toVec = TRUE, na.rm = TRUE)(pop)
  popBySparsity <- lapply(clf$params$sparsity, function(k, spar, pop) 
  {
    pop[which(spar == k)]
  }, spar, pop)
  popBySparsity <- popBySparsity[sapply(popBySparsity, function(x)
  {
    if(length(x) > 0)
    {
      TRUE
    }else
    {
      FALSE
    }
  })]
  bestBySparsity <- lapply(popBySparsity, function(pop, evalToFit) {getTheBestIndividual(pop, evalToFit)}, "fit_")
  return(bestBySparsity)
}

#####
#####
# A garder dans un coin pour visualiser les fit : 
# plot(sapply(population, function(x) x$fit_)[order(sapply(population, function(x) x$fit_))])
#
#####
#####

#' Convert an Individual's Sparse Gene Representation to a Dense Vector
#'
#' This function converts an individual's sparse gene representation (where only
#' specific indices have non-zero values) into a dense vector of a specified
#' size. It creates a vector with values from the individual's coefficients at
#' the specified indices and zeros elsewhere.
#'
#' @param individual A list representing an individual, with attributes:
#'   - `indices_`: A vector of indices where the individual has non-zero gene values.
#'   - `coeffs_`: A vector of coefficients corresponding to the genes at `indices_`.
#' @param size An integer specifying the size of the dense vector to be
#'   returned.
#'
#' @details The function initializes a dense vector (`res`) of length `size`
#' with all elements set to zero. It then populates this vector with the values
#' from `individual$coeffs_` at the positions specified in
#' `individual$indices_`. The result is a dense vector representation of the
#' individual's gene information.
#'
#' @return A numeric vector of length `size`, with non-zero values at positions
#'   specified by `individual$indices_` and zeroes elsewhere.
#'
#' @examples
#' \dontrun{
#' individual <- list(indices_ = c(2, 4, 6), coeffs_ = c(1.5, -0.5, 2.0))
#' size <- 10
#' dense_vector <- individualGetDenseVec(individual, size)
#' print(dense_vector) # Should show a vector of length 10 with specified values at indices 2, 4, and 6
#' }
#'
#' @export
individualGetDenseVec <- function(individual, size)
{
  res <- rep(0, size)
  res[individual$indices_] <- individual$coeffs_
  return(res)
}
predomics/predomicspkg documentation built on Dec. 11, 2024, 11:06 a.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
predomics/predomicspkg
Interpretable Prediction in Omics Data

R/terga2.lib.R
In predomics/predomicspkg: Interpretable Prediction in Omics Data

R Package Documentation

Browse R Packages

We want your feedback!

predomics/predomicspkg Interpretable Prediction in Omics Data

R/terga2.lib.R In predomics/predomicspkg: Interpretable Prediction in Omics Data

R Package Documentation

Browse R Packages

We want your feedback!

predomics/predomicspkg
Interpretable Prediction in Omics Data

R/terga2.lib.R
In predomics/predomicspkg: Interpretable Prediction in Omics Data