R/2_breed_aux_func.R

###############################################################################
#          GGGGGGGGGGGGG               AAA
#       GGG::::::::::::G              A:::A
#     GG:::::::::::::::G             A:::::A
#    G:::::GGGGGGGG::::G            A:::::::A
#   G:::::G       GGGGGG           A:::::::::A
#  G:::::G                        A:::::A:::::A
#  G:::::G                       A:::::A A:::::A
#  G:::::G    GGGGGGGGGG        A:::::A   A:::::A
#  G:::::G    G::::::::G       A:::::A     A:::::A
#  G:::::G    GGGGG::::G      A:::::AAAAAAAAA:::::A
#  G:::::G        G::::G     A:::::::::::::::::::::A
#   G:::::G       G::::G    A:::::AAAAAAAAAAAAA:::::A
#    G:::::GGGGGGGG::::G   A:::::A             A:::::A
#     GG:::::::::::::::G  A:::::A               A:::::A
#       GGG::::::GGG:::G A:::::A                 A:::::A
#          GGGGGG   GGGGAAAAAAA                   AAAAAAA
###############################################################################
# STAT 243
# Ruitong Zhu
# Jinxin Qu
# Rowan Morse
# Jared Bennett

###############################################################################
## Auxilary functions used inside the Breed() function
###############################################################################

#' Calculate Chromosome Fitness
#'
#' Calculate a relative fitness based on the rankscore of each chromosome.
#'
#' @usage Fitness(scores)
#'
#' @param scores A vector of the score for each chromosome, as calculated
#' from scoreFunc().
#'
#' @details This function calculates a relative fitness for each chromosome
#' based on the output of scoreFunc(). It follows the formula from
#' Givens_Hoeting_CH3 section 3.4.2.1, providing a better optimization than
#' the scoreFunc() alone. \cr \cr WARNING: This function scores the same way as
#' scoreFunc(), ie, if AIC() is the scoreFunc(), then lower scores are better,
#' and this function will follow that convention.
#'
#' @return A vector of the relative fitness of each
#' chromosome.
#'
#' @examples
#' set.seed(42)
#' test_Scores <- rnorm(x = 10, mean = 10, sd = 1)
#' Fitness(test_Scores)
#'
#' @export
Fitness <- function(scores=scores){

  Pop <- length(scores)

  #formula from Givens_Hoeting_CH3 section 3.4.2.1
  return(2*rank(x = -scores)/( Pop*(Pop+1) ) )
}
#' Chromosome Crossover
#'
#' Make 2 individual parent chromosomes crossover when breeding offsprings.
#'
#' @usage crossover(Chromes,parent1, parent2,numgenes)
#'
#' @param Chromes A matrix, whose each row representing an individual chromosome
#' and each column representing Pop.
#'
#' @param parent1,parent2 Numeric vectors represents individual parent chromosome.
#'
#' @param numgenes Number of genes in each chromosome.
#'
#' @details This function makes two individual parent chromosomes
#' perfrom crossover when breeding next generation.
#' Note that the crossover randomly pick several break points,
#' perform break and recombination on each set of parents.
#'
#' @return A matrix with each column representing the
#' offsprings from a process of crossover.
#'
#' @examples
#' test_Chromes <-  matrix(data = sample(c(0,1), size = Pop*genes, replace = TRUE), nrow = Pop, ncol = genes)
#' test_parent1 <- sample(x = 1:(nPop-1), size = nPop-1, replace = TRUE, prob = 1/relscore)
#' test_parent2 <- secondParents(firstParents=firstParents, method=method, nPop=nPop, relscore=relscore)
#' test_numgenes <- ncol(test_Chromes)
#' crossover(test_Chromes,parent1,parent2,numgenes)
#' @export
Crossover <- function(Chromes=population, parent1=firstParents, parent2=secondParents, numgenes=ngenes){

  #pick break points.
  breakPtL <- sample(x = 1:(numgenes-1), size = length(x = parent1), replace = TRUE)
  breakPtR <- breakPtL+1

  #perform break and recombination on each set of parents
  returnChroms <- vapply(X = 1:length(x = parent1), FUN = function(x){
    c(Chromes[parent1[x],1:breakPtL[x]], Chromes[parent2[x],breakPtR[x]:numgenes])
  }, FUN.VALUE = numeric(length = numgenes))


  return(t(x = returnChroms))
}
#' Get Second Parent
#'
#' Generates a list of parents to breed with the first parents selected. Makes
#' sure that a parent is not breeding with itself. Called within the Breed()
#' function
#'
#' @usage getSecondParents(firstParents, method, nPop, relScore)
#'
#' @param firstParents List of the first parents in each mating.
#'
#' @param method One of two scoring methods to determine the second parent.
#' \itemize{
#'   \item{oneScore: }{Second parent is chosen at random.}
#'   \item{twoScore: }{Second parent is chosen based on relative fitness.}
#' }
#'
#' @param nPop Number of chromosomes in the population. Equal to the length of firstParents.
#'
#' @param relScore Relative fitness score as calculated by Fitness(). Lower is better.
#'
#' @details This function ensures that the second parent in a crossover is distinct
#' from the first. It uses one of two methods to determine the second parent.
#' This will be slower than randomly picking the second parent but could provide
#' better performance. Function assumes that the worse member of the previous generation
#' has already been removed.
#'
#' @return A vector indicating the row position in the population matrix of the
#' parents that will breed with the firstParents vector.
#'
#' @examples
#' set.seed(42)
#' test_parent1 <-  matrix(data = sample(c(0,1), size = 16, replace = TRUE), nrow = 4, ncol = 4)
#' test_nPop <- length(test_parent1)
#' test_relScore <- rnorm(x = 4, mean = 10, sd = 1)
#' getSecondParents(firstParents=test_parent1, method="oneScore", nPop=4, relScore=test_relScore)
#'
#' @export
getSecondParents <- function(firstParents=firstParents, method=method, nPop=nPop, relScore=relScore) {
  #  store here
  secondParents <- vector(length=length(firstParents))
  if (method=="oneScore") {
    for (i in 1:length(secondParents))
    {
      #  remove corresponding parent from sample
      x <- c(1:(nPop-1))[-firstParents[i]]
      #  choose 1
      #  repeat
      secondParents[i] <- sample(x, size=1)
    }

  } else if (method=="twoScore")
  {
    for (i in 1:length(secondParents))
    {

      x <- c(1:(nPop-1))[-firstParents[i]]
      relscorePartial <- relScore[-firstParents[i]]
      secondParents[i] <- sample(x, size=1, prob=relscorePartial)

    }
  }

  return(secondParents)
}
#' Tournament Selection
#'
#' Uses the tournament method to generate a list of parents
#' to be used in crossover.
#' Tournament size used is k=2.
#' It is called within Breed function.
#'
#' @usage tournamentSelection(population=population, scores=scores, nPop=nPop)
#'
#' @param population current chromosome population as a matrix
#' with each row representing a chromosome and each value indicating the presence (1)
#' or absence (0) of a specific trait
#'
#' @param scores scores for the current chromosomes in the population
#' a lower score is better
#'
#' @param nPop current number of chromosomes in the population
#' If you do not remove the worst chromosome before using
#' You could set this to nPop1 to get results
#'
#' @details The function generates a list of parents using the tournament
#' method with size 2.  The current population is broken into groups of 2
#' (if it is odd one member is randomly selected to sit out the tournament).
#' For each group of 2 chromosomes the one with the lowest score is chose as a parent.
#' This is repeated until the desired number of parents ((nPop-1)*2) has been generated.
#'
#' @return A vector indicating the row positions in the population matrix
#' of the parents that will be bred using Crossover.
#'
#' @examples
#' PopChrom <- matrix(data = sample(x = c(0,1), size = 6*3, replace = TRUE),
#'                    nrow = 6, ncol = 3)
#' PopChrom <- safetyFunc(population = PopChrom)
#'
#' #assumes 1 chrom was left out of of sample and reserved for next gen.
#'
#' parents <- tournamentSelection(population=PopChrom, scores=c(4,50,3,100,4), nPop=6)
#'
#' @export
tournamentSelection <- function(population=population, scores=scores, nPop=nPop)
{
  #  k=2 is a commonly used value and is easy to work with
  #  k=2 is a good balance between selecting the best parents
  #  and preserving diversity
  #  feel free to change this, just a little more complicated
  k <- 2

  #  put results in here
  parents <- rep(NA,length=(2*(nPop-1)))

  #  number of groups of k within 1 tournament
  tourSize <-  floor((nPop-1)/2)

  numTour <- (2*(nPop-1))/tourSize
  #  based on k=2
  #  number of tournaments will always be 2

  #  set up annoying indexes
  j <- 0
  k <- j+1

  for (i in 1:floor(numTour)) {

    in1 <- j*tourSize+1
    in2 <- k*tourSize

    parents[in1:in2] <- holdTournaments(tourSize=tourSize, scores=scores, nPop=nPop)

    j <- j+1
    k <- k+1
  }

  #  length of extra spaces has to be < one tournamet size
  #  run once (computationally cheap) and randomly select some
  if (anyNA(parents)) {
    fill <- holdTournaments(tourSize=tourSize, scores=scores, nPop=nPop)
    parents[which(is.na(parents))] <- sample(x=fill, size=length(which(is.na(parents))), replace=FALSE)
  }
  return(parents)
}
#'  holdTournaments
#'  Calculates the winners of each tournament.
#'  Called within tournamentSelection
#'
#' @usage holdTournaments(tourSize=tourSize, scores=scores, nPop=nPop)
#'
#' @param tourSize Size of the tournament.
#' This should be floor((nPop-1)/k) where k is the number
#' of chromosomes in each competition.  This algorithm supports k=2
#'
#' @param scores scores for the chromosomes.
#' A lower score is better in terms of fitness
#'
#' @param nPop current size of the population of chromosomes
#'
#' @details For each set of 2 chromosomes within a tournament
#' the one with the lowest score is returned as the winner.
#'
#' @return A vector of length tourSize that indicates the winners
#' (by their row location in the population matrix) of each competition
#' within the tournament.
#'
#' @examples
#' assumes 1 chrom was left out of of sample and reserved for next gen.
#'
#' holdTournaments(tourSize=3, scores=c(10,100,200,50,3,400), nPop=7)
#' holdTournaments(tourSize=4, scores=c(1,5,30,38,50,301,473,4,90), nPop=10)
#'
#' @export
holdTournaments <- function(tourSize=tourSize, scores=scores, nPop=nPop) {
  k <- 2
  winners <- rep(NA, length(tourSize))

  partitions <- sample(x=1:(nPop-1), size=(nPop-1), replace=FALSE)

  #if nPop-1 is odd
  #one gets left out of tournament
  #last place is static but which one ends up there is random
  #so this is randomly selected
  if ((nPop-1) %% k!=0) {
    partitions <- partitions[-length(partitions)]
  }
  #  get the scores for the competing chromosomes
  partScores <- scores[partitions]

  #  index for subsetting
  i <- 1
  #  index for parent
  j <- 1

  while (i <= length(partitions)) {
    #  hold the "tournament"
    tour <- partitions[i:(i+1)]
    #  Get the winner
    winner <- which.min(partScores[i:(i+1)])
    #  Winner becomes a parent
    winners[j] <- tour[winner]

    i <- i+2
    j <- j+1
  }
  return(winners)
}

#' Match Parents for Tournament selection
#'
#' Takes parent vector generated by tournament
#' and pairs the parents for use in Crossover
#'
#' @usage matchParents(parents=parents, nPop=nPop)
#'
#' @param parents a vector indicating parent chromosomes by their row
#' index in the population matrix.  They must be generated by tournament
#' selection or the function will return an error.
#'
#' @param nPop size of the population of chromosomes
#'
#' @details The function separates the tournament winners
#' randomly into parent pairs for use in Crossover.  A parent will
#' never be matched to itself.
#'
#' @return a list the first element of which is a vector containing one set of
#' parents and the second elementn of which contains their mates.
#'
#' @examples
#' PopChrom <- matrix(data = sample(x = c(0,1), size = 6*3, replace = TRUE), nrow = 6, ncol = 3)
#' PopChrom <- safetyFunc(population = PopChrom)
#'
#' # assumes 1 chrom was left out of of sample and reserved for next gen
#'
#' parents <- tournamentSelection(population=PopChrom, scores=c(4,50,3,100,4), nPop=6)
#'
#' matchParents(parents=parents, nPop=6)
#' @export
matchParents <- function(parents=parents, nPop=nPop)
{

  firstParents <- rep(NA, (nPop-1))
  secondParents <- rep(NA, (nPop-1))

  #  Separating into pairs
  #  because of size/number of tournaments
  #  will always be able to separate in this way
  #  (there can be on more than (2*(nPop-1)/tourSize)1 of any 1 parent)

  y <- c()
  for (i in 1:(nPop-1))
  {

    #  we want to pair the one with the highest frequency first
    #  this finds a pattern that allows all of them to be matched
    mode <- which.max(tabulate(parents))

    firstParents[i] <- mode

    if ((length(y)==0) && (i>1) ) {
      stop("Please only enter parent vector generated using tournaments")
    }
    #  pair with another one that is not the same value

    #  possible choices
    y <- parents[-which(parents==firstParents[i])]

    #randomly choose 1

    secondParents[i] <- y[sample(x=c(1:length(y)), size=1, replace=FALSE)]

    #  update parents to remove used values
    remove <- c(which(firstParents[i]==parents)[1], which(secondParents[i]==parents)[1])

    parents <- parents[-remove]
  }
  return(list(firstParents, secondParents))
}
GilChrist19/GA documentation built on May 13, 2019, 5:32 p.m.