R/1_select_aux_func.R

###############################################################################
#          GGGGGGGGGGGGG               AAA
#       GGG::::::::::::G              A:::A
#     GG:::::::::::::::G             A:::::A
#    G:::::GGGGGGGG::::G            A:::::::A
#   G:::::G       GGGGGG           A:::::::::A
#  G:::::G                        A:::::A:::::A
#  G:::::G                       A:::::A A:::::A
#  G:::::G    GGGGGGGGGG        A:::::A   A:::::A
#  G:::::G    G::::::::G       A:::::A     A:::::A
#  G:::::G    GGGGG::::G      A:::::AAAAAAAAA:::::A
#  G:::::G        G::::G     A:::::::::::::::::::::A
#   G:::::G       G::::G    A:::::AAAAAAAAAAAAA:::::A
#    G:::::GGGGGGGG::::G   A:::::A             A:::::A
#     GG:::::::::::::::G  A:::::A               A:::::A
#       GGG::::::GGG:::G A:::::A                 A:::::A
#          GGGGGG   GGGGAAAAAAA                   AAAAAAA
###############################################################################
# STAT 243
# Ruitong Zhu
# Jinxin Qu
# Rowan Morse
# Jared Bennett

###############################################################################
# Auxilary functions for select()
###############################################################################

#' Safety Function
#'
#' This function ensures no chromosomes are empty
#'
#' @usage safetyFunc(population)
#'
#' @param population The list of chromosomes in the current population.
#'
#' @details This function ensures that no chromosomes are empty. Models cannot
#' have a dependent variable defined with no explanatory variables, therefore no
#' chromosomes can be empty.
#'
#' @return A matrix of chromosomes. This overwrites the chromosomes currently in
#' the population.
#'
#' @examples
#' set.seed(42)
#' test_PopChrom <- matrix(sample(c(0,1), 16, replace=TRUE), ncol = 4, nrow = 4)
#' safetyFunc(test_PopChrom)
#'
#' @export
safetyFunc <- function(population=PopChrom)  {

  #make sure there are no 0 chromosomes
  zeroRow <- (rowSums(x = population)) == 0

  if(any(zeroRow))  {
    #get which chromosomes are 0
    replaceR <- which(zeroRow)

    #find the least used locus.
    # in a tie, the first one found is returned.
    leastCol <- which.min(x = colSums(x = population) )

    #place something in each row, at the least used locus
    population[replaceR, leastCol] <- 1
  }

  return(population)
}
#' Perform Model
#'
#' This function performs the fitting model provided by the user
#'
#' @usage PerformModel(data, expVar, depVar, population, mFunc)
#'
#' @param data Dataframe of data provided by the user. This must be a dataframe,
#' it must have the dependent variable as the first column, and subsequent
#' columns are the free variables.
#'
#' @param expVar String list of explanatory variables, pulled from the
#' column names of data.
#'
#' @param depVar Single string for the dependent variable. It is pulled
#' from the first column name of data.
#'
#' @param population Matrix of chromosomes currently in the population.
#' These define what combinations of explanatory variables to run the model on.
#'
#' @param mFunc The user-supplied model to run on the data. This defaults to
#' lm().
#'
#' @details This function applies the defined model to the data, using depVar
#' as the dependent variable (observations) and some combination of expVar
#' as defined by the chromosomes from population.
#'
#' @return A list of mFunc objects.
#'
#' @examples
#' test_data <- mtcars
#' tese_eVar <- colnames(test_data)[2:NCOL(test_data)]
#' test_dVar <- colnames(test_data)[1]
#' test_PopChrom <- matrix(data = c(1,1,1,0,0,1,0,0,1,1), nrow = 1, ncol = ncol(mtcars) - 1)
#' test_model <- lm
#' PerformModel(test_data,tese_eVar,test_dVar,test_PopChrom,test_model)
#'
#' @export
PerformModel <- function(data=data, expVar=eVar, depVar=dVar, population=PopChrom, mFunc=model){
  #This needs to take extra args for model
  # do we want to include/exclude the intercept?


  #combine eVar
  # these do the same thing, vapply is marginally faster. Can it be better/more legible?
  halfModel <- apply(X = population, MARGIN = 1, FUN = function(x){paste0(expVar[as.logical(x = x)], collapse = "+")})
  #vapply(X = 1:5, FUN = function(x){paste0(eVar[as.logical(x = PopChrom[x,])], collapse = "+")}, FUN.VALUE = character(1))

  #This adds the y~stuff
  fullModel <- paste(depVar, halfModel, sep = "~")


  #This part calculates the model and puts it in a string to return
  retStuff <- lapply(X = fullModel, FUN = mFunc, data = data.frame(data))

  #way not faster on test stuff, maybe better in larger problems?
  #library(parallel)
  #mclapply(X = fullModel, FUN = lm, data = data.frame(data), mc.cores = 4)

  return(retStuff)
}
#' Mutate Chromosome
#'
#' Make mutations in the chromosomes
#'
#' @usage Mutate(population,mutRate)
#'
#' @param population Matrix of chromosomes currently in the population.
#'
#' @param mutRate A double given from the user that defines the mutation rate.
#'
#' @details This function creates binomially distributed mutations on
#' chromosomes at the rate specified by the user.
#'
#' @return A matrix of chromosomes in the population. This overwrites the current
#' population chromosomes.
#'
#' @examples
#' set.seed(42)
#' test_PopChrom <- sample(c(0,1),1000,replace=T)
#' Mutate(test_PopChrom, 0.01)
#'
#' @export
Mutate <- function(population=PopChrom, mutRate=mutRate){
  #Generate random mutations at the specified rate
  Mutations <- rbinom(n = length(population), size = 1, prob = mutRate)

  #find which sites receive a mutation
  index <- which(Mutations==1)

  #mutate the sites in the population. if 0, make 1, if 1, make 0
  population[index] <- ifelse(test = population[index]==1, yes = 0, no = 1)

  return(population)
}
#' Breed
#'
#' Perform recombination on chromosomes
#'
#' @usage Breed(population, method, scores, nPop, nGenes)
#'
#' @param population Matrix of chromosomes currently in the population.
#'
#' @param method One of three methods for choosing which chromosomes pair
#' before crossover.
#' \itemize{
#'   \item{oneScore: }{One parent is picked proportional to score, the second is random.}
#'   \item{twoScore: }{Both parents are picked proportional to score.}
#'   \item{tournament: }{Tournament selection method.}
#' }
#'
#' @param scores List of scores for each model, generated by scoreFunc().
#'
#' @param nPop Integer defining the population size. Equal to the length
#' of population and scores.
#'
#' @param nGenes Integer number of genes provided by the user or calculated
#' prior to running the model.
#'
#' @details This function performs crossovers between chromosomes to generate the
#' next generation of population. It performs an elite style recombination, where
#' the best scoring chromosome is always kept and the worst is removed, then
#' applies one of 3 methods to determine the crossovers between the remaining
#' chromosomes. Crossovers are then performed randomly within each pair.
#'
#' @return A matrix of chromosomes, with nRow=nPop and nCol=nGenes.
#'
#' @examples
#' set.seed(42)
#' test_PopChrom <- matrix(sample(c(0,1),16,replace=T), ncol = 4, nrow = 4)
#' test_Method <- "tournament"
#' test_Score <- rnorm(n = 4, mean = 10, sd = 1)
#' test_nPop <- 4
#' test_nGenes <- 4
#' Breed(test_PopChrom, test_Method, test_Scores, test_nPop, test_nGenes)
#'
#' @export
Breed <- function(population=PopChrom, method=methodBreed, scores=grades, nPop=Pop, nGenes=genes){
  if (is.element(method, c("oneScore", "twoScore", "tournament"))==FALSE) {
    stop("Please enter an appropriate method for parent selection.")
  }

  returnDat <- matrix(data = 0, nrow = nPop, ncol = nGenes)

  #perform elite style recombination
  # this saves the best result for a run
  elite <- which.min(x = scores)

  # put the elite at the top of the return
  returnDat[1, ] <- population[elite, ]

  #remove the worst individual so they can't mate
  worst <- which.max(x = scores)
  population <- population[-worst,]
  scores <- scores[-worst]

  #get relative fitness of individuals
  relscore <- Fitness(scores = scores)

  if (method != "tournament") {

    #get first parents
    # this is weighted towards the best individual (the same as our elite one)
    # one less than total pop because we are saving 1
    firstParents <- sample(x = 1:(nPop-1), size = nPop-1, replace = TRUE, prob = relscore)

    # get second parent
    # This is random or weighted by relative score (can choose)
    # Prevents 2 of same chromosome from breeding
    # one less than total pop because we are saving one
    secondParents <- getSecondParents(firstParents=firstParents, method=method, nPop=nPop, relScore=relscore)
  } else if (method=="tournament") {

    #  use tournament selection to get list of all parents
    parents <- tournamentSelection(population=population, scores=scores, nPop=nPop)
    matched <- matchParents(parents=parents, nPop=nPop)
    firstParents <- matched[[1]]
    secondParents <- matched[[2]]

    }

  # perform crossover mating, don't apply to the elite
  returnDat[2:nPop, ] <- Crossover(Chromes=population, parent1=firstParents, parent2=secondParents, numgenes=nGenes)

  return(returnDat)
}
#' Plot Genetic Algorithm
#'
#' This function plots the history of the genetic algorithm performed
#'
#' @usage GA_Plot(gaObject, title, xAxis, yAxis, color, ...)
#'
#' @param gaObject Object returned by select() function.
#'
#' @param title The title of the plot. Default is "Model Score History".
#'
#' @param xAxis The x-axis label. Default is "Generation".
#'
#' @param yAxis The y-axis label. Default is "Score".
#'
#' @param color The color of the points. Default is "green" for genetics.
#'
#' @param ... Extra arguments to be passed to plot function.
#'
#' @details This function accepts a select() object and plots the score history.
#' Several default options are given. The plot type is fixed.
#'
#' @return Plots the full history of the genetic algorithm.
#'
#' @examples
#' set.seed(42)
#' test_Data <- data.frame( matrix(data = runif(n = 900, min = 0, max = 100), nrow = 100, ncol = 9) )
#' colnames(test_Data) <- c("Y", LETTERS[1:8])
#' holdObject <- select(data=test_Data, model=lm, generations=100)
#' GA_Plot(holdObject)
#'
#' @export
GA_Plot <- function(gaObject, title="Model Score History", xAxis="Generation", yAxis="Score", color="green", ...){
  #get number of rows and columns
  numRows <- length(gaObject$history[[1]])
  numCols <- length(gaObject$history)

  #unlist the data and fill a matrix with it
  matrixPoints <- matrix(data = unlist(gaObject$history), nrow = numRows,
                         ncol = numCols)
  xScale <- matrix(data = 1:numCols, nrow = numRows, ncol = numCols, byrow = TRUE)

  matplot(x=xScale, y = matrixPoints, type = "p", lty = 0, lwd = 2, pch = 16,
          col = color, cex = 1.0, xlim = c(1,numCols), ylim = c(min(matrixPoints), max(matrixPoints)),
          xlab = xAxis, ylab = yAxis, main=title)

}
GilChrist19/GA documentation built on May 13, 2019, 5:32 p.m.