###############################################################################
# GGGGGGGGGGGGG AAA
# GGG::::::::::::G A:::A
# GG:::::::::::::::G A:::::A
# G:::::GGGGGGGG::::G A:::::::A
# G:::::G GGGGGG A:::::::::A
# G:::::G A:::::A:::::A
# G:::::G A:::::A A:::::A
# G:::::G GGGGGGGGGG A:::::A A:::::A
# G:::::G G::::::::G A:::::A A:::::A
# G:::::G GGGGG::::G A:::::AAAAAAAAA:::::A
# G:::::G G::::G A:::::::::::::::::::::A
# G:::::G G::::G A:::::AAAAAAAAAAAAA:::::A
# G:::::GGGGGGGG::::G A:::::A A:::::A
# GG:::::::::::::::G A:::::A A:::::A
# GGG::::::GGG:::G A:::::A A:::::A
# GGGGGG GGGGAAAAAAA AAAAAAA
###############################################################################
# STAT 243
# Ruitong Zhu
# Jinxin Qu
# Rowan Morse
# Jared Bennett
###############################################################################
# Auxilary functions for select()
###############################################################################
#' Safety Function
#'
#' This function ensures no chromosomes are empty
#'
#' @usage safetyFunc(population)
#'
#' @param population The list of chromosomes in the current population.
#'
#' @details This function ensures that no chromosomes are empty. Models cannot
#' have a dependent variable defined with no explanatory variables, therefore no
#' chromosomes can be empty.
#'
#' @return A matrix of chromosomes. This overwrites the chromosomes currently in
#' the population.
#'
#' @examples
#' set.seed(42)
#' test_PopChrom <- matrix(sample(c(0,1), 16, replace=TRUE), ncol = 4, nrow = 4)
#' safetyFunc(test_PopChrom)
#'
#' @export
safetyFunc <- function(population=PopChrom) {
#make sure there are no 0 chromosomes
zeroRow <- (rowSums(x = population)) == 0
if(any(zeroRow)) {
#get which chromosomes are 0
replaceR <- which(zeroRow)
#find the least used locus.
# in a tie, the first one found is returned.
leastCol <- which.min(x = colSums(x = population) )
#place something in each row, at the least used locus
population[replaceR, leastCol] <- 1
}
return(population)
}
#' Perform Model
#'
#' This function performs the fitting model provided by the user
#'
#' @usage PerformModel(data, expVar, depVar, population, mFunc)
#'
#' @param data Dataframe of data provided by the user. This must be a dataframe,
#' it must have the dependent variable as the first column, and subsequent
#' columns are the free variables.
#'
#' @param expVar String list of explanatory variables, pulled from the
#' column names of data.
#'
#' @param depVar Single string for the dependent variable. It is pulled
#' from the first column name of data.
#'
#' @param population Matrix of chromosomes currently in the population.
#' These define what combinations of explanatory variables to run the model on.
#'
#' @param mFunc The user-supplied model to run on the data. This defaults to
#' lm().
#'
#' @details This function applies the defined model to the data, using depVar
#' as the dependent variable (observations) and some combination of expVar
#' as defined by the chromosomes from population.
#'
#' @return A list of mFunc objects.
#'
#' @examples
#' test_data <- mtcars
#' tese_eVar <- colnames(test_data)[2:NCOL(test_data)]
#' test_dVar <- colnames(test_data)[1]
#' test_PopChrom <- matrix(data = c(1,1,1,0,0,1,0,0,1,1), nrow = 1, ncol = ncol(mtcars) - 1)
#' test_model <- lm
#' PerformModel(test_data,tese_eVar,test_dVar,test_PopChrom,test_model)
#'
#' @export
PerformModel <- function(data=data, expVar=eVar, depVar=dVar, population=PopChrom, mFunc=model){
#This needs to take extra args for model
# do we want to include/exclude the intercept?
#combine eVar
# these do the same thing, vapply is marginally faster. Can it be better/more legible?
halfModel <- apply(X = population, MARGIN = 1, FUN = function(x){paste0(expVar[as.logical(x = x)], collapse = "+")})
#vapply(X = 1:5, FUN = function(x){paste0(eVar[as.logical(x = PopChrom[x,])], collapse = "+")}, FUN.VALUE = character(1))
#This adds the y~stuff
fullModel <- paste(depVar, halfModel, sep = "~")
#This part calculates the model and puts it in a string to return
retStuff <- lapply(X = fullModel, FUN = mFunc, data = data.frame(data))
#way not faster on test stuff, maybe better in larger problems?
#library(parallel)
#mclapply(X = fullModel, FUN = lm, data = data.frame(data), mc.cores = 4)
return(retStuff)
}
#' Mutate Chromosome
#'
#' Make mutations in the chromosomes
#'
#' @usage Mutate(population,mutRate)
#'
#' @param population Matrix of chromosomes currently in the population.
#'
#' @param mutRate A double given from the user that defines the mutation rate.
#'
#' @details This function creates binomially distributed mutations on
#' chromosomes at the rate specified by the user.
#'
#' @return A matrix of chromosomes in the population. This overwrites the current
#' population chromosomes.
#'
#' @examples
#' set.seed(42)
#' test_PopChrom <- sample(c(0,1),1000,replace=T)
#' Mutate(test_PopChrom, 0.01)
#'
#' @export
Mutate <- function(population=PopChrom, mutRate=mutRate){
#Generate random mutations at the specified rate
Mutations <- rbinom(n = length(population), size = 1, prob = mutRate)
#find which sites receive a mutation
index <- which(Mutations==1)
#mutate the sites in the population. if 0, make 1, if 1, make 0
population[index] <- ifelse(test = population[index]==1, yes = 0, no = 1)
return(population)
}
#' Breed
#'
#' Perform recombination on chromosomes
#'
#' @usage Breed(population, method, scores, nPop, nGenes)
#'
#' @param population Matrix of chromosomes currently in the population.
#'
#' @param method One of three methods for choosing which chromosomes pair
#' before crossover.
#' \itemize{
#' \item{oneScore: }{One parent is picked proportional to score, the second is random.}
#' \item{twoScore: }{Both parents are picked proportional to score.}
#' \item{tournament: }{Tournament selection method.}
#' }
#'
#' @param scores List of scores for each model, generated by scoreFunc().
#'
#' @param nPop Integer defining the population size. Equal to the length
#' of population and scores.
#'
#' @param nGenes Integer number of genes provided by the user or calculated
#' prior to running the model.
#'
#' @details This function performs crossovers between chromosomes to generate the
#' next generation of population. It performs an elite style recombination, where
#' the best scoring chromosome is always kept and the worst is removed, then
#' applies one of 3 methods to determine the crossovers between the remaining
#' chromosomes. Crossovers are then performed randomly within each pair.
#'
#' @return A matrix of chromosomes, with nRow=nPop and nCol=nGenes.
#'
#' @examples
#' set.seed(42)
#' test_PopChrom <- matrix(sample(c(0,1),16,replace=T), ncol = 4, nrow = 4)
#' test_Method <- "tournament"
#' test_Score <- rnorm(n = 4, mean = 10, sd = 1)
#' test_nPop <- 4
#' test_nGenes <- 4
#' Breed(test_PopChrom, test_Method, test_Scores, test_nPop, test_nGenes)
#'
#' @export
Breed <- function(population=PopChrom, method=methodBreed, scores=grades, nPop=Pop, nGenes=genes){
if (is.element(method, c("oneScore", "twoScore", "tournament"))==FALSE) {
stop("Please enter an appropriate method for parent selection.")
}
returnDat <- matrix(data = 0, nrow = nPop, ncol = nGenes)
#perform elite style recombination
# this saves the best result for a run
elite <- which.min(x = scores)
# put the elite at the top of the return
returnDat[1, ] <- population[elite, ]
#remove the worst individual so they can't mate
worst <- which.max(x = scores)
population <- population[-worst,]
scores <- scores[-worst]
#get relative fitness of individuals
relscore <- Fitness(scores = scores)
if (method != "tournament") {
#get first parents
# this is weighted towards the best individual (the same as our elite one)
# one less than total pop because we are saving 1
firstParents <- sample(x = 1:(nPop-1), size = nPop-1, replace = TRUE, prob = relscore)
# get second parent
# This is random or weighted by relative score (can choose)
# Prevents 2 of same chromosome from breeding
# one less than total pop because we are saving one
secondParents <- getSecondParents(firstParents=firstParents, method=method, nPop=nPop, relScore=relscore)
} else if (method=="tournament") {
# use tournament selection to get list of all parents
parents <- tournamentSelection(population=population, scores=scores, nPop=nPop)
matched <- matchParents(parents=parents, nPop=nPop)
firstParents <- matched[[1]]
secondParents <- matched[[2]]
}
# perform crossover mating, don't apply to the elite
returnDat[2:nPop, ] <- Crossover(Chromes=population, parent1=firstParents, parent2=secondParents, numgenes=nGenes)
return(returnDat)
}
#' Plot Genetic Algorithm
#'
#' This function plots the history of the genetic algorithm performed
#'
#' @usage GA_Plot(gaObject, title, xAxis, yAxis, color, ...)
#'
#' @param gaObject Object returned by select() function.
#'
#' @param title The title of the plot. Default is "Model Score History".
#'
#' @param xAxis The x-axis label. Default is "Generation".
#'
#' @param yAxis The y-axis label. Default is "Score".
#'
#' @param color The color of the points. Default is "green" for genetics.
#'
#' @param ... Extra arguments to be passed to plot function.
#'
#' @details This function accepts a select() object and plots the score history.
#' Several default options are given. The plot type is fixed.
#'
#' @return Plots the full history of the genetic algorithm.
#'
#' @examples
#' set.seed(42)
#' test_Data <- data.frame( matrix(data = runif(n = 900, min = 0, max = 100), nrow = 100, ncol = 9) )
#' colnames(test_Data) <- c("Y", LETTERS[1:8])
#' holdObject <- select(data=test_Data, model=lm, generations=100)
#' GA_Plot(holdObject)
#'
#' @export
GA_Plot <- function(gaObject, title="Model Score History", xAxis="Generation", yAxis="Score", color="green", ...){
#get number of rows and columns
numRows <- length(gaObject$history[[1]])
numCols <- length(gaObject$history)
#unlist the data and fill a matrix with it
matrixPoints <- matrix(data = unlist(gaObject$history), nrow = numRows,
ncol = numCols)
xScale <- matrix(data = 1:numCols, nrow = numRows, ncol = numCols, byrow = TRUE)
matplot(x=xScale, y = matrixPoints, type = "p", lty = 0, lwd = 2, pch = 16,
col = color, cex = 1.0, xlim = c(1,numCols), ylim = c(min(matrixPoints), max(matrixPoints)),
xlab = xAxis, ylab = yAxis, main=title)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.