R/Gen.Alg.R
In HMP: Hypothesis Testing and Power Calculations for Comparing Metagenomic Samples from HMP

Documented in Gen.Alg

Gen.Alg <-
function(data, covars, iters=50, popSize=200, earlyStop=0, dataDist="euclidean", covarDist="gower", 
		verbose=FALSE, plot=TRUE, minSolLen=NULL, maxSolLen=NULL, custCovDist=NULL, penalty=0){
	if(missing(data) || (missing(covars) && is.null(custCovDist)))
		stop("data and/or covars are missing.")
	
	# Check for any bad numbers
	if(iters <= 0)
		stop("iters must be an integer greater than 0")
	if(popSize <= 0)
		stop("popSize must be an integer greater than 0")
	if(earlyStop < 0)
		stop("earlyStop must be an integer greater than or equal to 0")
	if(penalty < 0 || penalty > 1)
		stop("penalty must be between 0 and 1")
	
	# Check distances
	if(dataDist != "euclidean" && dataDist != "gower")
		stop("data.dist must be euclidean or gower.")
	if(covarDist != "euclidean" && covarDist != "gower")
		stop("covars.dist must be euclidean or gower.")
	
	# Define size
	size <- ncol(data)
	
	# Check stopping rules
	if(!is.null(minSolLen))
		if(minSolLen < 0 || minSolLen >= size)
			stop("minSolLen must be 0 or greater and less than the number of columns in data.")
	if(!is.null(maxSolLen))
		if(maxSolLen <= 0 || maxSolLen > size)
			stop("maxSolLen must be greater than 0 and less than or equal to the number columns in data.")
	if(!is.null(maxSolLen) && !is.null(minSolLen))
		if(maxSolLen < minSolLen)
			stop("maxSolLen must be bigger than minSolLen.")
	
	# Define some variables for use in the GA loop
	mutationChance <- 1/(size+1)
	elitism <- floor(popSize/5)
	evalSumm <- matrix(NA, iters, 6)
	newPopSize <- popSize - elitism
	newPopulation <- matrix(NA, newPopSize, size)
	parentProb <- stats::dnorm(1:popSize, mean=0, sd=(popSize/3))
	
	if(verbose){
		print("X. Current Step : Current Time Taken")
		runningTime <- proc.time()
		print(paste("1. Calculating Distances:", round((proc.time() - runningTime)[3], 3)))
	}
	
	# Set up our base distance matrix
	if(is.null(custCovDist)){
		covarDists <- vegan::vegdist(covars, covarDist)
	}else{
		covarDists <- custCovDist
	}
	
	# Get each columns distance contribution
	colDists <- vector("list", ncol(data))
	for(i in 1:ncol(data))
		colDists[[i]] <- vegan::vegdist(data[,i], dataDist)
	if(dataDist == "euclidean")
		colDists <- lapply(colDists, function(x) x^2)
	
	if(verbose)
		print(paste("2. Creating Starting Data:", round((proc.time() - runningTime)[3], 3)))
	
	# Create our starting data
	population <- gaCreation(data, popSize)
	
	if(verbose)
		print(paste("3. Scoring Starting Data:", round((proc.time() - runningTime)[3], 3)))
	
	# Score and sort
	evalVals <- rep(NA, popSize)
	for(e in 1:popSize)
		evalVals[e] <- gaScoring(population[e,], covarDists, colDists, dataDist, penalty, minSolLen, maxSolLen)
	population <- population[order(evalVals, decreasing=TRUE),]
	bestScoreValue <- max(evalVals)
	bestScoreCounter <- 0
	
	if(verbose)
		print(paste("4. Running Iterations:", round((proc.time() - runningTime)[3], 3)))
	
	# Run GA
	ptr <- proc.time()
	for(i in 1:iters){
		if(verbose){
			if(i %% round(iters/10) == 0)
				print(paste("Iteration - ", i, ": ", round((proc.time() - runningTime)[3], 3), sep=""))
		}
		# Cross over to fill rest of new population
		for(child in 1:newPopSize){
			parentIDs <- sample(1:popSize, 2, prob=parentProb)
			parents <- population[parentIDs,]
			crossOverPoint <- sample(0:size, 1)
			if(crossOverPoint == 0){
				newPopulation[child,] <- parents[2,]
			}else if(crossOverPoint == size){
				newPopulation[child,] <- parents[1,]
			}else{
				newPopulation[child,] <- c(parents[1,][1:crossOverPoint], parents[2,][(crossOverPoint+1):size])
			}
		}
		
		# Mutate all but elite
		if(mutationChance > 0){
			population[(elitism+1):popSize,] <- apply(newPopulation, 2, function(x){ifelse(stats::runif(newPopSize) < mutationChance, 1-x, x)})
		}else{
			population[(elitism+1):popSize,] <- newPopulation
		}
		
		# Score and sort our new solutions
		for(e in 1:popSize)
			evalVals[e] <- gaScoring(population[e,], covarDists, colDists, dataDist, penalty, minSolLen, maxSolLen)
		population <- population[order(evalVals, decreasing=TRUE),]
		evalSumm[i,] <- summary(evalVals)
		
		# Check if we want to stop early
		if(bestScoreValue == max(evalVals)){
			bestScoreCounter <- bestScoreCounter + 1
		}else{
			bestScoreCounter <- 0
			bestScoreValue <- max(evalVals)
		}
		
		if(bestScoreCounter == earlyStop && earlyStop != 0)
			break
	}	
	gaTime <- (proc.time() - ptr)[3]
	
	if(verbose)
		print(paste("5. Prettying Results", round((proc.time() - runningTime)[3], 3)))
	
	# Pretty up our data for returning
	rownames(population) <- paste("Solution", 1:nrow(population))
	colnames(population) <- colnames(data)
	rownames(evalSumm) <- paste("Iteration", 1:nrow(evalSumm))
	colnames(evalSumm) <- c("Worst", "25%ile", "Median", "Mean", "75%ile", "Best")
	
	evalVals <- matrix(evalVals[order(evalVals, decreasing=TRUE)], 1, length(evalVals))
	colnames(evalVals) <- paste("Solution ", 1:length(evalVals))
	rownames(evalVals) <- "Score"
	
	# Get selected columns using a consensus
	selIndex <- which(population[1,] == 1)
	sel <- colnames(data)[selIndex]
	
	# Get the nonselected columns
	nonSel <- colnames(data)[-selIndex]
	
	# Plot scoring summary
	if(plot)
		gaPlot(evalSumm)
	
	return(list(scoreSumm=evalSumm, solutions=population, scores=evalVals, time=gaTime, selected=sel, nonSelected=nonSel, selectedIndex=selIndex))
}

Any scripts or data that you put into this service are public.

HMP documentation built on Aug. 31, 2019, 5:05 p.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

HMP
Hypothesis Testing and Power Calculations for Comparing Metagenomic Samples from HMP

R/Gen.Alg.R
In HMP: Hypothesis Testing and Power Calculations for Comparing Metagenomic Samples from HMP

Defines functions Gen.Alg

Documented in Gen.Alg

Try the HMP package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

HMP Hypothesis Testing and Power Calculations for Comparing Metagenomic Samples from HMP

R/Gen.Alg.R In HMP: Hypothesis Testing and Power Calculations for Comparing Metagenomic Samples from HMP

Defines functions Gen.Alg

Documented in Gen.Alg

Try the HMP package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

HMP
Hypothesis Testing and Power Calculations for Comparing Metagenomic Samples from HMP

R/Gen.Alg.R
In HMP: Hypothesis Testing and Power Calculations for Comparing Metagenomic Samples from HMP