or Codon Evolution

Documented in CompareVectors CreateAADistanceMatrix CreateAAFixationMatrix CreateCodonMutationMatrix CreateNucleotideMutationMatrix GetFunctionality GetLikelihoodSAC_AAForSingleCharGivenOptimum GetPairwiseProteinFixationProbabilityArbitraryLength GetPairwiseProteinFixationProbabilitySingleSite GetPartitionOrder GetProteinProteinDistance GetSelacPhiCat GetSelacSiteLikelihoods SelacHMMOptimize SelacOptimize TranslateCodon

######################################################################################################################################
######################################################################################################################################
### SELAC -- SELection on Amino acids and/or Codons
######################################################################################################################################
######################################################################################################################################

#written by Jeremy M. Beaulieu and Brian O

###LOAD REQUIRED PACKAGES -- eventually move to namespace:
## only set to TRUE when testing. Set to FALSE when committing changes
# if(FALSE){
#     library(ape)
#     library(expm)
#     library(nnet)
#     library(nloptr)
#     library(seqinr)
#     library(phangorn)
#     library(MASS)
#     library(parallel)
#     library(Rcpp)
#     library(RcppArmadillo)
#     library(inline)
#     library(deSolve)
#     ##load compiled library independent of working directory
#     wd <- getwd();
#     ##get last part of wd that ends in 'selac'
#     selac.dir <- regmatches(wd, regexpr(".*/selac", wd))
#     if(length(selac.dir) == 0) selac.dir <- "./selac"
#     so.locale <- paste(selac.dir, "/src/selacHMM.so",sep="")
#     dyn.load(so.locale)
#     rm(selac.dir, so.locale)
# }

# Use seqinr coding of nucleotides: see ?n2s: 0 -> "a", 1 -> "c", 2 -> "g", 3 -> "t"

#Numcodes:
# 1 standard
# 2 vertebrate.mitochondrial
# 3 yeast.mitochondrial
# 4 protozoan.mitochondrial+mycoplasma
# 5 invertebrate.mitochondrial
# 6 ciliate+dasycladaceal
# 9 echinoderm+flatworm.mitochondrial
# 10 euplotid
# 11 bacterial+plantplastid
# 12 alternativeyeast
# 13 ascidian.mitochondrial
# 14 alternativeflatworm.mitochondrial
# 15 blepharism
# 16 chlorophycean.mitochondrial
# 21 trematode.mitochondrial
# 22 scenedesmus.mitochondrial
# 23 hraustochytrium.mitochondria


######################################################################################################################################
######################################################################################################################################
### A collection of constants used by various functions
######################################################################################################################################
######################################################################################################################################


.codon.sets <- matrix(c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3,
0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3), ncol=3)


.codon.set.translate <- matrix(c("a", "a", "a", "a", "a", "a", "a", "a", "a", "a", "a", "a", "a", "a", "a", "a", "c", "c", "c", "c", "c", "c", "c", "c", "c", "c", "c", "c", "c", "c", "c", "c", "g", "g", "g", "g", "g", "g", "g", "g", "g", "g", "g", "g", "g", "g", "g", "g", "t", "t", "t", "t", "t", "t", "t", "t", "t", "t", "t", "t", "t", "t", "t", "t",
"a", "a", "a", "a", "c", "c", "c", "c", "g", "g", "g", "g", "t", "t", "t", "t", "a", "a", "a", "a", "c", "c", "c", "c", "g", "g", "g", "g", "t", "t", "t", "t", "a", "a", "a", "a", "c", "c", "c", "c", "g", "g", "g", "g", "t", "t", "t", "t", "a", "a", "a", "a", "c", "c", "c", "c", "g", "g", "g", "g", "t", "t", "t", "t",
"a", "c", "g", "t", "a", "c", "g", "t", "a", "c", "g", "t", "a", "c", "g", "t", "a", "c", "g", "t", "a", "c", "g", "t", "a", "c", "g", "t", "a", "c", "g", "t", "a", "c", "g", "t", "a", "c", "g", "t", "a", "c", "g", "t", "a", "c", "g", "t", "a", "c", "g", "t", "a", "c", "g", "t", "a", "c", "g", "t", "a", "c", "g", "t"), ncol=3)


.codon.name <- c("aaa" ,"aac" ,"aag" ,"aat" ,"aca" ,"acc" ,"acg" ,"act" ,"aga" ,"agc" ,"agg" ,"agt" ,"ata" ,"atc" ,"atg" ,"att" ,"caa" ,"cac" ,"cag" ,"cat", "cca" ,"ccc",
"ccg" ,"cct" ,"cga" ,"cgc" ,"cgg" ,"cgt" ,"cta" ,"ctc" ,"ctg" ,"ctt" ,"gaa" ,"gac" ,"gag" ,"gat" ,"gca" ,"gcc" ,"gcg" ,"gct" ,"gga" ,"ggc", "ggg" ,"ggt",
"gta" ,"gtc" ,"gtg" ,"gtt" ,"taa" ,"tac" ,"tag" ,"tat" ,"tca" ,"tcc" ,"tcg" ,"tct" ,"tga" ,"tgc" ,"tgg" ,"tgt" ,"tta" ,"ttc" ,"ttg" ,"ttt")


TranslateCodon <- function(codon.string, numcode) {
    return(translate(s2c(codon.string), numcode=numcode))
}


.aa.translation <- list(sapply(.codon.name, TranslateCodon, numcode=1),
sapply(.codon.name, TranslateCodon, numcode=2),
sapply(.codon.name, TranslateCodon, numcode=3),
sapply(.codon.name, TranslateCodon, numcode=4),
sapply(.codon.name, TranslateCodon, numcode=5),
sapply(.codon.name, TranslateCodon, numcode=6),
sapply(.codon.name, TranslateCodon, numcode=9),
sapply(.codon.name, TranslateCodon, numcode=10),
sapply(.codon.name, TranslateCodon, numcode=11),
sapply(.codon.name, TranslateCodon, numcode=12),
sapply(.codon.name, TranslateCodon, numcode=13),
sapply(.codon.name, TranslateCodon, numcode=14),
sapply(.codon.name, TranslateCodon, numcode=15),
sapply(.codon.name, TranslateCodon, numcode=16),
sapply(.codon.name, TranslateCodon, numcode=21),
sapply(.codon.name, TranslateCodon, numcode=22),
sapply(.codon.name, TranslateCodon, numcode=23))


.unique.aa <- c("K", "N", "T", "R", "S", "I", "M", "Q", "H", "P", "L", "E", "D", "A", "G", "V", "*", "Y", "C", "W", "F")

.numcode.translation.idx <- c(1:6, 1, 1, 7:14, rep(1,5), 15:17)


######################################################################################################################################
######################################################################################################################################
### Various functions used by main function:
######################################################################################################################################
######################################################################################################################################

CreateAADistanceMatrixOriginal <- function(alpha=1.829272, beta=0.101799, gamma=0.0003990333, aa.properties=NULL, normalize=TRUE) {
    if(is.null(aa.properties)) {
        #     aa.properties <- structure(c(0, 2.75, 1.38, 0.92, 0, 0.74, 0.58, 0, 0.33, 0, 0,
        # 1.33, 0.39, 0.89, 0.65, 1.42, 0.71, 0, 0.13, 0.2, 8.1, 5.5, 13,
        # 12.3, 5.2, 9, 10.4, 5.2, 11.3, 4.9, 5.7, 11.6, 8, 10.5, 10.5,
        # 9.2, 8.6, 5.9, 5.4, 6.2, 31, 55, 54, 83, 132, 3, 96, 111, 119,
        # 111, 105, 56, 32.5, 85, 124, 32, 61, 84, 170, 136), .Dim = c(20L,
        # 3L), .Dimnames = list(c("Ala", "Cys", "Asp", "Glu", "Phe", "Gly",
        # "His", "Ile", "Lys", "Leu", "Met", "Asn", "Pro", "Gln", "Arg",
        # "Ser", "Thr", "Val", "Trp", "Tyr"), c("c", "p", "v"))) #properties from Grantham paper
        aa.properties <- structure(c(0, 2.75, 1.38, 0.92, 0, 0.74, 0.58, 0, 0.33, 0, 0,
        1.33, 0.39, 0.89, 0.65, 1.42, 0.71, 0, 0.13, 0.2, 8.1, 5.5, 13,
        12.3, 5.2, 9, 10.4, 5.2, 11.3, 4.9, 5.7, 11.6, 8, 10.5, 10.5,
        9.2, 8.6, 5.9, 5.4, 6.2, 31, 55, 54, 83, 132, 3, 96, 111, 119,
        111, 105, 56, 32.5, 85, 124, 32, 61, 84, 170, 136), .Dim = c(20L,
        3L), .Dimnames = list(c("A", "C", "D", "E", "F", "G",
        "H", "I", "K", "L", "M", "N", "P", "Q", "R",
        "S", "T", "V", "W", "Y"), c("c", "p", "v"))) #properties from Grantham paper
    }
    n.states <- dim(aa.properties)[1]
    if(n.states != 20) {
        warning(paste("aa.properties given", n.states, "states, normally there are 20 amino acids"))
    }
    aa.distances <- matrix(0,nrow=n.states,ncol=n.states)
    for (i in sequence(n.states)) {
        for (j in sequence(n.states)) {
            aa.distances[i, j] <- (alpha*(aa.properties[i,1] - aa.properties[j,1])^2 + beta*(aa.properties[i,2]-aa.properties[j,2])^2+gamma*(aa.properties[i,3]-aa.properties[j,3])^2)^(1/2)
        }
    }
    if(normalize) {
        aa.distances <- aa.distances / (sum(aa.distances) / (n.states*n.states - n.states)) #normalize so mean is 1 across the non-diagonal entries
    }
    rownames(aa.distances) <- rownames(aa.properties)
    colnames(aa.distances) <- rownames(aa.properties)
    return(aa.distances)
}


CreateAADistanceMatrix <- function(alpha=1.829272, beta=0.101799, gamma=0.0003990333, aa.properties=NULL, normalize=FALSE, poly.params=NULL, k=0) {
    if(is.null(aa.properties)) {
        #     aa.properties <- structure(c(0, 2.75, 1.38, 0.92, 0, 0.74, 0.58, 0, 0.33, 0, 0,
        # 1.33, 0.39, 0.89, 0.65, 1.42, 0.71, 0, 0.13, 0.2, 8.1, 5.5, 13,
        # 12.3, 5.2, 9, 10.4, 5.2, 11.3, 4.9, 5.7, 11.6, 8, 10.5, 10.5,
        # 9.2, 8.6, 5.9, 5.4, 6.2, 31, 55, 54, 83, 132, 3, 96, 111, 119,
        # 111, 105, 56, 32.5, 85, 124, 32, 61, 84, 170, 136), .Dim = c(20L,
        # 3L), .Dimnames = list(c("Ala", "Cys", "Asp", "Glu", "Phe", "Gly",
        # "His", "Ile", "Lys", "Leu", "Met", "Asn", "Pro", "Gln", "Arg",
        # "Ser", "Thr", "Val", "Trp", "Tyr"), c("c", "p", "v"))) #properties from Grantham paper
        aa.properties <- structure(c(0, 2.75, 1.38, 0.92, 0, 0.74, 0.58, 0, 0.33, 0, 0,
        1.33, 0.39, 0.89, 0.65, 1.42, 0.71, 0, 0.13, 0.2, 8.1, 5.5, 13,
        12.3, 5.2, 9, 10.4, 5.2, 11.3, 4.9, 5.7, 11.6, 8, 10.5, 10.5,
        9.2, 8.6, 5.9, 5.4, 6.2, 31, 55, 54, 83, 132, 3, 96, 111, 119,
        111, 105, 56, 32.5, 85, 124, 32, 61, 84, 170, 136), .Dim = c(20L,
        3L), .Dimnames = list(c("A", "C", "D", "E", "F", "G",
        "H", "I", "K", "L", "M", "N", "P", "Q", "R",
        "S", "T", "V", "W", "Y"), c("c", "p", "v"))) #properties from Grantham paper
    }
    n.states <- dim(aa.properties)[1]
    if(n.states != 20) {
        warning(paste("aa.properties given", n.states, "states, normally there are 20 amino acids"))
    }
    aa.distances <- matrix(0,nrow=n.states,ncol=n.states)
    if(k > 0){
        for (i in sequence(n.states)) {
            for (j in sequence(n.states)) {
                aa.distances[i, j] <- PolynomialTransform(x=(alpha*(aa.properties[i,1] - aa.properties[j,1])^2 + beta*(aa.properties[i,2]-aa.properties[j,2])^2+gamma*(aa.properties[i,3]-aa.properties[j,3])^2)^(1/2), xi=0, poly.params=poly.params, k=1)
            }
        }
    }else{
        for (i in sequence(n.states)) {
            for (j in sequence(n.states)) {
                aa.distances[i, j] <- PolynomialTransform(x=(alpha*(aa.properties[i,1] - aa.properties[j,1])^2 + beta*(aa.properties[i,2]-aa.properties[j,2])^2+gamma*(aa.properties[i,3]-aa.properties[j,3])^2)^(1/2), xi=0, poly.params=NULL, k=0)
            }
        }
    }
    if(normalize) {
        aa.distances <- aa.distances / (sum(aa.distances) / (n.states*n.states - n.states)) #normalize so mean is 1 across the non-diagonal entries
    }
    rownames(aa.distances) <- rownames(aa.properties)
    colnames(aa.distances) <- rownames(aa.properties)
    return(aa.distances)
}


###TEMPORARY### Used to test whether other distances are worth using.
GenerateAAProperties <- function(rows){
    mike.table <- read.delim("table.of.aa.attributes.from.Sharma.et.al.2013.csv", sep=",")
    aa.properties <- structure(unlist(t(mike.table[rows,-1])), .Dim = c(20L, 3L), .Dimnames = list(c("A", "C", "D", "E", "F", "G", "H", "I", "K", "L", "M", "N", "P", "Q", "R", "S", "T", "V", "W", "Y"), c("c", "p", "v")))
    return(aa.properties)
}


# Follows Grantham distance equations for generating starting values
GetAADistanceStartingParameters <- function(aa.properties){
    if(is.null(aa.properties)){
        aa.properties <- structure(c(0, 2.75, 1.38, 0.92, 0, 0.74, 0.58, 0, 0.33, 0, 0,
        1.33, 0.39, 0.89, 0.65, 1.42, 0.71, 0, 0.13, 0.2, 8.1, 5.5, 13,
        12.3, 5.2, 9, 10.4, 5.2, 11.3, 4.9, 5.7, 11.6, 8, 10.5, 10.5,
        9.2, 8.6, 5.9, 5.4, 6.2, 31, 55, 54, 83, 132, 3, 96, 111, 119,
        111, 105, 56, 32.5, 85, 124, 32, 61, 84, 170, 136), .Dim = c(20L, 3L), .Dimnames = list(c("A", "C", "D", "E", "F", "G", "H", "I", "K", "L", "M", "N", "P", "Q", "R", "S", "T", "V", "W", "Y"), c("c", "p", "v"))) #properties from Grantham paper
    }
    average.chemical.distance <- c()
    for(i in 1:3){
        average.chemical.distance <- c(average.chemical.distance, mean(dist(aa.properties[,i])))
    }
    weighting.factors <- (1 / average.chemical.distance)^2
    return(weighting.factors)
}


PolynomialTransform <- function(x, xi, poly.params, k){
    if(k == 0){
        mk2_1 <- xi + x
    }
    if(k == 1){
        mk2_1 <- xi + x + poly.params[1]*(x^2) + poly.params[2]*(x^3)
    }
    ##Not necessary yet:
    #if(k == 2){
    #	mk2_1 <- xi + (coef.vec[2-1,]/2)*x + (coef.vec[3-1,]/3)*(x^2) + (coef.vec[4-1]/4)*(x^3) + (coef.vec[5-1]/5)*(x^4) + (coef.vec[6-1]/6)*(x^5)
    #}
    #if(k == 3){
    #	mk2_1 <- xi + (coef.vec[2-1,]/2)*x + (coef.vec[3-1,]/3)*(x^2) + (coef.vec[4-1]/4)*(x^3) + (coef.vec[5-1]/5)*(x^4) + (coef.vec[6-1]/6)*(x^5) + (coef.vec[7-1]/7)*(x^6) + (coef.vec[8-1]/8)*(x^7)
    #}
    ####################

    return(mk2_1)
}


#Using matrix algebra from pg. 28.
CalculatePolynomialCoefficients <- function(alpha.poly, beta.poly, k){
    #coef.mat is the matrix of coefficients:
    coef.vec <- 1
    #k.set is a sequence of k ending with the max k which is specified at the function call:
    k.set = 1:k
    #phi is a transformation of alpha and beta:
    phi.poly = alpha.poly^2 + beta.poly
    #Matrix multiplication order does not matter for answer, but order matters for efficiency:
    for(k.index in 1:k){
        coef.vec <- CreatePolynomialMatrix(alpha.poly[k.index], phi.poly[k.index], k.set[k.index]) %*% coef.vec
    }
    return(coef.vec)
}


#See pg. 25-28 for structure of matrices. Easy.
CreatePolynomialMatrix <- function(alpha, phi, k){
    T.mat.k <- matrix(0, 2*k+1, 2*k-1)
    for(column.index in 1:(2*k-1)){
        if(column.index == 1){
            T.mat.k[1, column.index] = 1
            T.mat.k[2, column.index] = -2 * alpha
            T.mat.k[3, column.index] = phi
        }else{
            T.mat.k[column.index, column.index] = 1
            T.mat.k[column.index+1, column.index] = -2 * alpha
            T.mat.k[column.index+2, column.index] = phi
        }
    }
    return(T.mat.k)
}


CreateNucleotideMutationMatrix <- function(rates, model="JC", base.freqs=NULL) {
    if(model == "JC") {
        nuc.mutation.rates <- matrix(data=rates[1], nrow=4, ncol=4)
        rownames(nuc.mutation.rates) <- n2s(0:3)
        colnames(nuc.mutation.rates) <- n2s(0:3)
        diag(nuc.mutation.rates) <- 0
        diag(nuc.mutation.rates) <- -rowSums(nuc.mutation.rates)
        if(!is.null(base.freqs)){
            diag(nuc.mutation.rates) = 0
            nuc.mutation.rates = t(nuc.mutation.rates * base.freqs)
            diag(nuc.mutation.rates) = -rowSums(nuc.mutation.rates)
        }
        return(nuc.mutation.rates)
    }
    if(model == "HKY") {
        index <- matrix(NA, 4, 4)
        rates <- c(1,rates)
        sel <- col(index) < row(index)
        index[sel] <- c(1,2,1,1,2,1)
        index <- t(index)
        index[sel] <- c(1,2,1,1,2,1)
        nuc.mutation.rates <- matrix(0, nrow=4, ncol=4)
        nuc.mutation.rates<-matrix(rates[index], dim(index))
        rownames(nuc.mutation.rates) <- n2s(0:3)
        colnames(nuc.mutation.rates) <- n2s(0:3)
        diag(nuc.mutation.rates) <- 0
        diag(nuc.mutation.rates) <- -rowSums(nuc.mutation.rates)
        if(!is.null(base.freqs)){
            diag(nuc.mutation.rates) = 0
            nuc.mutation.rates = t(nuc.mutation.rates * base.freqs)
            diag(nuc.mutation.rates) = -rowSums(nuc.mutation.rates)
        }
        return(nuc.mutation.rates)
    }
    if(model == "GTR") {
        index <- matrix(NA, 4, 4)
        np <- 5
        sel <- col(index) < row(index)
        sel[4,3] = FALSE
        index[sel] <- 1:np
        index <- t(index)
        index[sel] <- 1:np
        nuc.mutation.rates <- matrix(0, nrow=4, ncol=4)
        nuc.mutation.rates<-matrix(rates[index], dim(index))
        rownames(nuc.mutation.rates) <- n2s(0:3)
        colnames(nuc.mutation.rates) <- n2s(0:3)
        nuc.mutation.rates[4,3] <- nuc.mutation.rates[3,4] <- 1
        diag(nuc.mutation.rates) <- 0
        diag(nuc.mutation.rates) <- -rowSums(nuc.mutation.rates)
        if(!is.null(base.freqs)){
            diag(nuc.mutation.rates) = 0
            nuc.mutation.rates = t(nuc.mutation.rates * base.freqs)
            diag(nuc.mutation.rates) = -rowSums(nuc.mutation.rates)
        }
        return(nuc.mutation.rates)
    }
    if(model == "UNREST") {
        index <- matrix(NA, 4, 4)
        np <- 12
        index[col(index) != row(index)] <- 1:np
        nuc.mutation.rates <- matrix(0, nrow=4, ncol=4)
        nuc.mutation.rates<-matrix(rates[index], dim(index))
        rownames(nuc.mutation.rates) <- n2s(0:3)
        colnames(nuc.mutation.rates) <- n2s(0:3)
        nuc.mutation.rates[3,4] = 1
        diag(nuc.mutation.rates) <- 0
        diag(nuc.mutation.rates) <- -rowSums(nuc.mutation.rates)
        #Next we take our rates and find the homogeneous solution to Q*pi=0 to determine the base freqs:
        #base.freqs <- Null(nuc.mutation.rates)
        #Rescale base.freqs so that they sum to 1:
        #base.freqs.scaled <- c(base.freqs/sum(base.freqs))
        #base.freqs.scaled.matrix <- rbind(base.freqs.scaled, base.freqs.scaled, base.freqs.scaled, base.freqs.scaled)
        #diag(nuc.mutation.rates) <- 0
        #Rescale Q to account for base.freqs:
        #nuc.mutation.rates <- nuc.mutation.rates * base.freqs.scaled.matrix
        #diag(nuc.mutation.rates) <- -rowSums(nuc.mutation.rates)

        return(nuc.mutation.rates)
    }
}


CreateCodonMutationMatrixIndex <- function() {
    nuc.rates.index = matrix(1:16, 4, 4)
    #codon.sets <- CreateCodonSets()
    n.codons <- dim(.codon.sets)[1]
    codon.mutation.rates <- matrix(data=0, nrow=n.codons, ncol=n.codons)
    rownames(codon.mutation.rates) <- rep("",n.codons)
    colnames(codon.mutation.rates) <- rep("",n.codons)
    for (i in sequence(n.codons)) {
        for (j in sequence(n.codons)) {
            if(sum(.codon.sets[i,] == .codon.sets[j,])==2) { #means that two of the bases match
                mismatch.position <- which(.codon.sets[i,] != .codon.sets[j,])
                codon.mutation.rates[i,j] <- nuc.rates.index[1+.codon.sets[i,mismatch.position], 1+.codon.sets[j, mismatch.position]] #nucs numbered from 0:3, rows are 1:4, thus the add 1
            }
        }
        #codon.name <- paste(n2s(as.numeric(.codon.sets[i,])), collapse="")
        rownames(codon.mutation.rates)[i] <- .codon.name[i]
        colnames(codon.mutation.rates)[i] <- .codon.name[i]
    }
    codon.mutation.rates[codon.mutation.rates==0] = NA
    return(codon.mutation.rates)
}


CreateCodonMutationMatrixIndexEvolveAA <- function() {
    codon.mut.index.mat <- CreateCodonMutationMatrixIndex()

    n.codons <- dim(.codon.sets)[1]
    mat.dim <- 21*n.codons
    evolv.codon.mut.index.mat <- matrix(data=0, nrow=mat.dim, ncol=mat.dim)

    for(i in 1:21)
    {
        index.vec.diag <- (1+(i-1)*64):(64+(i-1)*64)
        # fill in a complete coloumn of matrices, has to be done before the matrix on the diagonal
        evolv.codon.mut.index.mat[index.vec.diag, -index.vec.diag] <- diag(17, 64, 64)#do.call(cbind, replicate(20, diag(18, 64, 64), simplify = FALSE))
        evolv.codon.mut.index.mat[index.vec.diag, index.vec.diag] <- codon.mut.index.mat # fill in matrix on the diagonal
    }

    evolv.codon.mut.index.mat[evolv.codon.mut.index.mat == 0] = NA
    return(evolv.codon.mut.index.mat)
}


CreateCodonMutationMatrix <- function(nuc.mutation.rates) {
    #codon.sets <- CreateCodonSets()
    n.codons <- dim(.codon.sets)[1]
    codon.mutation.rates <- matrix(data=0, nrow=n.codons, ncol=n.codons)
    rownames(codon.mutation.rates) <- rep("",n.codons)
    colnames(codon.mutation.rates) <- rep("",n.codons)
    for (i in sequence(n.codons)) {
        for (j in sequence(n.codons)) {
            if(sum(.codon.sets[i,] == .codon.sets[j,])==2) { #means that two of the bases match
                mismatch.position <- which(.codon.sets[i,] != .codon.sets[j,])
                codon.mutation.rates[i,j] <- nuc.mutation.rates[1+.codon.sets[i,mismatch.position], 1+.codon.sets[j, mismatch.position]] #nucs numbered from 0:3, rows are 1:4, thus the add 1
            }
        }
        #codon.name <- paste(n2s(as.numeric(.codon.sets[i,])), collapse="")
        rownames(codon.mutation.rates)[i] <- .codon.name
        colnames(codon.mutation.rates)[i] <- .codon.name

    }
    diag(codon.mutation.rates) <- 0
    diag(codon.mutation.rates) <- -rowSums(codon.mutation.rates)
    rownames(codon.mutation.rates) <- colnames(codon.mutation.rates) <- .codon.name
    return(codon.mutation.rates)
}


CreateCodonMutationMatrixMutSel <- function(omega.par, fitness.pars, nuc.mutation.rates, numcode) {
    #codon.sets <- CreateCodonSets()
    n.codons <- dim(.codon.sets)[1]
    codon.mutation.rates <- matrix(data=0, nrow=n.codons, ncol=n.codons)
    rownames(codon.mutation.rates) <- rep("",n.codons)
    colnames(codon.mutation.rates) <- rep("",n.codons)
    #codon.set.translate <- apply(.codon.sets, 2, n2s)
    #codon.name <- apply(.codon.set.translate, 1, paste, collapse="")
    aa.translations <- .aa.translation[[numcode]][.codon.name]
    for (i in sequence(n.codons)) {
        for (j in sequence(n.codons)) {
            if(aa.translations[i] == aa.translations[j]){ #synonymous
                if(sum(.codon.sets[i,] == .codon.sets[j,])==2) { #means that two of the bases match
                    mismatch.position <- which(.codon.sets[i,] != .codon.sets[j,])
                    matched.position <- which(.codon.sets[i,] == .codon.sets[j,])
                    if((fitness.pars[j]-fitness.pars[i]) == 0){
                        codon.mutation.rates[i,j] = nuc.mutation.rates[1+.codon.sets[i,mismatch.position], 1+.codon.sets[j, mismatch.position]]
                    }else{
                        codon.mutation.rates[i,j] <- nuc.mutation.rates[1+.codon.sets[i,mismatch.position], 1+.codon.sets[j, mismatch.position]] * ((fitness.pars[j] - fitness.pars[i]) / (1-exp(fitness.pars[i] - fitness.pars[j])))
                    }
                }
            }else{ #nonsynonymous
                if(sum(.codon.sets[i,] == .codon.sets[j,])==2) { #means that two of the bases match
                    mismatch.position <- which(.codon.sets[i,] != .codon.sets[j,])
                    matched.position <- which(.codon.sets[i,] == .codon.sets[j,])
                    if((fitness.pars[j]-fitness.pars[i]) == 0){
                        codon.mutation.rates[i,j] = omega.par * nuc.mutation.rates[1+.codon.sets[i,mismatch.position], 1+.codon.sets[j, mismatch.position]]
                    }else{
                        codon.mutation.rates[i,j] <- omega.par * nuc.mutation.rates[1+.codon.sets[i,mismatch.position], 1+.codon.sets[j, mismatch.position]] * ((fitness.pars[j] - fitness.pars[i]) / (1-exp(fitness.pars[i] - fitness.pars[j])))
                    }
                }
            }
        }
    }
    #Remove stop codon rates -- they should be removed already, but just in case...
    codon.mutation.rates[which(aa.translations == "*"),] = codon.mutation.rates[,which(aa.translations == "*")] = 0
    #Now let us finish up the matrix:
    rownames(codon.mutation.rates) <- colnames(codon.mutation.rates) <- .codon.name
    diag(codon.mutation.rates) <- 0
    diag(codon.mutation.rates) <- -rowSums(codon.mutation.rates)
    return(codon.mutation.rates)
}


CreateCodonMutationMatrixYN98 <- function(x, codon.freqs, numcode) {
    omega.par = x[1]
    kappa.par <- x[2]
    #The last value is arbitrarily set to 0 per Yang and Nielsen (2008):
    #codon.sets <- CreateCodonSets()
    n.codons <- dim(.codon.sets)[1]
    codon.mutation.rates <- matrix(data=0, nrow=n.codons, ncol=n.codons)
    rownames(codon.mutation.rates) <- rep("",n.codons)
    colnames(codon.mutation.rates) <- rep("",n.codons)
    #codon.set.translate <- apply(.codon.sets, 2, n2s)
    #codon.name <- apply(.codon.set.translate, 1, paste, collapse="")
    #We add this in because the stop codons are not included in Grantham's distance calculation:
    aa.translations <- .aa.translation[[numcode]][.codon.name]
    for (i in sequence(n.codons)) {
        for (j in sequence(n.codons)) {
            if(aa.translations[i] == aa.translations[j]){ #synonymous -- set distance to zero.
                if(sum(.codon.sets[i,] == .codon.sets[j,])==2) { #means that two of the bases match
                    mismatch.position <- which(.codon.sets[i,] != .codon.sets[j,])
                    matched.position <- which(.codon.sets[i,] == .codon.sets[j,])
                    if(.codon.sets[i, mismatch.position] == 0 & .codon.sets[j,mismatch.position] == 2 | .codon.sets[i, mismatch.position] == 2 & .codon.sets[j,mismatch.position] == 0 | .codon.sets[i, mismatch.position] == 1 & .codon.sets[j,mismatch.position] == 3 | .codon.sets[i, mismatch.position] == 3 & .codon.sets[j,mismatch.position] == 1){
                        codon.mutation.rates[i,j] <- kappa.par * codon.freqs[j]
                    }else{
                        codon.mutation.rates[i,j] <- codon.freqs[j]
                    }
                }
            }else{ #nonsynonymous -- so we need to know Grantham's distance.
                if(sum(.codon.sets[i,] == .codon.sets[j,])==2) { #means that two of the bases match
                    mismatch.position <- which(.codon.sets[i,] != .codon.sets[j,])
                    matched.position <- which(.codon.sets[i,] == .codon.sets[j,])
                    if(.codon.sets[i, mismatch.position] == 0 & .codon.sets[j,mismatch.position] == 2 | .codon.sets[i, mismatch.position] == 2 & .codon.sets[j,mismatch.position] == 0 | .codon.sets[i, mismatch.position] == 1 & .codon.sets[j,mismatch.position] == 3 | .codon.sets[i, mismatch.position] == 3 & .codon.sets[j,mismatch.position] == 1){
                        codon.mutation.rates[i,j] <- kappa.par * codon.freqs[j] * omega.par
                    }else{
                        codon.mutation.rates[i,j] <- codon.freqs[j] * omega.par
                    }
                }
            }
        }
    }
    #Remove stop codon rates -- they should be removed already, but just in case...
    codon.mutation.rates[which(aa.translations == "*"),] = codon.mutation.rates[,which(aa.translations == "*")] = 0
    #Now let us finish up the matrix:
    rownames(codon.mutation.rates) <- colnames(codon.mutation.rates) <- .codon.name
    diag(codon.mutation.rates) <- 0
    diag(codon.mutation.rates) <- -rowSums(codon.mutation.rates)
    return(codon.mutation.rates)
}



CreateCodonMutationMatrixGY94 <- function(x, aa.distances, codon.freqs, numcode) {
    v.par <- x[1]
    kappa.par = x[2]
    #The last value is arbitrarily set to 0 per Yang and Nielsen (2008):
    #codon.sets <- CreateCodonSets()
    n.codons <- dim(.codon.sets)[1]
    codon.mutation.rates <- matrix(data=0, nrow=n.codons, ncol=n.codons)
    rownames(codon.mutation.rates) <- rep("",n.codons)
    colnames(codon.mutation.rates) <- rep("",n.codons)
    #codon.set.translate <- apply(.codon.sets, 2, n2s)
    #codon.name <- apply(.codon.set.translate, 1, paste, collapse="")
    #We add this in because the stop codons are not included in Grantham's distance calculation:
    aa.distances <- rbind(aa.distances, "*"=0, deparse.level=2)
    aa.distances <- cbind(aa.distances, "*"=0, deparse.level=2)
    aa.translations <- .aa.translation[[numcode]][.codon.name]
    for (i in sequence(n.codons)) {
        for (j in sequence(n.codons)) {
            if(aa.translations[i] == aa.translations[j]){ #synonymous -- set distance to zero.
                if(sum(.codon.sets[i,] == .codon.sets[j,])==2) { #means that two of the bases match
                    mismatch.position <- which(.codon.sets[i,] != .codon.sets[j,])
                    matched.position <- which(.codon.sets[i,] == .codon.sets[j,])
                    if(.codon.sets[i, mismatch.position] == 0 & .codon.sets[j,mismatch.position] == 2 | .codon.sets[i, mismatch.position] == 2 & .codon.sets[j,mismatch.position] == 0 | .codon.sets[i, mismatch.position] == 1 & .codon.sets[j,mismatch.position] == 3 | .codon.sets[i, mismatch.position] == 3 & .codon.sets[j,mismatch.position] == 1){
                        codon.mutation.rates[i,j] <- kappa.par * codon.freqs[j] * exp(-0/v.par)
                    }else{
                        codon.mutation.rates[i,j] <- codon.freqs[j] * exp(-0/v.par)
                    }
                }
            }else{ #nonsynonymous -- so we need to know Grantham's distance.
                if(sum(.codon.sets[i,] == .codon.sets[j,])==2) { #means that two of the bases match
                    mismatch.position <- which(.codon.sets[i,] != .codon.sets[j,])
                    matched.position <- which(.codon.sets[i,] == .codon.sets[j,])
                    if(.codon.sets[i, mismatch.position] == 0 & .codon.sets[j,mismatch.position] == 2 | .codon.sets[i, mismatch.position] == 2 & .codon.sets[j,mismatch.position] == 0 | .codon.sets[i, mismatch.position] == 1 & .codon.sets[j,mismatch.position] == 3 | .codon.sets[i, mismatch.position] == 3 & .codon.sets[j,mismatch.position] == 1){
                        codon.mutation.rates[i,j] <- kappa.par * codon.freqs[j] * exp(-aa.distances[aa.translations[i], aa.translations[j]]/v.par)
                    }else{
                        codon.mutation.rates[i,j] <- codon.freqs[j] * exp(-aa.distances[aa.translations[i], aa.translations[j]]/v.par)
                    }
                }
            }
        }
    }
    #Remove stop codon rates -- they should be removed already, but just in case...
    codon.mutation.rates[which(aa.translations == "*"),] = codon.mutation.rates[,which(aa.translations == "*")] = 0
    #Now let us finish up the matrix:
    rownames(codon.mutation.rates) <- colnames(codon.mutation.rates) <- .codon.name
    diag(codon.mutation.rates) <- 0
    diag(codon.mutation.rates) <- -rowSums(codon.mutation.rates)
    return(codon.mutation.rates)
}



CodonNumericToString <- function(x) { #remember that codon numbers start at 1
    return(n2s(x, levels=words(length=3), base4=FALSE))
}


CodonStringToNumeric <- function(x) { #remember that codon numbers start at 1
    triplet <- which(words(length=3)==x)
    if(length(triplet) == 0){
        triplet <- NA
    }
    return(triplet)
}


CodonStringToCharacter <- function(x) { #remember that codon numbers start at 1
    triplet <- x
    if(length(triplet) == 0){
        triplet <- NA
    }
    return(triplet)
}


NucleotideStringToNumeric <- function(x) { #remember that codon numbers start at 1
    singlet <- which(words(length=1)==x)
    if(length(singlet) == 0){
        singlet <- NA
    }
    return(singlet)
}


NucleotideStringToCharacter <- function(x) { #remember that codon numbers start at 1
    singlet <- x
    if(length(singlet) == 0){
        singlet <- NA
    }
    return(singlet)
}


ConvertCodonNumericDataToAAData <- function(codon.data, numcode=1) {
    aa.data <- codon.data
    for (row.index in sequence(dim(codon.data)[1])) {
        for (col.index in 2:(dim(codon.data)[2])) {
            #if(is.na(codon.data[row.index, col.index])){
            if(codon.data[row.index, col.index] == 65){
                aa.data[row.index, col.index] = "NA"
            }else{
                aa.data[row.index, col.index] <- TranslateCodon(CodonNumericToString(codon.data[row.index, col.index]), numcode=numcode)
            }
        }
    }
    return(aa.data)
}


#Problem with this is that it requires an assumption about frequencies. Flat
#  amino acid frequencies != flat codon frequences != frequencies of codons at
#  equilibrium given nucleotide model for many models. So, lets just go
#  directly to the codon model
#Create an amino acid mutation model, rows=from, cols=to
#
#CreateAAMutationMatrix <- function(codon.mutation.rates, codon.freqs = rep(1, 64), numcode=1, include.stop.codon=FALSE) {
#	TranslateCodon <- function(codon, numcode=1) {
#		return(translate(s2c(codon), numcode=numcode))
#	}
#	aa.by.codon <- sapply(rownames(codon.mutation.rates), TranslateCodon, numcode=numcode)
#	codon.mutation.rates.scaled <- codon.mutation.rates*codon.freqs
#	aa.ordered <- unique(aa.by.codon)
#	n.aa <- length(aa.ordered)
#	n.codon <- dim(codon.mutation.rates)[1]
#	aa.mutation.rates <- matrix(data=0, nrow=n.aa, ncol=n.aa)
#	rownames(aa.mutation.rates) <- aa.ordered
#	colnames(aa.mutation.rates) <- aa.ordered
#	for (i in sequence(n.codon)) {
#     for (j in sequence(n.codon)) {
#      	if (i != j) {
#          from.aa <- aa.by.codon[i]
#          to.aa <- aa.by.codon[j]
#          from.location <- which(aa.ordered==from.aa)
#          to.location <- which(aa.ordered==to.aa)
#          aa.mutation.rates[from.location, to.location] <- aa.mutation.rates[from.location, to.location] + codon.mutation.rates[i, j]
#        }
#      }
#    }
#    if(!include.stop.codon) {
#    	aa.mutation.rates<-aa.mutation.rates[which(rownames(aa.mutation.rates)!="*"),]
#    	aa.mutation.rates<-aa.mutation.rates[,which(colnames(aa.mutation.rates)!="*")]
#    }
#    diag(aa.mutation.rates) <- 0
#    diag(aa.mutation.rates) <- -rowSums(aa.mutation.rates)
#    return(aa.mutation.rates)
#}


CompareVectors <- function(cd1,cd2){
    cmp = (cd1 != cd2)
    num = sum(cmp)
    pos = which(cmp==TRUE)
    return(list(num=num,pos=pos))
}


GetPairwiseProteinFixationProbabilityArbitraryLength <- function(protein1, protein2, protein_op, aa.distances, nsites, C=4, Phi=0.5, q=4e-7, Ne=5e6){
    d1 <- GetProteinProteinDistance(protein1,protein_op,aa.distances)
    d2 <- GetProteinProteinDistance(protein2,protein_op,aa.distances)
    if(length(d1)!=length(d2)) #throw error if length of proteins are not the same
    stop("error: 2 proteins are of different lengths!")
    if(length(d1)==1){ #only one amino acid
        return(GetPairwiseProteinFixationProbabilitySingleSite(d1, d2, nsites=nsites, C=C, Phi=Phi, q=q, Ne=Ne))
    }
    else{
        if((length(d1)!=1)) #if s is given as a scalar, then treat it to be the same across all sites
        l = length(d1)
        cmp = CompareVectors(d1,d2)
        if(cmp$num > 1) return(0) #more than 1 position differ
        else if((cmp$num ==0)) return(1/(2*Ne)) #same fitness/functionality
        else{ #exactly 1 position differs
            pos = cmp$pos
            return(GetPairwiseProteinFixationProbabilitySingleSite(d1[pos], d2[pos], nsites=nsites, C=C, Phi=Phi, q=q, Ne=Ne))
        }
    }
}


GetFitness <- function(focal.protein, optimal.protein, aa.distances, nsites, C=4, Phi=0.5, q=4e-7) {
    focal.d <- GetProteinProteinDistance(focal.protein, optimal.protein, aa.distances)
    optimal.d <- GetProteinProteinDistance(optimal.protein, optimal.protein, aa.distances)
    return(exp(-(C+(C/nsites))*Phi*q*(focal.d-optimal.d)))
}


GetPairwiseProteinFixationProbabilitySingleSite <- function(d1, d2, nsites, C=4, Phi=0.5, q=4e-7, Ne=5e6, diploid=TRUE){
    if(diploid==TRUE){
        b = 1
    }else{
        b = 2
    }
    if(d1==d2){ #When the fitnesses are the same, neutral case, pure drift
        return(1/(2*Ne))
    }else{
        fit_ratio <- exp(-(C+(C/nsites))*Phi*q*(d1-d2)) #f1/f2
        if(fit_ratio==Inf){ #1 is much better than 2 (the mutant)
            return(0)
        }
        else{
            if(fit_ratio==1){
                return(1/(2*Ne))
            }
            else{
                return((1-fit_ratio^b)/(1-fit_ratio^(2*Ne)))
            }
        }
    }
}


GetProteinProteinDistance <- function(protein1, protein2, aa.distances){
    if(length(protein1)!=length(protein2)) #throw error if length of proteins are not the same
    stop("error: 2 proteins are of different lengths!")
    site_d <- function(k){
        if(protein1[k]=="*" || protein2[k]=="*") {
            warning("You have a stop codon in your sequence. This was treated as having a very large difference from other amino acids, but you probably want to exclude such sites. It may also be that your numcode is not appropriate for your data, and perhaps you want one that works for invertebrate mitochondria, chloroplasts, etc.")
            return(100*max(aa.distances))
        }
        if((is.na(protein1[k])) & (!is.na(protein2[k]))){
            return(mean(aa.distances[,protein2[k]]))
        }
        else if((is.na(protein2[k])) & (!is.na(protein1[k]))){
            return(mean(aa.distances[protein1[k],]))
        }
        else if((is.na(protein2[k])) & (!is.na(protein1[k]))){
            return(mean(aa.distances))
        }
        else
        return(aa.distances[protein1[k], protein2[k]])
    }
    #d <- sapply(c(1:length(protein1)),site_d,simplify=TRUE)
    if(length(protein1) == 1){
        d <- site_d(1)
    }else{
        d <- sapply(1:length(protein1), site_d, simplify=TRUE)
    }
    return(d)
}


#FastCreateAllCodonFixationProbabilityMatrices <- function(aa.distances=CreateAADistanceMatrix(), nsites, C=2, Phi=0.5, q=4e-7, Ne=5e6, include.stop.codon=TRUE, numcode=1, diploid=TRUE, flee.stop.codon.rate=0.9999999) {
##	#codon.sets <- CreateCodonSets()
#	.codon.sets <- expand.grid(0:3, 0:3, 0:3)
#	.codon.sets <- data.frame(first=.codon.sets[,3], second=.codon.sets[,2], third=.codon.sets[,1]) #reordering to group similar codons
#	n.codons <- dim(.codon.sets)[1]
#	codon.names <- rep("", n.codons)
#	for (i in sequence(n.codons)) {
#		codon.names[i] <- paste(n2s(as.numeric(.codon.sets[i,])), collapse="")
#	}
#	codon.aa <- sapply(codon.names, TranslateCodon, numcode=numcode)
#	unique.aa <- unique(codon.aa)
#	codon.fixation.probs <- array(data=0, dim=c(n.codons, n.codons, length(unique.aa)), dimnames=list(codon.names, codon.names, unique.aa))
#	for (i in sequence(n.codons)) {
#		for (j in sequence(n.codons)) {
#			if(sum(.codon.sets[i,] == .codon.sets[j,])>=2) { #match at two or three sites of three
#				for (k in sequence(length(unique.aa))) {
#					aa1 <- codon.aa[i]
#					aa2 <- codon.aa[j]
#					if(aa1!="*" && aa2!="*" && unique.aa[k]!="*") { #says we cannot mutate to stop codons and stop codons can never be optimal
#						d1 <- GetProteinProteinDistance(protein1=aa1, protein2=unique.aa[k], aa.distances=aa.distances)
#						d2 <- GetProteinProteinDistance(protein1=aa2, protein2=unique.aa[k], aa.distances=aa.distances)
#						codon.fixation.probs[i,j, k] <- GetPairwiseProteinFixationProbabilitySingleSite(d1, d2, nsites=nsites, C=C, Phi=Phi, q=q, Ne=Ne, diploid=diploid)
#					}else {
##						We have dropped s from the model as it is now explained through grantham like distances:
##						if(s==0) { #handles stop codon case where neutral, so could possibly go into and out of stop codons
##							codon.fixation.probs[i,j, k] <- 0
##						}else {
#							if(aa2!="*" && unique.aa[k]!="*") {
#								codon.fixation.probs[i,j, k] <- 0 #Old = if we are somehow in a stop codon, have a very high rate of moving away from this; New = make is zero because in theory our model should use selection to kill these but infinite selection is rather harsh.
##							}
#						}
#					}
#				}
#			}
#		}
#	}
#	codon.fixation.probs[,,"*"] = 0
#	return(codon.fixation.probs)
#}


CreateAAFixationMatrixForEverything <- function(aa.distances=CreateAADistanceMatrix(), nsites, C=4, Phi=0.5, q=4e-7, Ne=5e6, include.stop.codon=TRUE, numcode=1, diploid=TRUE) {
    states <- c("A", "C", "D", "E", "F", "G", "H", "I", "K", "L", "M", "N", "P", "Q", "R", "S", "T", "V", "W", "Y")
    fixation.array <- array(data=0, dim=rep(length(states)+include.stop.codon,3)) #adding the boolean to leave space for a stop codon if needed
    for (row.index in sequence(length(states))) {
        for (col.index in sequence(length(states))) {
            for (optimal.index in sequence(length(states))) {
                fixation.array[row.index, col.index, optimal.index] <- GetPairwiseProteinFixationProbabilitySingleSite(GetProteinProteinDistance(protein1=states[row.index], protein2=states[optimal.index], aa.distances=aa.distances),  GetProteinProteinDistance(protein1=states[col.index], protein2=states[optimal.index], aa.distances=aa.distances), nsites=nsites, C=C, Phi=Phi, q=q, Ne=Ne, diploid=diploid)

            }
        }
    }
    if(include.stop.codon) {
        states <- c(states, "*")
    }
    dimnames(fixation.array) <- list(states, states, states)
    return(fixation.array)
}


FastCreateAllCodonFixationProbabilityMatrices <- function(aa.distances=CreateAADistanceMatrix(), nsites, C=4, Phi=0.5, q=4e-7, Ne=5e6, include.stop.codon=TRUE, numcode=1, diploid=TRUE, flee.stop.codon.rate=0.9999999) {
    #codon.sets <- CreateCodonSets()
    #codon.sets <- expand.grid(0:3, 0:3, 0:3)
    #codon.sets[,c(3,2,1)] <- .codon.sets[,c(1,2,3)] #re-ordering as in the original one
    colnames(.codon.sets) <- c("first", "second", "third")
    n.codons <- dim(.codon.sets)[1]
    codon.names <- rep("", n.codons)
    aa.fixation.probs <- CreateAAFixationMatrixForEverything(aa.distances=aa.distances, nsites, C, Phi, q, Ne, include.stop.codon, numcode, diploid)
    for (i in sequence(n.codons)) {
        codon.names[i] <- paste(n2s(as.numeric(.codon.sets[i,])), collapse="")
    }
    codon.aa <- sapply(codon.names, TranslateCodon, numcode=numcode)
    #unique.aa <- unique(codon.aa)

    codon.fixation.probs <- array(data=0, dim=c(n.codons, n.codons, length(.unique.aa)), dimnames=list(codon.names, codon.names, .unique.aa))
    for (i in sequence(n.codons)) {
        for (j in sequence(n.codons)) {
            if(sum(.codon.sets[i,] == .codon.sets[j,])>1 ) { #match at two or more sites
                for (k in sequence(length(.unique.aa))) {
                    aa1 <- codon.aa[i]
                    aa2 <- codon.aa[j]
                    codon.fixation.probs[i,j, k] <- aa.fixation.probs[aa1, aa2, .unique.aa[k]]
                }
            }
        }
    }
    codon.fixation.probs[,,"*"] = 0
    return(codon.fixation.probs)
}


FastCreateOptAATransitionMatrices <- function(aa.distances=CreateAADistanceMatrix(), C, Phi, q, Ne, diploid, numcode=1, importance = 1) { #Cedric: added importance

    if(diploid == TRUE) {
        Ne <- 2*Ne
    } #Cedric: pay attention to diploid flag

    aa.dist.names <- colnames(aa.distances)
    aa.distances <- cbind(aa.distances, 0)
    aa.distances <- rbind(aa.distances, 0)
    colnames(aa.distances) <- c(aa.dist.names, "*")
    rownames(aa.distances) <- c(aa.dist.names, "*")

    numcode.idx <- .numcode.translation.idx[numcode]
    aa.names <- .aa.translation[[numcode.idx]]

    #aa.trans.mat <- (1.0/(aa.distances[.unique.aa, .unique.aa])^importance)/Ne #Cedric: adjusting for imporatance parameter and using 1/d instead of d
    aa.trans.mat <- (exp(-importance*aa.distances[.unique.aa, .unique.aa]))/Ne
    diag(aa.trans.mat) <- 0 # because R CAN divide by 0, some real Chuck Norris stuff here
    aa.trans.mat[,colnames(aa.trans.mat) == "*"] <- 0 # find better solution
    aa.trans.mat[colnames(aa.trans.mat) == "*",] <- 0

    # normalize distance
    norm.const <- rowSums(aa.trans.mat, na.rm = T)
    for(i in nrow(aa.trans.mat))
    aa.trans.mat[i,] <- aa.trans.mat[i,] / norm.const[i]


    aa.trans.matrices <- vector("list", 21)
    for(j in 1:21) {
        trans.matrix <- matrix(0, ncol=1344, nrow=64)
        for(i in 1:21) {
            index.vec.diag <- (1+(i-1)*64):(64+(i-1)*64)
            trans.matrix[, index.vec.diag] <- diag(aa.trans.mat[.unique.aa[j], .unique.aa[i]], ncol=64, nrow=64)
        }
        aa.trans.matrices[[j]] <- trans.matrix
    }
    names(aa.trans.matrices) <- .unique.aa
    return(aa.trans.matrices)
}


FastCreateEvolveAACodonFixationProbabilityMatrix <- function(aa.distances = CreateAADistanceMatrix(), nsites, C = 4, Phi = 0.5, q = 4e-7, Ne = 5e6, include.stop.codon = TRUE, numcode = 1, diploid = TRUE, flee.stop.codon.rate = 0.9999999, importance = 1) { #Cedric: Added the importance parameter
    codon.fixation.probs <- FastCreateAllCodonFixationProbabilityMatrices(aa.distances, nsites, C, Phi, q, Ne, include.stop.codon, numcode, diploid, flee.stop.codon.rate)
    opt.aa.transition.rate <- FastCreateOptAATransitionMatrices(aa.distances=aa.distances, C=C, Phi=Phi, q=q, Ne=Ne, diploid=diploid, numcode=numcode, importance) #Cedric: passing importance through

    n.codons <- dim(.codon.sets)[1]
    mat.dim <- 21*n.codons
    evolv.codon.fixation.probs <- matrix(data=0, nrow=mat.dim, ncol=mat.dim)

    for(i in 1:21) {
        index.vec.diag <- (1+(i-1)*64):(64+(i-1)*64)
        evolv.codon.fixation.probs[index.vec.diag, ] <- opt.aa.transition.rate[[i]] # fill in a complete coloumn of matrices, has to be done before the matrix on the diagonal
        evolv.codon.fixation.probs[index.vec.diag, index.vec.diag] <- codon.fixation.probs[,,.unique.aa[i]] # fill in matrix on the diagonal
    }

    rownames(evolv.codon.fixation.probs) <- paste(rep(.codon.name, times=21), rep(.unique.aa, each=64), sep="")
    colnames(evolv.codon.fixation.probs) <- paste(rep(.codon.name, times=21), rep(.unique.aa, each=64), sep="")

    return(evolv.codon.fixation.probs)
}


## Work in progress ##
#cppFunction('NumericMatrix CreateCodonFixationProbabilityMatrixGivenOptimalAA(NumericMatrix codon_sets, StringVector aa, NumericMatrix aa_distances, int nsites, double C, double Phi, double q, double Ne, bool include_stop_codon,  int numcode, bool diploid, Function GetProteinProteinDistance, Function GetPairwiseProteinFixationProbabilitySingleSite, int optimalaa_offset0index) {
#	NumericMatrix codon_fixation_probs_aa(codon_sets.nrow(), codon_sets.nrow());
#//	Rcpp::Rcout << "ok";
#//	Rcout << aa;
#	for (int i=0; i<codon_sets.nrow(); ++i) {
#		for(int j=0; j<codon_sets.nrow(); ++j) {
#			int mismatches=0;
#			for(int pos=0; pos<3; ++pos) {
#				if(codon_sets(i, pos)==codon_sets(j, pos)) {
#					mismatches++;
#				}
#			}
#			if(mismatches<2) {
#				StringVector proteinI(1);
#				StringVector proteinJ(1);
#				StringVector proteinOptimal(1);
#				proteinI(0) = aa(i);
#				proteinJ(0) = aa(j);
#				proteinOptimal(0) = aa(optimalaa_offset0index);
#				codon_fixation_probs_aa(i,j) = as<double>(GetPairwiseProteinFixationProbabilitySingleSite(GetProteinProteinDistance(_["protein1"]=proteinI, _["protein2"]=proteinOptimal, _["aa.distances"]=aa_distances),GetProteinProteinDistance(_["protein1"]=proteinJ, _["protein2"]=proteinOptimal, _["aa.distances"]=aa_distances), _["nsites"]=nsites, _["C"]=C, _["Phi"]=Phi, _["q"]=q, _["Ne"]=Ne, _["diploid"]=diploid));
#				//codon_fixation_probs_aa(i,j) = as<double>(GetPairwiseProteinFixationProbabilitySingleSite(GetProteinProteinDistance(_["protein1"]=aa(i), _["protein2"]=aa(optimalaa_offset0index), _["aa.distances"]=aa_distances),  GetProteinProteinDistance(_["protein1"]=aa(j), _["protein2"]=aa(optimalaa_offset0index), _["aa.distances"]=aa_distances), _["nsites"]=nsites, _["C"]=C, _["Phi"]=Phi, _["q"]=q, _["Ne"]=Ne, _["diploid"]=diploid));
#			}
#		}
#	}
#	return(codon_fixation_probs_aa);
#}')
#result <- CreateCodonFixationProbabilityMatrixGivenOptimalAA(.codon.sets, unname(codon.aa),  aa.distances, nsites, C, Phi, q, Ne, include.stop.codon,  numcode, diploid, GetProteinProteinDistance, GetPairwiseProteinFixationProbabilitySingleSite, k-1)

## Work in progress ##
#CCQuestionablyFastCreateAllCodonFixationProbabilityMatrices <- function(aa.distances=CreateAADistanceMatrix(), nsites, C=4.0, Phi=0.5, q=4e-7, Ne=5e6, include.stop.codon=TRUE, numcode=1, diploid=TRUE, flee.stop.codon.rate=0.9999999) {
#	.codon.sets <- CreateCodonSets()
#	.codon.sets <- expand.grid(0:3, 0:3, 0:3)
#	.codon.sets[,c(3,2,1)] <- .codon.sets[,c(1,2,3)] #re-ordering as in the original one
#	colnames(.codon.sets) <- c("first", "second", "third")
#	n.codons <- dim(.codon.sets)[1]
#	codon.names <- rep("", n.codons)
#	aa.fixation.probs <- CreateAAFixationMatrixForEverything(aa.distances=aa.distances, nsites, C, Phi, q, Ne, include.stop.codon, numcode, diploid)
#	for (i in sequence(n.codons)) {
#		codon.names[i] <- paste(n2s(as.numeric(.codon.sets[i,])), collapse="")
#	}
#	codon.aa <- sapply(codon.names, TranslateCodon, numcode=numcode)
#	unique.aa <- unique(codon.aa)

#	codon.fixation.probs <- array(data=0, dim=c(n.codons, n.codons, length(unique.aa)), dimnames=list(codon.names, codon.names, unique.aa))
#	for (k in sequence(length(unique.aa))) {
#		codon.fixation.probs[,,k] <- CreateCodonFixationProbabilityMatrixGivenOptimalAA(.codon.sets, unname(codon.aa),  aa.distances, nsites, C, Phi, q, Ne, include.stop.codon,  numcode, diploid, GetProteinProteinDistance, GetPairwiseProteinFixationProbabilitySingleSite, k-1) #k-1 due to C++ counting from zero
#	}
#	return(codon.fixation.probs)
#}


DiagArray <- function (dim){
    n <- dim[2]
    d <- seq(1, n*n, by=n+1)
    as.vector(outer(d, seq(0, by=n*n, length=prod(dim[-1:-2])), "+"))
}


FastCreateAllCodonFixationProbabilityMatricesSetToOne <- function(numcode=1) {
    #   codon.sets <- CreateCodonSets()
    #	.codon.sets <- expand.grid(0:3, 0:3, 0:3)
    #	.codon.sets <- data.frame(first=.codon.sets[,3], second=.codon.sets[,2], third=.codon.sets[,1]) #reordering to group similar codons
    n.codons <- dim(.codon.sets)[1]
    codon.names <- rep("", n.codons)
    for (i in sequence(n.codons)) {
        codon.names[i] <- paste(n2s(as.numeric(.codon.sets[i,])), collapse="")
    }
    codon.aa <- sapply(codon.names, TranslateCodon, numcode=numcode)
    #unique.aa <- unique(codon.aa)
    codon.fixation.rates <- array(data=1, dim=c(n.codons, n.codons, length(.unique.aa)), dimnames=list(codon.names, codon.names, .unique.aa))
    return(codon.fixation.rates)
}


CreateCodonFixationProbabilityMatrix <- function(aa_op, s, aa.distances, nsites, C=4, Phi=0.5, q=4e-7, Ne=5e6, include.stop.codon=TRUE, numcode=1){
    #   codon.sets <- CreateCodonSets()
    #    .codon.sets <- expand.grid(0:3, 0:3, 0:3)
    #    .codon.sets <- data.frame(first=.codon.sets[,3], second=.codon.sets[,2], third=.codon.sets[,1]) #reordering to group similar codons
    n.codons <- dim(.codon.sets)[1]
    codon.fixation.rates <- matrix(data=0, nrow=n.codons, ncol=n.codons)
    codon.names <- rep("", n.codons)
    for (i in sequence(n.codons)) {
        codon.names[i] <- paste(n2s(as.numeric(.codon.sets[i,])), collapse="")
    }
    rownames(codon.fixation.rates) <- codon.names
    colnames(codon.fixation.rates) <- codon.names
    codon.aa <- sapply(codon.names, TranslateCodon, numcode=numcode)

    for (i in sequence(n.codons)) {
        for (j in sequence(n.codons)) {
            if(sum(.codon.sets[i,] == .codon.sets[j,])>=2) { #match at two or three sites of three
                aa1 <- TranslateCodon(paste(n2s(as.numeric(.codon.sets[i,])), collapse=""), numcode=numcode)
                aa2 <- TranslateCodon(paste(n2s(as.numeric(.codon.sets[j,])), collapse=""), numcode=numcode)
                if(aa1!="*" && aa2!="*") { #says we cannot mutate to stop codons
                    d1 <- GetProteinProteinDistance(protein1=aa1, protein2=aa_op, aa.distances=aa.distances)
                    d2 <- GetProteinProteinDistance(protein1=aa2, protein2=aa_op, aa.distances=aa.distances)
                    codon.fixation.rates[i,j] <- GetPairwiseProteinFixationProbabilitySingleSite(d1, d2, nsites=nsites, C=C, Phi=Phi, q=q, Ne=Ne)
                } else {
                    if(s==0) { #handles stop codon case where neutral, so could possibly go into and out of stop codons
                        codon.fixation.rates[i,j] <- 1/(2*Ne)
                    }
                }
            }
        }
    }
    if(!include.stop.codon) {
        codon.fixation.rates<-codon.fixation.rates[which(codon.aa!="*"),]
        codon.fixation.rates<-codon.fixation.rates[,which(codon.aa!="*")]
    }
    diag(codon.fixation.rates) <- 0 #b/c we dont want these included when calculating diag
    diag(codon.fixation.rates) <- -rowSums(codon.fixation.rates)
    return(codon.fixation.rates)
}


CreateAAFixationMatrix <- function(aa_op,s,aa.distances,C=4, Phi=0.5, q=4e-7, Ne=5e6){
    m = 20
    mat <- matrix(0,nrow=m,ncol=m)#set diagonal entries to be 0 at first
    for(i in 1:(m-1)){
        for(j in (i+1):m){
            mat[i,j] <- GetPairwiseProteinFixationProbabilityArbitraryLength(i,j,aa_op,s,aa.distances,C,Phi,q,Ne) #fixation prob -> transition rate
            mat[j,i] <- GetPairwiseProteinFixationProbabilityArbitraryLength(j,i,aa_op,s,aa.distances,C,Phi,q,Ne) #symmetric entry (not the same rate!)
        }#end for j
    }#end for i
    return(mat)
}


CreateCodonSets <- function() {
    codon.sets <- expand.grid(0:3, 0:3, 0:3)
    codon.sets[,c(3,2,1)] <- codon.sets[,c(1,2,3)] #re-ordering as in the original one
    colnames(.codon.sets) <- c("first", "second", "third")
    codon.sets <- as.matrix(.codon.sets)
    return(.codon.sets)
}


GetLikelihoodSAC_AAForSingleCharGivenOptimum <- function(aa.data, phy, Q_aa, charnum=1, root.p=NULL, return.all=FALSE) {
    #result <- rayDISC(phy=phy, data=aa.data, ntraits=1, charnum=charnum, p=Q_aa, root.p=root.p, node.states="marginal")
    #Makes dev.rayDISC available:
    #dev.raydisc <- corHMM:::dev.raydisc
    nb.tip<-length(phy$tip.label)
    nb.node <- phy$Nnode
    nl <- nrow(Q_aa)
    #Now we need to build the matrix of likelihoods to pass to dev.raydisc:
    liks <- matrix(0, nb.tip + nb.node, nl)
    #Now loop through the tips.
    for(i in 1:nb.tip){
        #The codon at a site for a species is not NA, then just put a 1 in the appropriate column.
        #Note: We add charnum+1, because the first column in the data is the species labels:
        if(!is.na(aa.data[i,charnum+1])){
            liks[i,aa.data[i,charnum+1]] <- 1
        }else{
            #If here, then the site has no data, so we treat it as ambiguous for all possible codons. Likely things might be more complicated, but this can modified later:
            liks[i,] <- 1
        }
    }
    diag(Q_codon) = 0
    diag(Q_codon) = -rowSums(Q_codon)
    #The result here is just the likelihood:
    #result <- -dev.raydisc(p=NULL, phy=phy, liks=liks, Q=Q_aa, rate=NULL, root.p=root.p)
    result <- -Inf
    ifelse(return.all, stop("return all not currently implemented"), return(result))
}


GetLikelihoodSAC_CodonForSingleCharGivenOptimumHMMScoring <- function(charnum=1, codon.data, phy, Q_codon_array, expQt.codon, root.p=NULL, scale.factor, anc.indices, return.all=FALSE) {
    nb.tip <- length(phy$tip.label)
    nb.node <- phy$Nnode

    nl <- 64
    # #Now we need to build the matrix of likelihoods to store node and tip state:
    # if(all(codon.data[,charnum+1] < 65)){
    #   #no need to subset
    #   liks <- Matrix::sparseMatrix(i=1:nb.tip,j=codon.data[,charnum+1],x=1,dims=c(nb.tip + nb.node, nl))
    # } else {
    #   key<-codon.data[,charnum+1] < 65
    #   liks <- Matrix::sparseMatrix(i=which(key),j=codon.data[which(key),charnum+1],x=1,dims=c(nb.tip + nb.node, nl))
    #   liks[which(!key),-c(49, 51, 57)] <- 1
    # }
    # ## Now HMM this matrix by pasting these together:
    # liks.HMM <- cbind(liks,liks,liks,liks,
    #                   liks,liks,liks,liks,
    #                   liks,liks,liks,liks,
    #                   liks,liks,liks,liks,
    #                   Matrix::Matrix(0, nb.tip + nb.node, nl),
    #                   liks,liks,liks,liks)
    liks.HMM <- Matrix::Matrix(0, nb.node, 21* nl)
    TIPS <- 1:nb.tip
    comp <- numeric(nb.tip + nb.node)

    if(any(root.p < 0) | any(is.na(root.p))){
        return(1000000)
    }
    #Obtain an object of all the unique ancestors
    for (focal in anc.indices) {
        #the ancestral node at row i is called focal
        #focal <- anc[i]
        #Get descendant information of focal
        desRows<-which(phy$edge[,1]==focal)
        # desNodes<-phy$edge[desRows,2]
        v <- 1
        for (rowIndex in desRows){
            desNode= phy$edge[rowIndex,2]
            if(desNode <= nb.tip){
                if(codon.data[ desNode,charnum+1] < 65){
                    v <- v * expQt.codon[[codon.data[ desNode,charnum+1] ]][[desNode]]
                    # v <- v * internal_expAtv(A=Q_codon_array ,t=phy$edge.length[rowIndex], v=liks.HMM[phy$edge[rowIndex,2],])
                }
            }else{
                v <- v * internal_expAtv(A=Q_codon_array ,t=phy$edge.length[rowIndex], v=liks.HMM[desNode-nb.tip,])
            }
        }
        comp[focal] <- sum(v)
        liks.HMM[focal-nb.tip,] <- v/comp[focal]
    }
    #Specifies the root:
    # root <- nb.tip + 1L
    #If any of the logs have NAs restart search:
    if(is.nan(sum(log(comp[-TIPS]))) || is.na(sum(log(comp[-TIPS])))){
        return(1000000)
    }else{
        loglik<- (sum(log(comp[-TIPS])) + log(sum(root.p * liks.HMM[1L,])))
        if(is.infinite(loglik)){return(1000000)}
    }
    return(loglik)


    #The result here is just the likelihood:
    # result <- -TreeTraversalODE(phy=phy, Q_codon_array_vectored=Q_codon_array_vectored, liks.HMM=liks.HMM, bad.likelihood=-100000, root.p=root.p)
    # ifelse(return.all, stop("return all not currently implemented"), return(result))
}


GetLikelihoodSAC_CodonForSingleCharGivenOptimum <- function(charnum=1, codon.data, phy, Q_codon, root.p=NULL, scale.factor, anc.indices, return.all=FALSE) {
    nb.tip <- length(phy$tip.label)
    nb.node <- phy$Nnode

    nl <- nrow(Q_codon[[1]])
    #Now we need to build the matrix of likelihoods to pass to dev.raydisc:
    liks <- matrix(0, nb.tip + nb.node, nl)
    if(all(codon.data[,charnum+1] < 65)){
        #no need to subset
        liks[cbind(1:nb.tip,codon.data[,charnum+1])] <- 1
    } else {
        key<-codon.data[,charnum+1] < 65
        liks[cbind(which(key),codon.data[which(key),charnum+1])] <- 1
        liks[which(!key),] <- 1
        if(nl > 4){
            liks[which(!key),c(49, 51, 57)] <- 0
        }
    }
    #The result here is just the likelihood:
    result <- -FinishLikelihoodCalculation(phy=phy, liks=liks, Q=Q_codon, root.p=root.p, anc=anc.indices)
    if(return.all) stop("return all not currently implemented");
    return(result)
}


GetLikelihoodSAC_CodonForManyCharGivenFixedOptimumAndQAndRoot <- function(codon.data, phy, Q_codon, root.p=NULL, return.all=FALSE) {
    return(sum(sapply(seq(from=1, to=dim(codon.data)[2]-1, by=1), GetLikelihoodSAC_CodonForSingleCharGivenOptimum, codon.data=codon.data, phy=phy, Q_codon=Q_codon, root.p=root.p, return.all=return.all)))
}


GetLikelihoodSAC_CodonForManyCharVaryingBySiteEvolvingAA <- function(codon.data, phy, Q_codon_array, codon.freq.by.aa=NULL, codon.freq.by.gene=NULL, aa.optim_array, codon_mutation_matrix, Ne, rates, numcode, diploid, n.cores.by.gene.by.site=1, verbose=FALSE){

    nsites.unique <- dim(codon.data$unique.site.patterns)[2]-1
    final.likelihood.vector <- rep(NA, nsites.unique)

    #We rescale the codon matrix only:
    diag(codon_mutation_matrix) = 0
    diag(codon_mutation_matrix) = -rowSums(codon_mutation_matrix)
    scale.factor <- -sum(diag(codon_mutation_matrix) * codon.freq.by.gene, na.rm=TRUE)
    codon_mutation_matrix_scaled = codon_mutation_matrix * (1/scale.factor)
    #Finish the Q_array codon mutation matrix multiplication here:
    if(diploid == TRUE){
        Q_codon_array = (2 * Ne) * codon_mutation_matrix_scaled * Q_codon_array
    }else{
        Q_codon_array = Ne * codon_mutation_matrix_scaled * Q_codon_array
    }
    diag(Q_codon_array) = 0
    diag(Q_codon_array) = -rowSums(Q_codon_array)
    Q_codon_array <- Matrix::Matrix(Q_codon_array)
    #Put the na.rm=TRUE bit here just in case -- when the amino acid is a stop codon, there is a bunch of NaNs. Should be fixed now.
    #scale.factor <- -sum(Q_codon_array[DiagArray(dim(Q_codon_array))] * equilibrium.codon.freq, na.rm=TRUE)

    #Generate matrix of root frequencies for each optimal AA:
    root.p_array <- codon.freq.by.gene
    #root.p_array <- t(root.p_array)
    #root.p_array <- root.p_array / rowSums(root.p_array)
    #rownames(root.p_array) <- .unique.aa
    phy.sort <- reorder(phy, "pruningwise")
    # Q_codon_array_vectored <- c(t(Q_codon_array)) # has to be transposed
    # Q_codon_array_vectored <- Q_codon_array_vectored[.non_zero_pos]

    ## This evaluates exp(Qt)*liks for all of the tip edges for all 64 codon arrangements.
    ## Evaluating multiple tips at once is cheaper than separate evaluation.
    ## In the future, perhaps reduce the number of evaluations for unused codons
    nb.tip <- length(phy.sort$tip.label)
    edge.length.tip <- numeric(nb.tip)
    tip.edge=which(phy.sort$edge[,2]<=nb.tip)
    edge.length.tip[phy.sort$edge[tip.edge,2]] <- phy.sort$edge.length[tip.edge]

    #Future: find all tips which contain a codon
    tip.codons <- lapply(seq_len(nb.tip), function(j) unique(codon.data$unique.site.patterns[j,-1]) )
    codon.tips <- lapply(1:64,function(i) sapply(tip.codons,function(j) i%in% j ) )
    if(!all(sapply(codon.tips,any))) {
        TipLikelihood_codon <- function(codon_number){
            # relevent lines:
            # rowIndex <- which(phy$edge[,1]==focal)
            # v <- v * internal_expAtv(A=Q_codon_array ,t=phy$edge.length[rowIndex], v=liks.HMM[phy$edge[rowIndex,2],])
            if(!any(codon.tips[[codon_number]])) return(list())
            exp_A_tvec_codon(A = Q_codon_array,codon = codon_number,tvec = edge.length.tip)
        }
    }else {
        TipLikelihood_codon <- function(codon_number){
            # relevent lines:
            # rowIndex <- which(phy$edge[,1]==focal)
            # v <- v * internal_expAtv(A=Q_codon_array ,t=phy$edge.length[rowIndex], v=liks.HMM[phy$edge[rowIndex,2],])
            exp_A_tvec_codon(A = Q_codon_array,codon = codon_number,tvec = edge.length.tip)
        }
    }
    # set this up so that:
    # internal_expAtv(A=Q_codon_array ,t=phy$edge.length[rowIndex], v=liks.HMM[phy$edge[rowIndex,2],])
    # can be replaced by:
    # expQt.codon[[ codon.data[ phy$edge[rowIndex,2], charnum+1 ] ]][[ phy$edge[rowIndex,2] ]]
    expQt.codon <- list(list())
    if(n.cores.by.gene.by.site == 1    ){
        expQt.codon <- lapply(1:64, TipLikelihood_codon)
    } else if(64 > n.cores.by.gene.by.site &&  sum(sapply(codon.tips,any)) %/% n.cores.by.gene.by.site < 10 ) {
        expQt.codon <- mclapply(1:64, TipLikelihood_codon, mc.cores=n.cores.by.gene.by.site, mc.preschedule=FALSE)
    } else {
        expQt.codon <- mclapply(1:64, TipLikelihood_codon, mc.cores=n.cores.by.gene.by.site, mc.preschedule=T)
    }
    anc.indices <- unique(phy.sort$edge[,1])
    if(verbose){
        MultiCoreLikelihoodBySite <- function(nsite.index){
            tmp <- GetLikelihoodSAC_CodonForSingleCharGivenOptimumHMMScoring(charnum=nsite.index, codon.data=codon.data$unique.site.patterns,
            phy=phy.sort, Q_codon_array=Q_codon_array,
            expQt.codon=expQt.codon,
            root.p=root.p_array, scale.factor=scale.factor,
            anc.indices=anc.indices, return.all=FALSE)
            cat(".")
            return(tmp)
        }

    } else {
        MultiCoreLikelihoodBySite <- function(nsite.index){
            tmp <- GetLikelihoodSAC_CodonForSingleCharGivenOptimumHMMScoring(charnum=nsite.index, codon.data=codon.data$unique.site.patterns, phy=phy.sort,
            Q_codon_array=Q_codon_array,
            expQt.codon=expQt.codon,
            root.p=root.p_array, scale.factor=scale.factor,
            anc.indices=anc.indices, return.all=FALSE)
            return(tmp)
        }
    }
    if(n.cores.by.gene.by.site == 1    ){
        final.likelihood.vector <- unlist(lapply(1:nsites.unique, MultiCoreLikelihoodBySite))
    } else if(nsites.unique > n.cores.by.gene.by.site &&  nsites.unique %/% n.cores.by.gene.by.site < 10 ) {
        final.likelihood.vector <- unlist(mclapply(1:nsites.unique, MultiCoreLikelihoodBySite, mc.cores=n.cores.by.gene.by.site, mc.preschedule=FALSE))
    } else {
        final.likelihood.vector <- unlist(mclapply(1:nsites.unique, MultiCoreLikelihoodBySite, mc.cores=n.cores.by.gene.by.site, mc.preschedule=T))
    }
    if(verbose) cat("|\n")
    return(final.likelihood.vector)
}


GetLikelihoodSAC_CodonForManyCharVaryingBySite <- function(codon.data, phy, Q_codon_array, codon.freq.by.aa=NULL, codon.freq.by.gene=NULL, aa.optim_array, codon_mutation_matrix, Ne, rates, numcode, diploid, n.cores.by.gene.by.site=1) {

    nsites.unique <- dim(codon.data$unique.site.patterns)[2]-1
    final.likelihood.vector <- rep(NA, nsites.unique)
    #unique.aa <- GetMatrixAANames(numcode)

    #We rescale the codon matrix only:
    diag(codon_mutation_matrix) = 0
    diag(codon_mutation_matrix) = -rowSums(codon_mutation_matrix)
    scale.factor <- -sum(diag(codon_mutation_matrix) * codon.freq.by.gene, na.rm=TRUE)
    codon_mutation_matrix_scaled = codon_mutation_matrix * (1/scale.factor)
    #Finish the Q_array codon mutation matrix multiplication here:
    for(k in 1:21){
        if(diploid == TRUE){
            Q_codon_array[,,.unique.aa[k]] = (2 * Ne) * codon_mutation_matrix_scaled * Q_codon_array[,,.unique.aa[k]]
        }else{
            Q_codon_array[,,.unique.aa[k]] = Ne * codon_mutation_matrix_scaled * Q_codon_array[,,.unique.aa[k]]
        }
        diag(Q_codon_array[,,.unique.aa[k]]) = 0
        diag(Q_codon_array[,,.unique.aa[k]]) = -rowSums(Q_codon_array[,,.unique.aa[k]])
    }

    #Put the na.rm=TRUE bit here just in case -- when the amino acid is a stop codon, there is a bunch of NaNs. Should be fixed now.
    #scale.factor <- -sum(Q_codon_array[DiagArray(dim(Q_codon_array))] * equilibrium.codon.freq, na.rm=TRUE)
    phy <- reorder(phy, "pruningwise")

    ## This is obviously not very elegant, but not sure how else to code it to store this stuff in this way -- WORK IN PROGRESS:
    tempGetAAExpQt <- local({
        p0=phy
        Qca=Q_codon_array
        r0=rates
        function(aa) {
            GetExpQt(phy=p0, Q=Qca[,,aa], scale.factor=NULL, rates=r0)
        } })
    expQt <- NULL
    expQt <- mclapply(c("K", "N", "T", "R", "S",
    "I", "M", "Q", "H", "P",
    "L", "E",  "D", "A", "G",
    "V", "Y", "C", "W", "F"),
    FUN=tempGetAAExpQt,
    mc.cores=n.cores.by.gene.by.site)
    names(expQt) <- c("K", "N", "T", "R", "S",
    "I", "M", "Q", "H", "P",
    "L", "E",  "D", "A", "G",
    "V", "Y", "C", "W", "F")
    # expQt$K <- GetExpQt(phy=phy, Q=Q_codon_array[,,"K"], scale.factor=NULL, rates=rates)
    # expQt$N <- GetExpQt(phy=phy, Q=Q_codon_array[,,"N"], scale.factor=NULL, rates=rates)
    # expQt$T <- GetExpQt(phy=phy, Q=Q_codon_array[,,"T"], scale.factor=NULL, rates=rates)
    # expQt$R <- GetExpQt(phy=phy, Q=Q_codon_array[,,"R"], scale.factor=NULL, rates=rates)
    # expQt$S <- GetExpQt(phy=phy, Q=Q_codon_array[,,"S"], scale.factor=NULL, rates=rates)
    # expQt$I <- GetExpQt(phy=phy, Q=Q_codon_array[,,"I"], scale.factor=NULL, rates=rates)
    # expQt$M <- GetExpQt(phy=phy, Q=Q_codon_array[,,"M"], scale.factor=NULL, rates=rates)
    # expQt$Q <- GetExpQt(phy=phy, Q=Q_codon_array[,,"Q"], scale.factor=NULL, rates=rates)
    # expQt$H <- GetExpQt(phy=phy, Q=Q_codon_array[,,"H"], scale.factor=NULL, rates=rates)
    # expQt$P <- GetExpQt(phy=phy, Q=Q_codon_array[,,"P"], scale.factor=NULL, rates=rates)
    # expQt$L <- GetExpQt(phy=phy, Q=Q_codon_array[,,"L"], scale.factor=NULL, rates=rates)
    # expQt$E <- GetExpQt(phy=phy, Q=Q_codon_array[,,"E"], scale.factor=NULL, rates=rates)
    # expQt$D <- GetExpQt(phy=phy, Q=Q_codon_array[,,"D"], scale.factor=NULL, rates=rates)
    # expQt$A <- GetExpQt(phy=phy, Q=Q_codon_array[,,"A"], scale.factor=NULL, rates=rates)
    # expQt$G <- GetExpQt(phy=phy, Q=Q_codon_array[,,"G"], scale.factor=NULL, rates=rates)
    # expQt$V <- GetExpQt(phy=phy, Q=Q_codon_array[,,"V"], scale.factor=NULL, rates=rates)
    # expQt$Y <- GetExpQt(phy=phy, Q=Q_codon_array[,,"Y"], scale.factor=NULL, rates=rates)
    # expQt$C <- GetExpQt(phy=phy, Q=Q_codon_array[,,"C"], scale.factor=NULL, rates=rates)
    # expQt$W <- GetExpQt(phy=phy, Q=Q_codon_array[,,"W"], scale.factor=NULL, rates=rates)
    # expQt$F <- GetExpQt(phy=phy, Q=Q_codon_array[,,"F"], scale.factor=NULL, rates=rates)

    #Generate matrix of root frequencies for each optimal AA:
    root.p_array <- matrix(codon.freq.by.aa, nrow=dim(Q_codon_array)[2], ncol=21)
    root.p_array <- t(root.p_array)
    root.p_array <- root.p_array / rowSums(root.p_array)
    rownames(root.p_array) <- .unique.aa

    phy.sort <- reorder(phy, "pruningwise")
    anc.indices <- unique(phy.sort$edge[,1])
    MultiCoreLikelihoodBySite <- function(i){
        tmp <- GetLikelihoodSAC_CodonForSingleCharGivenOptimum(charnum=i, codon.data=codon.data$unique.site.patterns, phy=phy.sort, Q_codon=expQt[[aa.optim_array[i]]], root.p=root.p_array[aa.optim_array[i],], scale.factor=scale.factor, anc.indices=anc.indices, return.all=FALSE)
        return(tmp)
    }
    final.likelihood.vector.mc <- unlist(mclapply(1:nsites.unique, MultiCoreLikelihoodBySite, mc.cores=n.cores.by.gene.by.site))
    #    final.likelihood.vector <- rep(NA, nsites)
    #for (i in sequence(nsites)) {
    #    final.likelihood.vector[i] <- GetLikelihoodSAC_CodonForSingleCharGivenOptimum(charnum=i, codon.data=codon.data$unique.site.patterns, phy=phy.sort, Q_codon=expQt[[aa.optim_array[i]]], root.p=root.p_array[aa.optim_array[i],], scale.factor=scale.factor, anc.indices=anc.indices, return.all=FALSE)
    #}
    #print(cbind(final.likelihood.vector, final.likelihood.vector.mc))
    return(final.likelihood.vector.mc)
}


GetLikelihoodMutSel_CodonForManyCharVaryingBySite <- function(codon.data, phy, root.p_array=NULL, Q_codon, numcode, n.cores.by.gene.by.site=1) {
    nsites.unique <- dim(codon.data$unique.site.patterns)[2] - 1
    final.likelihood.vector <- rep(NA, nsites.unique)

    diag(Q_codon) = 0
    diag(Q_codon) = -rowSums(Q_codon)
    scale.factor <- -sum(diag(Q_codon) * root.p_array, na.rm=TRUE)
    expQt <- GetExpQt(phy=phy, Q=Q_codon, scale.factor=scale.factor, rates=NULL)

    phy.sort <- reorder(phy, "pruningwise")
    anc.indices <- unique(phy.sort$edge[,1])

    MultiCoreLikelihoodBySite <- function(nsite.index){
        tmp <- GetLikelihoodSAC_CodonForSingleCharGivenOptimum(charnum=nsite.index, codon.data=codon.data$unique.site.patterns, phy=phy.sort, Q_codon=expQt, root.p=root.p_array, scale.factor=scale.factor, anc.indices=anc.indices, return.all=FALSE)
        return(tmp)
    }
    final.likelihood.vector <- unlist(mclapply(1:nsites.unique, MultiCoreLikelihoodBySite, mc.cores=n.cores.by.gene.by.site))
    return(final.likelihood.vector)
}


GetLikelihoodNucleotideForManyCharVaryingBySite <- function(nuc.data, phy, nuc.mutation.rates, include.gamma=FALSE, rates.k=NULL, ncats=NULL, root.p_array=NULL, n.cores.by.gene.by.site=1) {
    nsites.unique <- dim(nuc.data$unique.site.patterns)[2]-1
    final.likelihood.vector <- rep(NA, nsites.unique)
    if(is.null(root.p_array)) {
        #Generate matrix of equal frequencies for each site:
        root.p_array <- rep(0.25, 4)
    }
    #Rescaling Q matrix in order to have a 1 nucleotide change per site if the branch length was 1:
    diag(nuc.mutation.rates) = 0
    nuc.mutation.rates = t(nuc.mutation.rates * root.p_array)
    diag(nuc.mutation.rates) = -rowSums(nuc.mutation.rates)
    scale.factor <- -sum(diag(nuc.mutation.rates) * root.p_array)
    expQt <- GetExpQt(phy=phy, Q=nuc.mutation.rates, scale.factor=scale.factor, rates=rates.k)
    phy.sort <- reorder(phy, "pruningwise")
    anc.indices <- unique(phy.sort$edge[,1])

    MultiCoreLikelihoodBySite <- function(nsite.index){
        tmp <- GetLikelihoodSAC_CodonForSingleCharGivenOptimum(charnum=nsite.index, codon.data=nuc.data$unique.site.patterns, phy=phy, Q_codon=expQt, root.p=root.p_array, scale.factor=scale.factor, anc.indices=anc.indices, return.all=FALSE)
        return(tmp)
    }
    final.likelihood.vector <- unlist(mclapply(1:nsites.unique, MultiCoreLikelihoodBySite, mc.cores=n.cores.by.gene.by.site))
    return(final.likelihood.vector)
}


GetLikelihoodSAC_CodonForManyCharGivenAllParamsEvolvingAA <- function(x, codon.data, phy, codon.freq.by.aa=NULL, codon.freq.by.gene=NULL, numcode=1, diploid=TRUE, aa.properties=NULL, volume.fixed.value=0.0003990333, nuc.model, codon.index.matrix, include.gamma, gamma.type, ncats, k.levels=0, logspace=FALSE, verbose=TRUE, neglnl=FALSE, n.cores.by.gene.by.site=1, estimate.importance=FALSE) {

    if(logspace) {
        x = exp(x)
    }

    if(estimate.importance == TRUE){
        importance.of.aa.dist.in.selective.environment.change = x[length(x)]
        x = x[-length(x)]
    }else{
        importance.of.aa.dist.in.selective.environment.change = 1
    }

    rate.for.selective.environment.change = x[length(x)]
    x = x[-length(x)]

    if(include.gamma == TRUE){
        shape = x[length(x)]
        x = x[-length(x)]
    }

    C.Phi.q.Ne <- x[1]
    C <- 4
    q <- 4e-7
    Ne <- 5e6
    Phi.q.Ne <- C.Phi.q.Ne / C
    Phi.Ne <- Phi.q.Ne / q
    Phi <- Phi.Ne / Ne
    alpha <- x[2]
    beta <- x[3]
    gamma <- volume.fixed.value

    if(k.levels > 0){
        if(nuc.model == "JC") {
            base.freqs=c(x[4:6], 1-sum(x[4:6]))
            #During the early stages of the optimization process it will try weird values for the base frequencies.
            if(any(base.freqs < 0)){
                return(1000000)
            }
            nuc.mutation.rates <- CreateNucleotideMutationMatrix(1, model=nuc.model, base.freqs=base.freqs)
            poly.params <- x[7:8]
        }
        if(nuc.model == "GTR") {
            base.freqs=c(x[4:6], 1-sum(x[4:6]))
            #During the early stages of the optimization process it will try weird values for the base frequencies.
            if(any(base.freqs < 0)){
                return(1000000)
            }
            nuc.mutation.rates <- CreateNucleotideMutationMatrix(x[9:length(x)], model=nuc.model, base.freqs=base.freqs)
            poly.params <- x[7:8]
        }
        if(nuc.model == "UNREST") {
            nuc.mutation.rates <- CreateNucleotideMutationMatrix(x[6:length(x)], model=nuc.model, base.freqs=NULL)
            poly.params <- x[4:5]
        }
    }else{
        if(nuc.model == "JC") {
            base.freqs=c(x[4:6], 1-sum(x[4:6]))
            #During the early stages of the optimization process it will try weird values for the base frequencies.
            if(any(base.freqs < 0)){
                return(1000000)
            }
            nuc.mutation.rates <- CreateNucleotideMutationMatrix(1, model=nuc.model, base.freqs=base.freqs)
        }
        if(nuc.model == "GTR") {
            base.freqs=c(x[4:6], 1-sum(x[4:6]))
            #During the early stages of the optimization process it will try weird values for the base frequencies.
            if(any(base.freqs < 0)){
                return(1000000)
            }
            nuc.mutation.rates <- CreateNucleotideMutationMatrix(x[7:length(x)], model=nuc.model, base.freqs=base.freqs)
        }
        if(nuc.model == "UNREST") {
            nuc.mutation.rates <- CreateNucleotideMutationMatrix(x[4:length(x)], model=nuc.model, base.freqs=NULL)
        }
    }

    nuc.mutation.rates.vector <- c(nuc.mutation.rates, rate.for.selective.environment.change)
    codon_mutation_matrix <- matrix(nuc.mutation.rates.vector[codon.index.matrix], dim(codon.index.matrix))
    codon_mutation_matrix[is.na(codon_mutation_matrix)]=0
    nsites.unique <- dim(codon.data$unique.site.patterns)[2]-1
    nsites <- sum(codon.data$site.pattern.counts)

    if(include.gamma==TRUE){
        if(gamma.type == "median"){
            rates.k <- DiscreteGamma(shape=shape, ncats=ncats)
            weights.k <- rep(1/ncats, ncats)
        }
        if(gamma.type == "quadrature"){
            rates.and.weights <- LaguerreQuad(shape=shape, ncats=ncats)
            rates.k <- rates.and.weights[1:ncats]
            weights.k <- rates.and.weights[(ncats+1):(ncats*2)]
        }
        if(gamma.type == "lognormal"){
            rates.and.weights <- LogNormalQuad(shape=shape, ncats=ncats)
            rates.k <- rates.and.weights[1:ncats]
            weights.k <- rates.and.weights[(ncats+1):(ncats*2)]
        }
        #ttmmpp <- c(nuc.mutation.rates.vector, nsites.unique, nsites, C, Phi, rates.k, q, Ne, shape, importance.of.aa.dist.in.selective.environment.change)
        #writeLines(text = paste(ttmmpp), con = "~/Desktop/selac_parameter.txt", sep = "\t")
        final.likelihood.mat = matrix(0, nrow=ncats, ncol=nsites.unique)
        for(k.cat in sequence(ncats)){
            if(k.levels > 0){
                aa.distances <- CreateAADistanceMatrix(alpha=alpha, beta=beta, gamma=gamma, aa.properties=aa.properties, normalize=FALSE, poly.params=poly.params, k=k.levels)
            }else{
                aa.distances <- CreateAADistanceMatrix(alpha=alpha, beta=beta, gamma=gamma, aa.properties=aa.properties, normalize=FALSE, poly.params=NULL, k=k.levels)
            }
            Q_codon_array <- FastCreateEvolveAACodonFixationProbabilityMatrix(aa.distances=aa.distances, nsites=nsites, C=C, Phi=Phi*rates.k[k.cat], q=q, Ne=Ne, include.stop.codon=TRUE, numcode=numcode, diploid=diploid, flee.stop.codon.rate=0.9999999, importance.of.aa.dist.in.selective.environment.change) #Cedric: added importance
            final.likelihood.mat[k.cat,] = GetLikelihoodSAC_CodonForManyCharVaryingBySiteEvolvingAA(codon.data, phy, Q_codon_array, codon.freq.by.aa=codon.freq.by.aa, codon.freq.by.gene=codon.freq.by.gene, codon_mutation_matrix=codon_mutation_matrix, Ne=Ne, rates=NULL, numcode=numcode, diploid=diploid, n.cores.by.gene.by.site=n.cores.by.gene.by.site, verbose=verbose)
        }
        likelihood <- sum(log(colSums(exp(final.likelihood.mat)*weights.k)) * codon.data$site.pattern.counts)
    }else{
        if(k.levels > 0){
            aa.distances <- CreateAADistanceMatrix(alpha=alpha, beta=beta, gamma=gamma, aa.properties=aa.properties, normalize=FALSE, poly.params=poly.params, k=k.levels)
        }else{
            aa.distances <- CreateAADistanceMatrix(alpha=alpha, beta=beta, gamma=gamma, aa.properties=aa.properties, normalize=FALSE, poly.params=NULL, k=k.levels)
        }
        Q_codon_array <- FastCreateEvolveAACodonFixationProbabilityMatrix(aa.distances=aa.distances, nsites=nsites, C=C, Phi=Phi, q=q, Ne=Ne, include.stop.codon=TRUE, numcode=numcode, diploid=diploid, flee.stop.codon.rate=0.9999999, importance.of.aa.dist.in.selective.environment.change) #Cedric: added importance
        final.likelihood = GetLikelihoodSAC_CodonForManyCharVaryingBySiteEvolvingAA(codon.data, phy, Q_codon_array, codon.freq.by.aa=codon.freq.by.aa, codon.freq.by.gene=codon.freq.by.gene, codon_mutation_matrix=codon_mutation_matrix, Ne=Ne, rates=NULL, numcode=numcode, diploid=diploid, n.cores.by.gene.by.site=n.cores.by.gene.by.site, verbose=verbose)
        likelihood <- sum(final.likelihood * codon.data$site.pattern.counts)
    }

    if(neglnl) {
        likelihood <- -1 * likelihood
    }
    if(verbose > 1) {
        results.vector <- c(likelihood, C*Phi*q, alpha, beta, gamma, Ne, ape::write.tree(phy))
        names(results.vector) <- c("likelihood", "C.Phi.q.Ne", "alpha", "beta", "gamma", "Ne", "phy")
        print(results.vector)
    }else if(verbose){
        results.vector <- c(likelihood, alpha, beta, gamma)
        names(results.vector) <- c("likelihood", "alpha", "beta", "gamma")
        print(results.vector)

    }
    if(is.na(likelihood) || is.nan(likelihood)){
        return(1000000)
    }else{
        return(likelihood)
    }
}


GetLikelihoodSAC_CodonForManyCharGivenAllParams <- function(x, codon.data, phy, aa.optim_array=NULL, codon.freq.by.aa=NULL, codon.freq.by.gene=NULL, numcode=1, diploid=TRUE, aa.properties=NULL, volume.fixed.value=0.0003990333, nuc.model, codon.index.matrix, include.gamma, gamma.type, ncats, k.levels=0, logspace=FALSE, verbose=TRUE, neglnl=FALSE, n.cores.by.gene.by.site=1) {
    if(logspace) {
        x = exp(x)
    }
    if(include.gamma == TRUE){
        shape = x[length(x)]
        x = x[-length(x)]
    }

    C.Phi.q.Ne <- x[1]
    C <- 4
    q <- 4e-7
    Ne <- 5e6
    Phi.q.Ne <- C.Phi.q.Ne / C
    Phi.Ne <- Phi.q.Ne / q
    Phi <- Phi.Ne / Ne
    alpha <- x[2]
    beta <- x[3]
    gamma <- volume.fixed.value

    if(k.levels > 0){
        if(nuc.model == "JC") {
            base.freqs=c(x[4:6], 1-sum(x[4:6]))
            #During the early stages of the optimization process it will try weird values for the base frequencies.
            if(any(base.freqs < 0)){
                return(1000000)
            }
            nuc.mutation.rates <- CreateNucleotideMutationMatrix(1, model=nuc.model, base.freqs=base.freqs)
            poly.params <- x[7:8]
        }
        if(nuc.model == "GTR") {
            base.freqs=c(x[4:6], 1-sum(x[4:6]))
            #During the early stages of the optimization process it will try weird values for the base frequencies.
            if(any(base.freqs < 0)){
                return(1000000)
            }
            nuc.mutation.rates <- CreateNucleotideMutationMatrix(x[9:length(x)], model=nuc.model, base.freqs=base.freqs)
            poly.params <- x[7:8]
        }
        if(nuc.model == "UNREST") {
            nuc.mutation.rates <- CreateNucleotideMutationMatrix(x[6:length(x)], model=nuc.model, base.freqs=NULL)
            poly.params <- x[4:5]
        }
    }else{
        if(nuc.model == "JC") {
            base.freqs=c(x[4:6], 1-sum(x[4:6]))
            #During the early stages of the optimization process it will try weird values for the base frequencies.
            if(any(base.freqs < 0)){
                return(1000000)
            }
            nuc.mutation.rates <- CreateNucleotideMutationMatrix(1, model=nuc.model, base.freqs=base.freqs)
        }
        if(nuc.model == "GTR") {
            base.freqs=c(x[4:6], 1-sum(x[4:6]))
            #During the early stages of the optimization process it will try weird values for the base frequencies.
            if(any(base.freqs < 0)){
                return(1000000)
            }
            nuc.mutation.rates <- CreateNucleotideMutationMatrix(x[7:length(x)], model=nuc.model, base.freqs=base.freqs)
        }
        if(nuc.model == "UNREST") {
            nuc.mutation.rates <- CreateNucleotideMutationMatrix(x[4:length(x)], model=nuc.model, base.freqs=NULL)
        }
    }

    #codon_mutation_matrix = CreateCodonMutationMatrix(nuc.mutation.rates) #We now make an index matrix first then just place the nucleotide rates into it:
    #codon_mutation_matrix = c(as.vector(nuc.mutation.rates), 0)[codon.index.matrix]
    codon_mutation_matrix <- matrix(nuc.mutation.rates[codon.index.matrix], dim(codon.index.matrix))
    codon_mutation_matrix[is.na(codon_mutation_matrix)]=0
    nsites.unique <- dim(codon.data$unique.site.patterns)[2]-1
    nsites <- sum(codon.data$site.pattern.counts)

    if(include.gamma==TRUE){
        if(gamma.type == "median"){
            rates.k <- DiscreteGamma(shape=shape, ncats=ncats)
            weights.k <- rep(1/ncats, ncats)
        }
        if(gamma.type == "quadrature"){
            rates.and.weights <- LaguerreQuad(shape=shape, ncats=ncats)
            rates.k <- rates.and.weights[1:ncats]
            weights.k <- rates.and.weights[(ncats+1):(ncats*2)]
        }
        if(gamma.type == "lognormal"){
            rates.and.weights <- LogNormalQuad(shape=shape, ncats=ncats)
            rates.k <- rates.and.weights[1:ncats]
            weights.k <- rates.and.weights[(ncats+1):(ncats*2)]
        }
        final.likelihood.mat = matrix(0, nrow=ncats, ncol=nsites.unique)
        for(k.cat in sequence(ncats)){
            if(k.levels > 0){
                aa.distances <- CreateAADistanceMatrix(alpha=alpha, beta=beta, gamma=gamma, aa.properties=aa.properties, normalize=FALSE, poly.params=poly.params, k=k.levels)
            }else{
                aa.distances <- CreateAADistanceMatrix(alpha=alpha, beta=beta, gamma=gamma, aa.properties=aa.properties, normalize=FALSE, poly.params=NULL, k=k.levels)
            }
            Q_codon_array <- FastCreateAllCodonFixationProbabilityMatrices(aa.distances=aa.distances, nsites=nsites, C=C, Phi=Phi*rates.k[k.cat], q=q, Ne=Ne, include.stop.codon=TRUE, numcode=numcode, diploid=diploid, flee.stop.codon.rate=0.9999999)
            final.likelihood.mat[k.cat,] = GetLikelihoodSAC_CodonForManyCharVaryingBySite(codon.data, phy, Q_codon_array, codon.freq.by.aa=codon.freq.by.aa, codon.freq.by.gene=codon.freq.by.gene, aa.optim_array=aa.optim_array, codon_mutation_matrix=codon_mutation_matrix, Ne=Ne, rates=NULL, numcode=numcode, diploid=diploid, n.cores.by.gene.by.site=n.cores.by.gene.by.site)
        }
        likelihood <- sum(log(colSums(exp(final.likelihood.mat)*weights.k)) * codon.data$site.pattern.counts)
    }else{
        if(k.levels > 0){
            aa.distances <- CreateAADistanceMatrix(alpha=alpha, beta=beta, gamma=gamma, aa.properties=aa.properties, normalize=FALSE, poly.params=poly.params, k=k.levels)
        }else{
            aa.distances <- CreateAADistanceMatrix(alpha=alpha, beta=beta, gamma=gamma, aa.properties=aa.properties, normalize=FALSE, poly.params=NULL, k=k.levels)
        }
        Q_codon_array <- FastCreateAllCodonFixationProbabilityMatrices(aa.distances=aa.distances, nsites=nsites, C=C, Phi=Phi, q=q, Ne=Ne, include.stop.codon=TRUE, numcode=numcode, diploid=diploid, flee.stop.codon.rate=0.9999999)
        final.likelihood = GetLikelihoodSAC_CodonForManyCharVaryingBySite(codon.data, phy, Q_codon_array, codon.freq.by.aa=codon.freq.by.aa, codon.freq.by.gene=codon.freq.by.gene, aa.optim_array=aa.optim_array, codon_mutation_matrix=codon_mutation_matrix, Ne=Ne, rates=NULL, numcode=numcode, diploid=diploid, n.cores.by.gene.by.site=n.cores.by.gene.by.site)
        likelihood <- sum(final.likelihood * codon.data$site.pattern.counts)
    }

    if(neglnl) {
        likelihood <- -1 * likelihood
    }
    if(verbose > 1) {
        results.vector <- c(likelihood, C*Phi*q, alpha, beta, gamma, Ne, ape::write.tree(phy))
        names(results.vector) <- c("likelihood", "C.Phi.q.Ne", "alpha", "beta", "gamma", "Ne", "phy")
        print(results.vector)
    }else if(verbose){
        results.vector <- c(likelihood, alpha, beta, gamma)
        names(results.vector) <- c("likelihood", "alpha", "beta", "gamma")
        print(results.vector)

    }
    if(is.na(likelihood) || is.nan(likelihood)){
        return(1000000)
    }else{
        return(likelihood)
    }
}


GetLikelihoodMutSel_CodonForManyCharGivenAllParams <- function(x, codon.data, phy, root.p_array=NULL, numcode, nuc.model, logspace=FALSE, verbose=TRUE, neglnl=FALSE, n.cores.by.gene.by.site=1) {
    if(logspace) {
        x = exp(x)
    }
    if(nuc.model == "JC") {
        base.freqs <- c(x[2:4], 1-sum(x[2:4]))
        nuc.mutation.rates <- CreateNucleotideMutationMatrix(1, model=nuc.model, base.freqs=base.freqs)
        x = x[-c(2:4)]
    }
    if(nuc.model == "GTR") {
        base.freqs <- c(x[2:4], 1-sum(x[2:4]))
        nuc.mutation.rates <- CreateNucleotideMutationMatrix(x[5:9], model=nuc.model, base.freqs=base.freqs)
        x = x[-c(2:9)]
    }
    if(nuc.model == "UNREST") {
        nuc.mutation.rates <- CreateNucleotideMutationMatrix(x[2:12], model=nuc.model, base.freqs=NULL)
        x = x[-c(2:12)]
    }

    #During the early stages of the optimization process it will try weird values for the base frequencies.
    if(any(base.freqs < 0)){
        return(1000000)
    }
    if(!is.null(root.p_array[1])){
        codon.eq.freq <- root.p_array
    }else{
        #.codon.sets <- .codon.sets
        n.codons <- dim(.codon.sets)[1]
        codon.eq.freq <- numeric(n.codons)
        fitness.pars <- c(x[-1],0)
        fitness.pars.ordered <- numeric(n.codons)
        if(length(fitness.pars)>21){
            fitness.pars.ordered = c(fitness.pars[1:48], 0, fitness.pars[49], 0, fitness.pars[50:54], 0, fitness.pars[55:61])
        }else{
            fitness.pars.ordered <- numeric(n.codons)
            #codon.set.translate <- apply(.codon.sets, 2, n2s)
            #codon.name <- apply(.codon.set.translate, 1, paste, collapse="")
            aa.translations <- .aa.translation[[numcode]][.codon.name]
            unique.aa.nostop = .unique.aa[-which(.unique.aa=="*")]
            for(par.index in 1:length(unique.aa.nostop)){
                fitness.pars.ordered[which(aa.translations == unique.aa.nostop[par.index])] <- fitness.pars[par.index]
            }
        }
        for(codon.index in 1:n.codons){
            #In the canonical model stop codons are ignored. We do the same here.
            if(codon.index == 49 | codon.index == 51 | codon.index == 57){
                codon.eq.freq[codon.index] = 0
            }else{
                codon.eq.freq[codon.index] <- base.freqs[unname(.codon.sets[codon.index,1])+1] * base.freqs[unname(.codon.sets[codon.index,2])+1] * base.freqs[unname(.codon.sets[codon.index,3])+1] * exp(fitness.pars.ordered[codon.index])
            }
        }
        codon.eq.freq <- codon.eq.freq[1:64]/sum(codon.eq.freq[1:64])
    }

    Q_codon = CreateCodonMutationMatrixMutSel(omega.par=x[1], fitness.pars=fitness.pars.ordered, nuc.mutation.rates=nuc.mutation.rates, numcode=numcode)
    final.likelihood <- GetLikelihoodMutSel_CodonForManyCharVaryingBySite(codon.data, phy, root.p_array=codon.eq.freq, Q_codon=Q_codon, numcode=numcode, n.cores.by.gene.by.site=n.cores.by.gene.by.site)
    likelihood <- sum(final.likelihood * codon.data$site.pattern.counts)

    if(neglnl) {
        likelihood <- -1 * likelihood
    }
    if(verbose > 1) {
        results.vector <- c(likelihood, x, ape::write.tree(phy))
        names(results.vector) <- c("likelihood", paste0("param", sequence(length(x))), "phy")
        print(results.vector)
    } else if(verbose){
        results.vector <- c(likelihood, x)
        names(results.vector) <- c("likelihood", paste0("param", sequence(length(x))))
        print(results.vector)

    }
    return(likelihood)
}


GetLikelihoodGY94_YN98_CodonForManyCharGivenAllParams <- function(x, codon.data, phy, root.p_array=NULL, model.type="GY94", numcode, logspace=FALSE, verbose=TRUE, neglnl=FALSE, n.cores.by.gene.by.site=1) {
    if(logspace) {
        x = exp(x)
    }
    if(!is.null(root.p_array)){
        codon.freqs <- root.p_array
    }else{
        codon.freqs.tabled <- table(as.matrix(codon.data$unique.site.patterns[,2:dim(codon.data$unique.site.patterns)[2]]))
        codon.freqs <- numeric(64)
        for(codon.index in 1:length(codon.freqs.tabled)){
            codon.freqs[as.numeric(names(codon.freqs.tabled))[codon.index]] <- codon.freqs.tabled[codon.index]
        }
        codon.freqs <- codon.freqs[1:64]/sum(codon.freqs[1:64])
    }
    if(model.type == "GY94"){
        aa.distances <- CreateAADistanceMatrix()
        Q_codon = CreateCodonMutationMatrixGY94(x=x, aa.distances=aa.distances, codon.freqs=codon.freqs, numcode=numcode)
    }else{
        Q_codon = CreateCodonMutationMatrixYN98(x=x, codon.freqs=codon.freqs, numcode=numcode)
    }
    final.likelihood <- GetLikelihoodMutSel_CodonForManyCharVaryingBySite(codon.data, phy, root.p_array=codon.freqs, Q_codon=Q_codon, numcode=numcode, n.cores.by.gene.by.site=n.cores.by.gene.by.site)
    likelihood <- sum(final.likelihood * codon.data$site.pattern.counts)

    if(neglnl) {
        likelihood <- -1 * likelihood
    }
    if(verbose > 1) {
        results.vector <- c(likelihood, x, ape::write.tree(phy))
        names(results.vector) <- c("likelihood", paste0("param", sequence(length(x))), "phy")
        print(results.vector)
    }else if(verbose){
        results.vector <- c(likelihood, x)
        names(results.vector) <- c("likelihood", paste0("param",seq_along(x)))
        print(results.vector)

    }
    return(likelihood)
}


GetLikelihoodNucleotideForManyCharGivenAllParams <- function(x, nuc.data, phy, root.p_array=NULL, numcode=1, nuc.model, include.gamma=FALSE, rates.k=NULL, gamma.type="quadrature", ncats=NULL, logspace=FALSE, verbose=TRUE, neglnl=FALSE, n.cores.by.gene.by.site=1) {
    if(logspace) {
        x = exp(x)
    }
    if(include.gamma == TRUE){
        shape = x[1]
        x = x[-1]
    }
    if(length(x)==0){
        transition.rates <- 1
    }else{
        transition.rates <- x[1:length(x)]
    }
    nsites.unique <- dim(nuc.data$unique.site.patterns)[2]-1
    nuc.mutation.rates <- CreateNucleotideMutationMatrix(transition.rates, model=nuc.model)
    if(include.gamma==TRUE){
        if(gamma.type == "median"){
            rates.k <- DiscreteGamma(shape=shape, ncats=ncats)
            weights.k <- rep(1/ncats, ncats)
        }
        if(gamma.type == "quadrature"){
            rates.and.weights <- LaguerreQuad(shape=shape, ncats=ncats)
            rates.k <- rates.and.weights[1:ncats]
            weights.k <- rates.and.weights[(ncats+1):(ncats*2)]
        }
        if(gamma.type == "lognormal"){
            rates.and.weights <- LogNormalQuad(shape=shape, ncats=ncats)
            rates.k <- rates.and.weights[1:ncats]
            weights.k <- rates.and.weights[(ncats+1):(ncats*2)]
        }
        final.likelihood.mat = matrix(0, nrow=ncats, ncol=nsites.unique)
        for(k in sequence(ncats)){
            final.likelihood.mat[k,] = GetLikelihoodNucleotideForManyCharVaryingBySite(nuc.data=nuc.data, phy=phy, nuc.mutation.rates=nuc.mutation.rates, rates.k=rates.k[k], root.p_array=root.p_array, n.cores.by.gene.by.site=n.cores.by.gene.by.site)
        }
        likelihood <- sum(log(colSums(exp(final.likelihood.mat)*weights.k)) * nuc.data$site.pattern.counts)
    }else{
        final.likelihood = GetLikelihoodNucleotideForManyCharVaryingBySite(nuc.data=nuc.data, phy=phy, nuc.mutation.rates=nuc.mutation.rates, rates.k=NULL, root.p_array=root.p_array, n.cores.by.gene.by.site=n.cores.by.gene.by.site)
        likelihood <- sum(final.likelihood * nuc.data$site.pattern.counts)
    }

    if(neglnl) {
        likelihood <- -1 * likelihood
    }
    if(is.na(likelihood)){
        return(1000000)
    }
    if(verbose > 1) {
        results.vector <- c(likelihood, x, ape::write.tree(phy))
        names(results.vector) <- c("likelihood", paste0("param", sequence(length(x))), "phy")
        print(results.vector)
    }else if(verbose){
        results.vector <- c(likelihood, x)
        names(results.vector) <- c("likelihood", paste0("param", seq_along(x)))
        print(results.vector)

    }
    return(likelihood)
}


GetOptimalAAPerSite <- function(x, codon.data, phy, aa.optim_array=NULL, codon.freq.by.aa=NULL, codon.freq.by.gene=NULL, numcode=1, diploid=TRUE, aa.properties=NULL, volume.fixed.value=0.0003990333, nuc.model, codon.index.matrix, include.gamma=FALSE, gamma.type="quadrature", ncats=4, k.levels=0, logspace=FALSE, verbose=TRUE, neglnl=FALSE, n.cores.by.gene.by.site=1) {
    if(logspace) {
        x = exp(x)
    }
    if(include.gamma == TRUE){
        shape = x[length(x)]
        x = x[-length(x)]
    }

    C.Phi.q.Ne <- x[1]
    C <- 4
    q <- 4e-7
    Ne <- 5e6
    Phi.q.Ne <- C.Phi.q.Ne / C
    Phi.Ne <- Phi.q.Ne / q
    Phi <- Phi.Ne / Ne
    alpha <- x[2]
    beta <- x[3]
    gamma <- volume.fixed.value

    if(k.levels > 0){
        if(nuc.model == "JC") {
            base.freqs=c(x[4:6], 1-sum(x[4:6]))
            nuc.mutation.rates <- CreateNucleotideMutationMatrix(1, model=nuc.model, base.freqs=base.freqs)
            poly.params <- x[7:8]
        }
        if(nuc.model == "GTR") {
            base.freqs=c(x[4:6], 1-sum(x[4:6]))
            nuc.mutation.rates <- CreateNucleotideMutationMatrix(x[9:length(x)], model=nuc.model, base.freqs=base.freqs)
            poly.params <- x[7:8]
        }
        if(nuc.model == "UNREST") {
            nuc.mutation.rates <- CreateNucleotideMutationMatrix(x[6:length(x)], model=nuc.model, base.freqs=NULL)
            poly.params <- x[4:5]
        }
    }else{
        if(nuc.model == "JC") {
            base.freqs=c(x[4:6], 1-sum(x[4:6]))
            nuc.mutation.rates <- CreateNucleotideMutationMatrix(1, model=nuc.model, base.freqs=base.freqs)
        }
        if(nuc.model == "GTR") {
            base.freqs=c(x[4:6], 1-sum(x[4:6]))
            nuc.mutation.rates <- CreateNucleotideMutationMatrix(x[7:length(x)], model=nuc.model, base.freqs=base.freqs)
        }
        if(nuc.model == "UNREST") {
            nuc.mutation.rates <- CreateNucleotideMutationMatrix(x[4:length(x)], model=nuc.model, base.freqs=NULL)
        }
    }

    if(!is.null(codon.data$unique.site.patterns)){
        codon.data.list <- codon.data
        nsites.unique <- dim(codon.data$unique.site.patterns)[2]-1
    }else{
        nsites.unique <- dim(codon.data)[2]-1
        codon.data.list <- NULL
        codon.data.list$unique.site.patterns <- codon.data
        codon.data.list$site.pattern.counts <- rep(1, nsites.unique)
    }
    nsites <- sum(codon.data.list$site.pattern.counts)

    #codon_mutation_matrix = c(as.vector(nuc.mutation.rates), 0)[codon.index.matrix]
    codon_mutation_matrix <- matrix(nuc.mutation.rates[codon.index.matrix], dim(codon.index.matrix))
    codon_mutation_matrix[is.na(codon_mutation_matrix)]=0

    optimal.vector.by.site <- rep(NA, nsites.unique)
    #unique.aa <- GetMatrixAANames(numcode)
    optimal.aa.likelihood.mat <- matrix(0, nrow=length(.unique.aa), ncol=nsites.unique)

    for(i in 1:length(.unique.aa)){
        if(.unique.aa[i]=="*"){
            optimal.aa.likelihood.mat[i,] <- rep(-1000000, nsites.unique)
        }else{
            aa.optim_array = rep(.unique.aa[i], nsites.unique)
            if(include.gamma==TRUE){
                if(gamma.type == "median"){
                    rates.k <- DiscreteGamma(shape=shape, ncats=ncats)
                    weights.k <- rep(1/ncats, ncats)
                }
                if(gamma.type == "quadrature"){
                    rates.and.weights <- LaguerreQuad(shape=shape, ncats=ncats)
                    rates.k <- rates.and.weights[1:ncats]
                    weights.k <- rates.and.weights[(ncats+1):(ncats*2)]
                }
                if(gamma.type == "lognormal"){
                    rates.and.weights <- LogNormalQuad(shape=shape, ncats=ncats)
                    rates.k <- rates.and.weights[1:ncats]
                    weights.k <- rates.and.weights[(ncats+1):(ncats*2)]
                }
                final.likelihood.mat = matrix(0, nrow=ncats, ncol=nsites.unique)
                for(k.cat in sequence(ncats)){
                    if(k.levels > 0){
                        aa.distances <- CreateAADistanceMatrix(alpha=alpha, beta=beta, gamma=gamma, aa.properties=aa.properties, normalize=FALSE, poly.params=poly.params, k=k.levels)
                    }else{
                        aa.distances <- CreateAADistanceMatrix(alpha=alpha, beta=beta, gamma=gamma, aa.properties=aa.properties, normalize=FALSE, poly.params=NULL, k=k.levels)
                    }
                    Q_codon_array <- FastCreateAllCodonFixationProbabilityMatrices(aa.distances=aa.distances, nsites=nsites, C=C, Phi=Phi*rates.k[k.cat], q=q, Ne=Ne, include.stop.codon=TRUE, numcode=numcode, diploid=diploid, flee.stop.codon.rate=0.9999)
                    tmp = GetLikelihoodSAC_CodonForManyCharVaryingBySite(codon.data.list, phy, Q_codon_array, codon.freq.by.aa=codon.freq.by.aa, codon.freq.by.gene=codon.freq.by.gene, aa.optim_array=aa.optim_array, codon_mutation_matrix=codon_mutation_matrix, Ne=Ne, rates=NULL, numcode=numcode, diploid=diploid, n.cores.by.gene.by.site=n.cores.by.gene.by.site)
                    tmp[is.na(tmp)] = -1000000
                    final.likelihood.mat[k.cat,] = tmp
                }
                optimal.aa.likelihood.mat[i,] <- log(colSums(exp(final.likelihood.mat)*weights.k))
            }else{
                if(k.levels > 0){
                    aa.distances <- CreateAADistanceMatrix(alpha=alpha, beta=beta, gamma=gamma, aa.properties=aa.properties, normalize=FALSE, poly.params=poly.params, k=k.levels)
                }else{
                    aa.distances <- CreateAADistanceMatrix(alpha=alpha, beta=beta, gamma=gamma, aa.properties=aa.properties, normalize=FALSE, poly.params=NULL, k=k.levels)
                }
                Q_codon_array <- FastCreateAllCodonFixationProbabilityMatrices(aa.distances=aa.distances, nsites=nsites, C=C, Phi=Phi, q=q, Ne=Ne, include.stop.codon=TRUE, numcode=numcode, diploid=diploid, flee.stop.codon.rate=0.9999)
                tmp = GetLikelihoodSAC_CodonForManyCharVaryingBySite(codon.data.list, phy, Q_codon_array, codon.freq.by.aa=codon.freq.by.aa, codon.freq.by.gene=codon.freq.by.gene, aa.optim_array=aa.optim_array, codon_mutation_matrix=codon_mutation_matrix, Ne=Ne, rates=NULL, numcode=numcode, diploid=diploid, n.cores.by.gene.by.site=n.cores.by.gene.by.site)
                tmp[is.na(tmp)] = -1000000
                final.likelihood = tmp
                optimal.aa.likelihood.mat[i,] <- final.likelihood
            }
        }
    }
    for(j in 1:nsites.unique){
        optimal.vector.by.site[j] <- .unique.aa[which.is.max(optimal.aa.likelihood.mat[,j])]
    }
    return(optimal.vector.by.site)
}



GetAveAAPerSite <- function(x, codon.data, phy, aa.optim_array=NULL, codon.freq.by.aa=NULL, codon.freq.by.gene=NULL, numcode=1, diploid=TRUE, aa.properties=NULL, volume.fixed.value=0.0003990333, nuc.model, codon.index.matrix, include.gamma=FALSE, gamma.type="quadrature", ncats=4, k.levels=0, logspace=FALSE, verbose=TRUE, neglnl=FALSE, n.cores.by.gene.by.site=1) {
    if(logspace) {
        x = exp(x)
    }
    if(include.gamma == TRUE){
        shape = x[length(x)]
        x = x[-length(x)]
    }

    C.Phi.q.Ne <- x[1]
    C <- 4
    q <- 4e-7
    Ne <- 5e6
    Phi.q.Ne <- C.Phi.q.Ne / C
    Phi.Ne <- Phi.q.Ne / q
    Phi <- Phi.Ne / Ne
    alpha <- x[2]
    beta <- x[3]
    gamma <- volume.fixed.value

    if(k.levels > 0){
        if(nuc.model == "JC") {
            base.freqs=c(x[4:6], 1-sum(x[4:6]))
            nuc.mutation.rates <- CreateNucleotideMutationMatrix(1, model=nuc.model, base.freqs=base.freqs)
            poly.params <- x[7:8]
        }
        if(nuc.model == "GTR") {
            base.freqs=c(x[4:6], 1-sum(x[4:6]))
            nuc.mutation.rates <- CreateNucleotideMutationMatrix(x[9:length(x)], model=nuc.model, base.freqs=base.freqs)
            poly.params <- x[7:8]
        }
        if(nuc.model == "UNREST") {
            nuc.mutation.rates <- CreateNucleotideMutationMatrix(x[6:length(x)], model=nuc.model, base.freqs=NULL)
            poly.params <- x[4:5]
        }
    }else{
        if(nuc.model == "JC") {
            base.freqs=c(x[4:6], 1-sum(x[4:6]))
            nuc.mutation.rates <- CreateNucleotideMutationMatrix(1, model=nuc.model, base.freqs=base.freqs)
        }
        if(nuc.model == "GTR") {
            base.freqs=c(x[4:6], 1-sum(x[4:6]))
            nuc.mutation.rates <- CreateNucleotideMutationMatrix(x[7:length(x)], model=nuc.model, base.freqs=base.freqs)
        }
        if(nuc.model == "UNREST") {
            nuc.mutation.rates <- CreateNucleotideMutationMatrix(x[4:length(x)], model=nuc.model, base.freqs=NULL)
        }
    }

    if(!is.null(codon.data$unique.site.patterns)){
        codon.data.list <- codon.data
        nsites.unique <- dim(codon.data$unique.site.patterns)[2]-1
    }else{
        nsites.unique <- dim(codon.data)[2]-1
        codon.data.list <- NULL
        codon.data.list$unique.site.patterns <- codon.data
        codon.data.list$site.pattern.counts <- rep(1, nsites.unique)
    }
    nsites <- sum(codon.data.list$site.pattern.counts)

    #codon_mutation_matrix = c(as.vector(nuc.mutation.rates), 0)[codon.index.matrix]
    codon_mutation_matrix <- matrix(nuc.mutation.rates[codon.index.matrix], dim(codon.index.matrix))
    codon_mutation_matrix[is.na(codon_mutation_matrix)]=0

    likelihood.by.site <- rep(NA, nsites.unique)
    #unique.aa <- GetMatrixAANames(numcode)
    optimal.aa.likelihood.mat <- matrix(0, nrow=length(.unique.aa), ncol=nsites.unique)

    for(i in 1:length(.unique.aa)){
        if(.unique.aa[i]=="*"){
            optimal.aa.likelihood.mat[i,] <- rep(-1000000, nsites.unique)
        }else{
            aa.optim_array = rep(.unique.aa[i], nsites.unique)
            if(include.gamma==TRUE){
                if(gamma.type == "median"){
                    rates.k <- DiscreteGamma(shape=shape, ncats=ncats)
                    weights.k <- rep(1/ncats, ncats)
                }
                if(gamma.type == "quadrature"){
                    rates.and.weights <- LaguerreQuad(shape=shape, ncats=ncats)
                    rates.k <- rates.and.weights[1:ncats]
                    weights.k <- rates.and.weights[(ncats+1):(ncats*2)]
                }
                if(gamma.type == "lognormal"){
                    rates.and.weights <- LogNormalQuad(shape=shape, ncats=ncats)
                    rates.k <- rates.and.weights[1:ncats]
                    weights.k <- rates.and.weights[(ncats+1):(ncats*2)]
                }
                final.likelihood.mat = matrix(0, nrow=ncats, ncol=nsites.unique)
                for(k.cat in sequence(ncats)){
                    if(k.levels > 0){
                        aa.distances <- CreateAADistanceMatrix(alpha=alpha, beta=beta, gamma=gamma, aa.properties=aa.properties, normalize=FALSE, poly.params=poly.params, k=k.levels)
                    }else{
                        aa.distances <- CreateAADistanceMatrix(alpha=alpha, beta=beta, gamma=gamma, aa.properties=aa.properties, normalize=FALSE, poly.params=NULL, k=k.levels)
                    }
                    Q_codon_array <- FastCreateAllCodonFixationProbabilityMatrices(aa.distances=aa.distances, nsites=nsites, C=C, Phi=Phi*rates.k[k.cat], q=q, Ne=Ne, include.stop.codon=TRUE, numcode=numcode, diploid=diploid, flee.stop.codon.rate=0.9999)
                    tmp = GetLikelihoodSAC_CodonForManyCharVaryingBySite(codon.data.list, phy, Q_codon_array, codon.freq.by.aa=codon.freq.by.aa, codon.freq.by.gene=codon.freq.by.gene, aa.optim_array=aa.optim_array, codon_mutation_matrix=codon_mutation_matrix, Ne=Ne, rates=NULL, numcode=numcode, diploid=diploid, n.cores.by.gene.by.site=n.cores.by.gene.by.site)
                    tmp[is.na(tmp)] = -1000000
                    final.likelihood.mat[k.cat,] = tmp
                }
                optimal.aa.likelihood.mat[i,] <- log(colSums(exp(final.likelihood.mat)*weights.k))
            }else{
                if(k.levels > 0){
                    aa.distances <- CreateAADistanceMatrix(alpha=alpha, beta=beta, gamma=gamma, aa.properties=aa.properties, normalize=FALSE, poly.params=poly.params, k=k.levels)
                }else{
                    aa.distances <- CreateAADistanceMatrix(alpha=alpha, beta=beta, gamma=gamma, aa.properties=aa.properties, normalize=FALSE, poly.params=NULL, k=k.levels)
                }
                Q_codon_array <- FastCreateAllCodonFixationProbabilityMatrices(aa.distances=aa.distances, nsites=nsites, C=C, Phi=Phi, q=q, Ne=Ne, include.stop.codon=TRUE, numcode=numcode, diploid=diploid, flee.stop.codon.rate=0.9999)
                tmp = GetLikelihoodSAC_CodonForManyCharVaryingBySite(codon.data.list, phy, Q_codon_array, codon.freq.by.aa=codon.freq.by.aa, codon.freq.by.gene=codon.freq.by.gene, aa.optim_array=aa.optim_array, codon_mutation_matrix=codon_mutation_matrix, Ne=Ne, rates=NULL, numcode=numcode, diploid=diploid, n.cores.by.gene.by.site=n.cores.by.gene.by.site)
                tmp[is.na(tmp)] = -1000000
                final.likelihood = tmp
                optimal.aa.likelihood.mat[i,] <- final.likelihood
            }
        }
    }
    #Take average
    for(j in 1:nsites.unique){
        #Exclude stop codons, otherwise aritificially inflates likelihood due to our arbitrary setting to -100000. If stop codons are to be included, the following line would have to be modified.
        likelihood.by.site[j] <- log(mean(exp(optimal.aa.likelihood.mat[which(!.unique.aa=="*"),j])))
    }
    final.likelihood <- sum(likelihood.by.site * codon.data$site.pattern.counts)

    if(neglnl) {
        final.likelihood <- -1 * final.likelihood
    }

    return(final.likelihood)
}


#OptimizeModelPars <- function(x, codon.site.data, codon.site.counts, data.type, codon.model, n.partitions, nsites.vector, index.matrix, phy, aa.optim_array=NULL, codon.freq.by.aa=NULL, codon.freq.by.gene=NULL, numcode=1, diploid=TRUE, aa.properties=NULL, volume.fixed.value=0.0003990333, nuc.model, codon.index.matrix=NULL, edge.length="optimize", include.gamma=FALSE, gamma.type, ncats, k.levels, logspace=FALSE, verbose=TRUE, parallel.type="by.gene", n.cores=NULL, neglnl=FALSE) {
#    if(logspace) {
#        x <- exp(x)
#    }
#    par.mat <- index.matrix
#    par.mat[] <- c(x, 0)[index.matrix]
#    if(is.null(aa.optim_array)){
#        if(data.type == "nucleotide"){
#            if(nuc.model == "JC"){
#                max.par = 0
#            }
#            if(nuc.model == "GTR"){
#                max.par = 5
#            }
#            if(nuc.model == "UNREST"){
#                max.par = 11
#            }
#            if(include.gamma == TRUE){
#                max.par = max.par + 1
#            }
#            if(is.null(n.cores)){
#                likelihood.vector <- c()
#                for(partition.index in sequence(n.partitions)){
#                    nuc.data = NULL
#                    nuc.data$unique.site.patterns = codon.site.data[[partition.index]]
#                    nuc.data$site.pattern.counts = codon.site.counts[[partition.index]]
#                    likelihood.vector = c(likelihood.vector, GetLikelihoodNucleotideForManyCharGivenAllParams(x=log(par.mat[partition.index,1:max.par]), nuc.data=nuc.data, phy=phy, root.p_array=codon.freq.by.gene[[partition.index]], numcode=numcode, nuc.model=nuc.model, include.gamma=include.gamma, gamma.type=gamma.type, ncats=ncats, logspace=logspace, verbose=verbose, neglnl=neglnl, parallel.type=parallel.type, n.cores=NULL))
#                }
#                likelihood = sum(likelihood.vector)
#            }else{
#                if(parallel.type=="by.gene"){
#                    MultiCoreLikelihood <- function(partition.index){
#                        nuc.data = NULL
#                        nuc.data$unique.site.patterns = codon.site.data[[partition.index]]
#                        nuc.data$site.pattern.counts = codon.site.counts[[partition.index]]
#                        likelihood.tmp = GetLikelihoodNucleotideForManyCharGivenAllParams(x=log(par.mat[partition.index,1:max.par]), nuc.data=nuc.data, phy=phy, root.p_array=codon.freq.by.gene[[partition.index]], numcode=numcode, nuc.model=nuc.model, include.gamma=include.gamma, gamma.type=gamma.type, ncats=ncats, logspace=logspace, verbose=verbose, neglnl=neglnl, parallel.type=parallel.type, n.cores=NULL)
#                        return(likelihood.tmp)
#                    }
#                    #This orders the nsites per partition in decreasing order (to increase efficiency):
#                    partition.order <- 1:n.partitions
#                    likelihood <- sum(unlist(mclapply(partition.order[order(nsites.vector, decreasing=TRUE)], MultiCoreLikelihood, mc.cores=n.cores)))
#                }
#                if(parallel.type == "by.site"){
#                    likelihood.vector <- c()
#                    for(partition.index in sequence(n.partitions)){
#                        nuc.data = NULL
#                        nuc.data$unique.site.patterns = codon.site.data[[partition.index]]
#                        nuc.data$site.pattern.counts = codon.site.counts[[partition.index]]
#                        likelihood.vector = c(likelihood.vector, GetLikelihoodNucleotideForManyCharGivenAllParams(x=log(par.mat[partition.index,1:max.par]), nuc.data=nuc.data, phy=phy, root.p_array=codon.freq.by.gene[[partition.index]], numcode=numcode, nuc.model=nuc.model, include.gamma=include.gamma, gamma.type=gamma.type, ncats=ncats, logspace=logspace, verbose=verbose, neglnl=neglnl, parallel.type=parallel.type, n.cores=n.cores))
#                    }
#                    likelihood = sum(likelihood.vector)
#                }
#            }
#        }else{
#            if(codon.model == "GY94"){
#                max.par = 2
#                if(is.null(n.cores)){
#                    likelihood.vector <- c()
#                    for(partition.index in sequence(n.partitions)){
#                        codon.data = NULL
#                        codon.data$unique.site.patterns = codon.site.data[[partition.index]]
#                        codon.data$site.pattern.counts = codon.site.counts[[partition.index]]
#                        likelihood.vector = c(likelihood.vector, GetLikelihoodGY94_CodonForManyCharGivenAllParams(x=log(par.mat[partition.index,1:max.par]), codon.data=codon.data, phy=phy, root.p_array=NULL, numcode=numcode, logspace=logspace, verbose=verbose, neglnl=neglnl, parallel.type=parallel.type, n.cores=NULL))
#                    }
#                    likelihood = sum(likelihood.vector)
#                }else{
#                    if(parallel.type=="by.gene"){
#                        MultiCoreLikelihood <- function(partition.index){
#                            codon.data = NULL
#                            codon.data$unique.site.patterns = codon.site.data[[partition.index]]
#                            codon.data$site.pattern.counts = codon.site.counts[[partition.index]]
#                            likelihood.tmp = GetLikelihoodGY94_CodonForManyCharGivenAllParams(x=log(par.mat[partition.index,1:max.par]), codon.data=codon.data, phy=phy, root.p_array=NULL, numcode=numcode, logspace=logspace, verbose=verbose, neglnl=neglnl, parallel.type=parallel.type, n.cores=NULL)
#                            return(likelihood.tmp)
#                        }
#                        #This orders the nsites per partition in decreasing order (to increase efficiency):
#                        partition.order <- 1:n.partitions
#                        likelihood <- sum(unlist(mclapply(partition.order[order(nsites.vector, decreasing=TRUE)], MultiCoreLikelihood, mc.cores=n.cores)))
#                    }
#                    if(parallel.type == "by.site"){
#                        likelihood.vector <- c()
#                        for(partition.index in sequence(n.partitions)){
#                            codon.data = NULL
#                            codon.data$unique.site.patterns = codon.site.data[[partition.index]]
#                            codon.data$site.pattern.counts = codon.site.counts[[partition.index]]
#                            likelihood.vector = c(likelihood.vector, GetLikelihoodGY94_CodonForManyCharGivenAllParams(x=log(par.mat[partition.index,1:max.par]), codon.data=codon.data, phy=phy, root.p_array=NULL, numcode=numcode, logspace=logspace, verbose=verbose, neglnl=neglnl, parallel.type=parallel.type, n.cores=n.cores))
#                        }
#                        likelihood = sum(likelihood.vector)
#                    }
#                }
#            }else{
#                #To do: figure out way to allow for the crazy 60 fitness par model.
#                if(nuc.model == "JC"){
#                    #base.freq + nuc.rates + omega + fitness.pars
#                    max.par = 3 + 0 + 1 + 19
#                }
#                if(nuc.model == "GTR"){
#                    max.par = 3 + 5 + 1 + 19
#                }
#                if(nuc.model == "UNREST"){
#                    max.par = 0 + 11 + 1 + 19
#                }
#                if(is.null(n.cores)){
#                    likelihood.vector <- c()
#                    for(partition.index in sequence(n.partitions)){
#                        codon.data = NULL
#                        codon.data$unique.site.patterns = codon.site.data[[partition.index]]
#                        codon.data$site.pattern.counts = codon.site.counts[[partition.index]]
#                        likelihood.vector = c(likelihood.vector, GetLikelihoodMutSel_CodonForManyCharGivenAllParams(x=log(par.mat[partition.index,1:max.par]), codon.data=codon.data, phy=phy, root.p_array=codon.freq.by.gene[[partition.index]], numcode=numcode, nuc.model=nuc.model, logspace=logspace, verbose=verbose, neglnl=neglnl, parallel.type=parallel.type, n.cores=NULL))
#                    }
#                    likelihood = sum(likelihood.vector)
#                }else{
#                    if(parallel.type=="by.gene"){
#                        MultiCoreLikelihood <- function(partition.index){
#                            codon.data = NULL
#                            codon.data$unique.site.patterns = codon.site.data[[partition.index]]
#                            codon.data$site.pattern.counts = codon.site.counts[[partition.index]]
#                            likelihood.tmp = GetLikelihoodMutSel_CodonForManyCharGivenAllParams(x=log(par.mat[partition.index,1:max.par]), codon.data=codon.data, phy=phy, root.p_array=codon.freq.by.gene[[partition.index]], numcode=numcode, nuc.model=nuc.model, logspace=logspace, verbose=verbose, neglnl=neglnl, parallel.type=parallel.type, n.cores=NULL)
#                            return(likelihood.tmp)
#                        }
#                        #This orders the nsites per partition in decreasing order (to increase efficiency):
#                        partition.order <- 1:n.partitions
#                        likelihood <- sum(unlist(mclapply(partition.order[order(nsites.vector, decreasing=TRUE)], MultiCoreLikelihood, mc.cores=n.cores)))
#                    }
#                    if(parallel.type == "by.site"){
#                        likelihood.vector <- c()
#                        for(partition.index in sequence(n.partitions)){
#                            codon.data = NULL
#                            codon.data$unique.site.patterns = codon.site.data[[partition.index]]
#                            codon.data$site.pattern.counts = codon.site.counts[[partition.index]]
#                            likelihood.vector = c(likelihood.vector, GetLikelihoodMutSel_CodonForManyCharGivenAllParams(x=log(par.mat[partition.index,1:max.par]), codon.data=codon.data, phy=phy, root.p_array=codon.freq.by.gene[[partition.index]], numcode=numcode, nuc.model=nuc.model, logspace=logspace, verbose=verbose, neglnl=neglnl, parallel.type=parallel.type, n.cores=n.cores))
#                        }
#                        likelihood = sum(likelihood.vector)
#                    }
#                }
#            }
#        }
#    }else{
#        if(nuc.model == "JC"){
#            max.par = 6
#        }
#        if(nuc.model == "GTR"){
#            max.par = 6 + 5
#        }
#        if(nuc.model == "UNREST"){
#            max.par = 3 + 11
#        }
#        if(include.gamma == TRUE){
#            max.par = max.par + 1
#        }
#        if(k.levels > 0){
#            max.par = max.par + 2
#        }
#        if(is.null(n.cores)){
#            likelihood.vector <- c()
#            for(partition.index in sequence(n.partitions)){
#                codon.data = NULL
#                codon.data$unique.site.patterns = codon.site.data[[partition.index]]
#                codon.data$site.pattern.counts = codon.site.counts[[partition.index]]
#                likelihood.vector = c(likelihood.vector, GetLikelihoodSAC_CodonForManyCharGivenAllParams(x=log(par.mat[partition.index,1:max.par]), codon.data=codon.data, phy=phy, aa.optim_array=aa.optim_array[[partition.index]], codon.freq.by.aa=codon.freq.by.aa[[partition.index]], codon.freq.by.gene=codon.freq.by.gene[[partition.index]], numcode=numcode, diploid=diploid, aa.properties=aa.properties, volume.fixed.value=volume.fixed.value, nuc.model=nuc.model, codon.index.matrix=codon.index.matrix, include.gamma=include.gamma, gamma.type=gamma.type, ncats=ncats, k.levels=k.levels, logspace=logspace, verbose=verbose, neglnl=neglnl, parallel.type=parallel.type, n.cores=NULL))
#            }
#            likelihood = sum(likelihood.vector)
#        }else{
#            if(parallel.type == "by.gene"){
#                MultiCoreLikelihood <- function(partition.index){
#                    codon.data = NULL
#                    codon.data$unique.site.patterns = codon.site.data[[partition.index]]
#                    codon.data$site.pattern.counts = codon.site.counts[[partition.index]]
#                    likelihood.tmp = GetLikelihoodSAC_CodonForManyCharGivenAllParams(x=log(par.mat[partition.index,1:max.par]), codon.data=codon.data, phy=phy, aa.optim_array=aa.optim_array[[partition.index]], codon.freq.by.aa=codon.freq.by.aa[[partition.index]], codon.freq.by.gene=codon.freq.by.gene[[partition.index]], numcode=numcode, diploid=diploid, aa.properties=aa.properties, volume.fixed.value=volume.fixed.value, nuc.model=nuc.model, codon.index.matrix=codon.index.matrix, include.gamma=include.gamma, gamma.type=gamma.type, ncats=ncats, k.levels=k.levels, logspace=logspace, verbose=verbose, neglnl=neglnl, parallel.type=parallel.type, n.cores=NULL)
#                    return(likelihood.tmp)
#                }
#                #This orders the nsites per partition in decreasing order (to increase efficiency):
#                partition.order <- 1:n.partitions
#                likelihood <- sum(unlist(mclapply(partition.order[order(nsites.vector, decreasing=TRUE)], MultiCoreLikelihood, mc.cores=n.cores)))
#            }
#            if(parallel.type == "by.site"){
#                likelihood.vector <- c()
#                for(partition.index in sequence(n.partitions)){
#                    codon.data = NULL
#                    codon.data$unique.site.patterns = codon.site.data[[partition.index]]
#                    codon.data$site.pattern.counts = codon.site.counts[[partition.index]]
#                    likelihood.vector = c(likelihood.vector, GetLikelihoodSAC_CodonForManyCharGivenAllParams(x=log(par.mat[partition.index,1:max.par]), codon.data=codon.data, phy=phy, aa.optim_array=aa.optim_array[[partition.index]], codon.freq.by.aa=codon.freq.by.aa[[partition.index]], codon.freq.by.gene=codon.freq.by.gene[[partition.index]], numcode=numcode, diploid=diploid, aa.properties=aa.properties, volume.fixed.value=volume.fixed.value, nuc.model=nuc.model, codon.index.matrix=codon.index.matrix, include.gamma=include.gamma, gamma.type=gamma.type, ncats=ncats, k.levels=k.levels, logspace=logspace, verbose=verbose, neglnl=neglnl, parallel.type=parallel.type, n.cores=n.cores))
#                }
#                likelihood = sum(likelihood.vector)
#            }
#        }
#    }
#    return(likelihood)
#}


##Redundant to code above. This is a work in progress. Will likely change quite a bit in the future to speed things up.
OptimizeEdgeLengths <- function(x, par.mat, codon.site.data, codon.site.counts, data.type, codon.model, n.partitions, nsites.vector, index.matrix, phy, aa.optim_array=NULL, root.p_array=NULL, codon.freq.by.aa=NULL, codon.freq.by.gene=NULL, numcode=1, diploid=TRUE, aa.properties=NULL, volume.fixed.value=0.0003990333, nuc.model, codon.index.matrix=NULL, edge.length="optimize", include.gamma=FALSE, gamma.type, ncats, k.levels, logspace=FALSE, verbose=TRUE, n.cores.by.gene, n.cores.by.gene.by.site=1, estimate.importance=FALSE, neglnl=FALSE, HMM=FALSE) {

    if(logspace) {
        x <- exp(x)
    }
    phy$edge.length = x
    if(is.null(aa.optim_array)){
        if(HMM == TRUE) {
            max.par <- dim(par.mat)[2]
            MultiCoreLikelihood <- function(partition.index){
                codon.data = NULL
                codon.data$unique.site.patterns = codon.site.data[[partition.index]]
                codon.data$site.pattern.counts = codon.site.counts[[partition.index]]
                likelihood.tmp = GetLikelihoodSAC_CodonForManyCharGivenAllParamsEvolvingAA(x=log(par.mat[partition.index,1:max.par]), codon.data=codon.data, phy=phy, codon.freq.by.aa=NULL, codon.freq.by.gene=codon.freq.by.gene[[partition.index]], numcode=numcode, diploid=diploid, aa.properties=aa.properties, volume.fixed.value=volume.fixed.value, nuc.model=nuc.model, codon.index.matrix=codon.index.matrix, include.gamma=include.gamma, gamma.type=gamma.type, ncats=ncats, k.levels=k.levels, logspace=logspace, verbose=verbose, neglnl=neglnl, n.cores.by.gene.by.site=n.cores.by.gene.by.site, estimate.importance=estimate.importance)
                return(likelihood.tmp)
            }
            #This orders the nsites per partition in decreasing order (to increase efficiency):
            partition.order <- 1:n.partitions
            likelihood <- sum(unlist(mclapply(partition.order[order(nsites.vector, decreasing=TRUE)], MultiCoreLikelihood, mc.cores=n.cores.by.gene)))
        }else{
            if(data.type == "nucleotide"){
                if(nuc.model == "JC"){
                    max.par = 0
                }
                if(nuc.model == "GTR"){
                    max.par = 5
                }
                if(nuc.model == "UNREST"){
                    max.par = 11
                }
                if(include.gamma == TRUE){
                    max.par = max.par + 1
                }
                MultiCoreLikelihood <- function(partition.index){
                    nuc.data = NULL
                    nuc.data$unique.site.patterns = codon.site.data[[partition.index]]
                    nuc.data$site.pattern.counts = codon.site.counts[[partition.index]]
                    likelihood.tmp = GetLikelihoodNucleotideForManyCharGivenAllParams(x=log(par.mat[partition.index,1:max.par]), nuc.data=nuc.data, phy=phy, root.p_array=root.p_array[[partition.index]], numcode=numcode, nuc.model=nuc.model, include.gamma=include.gamma, gamma.type=gamma.type, ncats=ncats, logspace=logspace, verbose=verbose, neglnl=neglnl, n.cores.by.gene.by.site=n.cores.by.gene.by.site)
                    return(likelihood.tmp)
                }
                #This orders the nsites per partition in decreasing order (to increase efficiency):
                partition.order <- 1:n.partitions
                likelihood <- sum(unlist(mclapply(partition.order[order(nsites.vector, decreasing=TRUE)], MultiCoreLikelihood, mc.cores=n.cores.by.gene)))
                #There are four options: "none", "GY94", "FMutSel0", "selac".
            }else{
                if(codon.model == "GY94"){
                    max.par = 2
                    MultiCoreLikelihood <- function(partition.index){
                        codon.data = NULL
                        codon.data$unique.site.patterns = codon.site.data[[partition.index]]
                        codon.data$site.pattern.counts = codon.site.counts[[partition.index]]
                        likelihood.tmp = GetLikelihoodGY94_YN98_CodonForManyCharGivenAllParams(x=log(par.mat[partition.index,1:max.par]), codon.data=codon.data, phy=phy, root.p_array=NULL, model.type=codon.model, numcode=numcode, logspace=logspace, verbose=verbose, neglnl=neglnl, n.cores.by.gene.by.site=n.cores.by.gene.by.site)
                        return(likelihood.tmp)
                    }
                    #This orders the nsites per partition in decreasing order (to increase efficiency):
                    partition.order <- 1:n.partitions
                    likelihood <- sum(unlist(mclapply(partition.order[order(nsites.vector, decreasing=TRUE)], MultiCoreLikelihood, mc.cores=n.cores.by.gene)))
                }
                if(codon.model == "YN98"){
                    max.par = 2
                    MultiCoreLikelihood <- function(partition.index){
                        codon.data = NULL
                        codon.data$unique.site.patterns = codon.site.data[[partition.index]]
                        codon.data$site.pattern.counts = codon.site.counts[[partition.index]]
                        likelihood.tmp = GetLikelihoodGY94_YN98_CodonForManyCharGivenAllParams(x=log(par.mat[partition.index,1:max.par]), codon.data=codon.data, phy=phy, root.p_array=NULL, model.type=codon.model, numcode=numcode, logspace=logspace, verbose=verbose, neglnl=neglnl, n.cores.by.gene.by.site=n.cores.by.gene.by.site)
                        return(likelihood.tmp)
                    }
                    #This orders the nsites per partition in decreasing order (to increase efficiency):
                    partition.order <- 1:n.partitions
                    likelihood <- sum(unlist(mclapply(partition.order[order(nsites.vector, decreasing=TRUE)], MultiCoreLikelihood, mc.cores=n.cores.by.gene)))
                }
                if(codon.model == "FMutSel0" | codon.model == "FMutSel"){
                    if(codon.model == "FMutSel0"){
                        if(nuc.model == "JC"){
                            #base.freq + nuc.rates + omega + fitness.pars
                            max.par = 3 + 0 + 1 + 19
                        }
                        if(nuc.model == "GTR"){
                            max.par = 3 + 5 + 1 + 19
                        }
                        if(nuc.model == "UNREST"){
                            max.par = 0 + 11 + 1 + 19
                        }
                    }else{
                        if(nuc.model == "JC"){
                            #base.freq + nuc.rates + omega + fitness.pars
                            max.par = 3 + 0 + 1 + 60
                        }
                        if(nuc.model == "GTR"){
                            max.par = 3 + 5 + 1 + 60
                        }
                        if(nuc.model == "UNREST"){
                            max.par = 0 + 11 + 1 + 60
                        }
                    }
                    MultiCoreLikelihood <- function(partition.index){
                        codon.data = NULL
                        codon.data$unique.site.patterns = codon.site.data[[partition.index]]
                        codon.data$site.pattern.counts = codon.site.counts[[partition.index]]
                        likelihood.tmp = GetLikelihoodMutSel_CodonForManyCharGivenAllParams(x=log(par.mat[partition.index,1:max.par]), codon.data=codon.data, phy=phy, root.p_array=NULL, numcode=numcode, nuc.model=nuc.model, logspace=logspace, verbose=verbose, neglnl=neglnl,  n.cores.by.gene.by.site=n.cores.by.gene.by.site)
                        return(likelihood.tmp)
                    }
                    #This orders the nsites per partition in decreasing order (to increase efficiency):
                    partition.order <- 1:n.partitions
                    likelihood <- sum(unlist(mclapply(partition.order[order(nsites.vector, decreasing=TRUE)], MultiCoreLikelihood, mc.cores=n.cores.by.gene)))
                }
                if(codon.model == "selac"){
                    #Triggered the averging of the AA method#
                    if(nuc.model == "JC"){
                        max.par = 6
                    }
                    if(nuc.model == "GTR"){
                        max.par = 6 + 5
                    }
                    if(nuc.model == "UNREST"){
                        max.par = 3 + 11
                    }
                    if(include.gamma == TRUE){
                        max.par = max.par + 1
                    }
                    if(k.levels > 0){
                        max.par = max.par + 2
                    }
                    MultiCoreLikelihood <- function(partition.index){
                        codon.data = NULL
                        codon.data$unique.site.patterns = codon.site.data[[partition.index]]
                        codon.data$site.pattern.counts = codon.site.counts[[partition.index]]
                        likelihood.tmp = GetAveAAPerSite(x=log(par.mat[partition.index,1:max.par]), codon.data=codon.data, phy=phy, aa.optim_array=aa.optim_array[[partition.index]], codon.freq.by.aa=codon.freq.by.aa[[partition.index]], codon.freq.by.gene=codon.freq.by.gene[[partition.index]], numcode=numcode, diploid=diploid, aa.properties=aa.properties, volume.fixed.value=volume.fixed.value, nuc.model=nuc.model, codon.index.matrix=codon.index.matrix, include.gamma=include.gamma, gamma.type=gamma.type, ncats=ncats, k.levels=k.levels, logspace=logspace, verbose=verbose, neglnl=neglnl, n.cores.by.gene.by.site=n.cores.by.gene.by.site)
                        return(likelihood.tmp)
                    }
                    #This orders the nsites per partition in decreasing order (to increase efficiency):
                    partition.order <- 1:n.partitions
                    likelihood <- sum(unlist(mclapply(partition.order[order(nsites.vector, decreasing=TRUE)], MultiCoreLikelihood, mc.cores=n.cores.by.gene)))
                }

            }
        }
    }else{
        if(nuc.model == "JC"){
            max.par = 6
        }
        if(nuc.model == "GTR"){
            max.par = 6 + 5
        }
        if(nuc.model == "UNREST"){
            max.par = 3 + 11
        }
        if(include.gamma == TRUE){
            max.par = max.par + 1
        }
        if(k.levels > 0){
            max.par = max.par + 2
        }
        MultiCoreLikelihood <- function(partition.index){
            codon.data = NULL
            codon.data$unique.site.patterns = codon.site.data[[partition.index]]
            codon.data$site.pattern.counts = codon.site.counts[[partition.index]]
            likelihood.tmp = GetLikelihoodSAC_CodonForManyCharGivenAllParams(x=log(par.mat[partition.index,1:max.par]), codon.data=codon.data, phy=phy, aa.optim_array=aa.optim_array[[partition.index]], codon.freq.by.aa=codon.freq.by.aa[[partition.index]], codon.freq.by.gene=codon.freq.by.gene[[partition.index]], numcode=numcode, diploid=diploid, aa.properties=aa.properties, volume.fixed.value=volume.fixed.value, nuc.model=nuc.model, codon.index.matrix=codon.index.matrix, include.gamma=include.gamma, gamma.type=gamma.type, ncats=ncats, k.levels=k.levels, logspace=logspace, verbose=verbose, neglnl=neglnl, n.cores.by.gene.by.site=n.cores.by.gene.by.site)
            return(likelihood.tmp)
        }
        #This orders the nsites per partition in decreasing order (to increase efficiency):
        partition.order <- 1:n.partitions
        likelihood <- sum(unlist(mclapply(partition.order[order(nsites.vector, decreasing=TRUE)], MultiCoreLikelihood, mc.cores=n.cores.by.gene)))
    }
    return(likelihood)
}


#OptimizeModelParsAlphaBetaFixed <- function(x, alpha.beta, codon.site.data, codon.site.counts, data.type, codon.model, n.partitions, nsites.vector, index.matrix, phy, aa.optim_array=NULL, codon.freq.by.aa=NULL, codon.freq.by.gene=NULL, numcode=1, diploid=TRUE, aa.properties=NULL, volume.fixed.value=0.0003990333, nuc.model, codon.index.matrix=NULL, edge.length="optimize", include.gamma=FALSE, gamma.type, ncats, k.levels, logspace=FALSE, verbose=TRUE, n.cores=NULL, neglnl=FALSE) {
#    if(logspace) {
#        x <- exp(x)
#    }

#    par.mat.tmp <- index.matrix
#    par.mat.tmp[] <- c(x, 0)[index.matrix]
#if(include.gamma == TRUE){
#par.mat <- matrix(c(par.mat.tmp[1], alpha.beta[1], alpha.beta[2], par.mat.tmp[2:length(par.mat.tmp)], alpha.beta[3]),1, length(par.mat.tmp)+3)
#}else{
#    par.mat <- matrix(c(par.mat.tmp[1], alpha.beta[1], alpha.beta[2], par.mat.tmp[2:length(par.mat.tmp)]),1, length(par.mat.tmp)+2)
#}

#    if(nuc.model == "JC"){
#        max.par = 6
#    }
#    if(nuc.model == "GTR"){
#        max.par = 6 + 5
#    }
#    if(nuc.model == "UNREST"){
#        max.par = 3 + 11
#    }
#    if(include.gamma == TRUE){
#        max.par = max.par + 1
#    }
#    if(k.levels > 0){
#        max.par = max.par + 2
#    }
#    codon.data = NULL
#    codon.data$unique.site.patterns = codon.site.data
#    codon.data$site.pattern.counts = codon.site.counts
#    likelihood.vector = sum(GetLikelihoodSAC_CodonForManyCharGivenAllParams(x=log(par.mat), codon.data=codon.data, phy=phy, aa.optim_array=aa.optim_array, codon.freq.by.aa=codon.freq.by.aa, codon.freq.by.gene=codon.freq.by.gene, numcode=numcode, diploid=diploid, aa.properties=aa.properties, volume.fixed.value=volume.fixed.value, nuc.model=nuc.model, codon.index.matrix=codon.index.matrix, include.gamma=include.gamma, gamma.type=gamma.type, ncats=ncats, k.levels=k.levels, logspace=logspace, verbose=verbose, neglnl=neglnl, parallel.type=parallel.type, n.cores=NULL))
#    likelihood = sum(likelihood.vector)

#    return(likelihood)
#}


#OptimizeAlphaBetaOnly <- function(x, par.mat, codon.site.data, codon.site.counts, data.type, codon.model, n.partitions, nsites.vector, index.matrix, phy, aa.optim_array=NULL, codon.freq.by.aa=NULL, codon.freq.by.gene=NULL, numcode=1, diploid=TRUE, aa.properties=NULL, volume.fixed.value=0.0003990333, nuc.model, codon.index.matrix=NULL, edge.length="optimize", include.gamma=FALSE, gamma.type, ncats, k.levels, logspace=FALSE, verbose=TRUE, parallel.type="by.gene", n.cores=NULL, neglnl=FALSE) {
#    if(logspace) {
#        x <- exp(x)
#    }
#    par.mat.tmp <- par.mat
#if(include.gamma == TRUE){
#   par.mat <- cbind(par.mat.tmp[,1], x[1], x[2], par.mat.tmp[,2:dim(par.mat.tmp)[2]], x[3])
#}else{
#    par.mat <- cbind(par.mat.tmp[,1], x[1], x[2], par.mat.tmp[,2:dim(par.mat.tmp)[2]])
#}
#    if(nuc.model == "JC"){
#        max.par = 6
#    }
#    if(nuc.model == "GTR"){
#        max.par = 6 + 5
#    }
#    if(nuc.model == "UNREST"){
#        max.par = 3 + 11
#    }
#    if(include.gamma == TRUE){
#        max.par = max.par + 1
#    }
#    if(k.levels > 0){
#        max.par = max.par + 2
#    }
#    if(parallel.type == "by.gene"){
#        MultiCoreLikelihood <- function(partition.index){
#            codon.data = NULL
#            codon.data$unique.site.patterns = codon.site.data[[partition.index]]
#            codon.data$site.pattern.counts = codon.site.counts[[partition.index]]
#           likelihood.tmp = GetLikelihoodSAC_CodonForManyCharGivenAllParams(x=log(par.mat[partition.index,1:max.par]), codon.data=codon.data, phy=phy, aa.optim_array=aa.optim_array[[partition.index]], codon.freq.by.aa=codon.freq.by.aa[[partition.index]], codon.freq.by.gene=codon.freq.by.gene[[partition.index]], numcode=numcode, diploid=diploid, aa.properties=aa.properties, volume.fixed.value=volume.fixed.value, nuc.model=nuc.model, codon.index.matrix=codon.index.matrix, include.gamma=include.gamma, gamma.type=gamma.type, ncats=ncats, k.levels=k.levels, logspace=logspace, verbose=verbose, neglnl=neglnl, parallel.type=parallel.type, n.cores=NULL)
#            return(likelihood.tmp)
#        }
#This orders the nsites per partition in decreasing order (to increase efficiency):
#        partition.order <- 1:n.partitions
#        likelihood <- sum(unlist(mclapply(partition.order[order(nsites.vector, decreasing=TRUE)], MultiCoreLikelihood, mc.cores=n.cores)))
#    }
#    if(parallel.type == "by.site"){
#        likelihood.vector <- c()
#        for(partition.index in sequence(n.partitions)){
#            codon.data = NULL
#            codon.data$unique.site.patterns = codon.site.data[[partition.index]]
#            codon.data$site.pattern.counts = codon.site.counts[[partition.index]]
#            likelihood.vector = c(likelihood.vector, GetLikelihoodSAC_CodonForManyCharGivenAllParams(x=log(par.mat[partition.index,1:max.par]), codon.data=codon.data, phy=phy, aa.optim_array=aa.optim_array[[partition.index]], codon.freq.by.aa=codon.freq.by.aa[[partition.index]], codon.freq.by.gene=codon.freq.by.gene[[partition.index]], numcode=numcode, diploid=diploid, aa.properties=aa.properties, volume.fixed.value=volume.fixed.value, nuc.model=nuc.model, codon.index.matrix=codon.index.matrix, include.gamma=include.gamma, gamma.type=gamma.type, ncats=ncats, k.levels=k.levels, logspace=logspace, verbose=verbose, neglnl=neglnl, parallel.type=parallel.type, n.cores=n.cores))
#        }
#        likelihood = sum(likelihood.vector)
#    }
#    return(likelihood)
#}


OptimizeModelParsAlphaBetaGtrFixed <- function(x, alpha.beta.gtr, codon.site.data, codon.site.counts, data.type, codon.model, n.partitions, nsites.vector, index.matrix, phy, aa.optim_array=NULL, codon.freq.by.aa=NULL, codon.freq.by.gene=NULL, numcode=1, diploid=TRUE, aa.properties=NULL, volume.fixed.value=0.0003990333, nuc.model, codon.index.matrix=NULL, edge.length="optimize", include.gamma=FALSE, gamma.type, ncats, k.levels, logspace=FALSE, verbose=TRUE, n.cores.by.gene=n.cores.by.gene, n.cores.by.gene.by.site=1, estimate.importance=FALSE, neglnl=FALSE, HMM=FALSE) {
    if(logspace) {
        x <- exp(x)
    }

    if(HMM == TRUE) {
        if(estimate.importance == TRUE){
            max.par <- length(c(x[1], alpha.beta.gtr, x[2], x[3]))
            par.mat <- matrix(c(x[1], alpha.beta.gtr, x[2], x[3]), 1, max.par)
        }else{
            max.par <- length(c(x[1], alpha.beta.gtr, x[2]))
            par.mat <- matrix(c(x[1], alpha.beta.gtr, x[2]), 1, max.par)
        }
        codon.data = NULL
        codon.data$unique.site.patterns = codon.site.data
        codon.data$site.pattern.counts = codon.site.counts
        likelihood.vector = sum(GetLikelihoodSAC_CodonForManyCharGivenAllParamsEvolvingAA(x=log(par.mat), codon.data=codon.data, phy=phy, codon.freq.by.aa=codon.freq.by.aa, codon.freq.by.gene=codon.freq.by.gene, numcode=numcode, diploid=diploid, aa.properties=aa.properties, volume.fixed.value=volume.fixed.value, nuc.model=nuc.model, codon.index.matrix=codon.index.matrix, include.gamma=include.gamma, gamma.type=gamma.type, ncats=ncats, k.levels=k.levels, logspace=logspace, verbose=verbose, neglnl=neglnl, n.cores.by.gene.by.site=n.cores.by.gene.by.site, estimate.importance=estimate.importance))
        likelihood = sum(likelihood.vector)
    }else{
        if(is.null(aa.optim_array)){
            if(nuc.model == "JC"){
                max.par = 6
            }
            if(nuc.model == "GTR"){
                max.par = 6 + 5
            }
            if(nuc.model == "UNREST"){
                max.par = 3 + 11
            }
            if(include.gamma == TRUE){
                max.par = max.par + 1
            }
            if(k.levels > 0){
                max.par = max.par + 2
            }

            #THIS ASSUMES A SEPARATE GAMMA PER GENE
            #if(include.gamma == TRUE){
            #    par.mat <- matrix(c(x[1], alpha.beta.gtr, x[2]), 1, max.par)
            #}else{
            #    par.mat <- matrix(c(x[1], alpha.beta.gtr), 1, max.par)
            #}

            par.mat <- matrix(c(x[1], alpha.beta.gtr), 1, max.par)

            codon.data = NULL
            codon.data$unique.site.patterns = codon.site.data
            codon.data$site.pattern.counts = codon.site.counts
            likelihood.vector = sum(GetAveAAPerSite(x=log(par.mat), codon.data=codon.data, phy=phy, aa.optim_array=aa.optim_array, codon.freq.by.aa=codon.freq.by.aa, codon.freq.by.gene=codon.freq.by.gene, numcode=numcode, diploid=diploid, aa.properties=aa.properties, volume.fixed.value=volume.fixed.value, nuc.model=nuc.model, codon.index.matrix=codon.index.matrix, include.gamma=include.gamma, gamma.type=gamma.type, ncats=ncats, k.levels=k.levels, logspace=logspace, verbose=verbose, neglnl=neglnl, n.cores.by.gene.by.site=n.cores.by.gene.by.site))
            likelihood = sum(likelihood.vector)
        }else{
            if(nuc.model == "JC"){
                max.par = 6
            }
            if(nuc.model == "GTR"){
                max.par = 6 + 5
            }
            if(nuc.model == "UNREST"){
                max.par = 3 + 11
            }
            if(include.gamma == TRUE){
                max.par = max.par + 1
            }
            if(k.levels > 0){
                max.par = max.par + 2
            }

            #THIS ASSUMES A SEPARATE GAMMA PER GENE
            #if(include.gamma == TRUE){
            #    par.mat <- matrix(c(x[1], alpha.beta.gtr, x[2]), 1, max.par)
            #}else{
            #    par.mat <- matrix(c(x[1], alpha.beta.gtr), 1, max.par)
            #}

            par.mat <- matrix(c(x[1], alpha.beta.gtr), 1, max.par)

            codon.data = NULL
            codon.data$unique.site.patterns = codon.site.data
            codon.data$site.pattern.counts = codon.site.counts
            likelihood.vector = sum(GetLikelihoodSAC_CodonForManyCharGivenAllParams(x=log(par.mat), codon.data=codon.data, phy=phy, aa.optim_array=aa.optim_array, codon.freq.by.aa=codon.freq.by.aa, codon.freq.by.gene=codon.freq.by.gene, numcode=numcode, diploid=diploid, aa.properties=aa.properties, volume.fixed.value=volume.fixed.value, nuc.model=nuc.model, codon.index.matrix=codon.index.matrix, include.gamma=include.gamma, gamma.type=gamma.type, ncats=ncats, k.levels=k.levels, logspace=logspace, verbose=verbose, neglnl=neglnl, n.cores.by.gene.by.site=n.cores.by.gene.by.site))
            likelihood = sum(likelihood.vector)
        }
    } ## end else for HMM == TRUE
    return(likelihood)
}


OptimizeAlphaBetaGtrOnly <- function(x, fixed.pars, codon.site.data, codon.site.counts, data.type, codon.model, n.partitions, nsites.vector, index.matrix, phy, aa.optim_array=NULL, codon.freq.by.aa=NULL, codon.freq.by.gene=NULL, numcode=1, diploid=TRUE, aa.properties=NULL, volume.fixed.value=0.0003990333, nuc.model, codon.index.matrix=NULL, edge.length="optimize", include.gamma=FALSE, gamma.type=gamma.type, ncats, k.levels, logspace=FALSE, verbose=TRUE, n.cores.by.gene=n.cores.by.gene, n.cores.by.gene.by.site=1, neglnl=FALSE, HMM=FALSE, estimate.importance=FALSE) {
    if(logspace) {
        x <- exp(x)
    }
    if(HMM == TRUE) {
        if(estimate.importance == TRUE){
            par.mat <- c()
            for(row.index in 1:dim(fixed.pars)[1]){
                par.mat <- rbind(par.mat, c(fixed.pars[row.index,1], x, fixed.pars[row.index,2], fixed.pars[row.index,3]))
            }
        }else{
            par.mat <- c()
            for(row.index in 1:dim(fixed.pars)[1]){
                par.mat <- rbind(par.mat, c(fixed.pars[row.index,1], x, fixed.pars[row.index,2]))
            }
        }
        max.par <- dim(par.mat)[2]
        MultiCoreLikelihood <- function(partition.index){
            codon.data = NULL
            codon.data$unique.site.patterns = codon.site.data[[partition.index]]
            codon.data$site.pattern.counts = codon.site.counts[[partition.index]]
            likelihood.tmp = GetLikelihoodSAC_CodonForManyCharGivenAllParamsEvolvingAA(x=log(par.mat[partition.index,1:max.par]), codon.data=codon.data, phy=phy, codon.freq.by.aa=NULL, codon.freq.by.gene=codon.freq.by.gene[[partition.index]], numcode=numcode, diploid=diploid, aa.properties=aa.properties, volume.fixed.value=volume.fixed.value, nuc.model=nuc.model, codon.index.matrix=codon.index.matrix, include.gamma=include.gamma, gamma.type=gamma.type, ncats=ncats, k.levels=k.levels, logspace=logspace, verbose=verbose, neglnl=neglnl, n.cores.by.gene.by.site=n.cores.by.gene.by.site, estimate.importance=estimate.importance)
            return(likelihood.tmp)
        }

        #This orders the nsites per partition in decreasing order (to increase efficiency):
        partition.order <- 1:n.partitions
        likelihood <- sum(unlist(mclapply(partition.order[order(nsites.vector, decreasing=TRUE)], MultiCoreLikelihood, mc.cores=n.cores.by.gene)))

    }else{
        if(is.null(aa.optim_array)){
            if(nuc.model == "JC"){
                max.par = 6
            }
            if(nuc.model == "GTR"){
                max.par = 6 + 5
            }
            if(nuc.model == "UNREST"){
                max.par = 3 + 11
            }
            if(include.gamma == TRUE){
                max.par = max.par + 1
            }
            if(k.levels > 0){
                max.par = max.par + 2
            }

            #THIS ASSUMES SEPARATE GAMMA PER GENE:
            #    if(include.gamma == TRUE){
            #    par.mat <- c()
            #    for(row.index in 1:dim(fixed.pars)[1]){
            #        par.mat <- rbind(par.mat, c(fixed.pars[row.index,1], x, fixed.pars[row.index,2]))
            #    }
            #}else{
            #    par.mat <- c()
            #    for(row.index in 1:dim(fixed.pars)[1]){
            #        par.mat <- rbind(par.mat, c(fixed.pars[row.index,1], x))
            #    }
            #}

            par.mat <- c()
            for(row.index in 1:dim(fixed.pars)[1]){
                par.mat <- rbind(par.mat, c(fixed.pars[row.index,1], x))
            }

            MultiCoreLikelihood <- function(partition.index){
                codon.data = NULL
                codon.data$unique.site.patterns = codon.site.data[[partition.index]]
                codon.data$site.pattern.counts = codon.site.counts[[partition.index]]
                likelihood.tmp = GetAveAAPerSite(x=log(par.mat[partition.index,1:max.par]), codon.data=codon.data, phy=phy, aa.optim_array=aa.optim_array[[partition.index]], codon.freq.by.aa=codon.freq.by.aa[[partition.index]], codon.freq.by.gene=codon.freq.by.gene[[partition.index]], numcode=numcode, diploid=diploid, aa.properties=aa.properties, volume.fixed.value=volume.fixed.value, nuc.model=nuc.model, codon.index.matrix=codon.index.matrix, include.gamma=include.gamma, gamma.type=gamma.type, ncats=ncats, k.levels=k.levels, logspace=logspace, verbose=verbose, neglnl=neglnl, n.cores.by.gene.by.site=n.cores.by.gene.by.site)
                return(likelihood.tmp)
            }
            #This orders the nsites per partition in decreasing order (to increase efficiency):
            partition.order <- 1:n.partitions
            likelihood <- sum(unlist(mclapply(partition.order[order(nsites.vector, decreasing=TRUE)], MultiCoreLikelihood, mc.cores=n.cores.by.gene)))
        }else{
            if(nuc.model == "JC"){
                max.par = 6
            }
            if(nuc.model == "GTR"){
                max.par = 6 + 5
            }
            if(nuc.model == "UNREST"){
                max.par = 3 + 11
            }
            if(include.gamma == TRUE){
                max.par = max.par + 1
            }
            if(k.levels > 0){
                max.par = max.par + 2
            }

            #THIS ASSUMES SEPARATE GAMMA PER GENE:
            #    if(include.gamma == TRUE){
            #    par.mat <- c()
            #    for(row.index in 1:dim(fixed.pars)[1]){
            #        par.mat <- rbind(par.mat, c(fixed.pars[row.index,1], x, fixed.pars[row.index,2]))
            #    }
            #}else{
            #    par.mat <- c()
            #    for(row.index in 1:dim(fixed.pars)[1]){
            #        par.mat <- rbind(par.mat, c(fixed.pars[row.index,1], x))
            #    }
            #}

            par.mat <- c()
            for(row.index in 1:dim(fixed.pars)[1]){
                par.mat <- rbind(par.mat, c(fixed.pars[row.index,1], x))
            }

            MultiCoreLikelihood <- function(partition.index){
                codon.data = NULL
                codon.data$unique.site.patterns = codon.site.data[[partition.index]]
                codon.data$site.pattern.counts = codon.site.counts[[partition.index]]
                likelihood.tmp = GetLikelihoodSAC_CodonForManyCharGivenAllParams(x=log(par.mat[partition.index,1:max.par]), codon.data=codon.data, phy=phy, aa.optim_array=aa.optim_array[[partition.index]], codon.freq.by.aa=codon.freq.by.aa[[partition.index]], codon.freq.by.gene=codon.freq.by.gene[[partition.index]], numcode=numcode, diploid=diploid, aa.properties=aa.properties, volume.fixed.value=volume.fixed.value, nuc.model=nuc.model, codon.index.matrix=codon.index.matrix, include.gamma=include.gamma, gamma.type=gamma.type, ncats=ncats, k.levels=k.levels, logspace=logspace, verbose=verbose, neglnl=neglnl, n.cores.by.gene.by.site=n.cores.by.gene.by.site)
                return(likelihood.tmp)
            }
            #This orders the nsites per partition in decreasing order (to increase efficiency):
            partition.order <- 1:n.partitions
            likelihood <- sum(unlist(mclapply(partition.order[order(nsites.vector, decreasing=TRUE)], MultiCoreLikelihood, mc.cores=n.cores.by.gene)))
        }
    }
    return(likelihood)
}


OptimizeModelParsLarge <- function(x, codon.site.data, codon.site.counts, data.type, codon.model, n.partitions, nsites.vector, index.matrix, phy, aa.optim_array=NULL, root.p_array=NULL, numcode=1, diploid=TRUE, aa.properties=NULL, volume.fixed.value=0.0003990333, nuc.model, codon.index.matrix=NULL, edge.length="optimize", include.gamma=FALSE, gamma.type, ncats, k.levels, logspace=FALSE, verbose=TRUE, n.cores.by.gene=n.cores.by.gene, n.cores.by.gene.by.site=1, neglnl=FALSE) {

    poo <- x
    if(logspace) {
        x <- exp(x)
    }

    if(class(index.matrix)=="numeric"){
        index.matrix <- matrix(index.matrix, 1, length(index.matrix))
    }
    par.mat <- index.matrix
    par.mat[] <- c(x, 0)[index.matrix]

    if(data.type == "nucleotide"){
        if(nuc.model == "JC"){
            max.par = 0
        }
        if(nuc.model == "GTR"){
            max.par = 5
        }
        if(nuc.model == "UNREST"){
            max.par = 11
        }
        if(include.gamma == TRUE){
            max.par = max.par + 1
        }
        likelihood.vector <- c()
        for(partition.index in sequence(n.partitions)){
            nuc.data = NULL
            nuc.data$unique.site.patterns = codon.site.data
            nuc.data$site.pattern.counts = codon.site.counts
            likelihood.vector = c(likelihood.vector, GetLikelihoodNucleotideForManyCharGivenAllParams(x=log(par.mat[partition.index,1:max.par]), nuc.data=nuc.data, phy=phy, root.p_array=root.p_array, numcode=numcode, nuc.model=nuc.model, include.gamma=include.gamma, gamma.type=gamma.type, ncats=ncats, logspace=logspace, verbose=verbose, neglnl=neglnl, n.cores.by.gene.by.site=n.cores.by.gene.by.site))
        }
        likelihood = sum(likelihood.vector)
    }else{
        if(codon.model == "GY94" | codon.model == "YN98"){
            max.par = 2
            MultiCoreLikelihood <- function(partition.index){
                codon.data = NULL
                codon.data$unique.site.patterns = codon.site.data[[partition.index]]
                codon.data$site.pattern.counts = codon.site.counts[[partition.index]]
                likelihood.tmp <- c()
                try(likelihood.tmp <- GetLikelihoodGY94_YN98_CodonForManyCharGivenAllParams(x=log(par.mat[partition.index,1:max.par]), codon.data=codon.data, phy=phy, root.p_array=NULL, model.type=codon.model, numcode=numcode, logspace=logspace, verbose=verbose, neglnl=neglnl, n.cores.by.gene.by.site=n.cores.by.gene.by.site))
                if(length(likelihood.tmp)==0){
                    return(10000000)
                }else{
                    return(likelihood.tmp)
                }
            }
            #This orders the nsites per partition in decreasing order (to increase efficiency):
            partition.order <- 1:n.partitions
            likelihood <- sum(unlist(mclapply(partition.order[order(nsites.vector, decreasing=TRUE)], MultiCoreLikelihood, mc.cores=n.cores.by.gene)))
        }
        if(codon.model == "FMutSel0" | codon.model == "FMutSel") {
            if(codon.model == "FMutSel0"){
                #To do: figure out way to allow for the crazy 60 fitness par model.
                if(nuc.model == "JC"){
                    #base.freq + nuc.rates + omega + fitness.pars
                    max.par = 3 + 0 + 1 + 19
                }
                if(nuc.model == "GTR"){
                    max.par = 3 + 5 + 1 + 19
                }
                if(nuc.model == "UNREST"){
                    max.par = 0 + 11 + 1 + 19
                }
            }else{
                #To do: figure out way to allow for the crazy 60 fitness par model.
                if(nuc.model == "JC"){
                    #base.freq + nuc.rates + omega + fitness.pars
                    max.par = 3 + 0 + 1 + 60
                }
                if(nuc.model == "GTR"){
                    max.par = 3 + 5 + 1 + 60
                }
                if(nuc.model == "UNREST"){
                    max.par = 0 + 11 + 1 + 60
                }
            }
            save(poo, phy, index.matrix, codon.site.data, codon.site.counts, file="checkpoint.fmutsel.Rsave")
            MultiCoreLikelihood <- function(partition.index){
                codon.data = NULL
                codon.data$unique.site.patterns = codon.site.data[[partition.index]]
                codon.data$site.pattern.counts = codon.site.counts[[partition.index]]
                likelihood.tmp <- c()
                try(likelihood.tmp <- GetLikelihoodMutSel_CodonForManyCharGivenAllParams(x=log(par.mat[partition.index,1:max.par]), codon.data=codon.data, phy=phy, root.p_array=NULL, numcode=numcode, nuc.model=nuc.model, logspace=logspace, verbose=verbose, neglnl=neglnl,  n.cores.by.gene.by.site=n.cores.by.gene.by.site))
                if(length(likelihood.tmp)==0){
                    return(10000000)
                }else{
                    return(likelihood.tmp)
                }
            }
            #This orders the nsites per partition in decreasing order (to increase efficiency):
            partition.order <- 1:n.partitions
            likelihood <- sum(unlist(mclapply(partition.order[order(nsites.vector, decreasing=TRUE)], MultiCoreLikelihood, mc.cores=n.cores.by.gene)))
        }
    }
    return(likelihood)
}


ComputeStartingBranchLengths <- function(phy, data, data.type="codon", recalculate.starting.brlen){
    if(recalculate.starting.brlen || is.null(phy$edge.length)) {
        if(is.null(phy$edge.length)){
            phy$edge.length = rep(1, length(phy$edge[,1]))
        }
        data.mat <- DNAbinToNucleotideCharacter(data)
        new.tip<-list(edge=matrix(c(2L,1L),1,2),tip.label="FAKEY_MCFAKERSON", edge.length=1, Nnode=1L)
        class(new.tip) <- "phylo"
        phy.with.outgroup <- bind.tree(phy, new.tip,where="root")
        new.tip.data <- matrix(c("FAKEY_MCFAKERSON", rep("-", dim(data.mat)[2]-1)), dim(data.mat)[2], 1)
        new.tip.df <- as.data.frame(t(new.tip.data))
        rownames(new.tip.df) <- "FAKEY_MCFAKERSON"
        colnames(new.tip.df) <- colnames(data.mat)
        data.with.outgroup <- rbind(data.mat, new.tip.df)
        dats.mat <- as.matrix(data.with.outgroup[,-1])
        if(data.type=="codon"){
            third.position <- seq(3,dim(dats.mat)[2], by=3)
            dat <- phyDat(dats.mat[,third.position], type="DNA")
            mpr.tre <- acctran(phy.with.outgroup, dat)
            mpr.tre$edge.length <- mpr.tre$edge.length/dim(dats.mat[,third.position])[2]
        }else{
            dat <- phyDat(dats.mat, type="DNA")
            #mpr.tre <- phangorn::nnls.phylo(phy.with.outgroup, dist.ml(dat))
            fitJC <- pml(phy.with.outgroup, dat)
            fitJC <- optim.pml(fitJC, optBf=TRUE)
            mpr.tre <- fitJC$tree
        }
        mpr.tre.pruned <- drop.tip(mpr.tre, "FAKEY_MCFAKERSON")
        mpr.tre.pruned$edge.length[mpr.tre.pruned$edge.length == 0] <- 1e-7
    } else {
        mpr.tre.pruned <- phy
    }
    return(mpr.tre.pruned)
}


GetFitnessStartingValues <- function(codon.freqs, n.pars = 21){
    initial.vals <- c()
    for(i in 1:n.pars){
        #This is taken directly from PAML code:
        initial.vals <- c(initial.vals, (codon.freqs[i]+.001)/(codon.freqs[n.pars]+.002*runif(1)))
    }
    if(n.pars == 64){
        initial.vals = initial.vals[-c(49,51,57,64)]
    }
    initial.vals[initial.vals < 0.0001] <- 0.001
    return(initial.vals)
}


GetCAI <- function(codon.data, aa.optim, numcode=1, w){
    ref.codon.freqs <- GetCodonFreqsByAA(codon.data[1,-1], aa.optim, numcode=1)
    w.array <- matrix(ref.codon.freqs, nrow=64, ncol=21)
    w.array <- t(w.array)
    w.array <- w.array / apply(w.array, 1, max)
    w.array[17,] <- 0
    #unique.aa <- GetMatrixAANames(numcode=numcode)
    rownames(w.array) <- .unique.aa
    wi <- apply(w.array, 2, max)
    wi[wi < 1e-4] <- 0.01
    wi = w
    cai <- exp((1/(dim(codon.data)[2]-1)) * sum(log(wi[as.numeric(codon.data[1,])][-1])))
    return(cai)
}


#DiscreteGamma <- function (shape, ncats){
#  quantiles <- qgamma((1:(ncats - 1))/ncats, shape = shape, rate = shape)
#  return(diff(c(0, pgamma(quantiles * shape, shape + 1, rate=shape), 1)) * ncats)
#}

DiscreteGamma <- function(shape, ncats) {
    if (ncats == 1) return(1)
    quants <- qgamma( (1:(ncats - 1)) / ncats, shape = shape, rate = shape)
    return(diff(c(0, pgamma(quants * shape, shape + 1), 1)) * ncats)
}


LogNormalQuad <- function(shape, ncats){
    s = shape
    m = -(s^2)/2
    pp <- gauss.quad.prob(ncats, dist="normal", mu=m, sigma=s)
    return(c(exp(pp$nodes/m), pp$weights))
}


LaguerreQuad <- function(shape, ncats) {
    # Determine rates based on alpha and the number of bins
    # bins roots normalized to 1 of the General Laguerre Quadrature
    # first ncats elements are rates with mean 1
    # second ncats elements are probabilities with sum 1
    roots <- findRoots(shape - 1, ncats)
    weights <- numeric(ncats)
    f <- prod(1 + (shape - 1)/(1:ncats))

    for (i in 1:ncats) {
        weights[i] <- f*roots[i]/((ncats + 1)^2*Laguerre(roots[i], shape - 1, ncats + 1)^2)
    }
    roots <- roots/shape
    return(c(roots, weights))
}


findRoots <- function(shape, ncats) {
    # Determine rates based on Gamma's alpha and the number of bins
    # bins roots normalized to 1 of the General Laguerre Polynomial (GLP)
    coeff  <- integer(ncats + 1)
    for (i in 0:ncats) {
        coeff[i + 1] <- (-1)^i*nChooseK(ncats + shape, ncats - i)/factorial(i)
    }
    return(sort(Re(polyroot(coeff))))
}


Laguerre <- function(x, shape, degree) {
    y <- 0
    for (i in 0:degree) {
        y <- y + (-1)^i*choose(degree + shape, degree - i)*x^i/factorial(i)
    }
    return(y)
}


#Took this from R.basic -- the C version did not work when LaguerreQuad was called internally. Adding this function fixed this issue (JMB 9-29-2016).
nChooseK <- function(n, k, log=FALSE) {
    nChooseK0 <- function(n, k) {
        if((n == k) || (k==0))
        return(1);
        m <- min(k, n-k);
        prod(seq(from=n, to=(n-m+1), by=-1)/(seq(from=m, to=1, by=-1)));
    }
    # Process the arguments
    if (is.logical(log)) {
        if (log == TRUE)
        log <- exp(1)
        else
        log <- NULL;
    }
    # Repeat n or k to make the of equal length.
    nn <- length(n);
    nk <- length(k);
    if (nn > nk) {
        k <- rep(k, length.out=nn);
        nk <- nn;
    } else if (nn < nk) {
        n <- rep(n, length.out=nk);
        nn <- nk;
    }
    if (is.null(log)) {
        gamma(n+1) / (gamma(n-k+1) * gamma(k+1));
    } else {
        (lgamma(n+1) - (lgamma(n-k+1) + lgamma(k+1))) / log(log);
    }
}



PlotBubbleMatrix <- function(x, main="", special=Inf, cex=1){
    diag(x) <- 0
    x<-x/max(x)
    plot(x=range(.5,.5+dim(x)[2]),y=-range(.5, .5+dim(x)[1]), xlab="", ylab="", type="n", sub=main,xaxt='n',yaxt='n', asp=1,bty="n")
    axis(side=2, at=-sequence(dim(x)[1]), labels=rownames(x), las=2, cex.axis=cex)
    axis(side=3, at=sequence(dim(x)[2]), labels=colnames(x), las=2, cex.axis=cex)

    #abline(h=-1:(-dim(x)[2]), v=1:(dim(x)[1]), col="gray", lty=3)
    abline(h=-range(special)[1],v=range(special)[1], lty=2)
    abline(h=-range(special)[2],v=range(special)[2], lty=2)
    for (i in sequence(dim(x)[2])) {
        for (j in sequence(dim(x)[1])) {
            bg="gray"
            if(i %in% special || j %in% special) {
                if(i %in% special){
                    bg="green4"
                }else{
                    bg="magenta3"
                }
            }
            if(x[j,i]>0) {
                symbols(x=i, y=-j, circles=sqrt(x[j,i])/(2.1*sqrt(max(x))), inches=FALSE, add=TRUE, fg=bg, bg=bg)
            }
        }
    }
}


GetGainLossRatios <- function(x) {
    diag(x) <- 0
    x<-x/max(x)
    ratio.matrix <- x*0
    for (i in sequence(dim(x)[2])) {
        for (j in sequence(dim(x)[1])) {
            if(i>j) {
                gain.rate <- x[j, i]
                loss.rate <- x[i, j]
                ratio<-gain.rate/(loss.rate+gain.rate)-0.5
                if(is.na(ratio)) {
                    ratio <- 0
                }
                ratio.matrix[j,i]<-ratio
            }
        }
    }
    return(ratio.matrix)
}


#blue is proportional increase (largest circle means gain rate is positive, loss rate is zero)
#red is proportional decrease (circle of 0.5 means loss rate is twice that of gain rate)
PlotBubbleRatio <- function(x, main="", cex=1){
    ratio.matrix<-GetGainLossRatios(x)
    plot(x=range(.5,.5+dim(x)[2]),y=-range(.5, .5+dim(x)[1]), xlab="", ylab="", type="n", main=main,xaxt='n',yaxt='n', asp=1,bty="n")
    axis(side=2, at=-sequence(dim(x)[1]), labels=rownames(x), las=2, cex=cex)
    axis(side=3, at=sequence(dim(x)[2]), labels=colnames(x), las=2, cex=cex)
    for (i in sequence(dim(x)[2])) {
        for (j in sequence(dim(x)[1])) {
            if(ratio.matrix[j,i]!=0) {
                gain.rate <- x[j, i]
                loss.rate <- x[i, j]
                ratio<-ratio.matrix[j,i]
                bg="blue"
                if (ratio < 0) {
                    bg="red"
                }
                symbols(x=i, y=-j, circles=0.5*sqrt(abs(ratio))/sqrt(max(abs(ratio.matrix))), inches=FALSE, add=TRUE, fg=bg, bg=bg)
            }
        }
    }
}


PlotTransitionNetwork <- function(x, main="") {
    diag(x) <- 0
    x<-x/max(x)
    g <- igraph::graph.adjacency(x, weighted=TRUE, mode="directed")
    g.layout <- igraph::layout.fruchterman.reingold(g)
    plot(g, layout=g.layout, edge.width=10*igraph::get.edge.attribute(g, "weight"), edge.curved=TRUE)
}


DNAbinToCodonNumeric <- function(x, frame=0, corHMM.format=TRUE) {
    bound.characters <- sapply(as.character(x), paste, collapse="")
    #following fn is derived from code for uco in seqinr
    SplitToCodons <- function(seq.string, frame) {
        seq.string<-strsplit(seq.string, split="")[[1]]
        if (any(seq.string %in% LETTERS)) {
            seq.string <- tolower(seq.string)
        }
        return(sapply(splitseq(seq = seq.string, frame = frame, word = 3), CodonStringToNumeric))
    }
    split.characters <- t(sapply(bound.characters, SplitToCodons, frame=frame))
    colnames(split.characters) <- sequence(dim(split.characters)[2])
    if(corHMM.format) {
        split.characters<-cbind(data.frame(Taxa=rownames(split.characters)), data.frame(split.characters))
    }
    split.characters[is.na(split.characters)] = 65
    return(split.characters)
}


DNAbinToCodonCharacter <- function(x, frame=0, corHMM.format=TRUE) {
    bound.characters <- sapply(as.character(x), paste, collapse="")
    #following fn is derived from code for uco in seqinr
    SplitToCodons <- function(seq.string, frame) {
        seq.string<-strsplit(seq.string, split="")[[1]]
        if (any(seq.string %in% LETTERS)) {
            seq.string <- tolower(seq.string)
        }
        return(sapply(splitseq(seq = seq.string, frame = frame, word = 3), CodonStringToCharacter))
    }
    split.characters <- t(sapply(bound.characters, SplitToCodons, frame=frame))
    colnames(split.characters) <- sequence(dim(split.characters)[2])
    if(corHMM.format) {
        split.characters <- cbind(data.frame(Taxa=rownames(split.characters)), data.frame(split.characters))
    }
    split.characters[is.na(split.characters)] = 65
    return(split.characters)
}


DNAbinToNucleotideNumeric <- function(x, frame=0, corHMM.format=TRUE) {
    bound.characters <- sapply(as.character(x), paste, collapse="")
    #following fn is derived from code for uco in seqinr
    SplitToCodons <- function(seq.string, frame) {
        seq.string<-strsplit(seq.string, split="")[[1]]
        if (any(seq.string %in% LETTERS)) {
            seq.string <- tolower(seq.string)
        }
        return(sapply(splitseq(seq = seq.string, frame = frame, word = 1), NucleotideStringToNumeric))
    }
    split.characters <- t(sapply(bound.characters, SplitToCodons, frame=frame))
    colnames(split.characters) <- sequence(dim(split.characters)[2])
    if(corHMM.format) {
        split.characters<-cbind(data.frame(Taxa=rownames(split.characters)), data.frame(split.characters))
    }
    split.characters[is.na(split.characters)] = 65
    return(split.characters)
}


DNAbinToNucleotideCharacter <- function(x, frame=0, corHMM.format=TRUE) {
    bound.characters <- sapply(as.character(x), paste, collapse="")
    #following fn is derived from code for uco in seqinr
    SplitToCodons <- function(seq.string, frame) {
        seq.string<-strsplit(seq.string, split="")[[1]]
        if (any(seq.string %in% LETTERS)) {
            seq.string <- tolower(seq.string)
        }
        return(sapply(splitseq(seq = seq.string, frame = frame, word = 1), NucleotideStringToCharacter))
    }
    split.characters <- t(sapply(bound.characters, SplitToCodons, frame=frame))
    colnames(split.characters) <- sequence(dim(split.characters)[2])
    if(corHMM.format) {
        split.characters<-cbind(data.frame(Taxa=rownames(split.characters)), data.frame(split.characters))
    }
    split.characters[is.na(split.characters)] = 65
    return(split.characters)
}


SitePattern <- function(codon.data, corHMM.format=TRUE, includes.optimal.aa=FALSE){
    if(includes.optimal.aa == TRUE){
        char.strings <- sapply(codon.data[,-1], paste, collapse="_")
        tabled.strings <- table(char.strings)
        site.pattern.totals <- c()
        reduced.codon.data <- c()
        reduced.optimal.aa <- c()
        for(i in 1:length(tabled.strings)){
            site.pattern.totals <- c(site.pattern.totals, tabled.strings[i])
            reduced.codon.data.tmp <- unlist(strsplit(names(tabled.strings)[i], "_"))
            reduced.codon.data <- cbind(reduced.codon.data, as.numeric(reduced.codon.data.tmp[1:(length(reduced.codon.data.tmp)-1)]))
            reduced.optimal.aa <- c(reduced.optimal.aa, reduced.codon.data.tmp[length(reduced.codon.data.tmp)])
        }
        if(corHMM.format) {
            names.for.rows <- rownames(codon.data)[-dim(codon.data)[1]]
            split.characters<-cbind(data.frame(Taxa=names.for.rows), data.frame(reduced.codon.data))
            rownames(split.characters) <- names.for.rows
        }
        names(site.pattern.totals) <- NULL
        obj <- NULL
        obj$unique.site.patterns <- split.characters
        obj$site.pattern.counts <- site.pattern.totals
        obj$optimal.aa <- reduced.optimal.aa
    }else{
        char.strings <- sapply(codon.data[,-1], paste, collapse="_")
        tabled.strings <- table(char.strings)
        site.pattern.totals <- c()
        reduced.codon.data <- c()
        for(i in 1:length(tabled.strings)){
            site.pattern.totals <- c(site.pattern.totals, tabled.strings[i])
            reduced.codon.data <- cbind(reduced.codon.data, as.numeric(unlist(strsplit(names(tabled.strings)[i], "_"))))
        }
        if(corHMM.format) {
            split.characters<-cbind(data.frame(Taxa=rownames(codon.data)), data.frame(reduced.codon.data))
            rownames(split.characters) <- rownames(codon.data)
        }
        names(site.pattern.totals) <- NULL
        obj <- NULL
        obj$unique.site.patterns <- split.characters
        obj$site.pattern.counts <- site.pattern.totals
    }
    return(obj)
}


GetMatrixAANames <-function(numcode){
    #codon.sets <- CreateCodonSets()
    #codon.set.translate <- apply(.codon.sets, 2, n2s)
    #codon.name <- apply(.codon.set.translate, 1, paste, collapse="")
    codon.aa <- sapply(.codon.name,TranslateCodon, numcode=numcode)
    names(codon.aa ) = NULL
    #unique.aa <- unique(codon.aa)
    return(.unique.aa)
}


GetCodonFreqsByAA <- function(codon.data, aa.opt.vector, numcode){
    #codon.sets <- CreateCodonSets()
    #codon.set.translate <- apply(.codon.sets, 2, n2s)
    #codon.name <- apply(.codon.set.translate, 1, paste, collapse="")
    aa.translations <- .aa.translation[[numcode=numcode]][codon.data=codon.data[,1]]
    names(aa.translations) = NULL
    #unique.aa <- unique(aa.translation)
    codon.freqs <- c()
    for(aa.id.index in sequence(21)) {
        cols <- which(aa.opt.vector == .unique.aa[aa.id.index])
        codon.freqs.tmp <- rep(0, 64)
        for(col.index in sequence(length(cols))) {
            for(row.index in sequence(dim(codon.data)[1])) {
                if(codon.data[row.index, cols[col.index]]<65){
                    codon.freqs.tmp[codon.data[row.index, cols[col.index]]] <- codon.freqs.tmp[codon.data[row.index, cols[col.index]]] + 1
                }
            }
        }
        codon.freqs <- c(codon.freqs, codon.freqs.tmp)
    }
    return(codon.freqs)
}


GetCodonFreqsByGene <- function(codon.data){
    codon.freqs.tabled <- table(as.matrix(codon.data[,2:dim(codon.data)[2]]))
    codon.freqs <- numeric(64)
    for(codon.index in 1:length(codon.freqs)){
        codon.freqs[as.numeric(names(codon.freqs.tabled))[codon.index]] <- codon.freqs.tabled[codon.index]
    }
    codon.freqs <- codon.freqs[1:64]/sum(codon.freqs[1:64])
    return(codon.freqs)
}


GetCodonFreqsByGeneHMM <- function(codon.data){
    codon.freqs.tabled <- table(as.matrix(codon.data[,2:dim(codon.data)[2]]))
    codon.freqs <- numeric(64)
    for(codon.index in 1:length(codon.freqs)){
        codon.freqs[as.numeric(names(codon.freqs.tabled))[codon.index]] <- codon.freqs.tabled[codon.index]
    }
    codon.freqs <- codon.freqs[1:64]/sum(codon.freqs[1:64])
    codon.freqs.full <- c()
    for(amino.acid.index in 1:21) {
        codon.freqs.full <- c(codon.freqs.full, codon.freqs)
    }
    codon.freqs.full <- codon.freqs.full/sum(codon.freqs.full)
    return(codon.freqs.full)
}


GetAAFreqsByGene <- function(codon.data, aa.opt.vector, numcode){
    #codon.sets <- CreateCodonSets()
    #codon.set.translate <- apply(.codon.sets, 2, n2s)
    #codon.name <- apply(.codon.set.translate, 1, paste, collapse="")
    aa.translations <- .aa.translation[[numcode]][.codon.name]
    names(aa.translations) = NULL
    #unique.aa <- unique(aa.translation)
    eq.freqs <- c()
    for(aa.id.index in sequence(21)) {
        cols <- which(aa.opt.vector == .unique.aa[aa.id.index])
        eq.freqs.tmp <- rep(0, 64)
        for(col.index in sequence(length(cols))) {
            for(row.index in sequence(dim(codon.data)[1])) {
                if(codon.data[row.index, cols[col.index]]<65){
                    eq.freqs.tmp[codon.data[row.index, cols[col.index]]] <- eq.freqs.tmp[codon.data[row.index, cols[col.index]]] + 1
                }
            }
        }
        eq.freqs <- c(eq.freqs, eq.freqs.tmp)
    }
    root.p_array <- matrix(eq.freqs, nrow=64, ncol=21)
    root.p_array <- t(root.p_array)
    eq.freqs <- rowSums(root.p_array)
    eq.freqs <- as.vector(eq.freqs / sum(eq.freqs))

    return(eq.freqs)
}


GetMaxName <- function(x) {
    x.tmp <- x
    x.tmp <- x[-which(x == "NA")]
    if(length(x.tmp)==0){
        return(names(table(x))[(which.is.max(table(x)))])
    }else{
        return(names(table(x.tmp))[(which.is.max(table(x.tmp)))])
    }
}



######################################################################################################################################
######################################################################################################################################
### Likelihood calculator -- Two step process
######################################################################################################################################
######################################################################################################################################

#Step 1: We perform exponentiation as few times as possible; in the case of selac, for example, we do it for each of the possible optimal amino acids and store the matrices.
#code <- "
#Eigen::MatrixXd m = Rcpp::as<Eigen::MatrixXd>(a);
#return(Rcpp::wrap(m.exp()));"
#eigenExpM <- cxxfunction(signature(a="numeric"), code, plugin="RcppEigen")

#Use specialized expm, copied from package expm
internal_expm <- function (x) {
    stopifnot(is.numeric(x) || (isM <- inherits(x, "dMatrix")) ||
    inherits(x, "mpfrMatrix"))
    if (length(d <- dim(x)) != 2)
    stop("argument is not a matrix")
    if (d[1] != d[2])
    stop("matrix not square")
    method <- "Higham08"
    preconditioning = "2bal"
    A<-x
    n <- d[1]
    if (n <= 1)
    return(exp(A))
    # if (balancing) {
    baP <- expm::balance(A, "P")
    baS <- expm::balance(baP$z, "S")
    A <- baS$z
    # }
    nA <- Matrix::norm(A, "1")
    I <- if (is(A, "Matrix"))
    Matrix::Diagonal(n)
    else diag(n)
    if (nA <= 2.1) {
        t <- c(0.015, 0.25, 0.95, 2.1)
        l <- which.max(nA <= t)
        C <- rbind(c(120, 60, 12, 1, 0, 0, 0, 0, 0, 0),
        c(30240, 15120, 3360, 420, 30, 1, 0, 0, 0, 0),
        c(17297280, 8648640, 1995840, 277200, 25200, 1512, 56, 1, 0,  0),
        c(17643225600, 8821612800, 2075673600, 302702400, 30270240, 2162160, 110880, 3960, 90, 1))
        A2 <- A %*% A
        P <- I
        U <- C[l, 2] * I
        V <- C[l, 1] * I
        for (k in 1:l) {
            P <- P %*% A2
            U <- U + C[l, (2 * k) + 2] * P
            V <- V + C[l, (2 * k) + 1] * P
        }
        U <- A %*% U
        X <- solve(V - U, V + U)
    }
    else {
        s <- log2(nA/5.4)
        B <- A
        if (s > 0) {
            s <- ceiling(s)
            B <- B/(2^s)
        }
        c. <- c(64764752532480000, 32382376266240000, 7771770303897600,
        1187353796428800, 129060195264000, 10559470521600,
        670442572800, 33522128640, 1323241920, 40840800,
        960960, 16380, 182, 1)
        B2 <- B %*% B
        B4 <- B2 %*% B2
        B6 <- B2 %*% B4
        U <- B %*% (B6 %*% (c.[14] * B6 + c.[12] * B4 + c.[10] *  B2) +
        c.[8] * B6 + c.[6] * B4 + c.[4] * B2 + c.[2] * I)
        V <- B6 %*% (c.[13] * B6 + c.[11] * B4 + c.[9] * B2) +
        c.[7] * B6 + c.[5] * B4 + c.[3] * B2 + c.[1] * I
        X <- solve(V - U, V + U)
        if (s > 0)
        for (t in 1:s) X <- X %*% X
    }
    # if (balancing) {
    d <- baS$scale
    X <- X * (d * rep(1/d, each = n))
    pp <- as.integer(baP$scale)
    if (baP$i1 > 1) {
        for (i in (baP$i1 - 1):1) {
            tt <- X[, i]
            X[, i] <- X[, pp[i]]
            X[, pp[i]] <- tt
            tt <- X[i, ]
            X[i, ] <- X[pp[i], ]
            X[pp[i], ] <- tt
        }
    }
    if (baP$i2 < n) {
        for (i in (baP$i2 + 1):n) {
            tt <- X[, i]
            X[, i] <- X[, pp[i]]
            X[, pp[i]] <- tt
            tt <- X[i, ]
            X[i, ] <- X[pp[i], ]
            X[pp[i], ] <- tt
        }
    }
    # }
    return(X)
}


internal_expmt <- function (A, t_vec) {
    stopifnot(is.numeric(A) || (isM <- inherits(A, "dMatrix")) ||
    inherits(A, "mpfrMatrix"))
    if (length(d <- dim(A)) != 2)
    stop("argument is not a matrix")
    if (d[1] != d[2])
    stop("matrix not square")
    method <- "Higham08"
    preconditioning = "2bal"
    n <- d[1]
    if (n <= 1)
    return(as.list(exp(A*t_vec)))  # force a list format return
    # if (balancing) {
    baP <- expm::balance(A, "P")
    baS <- expm::balance(baP$z, "S")
    A <- baS$z
    # }
    nA <- Matrix::norm(A, "1")
    I <- if (is(A, "Matrix"))
    Matrix::Diagonal(n)
    else diag(n)
    res <- as.list(numeric(length(t_vec)))

    C <- rbind(c(120, 60, 12, 1, 0, 0, 0, 0, 0, 0),
    c(30240, 15120, 3360, 420, 30, 1, 0, 0, 0, 0),
    c(17297280, 8648640, 1995840, 277200, 25200, 1512, 56, 1, 0,  0),
    c(17643225600, 8821612800, 2075673600, 302702400, 30270240, 2162160, 110880, 3960, 90, 1))

    c. <- c(64764752532480000, 32382376266240000, 7771770303897600,
    1187353796428800, 129060195264000, 10559470521600,
    670442572800, 33522128640, 1323241920, 40840800,
    960960, 16380, 182, 1)
    t <- c(0.015, 0.25, 0.95, 2.1)
    sA <- log2(nA/5.4)
    A2_base <- A %*% A
    if(any(nA * abs(as.numeric(t_vec)) > 2.1)){
        A4_base <- A2_base %*% A2_base
        A6_base <- A2_base %*% A4_base
    }
    for(res_i in seq_len(length(t_vec))){
        t_i=t_vec[res_i]
        if (nA * abs(t_i) <= 2.1) {
            l <- which.max(nA * abs(t_i)  <= t)
            A2 <- A2_base * t_i * t_i
            P <- I
            U <- C[l, 2] * I
            V <- C[l, 1] * I
            for (k in 1:l) {
                P <- P %*% A2
                U <- U + C[l, (2 * k) + 2] * P
                V <- V + C[l, (2 * k) + 1] * P
            }
            U <- A %*% U * t_i
            X <- solve(V - U, V + U)
        }
        else {
            s <- sA + log2(abs(t_i))
            B <- A * t_i
            if (s > 0) {
                s <- ceiling(s)
                B <- B/(2^s)
                B2 <- A2_base * t_i * t_i / (4^s)
                B4 <- A4_base * t_i ^ 4 / (16^s)
                B6 <- A6_base * t_i ^ 6 / (64^s)
            } else {
                B2 <- A2_base * t_i * t_i
                B4 <- A4_base * t_i ^ 4
                B6 <- A6_base * t_i ^ 6
            }
            U <- B %*% (B6 %*% (c.[14] * B6 + c.[12] * B4 + c.[10] *  B2) +
            c.[8] * B6 + c.[6] * B4 + c.[4] * B2 + c.[2] * I)
            V <- B6 %*% (c.[13] * B6 + c.[11] * B4 + c.[9] * B2) +
            c.[7] * B6 + c.[5] * B4 + c.[3] * B2 + c.[1] * I
            X <- solve(V - U, V + U)
            if (s > 0)
            for (t in 1:s) X <- X %*% X
        }
        # if (balancing) {
        d <- baS$scale
        X <- X * (d * rep(1/d, each = n))
        pp <- as.integer(baP$scale)
        if (baP$i1 > 1) {
            for (i in (baP$i1 - 1):1) {
                tt <- X[, i]
                X[, i] <- X[, pp[i]]
                X[, pp[i]] <- tt
                tt <- X[i, ]
                X[i, ] <- X[pp[i], ]
                X[pp[i], ] <- tt
            }
        }
        if (baP$i2 < n) {
            for (i in (baP$i2 + 1):n) {
                tt <- X[, i]
                X[, i] <- X[, pp[i]]
                X[, pp[i]] <- tt
                tt <- X[i, ]
                X[i, ] <- X[pp[i], ]
                X[pp[i], ] <- tt
            }
        }
        # }
        res[[res_i]]<-X
    }
    return(res)
}

## This is a editted copy from the expm package; will be replaced with HMM variants for fixed values
internal_expAtv <- function(A, v, t=1)
{
    #Hardcoded arguments
    tol=1e-7; btol = 1e-7; m.max = 30; mxrej = 10 #constant
    ## R translation:  Ravi Varadhan, Johns Hopkins University
    ##		   "cosmetic", apply to sparse A: Martin Maechler, ETH Zurich
    d <- dim(A)
    # HMM constant: m <- c(1344,1344)
    n <- d[1]
    # HMM constant: n <- 1344
    if(n <= 1) {
        if(n == 1) return(exp(A*t)*v)
        stop("nrow(A) must be >= 1")
    }
    m <- min(n, m.max)
    # HMM constant: m <- 30
    gamma <- 0.9        # constant
    delta <- 1.2        # constant
    nA <- Matrix::norm(A, "I")  # varies with Q
    # Next line varies with Q and phy
    if(nA <  1e-6) { ## rescaling, by MMaechler, needed for small norms
        A <- A/nA
        t <- t*nA
        nA <- 1
    }
    rndoff <- nA * .Machine$double.eps # Varies with Q

    t_1 <- abs(t) # Varies with phy
    sgn <- sign(t) # Varies with phy
    t_now <- 0
    s_error <- 0
    k1 <- 2
    mb <- m    # HMM constant: mb <- 30
    xm <- 1/m  # HMM constant: xm <- 1/30
    # Next line is constant for all tips in HMM: beta <- 2*sqrt(5)
    beta <- sqrt(sum(v*v))# = norm(v) = |\ v ||
    if(beta == 0) ## border case: v is all 0, and the result is too
    return(v)
    # Next line is constant for all tips in HMM: fact <- 6.74967950018045e+33
    fact <- (((m+1)/exp(1))^(m+1))*sqrt(2*pi*(m+1))
    myRound <- function(tt) {
        s <- 10^(floor(log10(tt)) - 1)
        ceiling(tt/s)*s
    }
    t_new <- myRound( (1/nA)*(fact*tol/(4*beta*nA))^xm )
    # alt for HMM, varies with Q, phy and site
    # t_new <- myRound( (1/nA)*(nA*beta)^(-1/30)*7.48584399202831 )
    # alt constant for HMM tips, varies with Q
    # t_new <- myRound( (nA)^(-31/30)*7.12126158103164 )

    V <- matrix(0, n, m+1)    #HMM init: V <- matrix(0,1344,31)
    H <- matrix(0, m+2, m+2)  #HMM initt: H <- matrix(0,32,32)
    # use  Matrix(V[,j],nrow =n, ncol=1 ) later on?
    # nstep <- n.rej <- 0L      #irrelevant
    w <- v
    # updated in loop:
    # t_now, t_new, V, H, w, beta
    # updated on loop break:
    # mb
    while (t_now < t_1) {
        # nstep <- nstep + 1L
        t_step <- min(t_1 - t_now, t_new)
        # if(verbose) cat(sprintf("while(t_now = %g < ..): nstep=%d, t_step=%g\n",
        #                         t_now, nstep, t_step))
        V[,1] <- (1/beta)*w
        for (j in 1:m) {
            p <- as.vector(A %*% V[,j])  ## as of commit ab3e84e, this %*% is ~82% of all work!
            for (i in 1:j) {
                H[i,j] <- s <- sum(V[,i] *  p)
                p <- p - s * V[,i]
            }
            s <- sqrt(sum(p*p))
            if (s < btol) {
                k1 <- 0
                mb <- j
                t_step <- t_1 - t_now
                break
            }
            H[j+1, j] <- s
            V[, j+1] <- p / s
        } ## j-loop complete
        if (k1 != 0) {
            H[m+2, m+1] <- 1
            av <- A %*% V[, m+1]  ## as of commit ab3e84e, this %*% is just ~2.7% of all work
            avnorm <- sqrt(sum(av * av))
        }
        i.rej <- 0L
        while (i.rej <= mxrej) {
            mx <- mb + k1; imx <- seq_len(mx) # = 1:mx
            # if(verbose) cat(sprintf("	inner while: k1=%d -> mx=%d\n",
            #                         k1, mx))
            F <- internal_expm(sgn * t_step * H[imx,imx, drop=FALSE])
            if (k1 == 0) {
                err_loc <- btol
                break
            } else {
                phi1 <- abs(beta * F[m+1,1])
                phi2 <- abs(beta * F[m+2,1] * avnorm)
                if(is.nan(phi1) || is.nan(phi2))
                stop("NaN phi values; probably overflow in expm()")
                if (phi1 > 10*phi2) {
                    err_loc <- phi2
                    xm <- 1/m
                } else if (phi1 > phi2) {
                    err_loc <- (phi1 * phi2)/(phi1 - phi2)
                    xm <- 1/m
                } else {
                    err_loc <- phi1
                    xm <- 1/(m-1)
                }
            }
            if (err_loc <= delta * t_step*tol) break
            else {
                if (i.rej == mxrej)
                stop(gettextf('The requested tolerance (tol=%g) is too small for mxrej=%d.',
                tol, mxrej))
                t_step <- gamma * t_step * (t_step * tol / err_loc)^xm
                s <- 10^(floor(log10(t_step))-1)
                t_step <- s * ceiling(t_step / s)
                i.rej <- i.rej + 1L
            }
        }## end{ while (i.rej < mx..) }
        # n.rej <- n.rej + i.rej
        mx <- mb + max(0, k1-1); imx <- seq_len(mx) # = 1:mx
        w <- as.vector(V[, imx] %*% (beta*F[imx,1, drop=FALSE]))
        beta <- sqrt(sum(w*w))
        t_now <- t_now + t_step
        t_new <- myRound(gamma * t_step * (t_step*tol/err_loc)^xm)
        # err_loc <- max(err_loc, rndoff)
        # s_error <- s_error + err_loc
    }# end{ while }
    return(w)
}

## HMM variant of expAtv for evaluating multiple t for fixed v
exp_A_tvec_codon <- function(A, codon, tvec=1, v=NULL, subset=NULL )
{
    #Hardcoded arguments
    tol=1e-7; btol = 1e-7; m.max = 30; mxrej = 10 #constant

    ## R translation:  Ravi Varadhan, Johns Hopkins University
    ##		   "cosmetic", apply to sparse A: Martin Maechler, ETH Zurich
    d <- dim(A)
    # HMM constant: m <- c(1344,1344)
    n <- d[1]
    # HMM constant: n <- 1344
    if(!missing(codon)) {
        stopifnot(n == 1344)#, "If using codon notation, A must be 1344x1344.")
        stopifnot(length(codon)==1)#, "Only one codon may be processed at a time.")
        stopifnot(codon <65 || codon >0)#, "If using codon notation, codon must be in 1:64.")
        v=Matrix::sparseVector(x=rep(1,20),i=(c(0:15,17:20)*64+codon),length = 1344)
        # Next line is constant for all tips in HMM: beta <- 2*sqrt(5)
        beta <- 2*sqrt(5)
    } else {
        stopifnot(!is.null(v))#, "If not using codon notation, v must be provided.")
        stopifnot(length(v)==n)#, "v must be the same size as a side of A.")
        beta <- sqrt(sum(v*v))# = norm(v) = |\ v ||

    }
    stopifnot(is.null(subset))#, "Subset notation not yet implemented.")

    if(n <= 1) {
        if(n == 1) return(as.list(exp(A*tvec)*v))
        stop("nrow(A) must be >= 1")
    }

    stopifnot(all(tvec>=0) || all(tvec<=0))# , "Mixed sign notation not yet implemented.")
    if( length(tvec)==0) return(list())
    if( length(tvec)==1) return(list(internal_expAtv(A=A,v=as.numeric(v),t=tvec)))
    res = rep(list(v),length(tvec))

    m <- min(n, m.max)
    # HMM constant: m <- 30
    gamma <- 0.9        # constant
    delta <- 1.2        # constant
    nA <- Matrix::norm(A, "I")  # varies with Q
    # Next line varies with Q and phy
    if(nA <  1e-6) { ## rescaling, by MMaechler, needed for small norms
        A <- A/nA
        tvec <- tvec*nA
        nA <- 1
    }
    rndoff <- nA * .Machine$double.eps # Varies with Q
    if(all(tvec>=0)){
        t_1 = max(tvec)
        sgn=1
        t_1_vec=tvec
    } else if(all(tvec<=0)){
        t_1 = min(tvec)
        sgn=-1
        t_1_vec=-tvec

    } else stop("This line should be impossible to reach.")

    t_now <- 0
    s_error <- 0
    k1 <- 2
    mb <- m    # HMM constant: mb <- 30
    xm <- 1/m  # HMM constant: xm <- 1/30
    mx1=m+2
    mx2=m+1
    imx1=seq_len(mx1)
    imx2=seq_len(mx2)
    if(beta == 0) ## border case: v is all 0, and the result is too
    return(res)
    # Next line is constant for all tips in HMM: fact <- 6.74967950018045e+33
    fact <- (((m+1)/exp(1))^(m+1))*sqrt(2*pi*(m+1))

    myRound <- function(tt) {
        s <- 10^(floor(log10(tt)) - 1)
        ceiling(tt/s)*s
    }
    t_new <- myRound( (1/nA)*(fact*tol/(4*beta*nA))^xm )
    # alt for HMM, varies with Q, phy and site
    # t_new <- myRound( (1/nA)*(nA*beta)^(-1/30)*7.48584399202831 )
    # alt constant for HMM tips, varies with Q
    # t_new <- myRound( (nA)^(-31/30)*7.12126158103164 )

    V <- matrix(0, n, m+1)    #HMM init: V <- matrix(0,1344,31)
    H <- matrix(0, m+2, m+2)  #HMM initt: H <- matrix(0,32,32)
    w <- as.numeric(v)
    # updated in loop:
    # t_now, t_new, V, H, w, beta
    # updated on loop break:
    # mb
    while (t_now < t_1) {
        # nstep <- nstep + 1L
        subset = t_1_vec > t_now
        t_step <- min(t_1_vec[subset] - t_now, t_new)  # interval length to evaluate over
        V[,1] <- (1/beta)*w
        for (j in 1:m) {
            p <- as.vector(A %*% V[,j])
            for (i in 1:j) {
                H[i,j] <- s <- sum(V[,i] *  p)
                p <- p - s * V[,i]
            }
            s <- sqrt(sum(p*p))
            if (s < btol) {
                k1 <- 0
                mb <- j
                t_step <- t_1 - t_now
                break
            }
            H[j+1, j] <- s
            V[, j+1] <- p / s
        } ## j-loop complete
        if (k1 != 0) {
            H[m+2, m+1] <- 1
            av <- A %*% V[, m+1]  ## as of commit ab3e84e, this %*% is just ~2.7% of all work
            avnorm <- sqrt(sum(av * av))
        } else {  # cash out, all remaining evaluations can be performed at once!
            mx <- mb; imx = seq_len(mx)
            ivec = which(t_1_vec > t_now)
            F_list <- internal_expmt(A = H[imx,imx,drop=F],t_vec = tvec[ivec])
            res[ivec] <- lapply(F_list, function(F_mat) (beta * V[,imx] %*% F_mat[imx,1]) )
            return(res)
        }
        i.rej <- 0L
        while (i.rej <= mxrej) {
            #mx <- mb + k1; imx <- seq_len(mx) # = 1:mx
            F_edge <- internal_expm(sgn * t_step * H[imx1,imx1, drop=FALSE])

            # Check for adjustment due to errors
            phi1 <- abs(beta * F_edge[m+1,1])
            phi2 <- abs(beta * F_edge[m+2,1] * avnorm)
            if(is.nan(phi1) || is.nan(phi2))
            stop("NaN phi values; probably overflow in expm()")
            if (phi1 > 10*phi2) {
                err_loc <- phi2
                xm <- 1/m
            } else if (phi1 > phi2) {
                err_loc <- (phi1 * phi2)/(phi1 - phi2)
                xm <- 1/m
            } else {
                err_loc <- phi1
                xm <- 1/(m-1)
            }
            # Check if error is within tolerance
            if (err_loc <= delta * t_step*tol) break
            else {
                if (i.rej == mxrej)
                stop(gettextf('The requested tolerance (tol=%g) is too small for mxrej=%d.',
                tol, mxrej))
                # reduce evaluation interval and re-check error estimates
                t_step <- gamma * t_step * (t_step * tol / err_loc)^xm
                s <- 10^(floor(log10(t_step))-1)
                t_step <- s * ceiling(t_step / s)
                i.rej <- i.rej + 1L
            }
        }## end{ while (i.rej < mx..) }
        # n.rej <- n.rej + i.rej
        #mx <- mb + max(0, k1-1); imx <- seq_len(mx) # = 1:mx
        # Check to see if tvec falls in this interval
        if(any(t_1_vec < t_now+t_step & t_1_vec > t_now)){
            # Due to errors in estimates, code rewritten.
            # It should never enter this block any more
            stop("Interval should never surround any tvec point, tvec should only include endpoints.")
            ivec = which(t_1_vec < t_now+t_step & t_1_vec > t_now)
            F_list <- internal_expmt(A = H[imx1,imx1,drop=F],t_vec = tvec[ivec])
            res[ivec] <- lapply(F_list, function(F_mat) as.vector(V[,imx2] %*% (beta * F_mat[imx2,1,drop=F])) )
        }

        # Move on to next interval
        w <- as.vector(V[, imx2] %*% (beta*F_edge[imx2,1, drop=FALSE]))
        beta <- sqrt(sum(w*w))
        t_now <- t_now + t_step
        t_new <- myRound(gamma * t_step * (t_step*tol/err_loc)^xm)
        # Check to see if tvec is on this edge
        if(any(key <- t_1_vec == t_now))
        res[key] <- rep(list(w),sum(key))
    }# end{ while }
    return(res)
}


GetExpQt <- function(phy, Q, scale.factor, rates=NULL){
    if(!is.null(scale.factor)){
        Q.scaled = Q * (1/scale.factor)
    }else{
        Q.scaled = Q
    }
    if(!is.null(rates)){
        Q.scaled = Q.scaled * rates
    }
    nb.tip <- length(phy$tip.label)
    nb.node <- phy$Nnode
    expQt <- as.list(numeric(nb.tip + nb.node))
    TIPS <- 1:nb.tip
    comp <- numeric(nb.tip + nb.node)
    #phy <- reorder(phy, "pruningwise")
    #Obtain an object of all the unique ancestors
    anc <- unique(phy$edge[,1])
    desRows <- do.call(c,lapply(anc,
    function(focal){
        which(phy$edge[,1]==focal)
    }))

    desNodes <- phy$edge[desRows,2]
    expQt[desNodes] <-  internal_expmt(Q.scaled,phy$edge.length[desRows])

    return(expQt)
}


#GetExpQtParallel <- function(phy, Q, scale.factor, rates=NULL, ncores = 1){
#
#    if(!is.null(scale.factor)){
#        Q.scaled = Q * (1/scale.factor)
#    }else{
#        Q.scaled = Q
#    }

#    if(!is.null(rates)){
#        Q.scaled = Q.scaled * rates
#    }
#    nb.tip <- length(phy$tip.label)
#   nb.node <- phy$Nnode
#    expQt <- as.list(numeric(nb.tip + nb.node))
#    TIPS <- 1:nb.tip
#    comp <- numeric(nb.tip + nb.node)
#phy <- reorder(phy, "pruningwise")
#Obtain an object of all the unique ancestors
#    anc <- unique(phy$edge[,1])
#    edge.idx <- rep(NA, length(phy$edge[,1]))

#    k <- 1
#    for (i  in seq(from = 1, length.out = nb.node)) {
#the ancestral node at row i is called focal
#        focal <- anc[i]
#Get descendant information of focal
#        desRows <- which(phy$edge[,1]==focal)
#        desNodes <- phy$edge[desRows,2]
#        for (desIndex in sequence(length(desRows))){
#            edge.idx[k] <- desNodes[desIndex]
#            k <- k + 1
#        }
#    }
#    expQt <- mclapply(X = edge.idx, FUN = function(X){return(eigenExpM(Q.scaled * phy$edge.length[desRows[desIndex]]))}, mc.cores = ncores)
#    return(expQt)
#}


#Step 2: Finish likelihood by taking our already exponentiated Q down the tree and simply re-traverse the tree and multiply by the observed likelihood.
FinishLikelihoodCalculation <- function(phy, liks, Q, root.p, anc){

    nb.tip <- length(phy$tip.label)
    nb.node <- phy$Nnode
    TIPS <- 1:nb.tip
    comp <- numeric(nb.tip + nb.node)
    if(any(root.p < 0) | any(is.na(root.p))){
        return(1000000)
    }
    #Obtain an object of all the unique ancestors
    for (i  in seq(from = 1, length.out = nb.node)) {
        #the ancestral node at row i is called focal
        focal <- anc[i]
        #Get descendant information of focal
        desRows <- which(phy$edge[,1]==focal)
        desNodes <- phy$edge[desRows,2]
        v <- 1
        descendant.count <- 0
        for (desIndex in desNodes){
            if(desIndex <= nb.tip){
                if(sum(liks[desIndex,]) < 2){
                    v <- v * (Q[[desIndex]] %*% liks[desIndex,])
                    descendant.count <- descendant.count + 1
                }
            }else{
                v <- v * (Q[[desIndex]] %*% liks[desIndex,])
                descendant.count <- descendant.count + 1
            }
        }
        if(descendant.count>1){
            comp[focal] <- sum(v)
            liks[focal,] <- v/comp[focal]
        }else{
            comp[focal] <- 1
            liks[focal,] <- v
        }
    }
    #Specifies the root:
    root <- nb.tip + 1L
    #If any of the logs have NAs restart search:
    if(is.nan(sum(log(comp[-TIPS]))) || is.na(sum(log(comp[-TIPS])))){
        return(1000000)
    }
    else{
        loglik <- -(sum(log(comp[-TIPS])) + log(sum(root.p * liks[root,])))
        if(is.infinite(loglik)){return(1000000)}
    }
    loglik
}


FinishLikelihoodCalculationHMM <- function(phy, liks, Q, root.p, anc){

    nb.tip <- length(phy$tip.label)
    nb.node <- phy$Nnode
    TIPS <- 1:nb.tip
    comp <- numeric(nb.tip + nb.node)

    if(any(root.p < 0) | any(is.na(root.p))){
        return(1000000)
    }
    #Obtain an object of all the unique ancestors
    for (i  in seq(from = 1, length.out = nb.node)) {
        #the ancestral node at row i is called focal
        focal <- anc[i]
        #Get descendant information of focal
        desRows<-which(phy$edge[,1]==focal)
        desNodes<-phy$edge[desRows,2]
        v <- 1
        for (desIndex in desNodes){
            if(desIndex <= nb.tip){
                if(sum(liks[desIndex,]) < 65){
                    v <- v * (Q[[desIndex]] %*% liks[desIndex,])
                }
            }else{
                v <- v * (Q[[desIndex]] %*% liks[desIndex,])
            }
        }
        comp[focal] <- sum(v)
        liks[focal,] <- v/comp[focal]
    }
    #Specifies the root:
    root <- nb.tip + 1L
    #If any of the logs have NAs restart search:
    if(is.nan(sum(log(comp[-TIPS]))) || is.na(sum(log(comp[-TIPS])))){
        return(1000000)
    }
    else{
        loglik<- -(sum(log(comp[-TIPS])) + log(sum(root.p * liks[root,])))
        if(is.infinite(loglik)){return(1000000)}
    }
    loglik
}


#Step 2: Finish likelihood by taking our already exponentiated Q down the tree and simply re-traverse the tree and multiply by the observed likelihood.
#This version was rewritten by Cedric Landerer but is not any faster than the version I wrote.
#code <- '
#arma::mat m = Rcpp::as<arma::mat>(a);
#arma::vec v = Rcpp::as<arma::vec>(e);
#arma::vec out = Rcpp::as<arma::vec>(q);
#return Rcpp::wrap( out % (m * v) );
#'
#rcppArma <- cxxfunction(signature(a="numeric",e="numeric", q="numeric"), code, plugin="RcppArmadillo")

#FinishLikelihoodCalculationOLD <- function(phy, liks, Q, root.p, anc){
#   root <- length(phy$tip.label) + 1
#   nb.node <- phy$Nnode
#   nb.tip <- Ntip(phy)
#   comp <- numeric(nb.node)
#
#   if(any(root.p < 0) | any(is.na(root.p))){
#       return(10000000000)
#   }
#
#   #Obtain an object of all the unique ancestors
#   for (i in 1:nb.node) {
#       #the ancestral node at row i is called focal
#       focal <- anc[i]
#       #Get descendant information of focal
#       desRows <- which(phy$edge[,1]==focal)
#       desNodes <- phy$edge[desRows,2]
#       #desNodes <- desNodes[desNodes <= nb.tip]
#       v <- rep(1, 64)
#       for (desIndex in desNodes){
#           v <- rcppArma(Q[[desIndex]], liks[desIndex,], v)
#       }
#       comp[i] <- sum(v)
#       liks[focal,] <- v/comp[i]
#   }
#   #If any of the logs have NAs restart search:
#   log.comp <- log(comp)
#   if(any(!is.finite(log.comp))){
#       loglik <- 10000000000
#   }else{
#       loglik <- -(sum(log.comp) + log(sum(root.p * liks[root,])))
#       if(!is.finite(loglik)){
#           loglik <- 10000000000
#       }
#   }
#   return(loglik)
#}


######################################################################################################################################
######################################################################################################################################
### Likelihood calculator -- ODE solver
######################################################################################################################################

TreeTraversalODE <- function(phy, Q_codon_array_vectored, liks.HMM, bad.likelihood=-100000, root.p) {

    ##start with first method and move to next if problems encountered
    ## when solving ode, such as negative pr values < neg.pr.threshold
    ode.method.vec <- c("ode45", "lsoda")
    num.ode.method <- length(ode.method.vec)

    rtol = 1e-7 #default 1e-6 returns a negative value under long branch testing conditions
    atol = 1e-6 #default 1e-6

    neg.pr.threshold <- -10*atol


    nb.tip <- length(phy$tip.label)
    nb.node <- phy$Nnode

    anc <- unique(phy$edge[,1])
    TIPS <- 1:nb.tip

    comp <- numeric(nb.tip + nb.node)

    for (i in seq(from = 1, length.out = nb.node)) {
        focal <- anc[i]
        desRows <- which(phy$edge[,1]==focal) ##des = descendant
        desNodes <- phy$edge[desRows,2]
        state.pr.vector = rep(1, dim(liks.HMM)[2]) ##

        for (desIndex in sequence(length(desRows))){
            yini <- liks.HMM[desNodes[desIndex],]
            times=c(0, phy$edge.length[desRows[desIndex]])

            ode.not.solved <- TRUE
            ode.solver.attempt <- 0

            while(ode.not.solved && ode.solver.attempt < num.ode.method){
                ode.solver.attempt <- ode.solver.attempt+1
                ode.method <-  ode.method.vec[ode.solver.attempt]

                subtree.pr.ode.obj <- ode(
                y=yini, times=times, func = "selacHMM",
                parms=Q_codon_array_vectored, initfunc="initmod_selacHMM",
                dllname = "selac",
                method=ode.method, rtol=rtol, atol=atol
                )

                ## CHECK TO ENSURE THAT THE INTEGRATION WAS SUCCESSFUL ###########
                ## $istate should be = 0 [documentation in doc/deSolve.Rnw indicates
                ## it should be 2]
                ## Values < 0 indicate problems
                ## TODO: take advantage of while() around ode solving created
                ## for when we hit negative values
                istate <- attributes(subtree.pr.ode.obj)$istate[1]

                if(istate < 0){
                    ## For \code{lsoda, lsodar, lsode, lsodes, vode, rk, rk4, euler} these are
                    error.text <- switch(as.character(istate),
                    "-1"="excess work done",
                    "-2"="excess accuracy requested",
                    "-3"="illegal input detected",
                    "-4"="repeated error test failures",
                    "-5"="repeated convergence failures",
                    "-6"="error weight became zero",
                    paste("unknown error. ode() istate value: ", as.character(istate))
                    )

                    warning(print(paste("selac.R: Integration of descendent index", desIndex, ": ode solver returned state = ",  istate, " : ", error.text)))

                    if(ode.solver.attempt < num.ode.method){
                        warning.message <- paste("\tTrying ode method ", ode.method.vec[ode.solver.attempt+1])
                        warning(warning.message)
                    }else{
                        warning.message <- paste(warning.message, "No additional ode methods available. Returning bad.likelihood: ", bad.likelihood)
                        warning(warning.message)
                        return(bad.likelihood)
                    }
                    #return(bad.likelihood)
                }else{
                    ##no integration issues,
                    ## object consists of pr values at start and end time
                    ## extract final state variable, dropping time entry
                    subtree.pr.vector <- subtree.pr.ode.obj[dim(subtree.pr.ode.obj)[[1]],-1]


                    ## test for negative entries
                    ## if encountered and less than neg.pr.threshold
                    ## replace the negative values to 0
                    ## if there are values less than neg.pr.threshold, then
                    ## resolve equations using more robust method on the list
                    ## Alternative: use 'event' option in deSolve as described at
                    ## http://stackoverflow.com/questions/34424716/using-events-in-desolve-to-prevent-negative-state-variables-r
                    neg.vector.pos <- which(subtree.pr.vector < 0, arr.ind=TRUE)
                    num.neg.vector.pos <- length(neg.vector.pos)

                    if(num.neg.vector.pos > 0){
                        min.vector.val <- min(subtree.pr.vector[neg.vector.pos])
                        neg.vector.pos.as.string <- toString(neg.vector.pos)

                        warning.message <- paste("WARNING: subtree.pr.vector solved with ode method ", ode.method, " contains ", num.neg.vector.pos, " negative values at positions ", neg.vector.pos.as.string ,  "of a ", length(subtree.pr.vector), " vector." )


                        if(min.vector.val > neg.pr.threshold){
                            warning.message <- paste(warning.message, "\nMinimum value ", min.vector.val, " >  ", neg.pr.threshold, " the neg.pr.threshold.\nSetting all negative values to 0.")
                            warning(warning.message)
                            subtree.pr.vector[neg.vector.pos] <- 0

                        }else{
                            warning.message <- paste(warning.message, "selac.R: minimum value ", min.vector.val, " <  ", neg.pr.threshold, " the neg.pr.threshold.")

                            if(ode.solver.attempt < num.ode.method){
                                warning.message <- paste(warning.message, " Trying ode method ", ode.method.vec[ode.solver.attempt+1])
                                warning(warning.message)

                            }else{
                                warning.message <- paste(warning.message, "No additional ode methods available. Returning bad.likelihood: ", bad.likelihood)
                                warning(warning.message)
                                return(bad.likelihood)
                            }
                        }
                    }else{
                        ## no negative values in pr.vs.time.matrix
                        ode.not.solved <- FALSE
                    }
                } ## end else to istate < 0
            } ##end while() for ode solver


            state.pr.vector <- state.pr.vector * subtree.pr.vector
        }
        comp[focal] <- sum(state.pr.vector)
        liks.HMM[focal,] <- state.pr.vector/comp[focal]
    }
    root.node <- nb.tip + 1L

    ##Check for negative transition rates
    ##mikeg:  For now, just issue warning


    neg.nodes <- which(liks.HMM[root.node,] <0)
    if(length(neg.nodes)>0){
        warning(paste("selac.R: encountered " , length(neg.nodes), " negatives values in liks.HMM[", root.node, ", ", neg.nodes, " ] =  ",  liks.HMM[root.node, neg.nodes], " at position ", i, " , desIndex ", desIndex))
    }



    loglik <- -(sum(log(comp[-TIPS])) + log(sum(root.p * liks.HMM[root.node,])))

    ##return bad.likelihood if loglik is bad
    if(!is.finite(loglik)) return(bad.likelihood)

    return(loglik)
}



#' @title Efficient optimization of the SELAC model
#' @aliases selac
#'
#' @description
#' Efficient optimization of model parameters under the SELAC model
#'
#' @param codon.data.path Provides the path to the directory containing the gene specific fasta files of coding data. Must have a ".fasta" line ending.
#' @param n.partitions The number of partitions to analyze. The order is based on the Unix order of the fasta files in the directory.
#' @param phy The phylogenetic tree to optimize the model parameters.
#' @param data.type The data type being tested. Options are "codon" or "nucleotide".
#' @param codon.model The type of codon model to use. There are four options: "none", "GY94", "YN98", "FMutSel0", "FMutSel", "selac".
#' @param edge.length Indicates whether or not edge lengths should be optimized. By default it is set to "optimize", other option is "fixed", which is the user-supplied branch lengths.
#' @param edge.linked A logical indicating whether or not edge lengths should be optimized separately for each gene. By default, a single set of each lengths is optimized for all genes.
#' @param optimal.aa Indicates what type of optimal.aa should be used. There are five options: "none", "majrule", "averaged, "optimize", or "user".
#' @param nuc.model Indicates what type nucleotide model to use. There are three options: "JC", "GTR", or "UNREST".
#' @param include.gamma A logical indicating whether or not to include a discrete gamma model.
#' @param gamma.type Indicates what type of gamma distribution to use. Options are "quadrature" after the Laguerre quadrature approach of Felsenstein 2001 or median approach of Yang 1994 or "lognormal" after a lognormal quadrature approach.
#' @param ncats The number of discrete categories.
#' @param numcode The ncbi genetic code number for translation. By default the standard (numcode=1) genetic code is used.
#' @param diploid A logical indicating whether or not the organism is diploid or not.
#' @param k.levels Provides how many levels in the polynomial. By default we assume a single level (i.e., linear).
#' @param aa.properties User-supplied amino acid distance properties. By default we assume Grantham (1974) properties.
#' @param verbose Logical indicating whether each iteration be printed to the screen.
#' @param n.cores.by.gene The number of cores to dedicate to parallelize analyses across gene.
#' @param n.cores.by.gene.by.site The number of cores to decidate to parallelize analyses by site WITHIN a gene. Note n.cores.by.gene*n.cores.by.gene.by.site is the total number of cores dedicated to the analysis.
#' @param max.tol Supplies the relative optimization tolerance.
#' @param max.tol.edges Supplies the relative optimization tolerance for branch lengths only. Default is that is the same as the max.tol.
#' @param max.evals Supplies the max number of iterations tried during optimization.
#' @param max.restarts Supplies the number of random restarts.
#' @param user.optimal.aa If optimal.aa is set to "user", this option allows for the user-input optimal amino acids. Must be a list. To get the proper order of the partitions see "GetPartitionOrder" documentation.
#' @param fasta.rows.to.keep Indicates which rows to remove in the input fasta files.
#' @param recalculate.starting.brlen Whether to use given branch lengths in the starting tree or recalculate them.
#' @param output.by.restart Logical indicating whether or not each restart is saved to a file. Default is TRUE.
#' @param output.restart.filename Designates the file name for each random restart.
#' @param user.supplied.starting.param.vals Designates user-supplied starting values for C.q.phi.Ne, Grantham alpha, and Grantham beta. Default is NULL.
#' @param tol.step If > 1, makes for coarser tolerance at earlier iterations of the optimizer
#' @param optimizer.algorithm The optimizer used by nloptr.
#' @param start.from.mle If TRUE, will start optimization from the MLE. Default is FALSE.
#' @param mle.matrix The user-supplied matrix of parameter values for when start.from.mle is set to TRUE.
#' @param partition.order Allows for a specialized order of the partitions to be gathered from the working directory.
#' @param max.iterations Sets the number of cycles to optimize the different parts of the model.
#' @param dt.threads Indicates how many available threads to allow data.table to use. Default is zero.
#'
#' @details
#' Here we optimize parameters across each gene separately while keeping the shared parameters, alpha, beta, edge lengths, and nucleotide substitution parameters constant across genes. We then optimize alpha, beta, gtr, and the edge lengths while keeping the rest of the parameters for each gene fixed. This approach is potentially more efficient than simply optimizing all parameters simultaneously, especially if fitting models across 100's of genes.
#'
#' @examples
#' \dontrun{
#' phy <- ape::read.tree(file=system.file("extdata", "rokasYeast.tre", package="selac"))
#' result <- SelacOptimize(codon.data.path = paste0(find.package("selac"), '/extdata/'),
#' n.partitions=1, phy=phy, max.evals=10)
#' print(result)
#' }
#' @export
SelacOptimize <- function(codon.data.path, n.partitions=NULL, phy, data.type="codon", codon.model="selac", edge.length="optimize", edge.linked=TRUE, optimal.aa="optimize", nuc.model="GTR", include.gamma=FALSE, gamma.type="quadrature", ncats=4, numcode=1, diploid=TRUE, k.levels=0, aa.properties=NULL, verbose=FALSE, n.cores.by.gene=1, n.cores.by.gene.by.site=1, max.tol=1e-3, max.tol.edges=1e-3, max.evals=1000000, max.restarts=3, user.optimal.aa=NULL, fasta.rows.to.keep=NULL, recalculate.starting.brlen=TRUE, output.by.restart=TRUE, output.restart.filename="restartResult", user.supplied.starting.param.vals=NULL, tol.step=1, optimizer.algorithm="NLOPT_LN_SBPLX", start.from.mle=FALSE, mle.matrix=NULL, partition.order=NULL, max.iterations=6, dt.threads=1) {

    if(!data.type == "codon" & !data.type == "nucleotide"){
        stop("Check that your data type input is correct. Options are codon or nucleotide", call.=FALSE)
    }
    if(!codon.model == "none" & !codon.model == "GY94" & !codon.model == "YN98" & !codon.model == "FMutSel0" & !codon.model == "FMutSel" & !codon.model == "selac"){
        stop("Check that your codon model is correct. Options are GY94, FMutSel0, or selac", call.=FALSE)
    }
    if(!edge.length == "optimize" & !edge.length == "fixed"){
        stop("Check that you have a supported edge length option. Options are optimize or fixed.", call.=FALSE)
    }
    if(!optimal.aa == "optimize" & !optimal.aa == "majrule" & !optimal.aa == "averaged" & !optimal.aa == "none" & !optimal.aa == "user"){
        stop("Check that you have a supported optimal amino acid option. Options are optimize, majrule, none, or user", call.=FALSE)
    }
    if(!nuc.model == "JC" & !nuc.model == "GTR" & !nuc.model == "UNREST"){
        stop("Check that you have a supported nucleotide substitution model. Options are JC, GTR, or UNREST.", call.=FALSE)
    }
    if(!gamma.type == "quadrature" & !gamma.type == "median" & !gamma.type == "lognormal"){
        stop("Check that you have a supported gamma type. Options are quadrature after Felsenstein 2001 or median after Yang 1994 or lognormal.", call.=FALSE)
    }

    if(!is.null(user.optimal.aa)){
        if(is.list(user.optimal.aa) == FALSE){
            stop("User-supplied optimal amino acids must be input as a list.", call.=FALSE)
        }
    }

    if(start.from.mle == TRUE){
        partitions <- partition.order
    }else{
        partitions <- system(paste("ls -1 ", codon.data.path, "*.fasta", sep=""), intern=TRUE)
    }
    if(is.null(n.partitions)){
        n.partitions <- length(partitions)
    }else{
        n.partitions = n.partitions
    }

    if(n.partitions<n.cores.by.gene) {
        warning(paste0("You have ", n.partitions, " partition (set with the n.partitions argument) but are asking to run across ", n.cores.by.gene, " cores, so ", n.cores.by.gene - n.partitions, " cores will not be used"))
    }


    #fix user-supplied branch lengths that are shorter than the prespecified minimum.
    if(recalculate.starting.brlen==FALSE){
        phy$edge.length[phy$edge.length <= 1e-8]<-(1e-8)*1.01
    }


    #check that the taxon names are correctly formatted in comparison to the tree (DE)
    #{fastas=list.files(codon.data.path,pattern="*.fasta")
    #  if (identical(sort(unlist(names(read.FASTA(fastas[1])))) , sort(unlist(phy[[4]])))
    #  ) {
    #    print("DATA CHECK: Taxa in first alignment identical to taxa in tree. Good.")
    #    errorStatus<-"Safe"
    #  } else {
    #    print("Error: Taxa in alignment are not identical to taxa in tree. Check your input files. Exiting...")
    #    errorStatus<-"exit"
    #  }
    #  stopifnot(errorStatus!="exit")

    #check that the taxon names are correctly formated within all alignment files (DE)
    #  if(
    #  all(unlist(foreach(j = 1:length(fastas))%do%{                               ##makes pairwise checks of all the alignments to see if the taxon names are identical. If they are ALL identical then no error is thrown.
    #    foreach(k = length(fastas):1)%do%{
    #      identical(
    #        sort(names(read.FASTA(fastas[j]))), sort(names(read.FASTA(fastas[k])))
    #      )
    #    }
    #  }))
    #){
    #  print("DATA CHECK: Taxa in alignments identical to each other. Good.")
    #  errorStatus<-"Safe"
    #} else {
    #  print("Error: Taxa in alignments are not identical each other. Check your input files. Exiting...")
    #  errorStatus<-"exit"
    #}
    #stopifnot(errorStatus!="exit")
    #}


    #checks that the options being used make sense (DE)
    #  if(codon.model=="none"&optimal.aa=="none"&data.type=="codon"){
    #print("You have turned off amino-acid optimization and you're not using a codon model. Please set your data-type to 'nucleotide'. Exiting...")
    #errorStatus <- "exit"
    #}
    #stopifnot(errorStatus!="exit")

    ##########


    cat(paste("Using", n.cores.by.gene * n.cores.by.gene.by.site, "total processors", sep=" "), "\n")
    setDTthreads(threads=dt.threads)
    cat(paste("Allowing data.table to use", dt.threads, "threads", sep=" "), "\n")

    cat("Initializing data and model parameters...", "\n")

    site.pattern.data.list <- as.list(numeric(n.partitions))
    site.pattern.count.list <- as.list(numeric(n.partitions))
    nsites.vector <- c()
    if(optimal.aa == "none"){
        if(data.type == "nucleotide"){
            empirical.base.freq.list <- as.list(numeric(n.partitions))
            starting.branch.lengths <- matrix(0, n.partitions, length(phy$edge[,1]))
            for (partition.index in sequence(n.partitions)) {
                gene.tmp <- read.dna(partitions[partition.index], format='fasta')
                if(!is.null(fasta.rows.to.keep)){
                    gene.tmp <- as.list(as.matrix(cbind(gene.tmp))[fasta.rows.to.keep,])
                }else{
                    gene.tmp <- as.list(as.matrix(cbind(gene.tmp)))
                }
                starting.branch.lengths[partition.index,] <- ComputeStartingBranchLengths(phy, gene.tmp, data.type=data.type, recalculate.starting.brlen=recalculate.starting.brlen)$edge.length
                nucleotide.data <- DNAbinToNucleotideNumeric(gene.tmp)
                nucleotide.data <- nucleotide.data[phy$tip.label,]
                nsites.vector = c(nsites.vector, dim(nucleotide.data)[2] - 1)
                empirical.base.freq <- as.matrix(nucleotide.data[,-1])
                empirical.base.freq <- table(empirical.base.freq, deparse.level = 0)/sum(table(empirical.base.freq, deparse.level = 0))
                empirical.base.freq.list[[partition.index]] <- as.vector(empirical.base.freq[1:4])
                nucleotide.data <- SitePattern(nucleotide.data, includes.optimal.aa=FALSE)
                site.pattern.data.list[[partition.index]] = nucleotide.data$unique.site.patterns
                site.pattern.count.list[[partition.index]] = nucleotide.data$site.pattern.counts
            }
        }else{
            codon.freq.by.gene.list <- as.list(numeric(n.partitions))
            empirical.aa.freq.list <- as.list(numeric(n.partitions))
            starting.branch.lengths <- matrix(0, n.partitions, length(phy$edge[,1]))
            for (partition.index in sequence(n.partitions)) {
                gene.tmp <- read.dna(partitions[partition.index], format='fasta')
                if(!is.null(fasta.rows.to.keep)){
                    gene.tmp <- as.list(as.matrix(cbind(gene.tmp))[fasta.rows.to.keep,])
                }else{
                    gene.tmp <- as.list(as.matrix(cbind(gene.tmp)))
                }
                starting.branch.lengths[partition.index,] <- ComputeStartingBranchLengths(phy, gene.tmp, data.type=data.type, recalculate.starting.brlen=recalculate.starting.brlen)$edge.length
                codon.data <- DNAbinToCodonNumeric(gene.tmp)
                codon.data <- codon.data[phy$tip.label,]
                nsites.vector = c(nsites.vector, dim(codon.data)[2] - 1)
                aa.data <- ConvertCodonNumericDataToAAData(codon.data, numcode=numcode)
                aa.optim <- apply(aa.data[, -1], 2, GetMaxName) #starting values for all, final values for majrule
                empirical.aa.freq.list[[partition.index]] <- GetAAFreqsByGene(codon.data[,-1], aa.optim, numcode=numcode)
                codon.freq.by.gene.list[[partition.index]] <- GetCodonFreqsByGene(codon.data[,-1])
                codon.data <- SitePattern(codon.data, includes.optimal.aa=FALSE)
                site.pattern.data.list[[partition.index]] = codon.data$unique.site.patterns
                site.pattern.count.list[[partition.index]] = codon.data$site.pattern.counts
            }
        }
    }else{
        codon.freq.by.aa.list <- as.list(numeric(n.partitions))
        codon.freq.by.gene.list <- as.list(numeric(n.partitions))
        starting.branch.lengths <- matrix(0, n.partitions, length(phy$edge[,1]))
        aa.optim.list <- as.list(numeric(n.partitions))
        aa.optim.full.list <- as.list(numeric(n.partitions))
        for (partition.index in sequence(n.partitions)) {
            gene.tmp <- read.dna(partitions[partition.index], format='fasta')
            if(!is.null(fasta.rows.to.keep)){
                gene.tmp <- as.list(as.matrix(cbind(gene.tmp))[fasta.rows.to.keep,])
            }else{
                gene.tmp <- as.list(as.matrix(cbind(gene.tmp)))
            }
            starting.branch.lengths[partition.index,] <- ComputeStartingBranchLengths(phy, gene.tmp, data.type=data.type, recalculate.starting.brlen=recalculate.starting.brlen)$edge.length
            codon.data <- DNAbinToCodonNumeric(gene.tmp)
            codon.data <- codon.data[phy$tip.label,]
            nsites.vector = c(nsites.vector, dim(codon.data)[2] - 1)
            aa.data <- ConvertCodonNumericDataToAAData(codon.data, numcode=numcode)
            if(optimal.aa == "user"){
                aa.optim <- user.optimal.aa[[partition.index]]
                aa.optim.full.list[[partition.index]] <- aa.optim
            }else{
                aa.optim <- apply(aa.data[, -1], 2, GetMaxName) #starting values for all, final values for majrule
                aa.optim.full.list[[partition.index]] <- aa.optim
            }
            codon.freq.by.aa.list[[partition.index]] <- GetCodonFreqsByAA(codon.data[,-1], aa.optim, numcode=numcode)
            codon.freq.by.gene.list[[partition.index]] <- GetCodonFreqsByGene(codon.data[,-1])
            aa.optim.frame.to.add <- matrix(c("optimal", aa.optim), 1, dim(codon.data)[2])
            colnames(aa.optim.frame.to.add) <- colnames(codon.data)
            codon.data <- rbind(codon.data, aa.optim.frame.to.add)
            codon.data <- SitePattern(codon.data, includes.optimal.aa=TRUE)
            site.pattern.data.list[[partition.index]] = codon.data$unique.site.patterns
            site.pattern.count.list[[partition.index]] = codon.data$site.pattern.counts
            aa.optim.list[[partition.index]] = codon.data$optimal.aa
        }
    }

    opts <- list("algorithm" = optimizer.algorithm, "maxeval" = max.evals, "ftol_rel" = max.tol)
    opts.edge <- list("algorithm" = optimizer.algorithm, "maxeval" = max.evals, "ftol_rel" = max.tol.edges)


    results.final <- c()
    if(nuc.model == "JC"){
        nuc.ip = NULL
        max.par.model.count = 0
        parameter.column.names <- c()
    }
    if(nuc.model == "GTR"){
        nuc.ip = rep(1, 5)
        max.par.model.count = 5
        parameter.column.names <- c("C_A", "G_A", "T_A", "G_C", "T_C")
    }
    if(nuc.model == "UNREST"){
        nuc.ip = rep(1, 11)
        max.par.model.count = 11
        parameter.column.names <- c("C_A", "G_A", "T_A", "A_C", "G_C", "T_C", "A_G", "C_G", "A_T", "C_T", "G_T")
    }

    if(optimal.aa=="none") {
        if(data.type == "nucleotide"){
            codon.index.matrix = NA
            if(include.gamma == TRUE){
                ip = c(1,nuc.ip)
                upper = c(5, rep(21, length(ip)-1))
                lower = rep(-21, length(ip))
                #lower[1] <- log(0.01)
                max.par.model.count = max.par.model.count + 1
                parameter.column.names <- c("shape.gamma", parameter.column.names)
            }else{
                ip = nuc.ip
                upper = rep(21, length(ip))
                lower = rep(-21, length(ip))
            }
            index.matrix = matrix(0, n.partitions, length(ip))
            index.matrix[1,] = 1:ncol(index.matrix)
            ip.vector = ip
            upper.vector = upper
            lower.vector = lower
            if(n.partitions > 1){
                for(partition.index in 2:n.partitions){
                    ip.vector = c(ip.vector, ip)
                    upper.vector = c(upper.vector, upper)
                    lower.vector = c(lower.vector, lower)
                    index.matrix.tmp = numeric(max.par.model.count)
                    index.matrix.tmp[index.matrix.tmp==0] = seq(max(index.matrix)+1, length.out=length(index.matrix.tmp[index.matrix.tmp==0]))
                    index.matrix[partition.index,] <- index.matrix.tmp
                }
            }
            number.of.current.restarts <- 1
            best.lik <- 1000000
            while(number.of.current.restarts < (max.restarts+1)){
                cat(paste("Finished. Performing analysis...", sep=""), "\n")
                mle.pars.mat <- index.matrix
                mle.pars.mat[] <- c(ip.vector, 0)[index.matrix]
                if(edge.length == "optimize"){
                    cat("       Optimizing edge lengths", "\n")
                    phy$edge.length <- apply(starting.branch.lengths, 2, weighted.mean, nsites.vector)
                    phy$edge.length[phy$edge.length < 1e-08] <- 1e-08
                    results.edge.final <- OptimizeEdgeLengthsGTRNew(phy=phy, pars.mat=mle.pars.mat, site.pattern.data.list=site.pattern.data.list, site.pattern.count.list=site.pattern.count.list, empirical.base.freq.list=empirical.base.freq.list, nuc.model=nuc.model, include.gamma=include.gamma, gamma.type=gamma.type, ncats=ncats, nsites.vector=nsites.vector, logspace=TRUE, n.cores=n.cores.by.gene, neglnl=TRUE)
                    print(results.edge.final$final.likelihood)
                    phy <- results.edge.final$phy
                }
                cat("       Optimizing model parameters", "\n")
                ParallelizedOptimizedByGene <- function(n.partition){
                    optim.by.gene <- nloptr(x0=log(mle.pars.mat[n.partition,]), eval_f = OptimizeModelParsLarge, ub=upper.vector[1:dim(mle.pars.mat)[2]], lb=lower.vector[1:dim(mle.pars.mat)[2]], opts=opts, codon.site.data=site.pattern.data.list[[n.partition]], codon.site.counts=site.pattern.count.list[[n.partition]], data.type=data.type, codon.model=codon.model, n.partitions=1, nsites.vector=nsites.vector[n.partition], index.matrix=index.matrix, phy=phy, aa.optim_array=NULL, root.p_array=empirical.base.freq.list[[n.partition]], numcode=numcode, diploid=diploid, aa.properties=aa.properties, volume.fixed.value=NULL, nuc.model=nuc.model, codon.index.matrix=codon.index.matrix, edge.length=edge.length, include.gamma=include.gamma, gamma.type=gamma.type, ncats=ncats, k.levels=k.levels, logspace=TRUE, verbose=verbose, n.cores.by.gene=n.cores.by.gene, n.cores.by.gene.by.site=n.cores.by.gene.by.site, neglnl=TRUE)
                    tmp.pars <- c(optim.by.gene$objective, optim.by.gene$solution)
                    return(tmp.pars)
                }
                results.set <- mclapply(1:n.partitions, ParallelizedOptimizedByGene, mc.cores=n.cores.by.gene)
                parallelized.parameters <- t(matrix(unlist(results.set),dim(index.matrix)[2]+1,n.partitions))
                results.final <- NULL
                results.final$objective <- sum(parallelized.parameters[,1])
                results.final$solution <- c(t(parallelized.parameters[,-1]))
                mle.pars.mat <- index.matrix
                mle.pars.mat[] <- c(exp(results.final$solution), 0)[index.matrix]
                print(results.final$objective)
                print(mle.pars.mat)

                current.likelihood <- results.final$objective
                cat(paste("Current likelihood", current.likelihood, sep=" "), "\n")
                are_we_there_yet <- 1
                iteration.number <- 1
                while(are_we_there_yet > max.tol && iteration.number<=max.iterations){
                    cat(paste("Finished. Iterating search -- Round", iteration.number, sep=" "), "\n")
                    if(edge.length == "optimize"){
                        cat("       Optimizing edge lengths", "\n")
                        phy$edge.length[phy$edge.length < 1e-08] <- 1e-08
                        results.edge.final <- OptimizeEdgeLengthsGTRNew(phy=phy, pars.mat=mle.pars.mat, site.pattern.data.list=site.pattern.data.list, site.pattern.count.list=site.pattern.count.list, empirical.base.freq.list=empirical.base.freq.list, nuc.model=nuc.model, include.gamma=include.gamma, gamma.type=gamma.type, ncats=ncats, nsites.vector=nsites.vector, logspace=TRUE, n.cores=n.cores.by.gene, neglnl=TRUE)
                        print(results.edge.final$final.likelihood)
                        phy <- results.edge.final$phy
                    }
                    cat("       Optimizing model parameters", "\n")
                    opts.params <- opts
                    opts.params$ftol_rel <- opts$ftol_rel * (max(1,tol.step^((max.iterations+1)-iteration.number)))

                    ParallelizedOptimizedByGene <- function(n.partition){
                        optim.by.gene <- nloptr(x0=log(mle.pars.mat[n.partition,]), eval_f = OptimizeModelParsLarge, ub=upper.vector[1:dim(mle.pars.mat)[2]], lb=lower.vector[1:dim(mle.pars.mat)[2]], opts=opts.params, codon.site.data=site.pattern.data.list[[n.partition]], codon.site.counts=site.pattern.count.list[[n.partition]], data.type=data.type, codon.model=codon.model, n.partitions=1, nsites.vector=nsites.vector[n.partition], index.matrix=index.matrix, phy=phy, aa.optim_array=NULL, root.p_array=empirical.base.freq.list[[n.partition]], numcode=numcode, diploid=diploid, aa.properties=aa.properties, volume.fixed.value=NULL, nuc.model=nuc.model, codon.index.matrix=codon.index.matrix, edge.length=edge.length, include.gamma=include.gamma, gamma.type=gamma.type, ncats=ncats, k.levels=k.levels, logspace=TRUE, verbose=verbose, n.cores.by.gene=n.cores.by.gene, n.cores.by.gene.by.site=n.cores.by.gene.by.site, neglnl=TRUE)
                        tmp.pars <- c(optim.by.gene$objective, optim.by.gene$solution)
                        return(tmp.pars)
                    }
                    results.set <- mclapply(1:n.partitions, ParallelizedOptimizedByGene, mc.cores=n.cores.by.gene)
                    parallelized.parameters <- t(matrix(unlist(results.set),dim(index.matrix)[2]+1,n.partitions))
                    results.final <- NULL
                    results.final$objective <- sum(parallelized.parameters[,1])
                    results.final$solution <- c(t(parallelized.parameters[,-1]))
                    mle.pars.mat <- index.matrix
                    mle.pars.mat[] <- c(exp(results.final$solution), 0)[index.matrix]
                    print(results.final$objective)
                    print(mle.pars.mat)

                    are_we_there_yet <- (current.likelihood - results.final$objective ) / results.final$objective
                    current.likelihood <- results.final$objective
                    cat(paste("Current likelihood", current.likelihood, sep=" "), paste("difference from previous round", are_we_there_yet, sep=" "), "\n")
                    iteration.number <- iteration.number + 1
                }
                #Output for use in sims#
                if(output.by.restart == TRUE){
                    obj.tmp = list(np=max(index.matrix) + length(phy$edge.length) + sum(nsites.vector), loglik = -results.final$objective, AIC = -2*(-results.final$objective)+2*(max(index.matrix) + length(phy$edge.length) + sum(nsites.vector)), mle.pars=mle.pars.mat, index.matrix=index.matrix, partitions=partitions[1:n.partitions], opts=opts, phy=phy, nsites=nsites.vector, data.type=data.type, codon.model=codon.model, aa.optim=NULL, aa.optim.type=optimal.aa, nuc.model=nuc.model, include.gamma=include.gamma, gamma.type=gamma.type, ncats=ncats, k.levels=k.levels, numcode=numcode, diploid=diploid, aa.properties=aa.properties, volume.fixed.value=NULL, empirical.base.freqs=empirical.base.freq.list, max.tol=max.tol, max.evals=max.evals, selac.starting.vals=ip.vector)
                    class(obj.tmp) = "selac"
                    save(obj.tmp,file=paste(paste(codon.data.path, output.restart.filename, sep=""), number.of.current.restarts, "Rsave", sep="."))
                }
                ########################
                if(results.final$objective < best.lik){
                    best.ip <- ip.vector
                    best.lik <- results.final$objective
                    best.solution <- mle.pars.mat
                    best.edge.lengths <- phy$edge.length
                }
                number.of.current.restarts <- number.of.current.restarts + 1
            }

            loglik <- -(best.lik) #to go from neglnl to lnl
            mle.pars.mat <- best.solution
            if(edge.length == "optimize"){
                phy$edge.length <- best.edge.lengths
            }
            cat("Finished. Summarizing results...", "\n")
            colnames(mle.pars.mat) <- parameter.column.names

            if(edge.length == "optimize"){
                np <- max(index.matrix) + length(phy$edge.length)
            }else{
                np <- max(index.matrix)
            }
            obj = list(np=np, loglik = loglik, AIC = -2*loglik+2*np, AICc = NULL, mle.pars=mle.pars.mat, index.matrix=index.matrix, partitions=partitions[1:n.partitions], opts=opts, phy=phy, data.type=data.type, codon.model=codon.model, nsites=nsites.vector, aa.optim=NULL, aa.optim.type=optimal.aa, nuc.model=nuc.model, include.gamma=include.gamma, gamma.type=gamma.type, ncats=ncats, k.levels=k.levels, numcode=numcode, diploid=diploid, aa.properties=aa.properties, empirical.base.freqs=empirical.base.freq.list, max.tol=max.tol, max.evals=max.evals)
            class(obj) = "selac"
        }else{
            if(codon.model == "GY94"){
                max.par.model.count <- 2
                ip = c(1,1)
                parameter.column.names <- c("V", "kappa")
                upper = rep(log(99), length(ip))
                lower = rep(-21, length(ip))

                codon.index.matrix = NA

                index.matrix = matrix(0, n.partitions, length(ip))
                index.matrix[1,] = 1:ncol(index.matrix)
                ip.vector = ip
                upper.vector = upper
                lower.vector = lower
                if(n.partitions > 1){
                    for(partition.index in 2:n.partitions){
                        #ip.vector = c(ip.vector, 1)
                        #upper.vector = c(upper.vector, log(99))
                        #lower.vector = c(lower.vector, -10)
                        #index.matrix.tmp = numeric(max.par.model.count)
                        #index.matrix.tmp[2] = 2
                        #index.matrix.tmp[index.matrix.tmp==0] = seq(max(index.matrix)+1, length.out=length(index.matrix.tmp[index.matrix.tmp==0]))
                        #index.matrix[partition.index,] <- index.matrix.tmp
                        index.matrix[partition.index,] <- 1:ncol(index.matrix)
                    }
                }
            }
            if(codon.model == "YN98"){
                max.par.model.count <- 2
                ip = c(1,1)
                parameter.column.names <- c("omega", "kappa")
                upper = rep(log(99), length(ip))
                lower = rep(-21, length(ip))

                codon.index.matrix = NA

                index.matrix = matrix(0, n.partitions, length(ip))
                index.matrix[1,] = 1:ncol(index.matrix)
                ip.vector = ip
                upper.vector = upper
                lower.vector = lower
                if(n.partitions > 1){
                    for(partition.index in 2:n.partitions){
                        #ip.vector = c(ip.vector, 1)
                        #upper.vector = c(upper.vector, log(99))
                        #lower.vector = c(lower.vector, -10)
                        #index.matrix.tmp = numeric(max.par.model.count)
                        #index.matrix.tmp[2] = 2
                        #index.matrix.tmp[index.matrix.tmp==0] = seq(max(index.matrix)+1, length.out=length(index.matrix.tmp[index.matrix.tmp==0]))
                        #index.matrix[partition.index,] <- index.matrix.tmp
                        index.matrix[partition.index,] <- 1:ncol(index.matrix)
                    }
                }
            }
            if(codon.model == "FMutSel0"){
                empirical.aa.freq.unlist <- matrix(unlist(empirical.aa.freq.list), ncol = 21, byrow = TRUE)
                empirical.aa.freq <- colSums(empirical.aa.freq.unlist)/ sum(colSums(empirical.aa.freq.unlist))
                fitness.pars <- GetFitnessStartingValues(codon.freqs=empirical.aa.freq)[-c(17,21)]
                aa.ordered <- .unique.aa
                aa.ordered <- aa.ordered[-c(17,21)]
                if(nuc.model == "UNREST"){
                    max.par.model.count <- max.par.model.count + 1 + 19
                    ip = c(0.4, nuc.ip, fitness.pars)
                    parameter.column.names <- c("omega", parameter.column.names, paste("fitness", aa.ordered, sep="_"))
                    upper = rep(log(99), length(ip))
                    lower = rep(-10, length(ip))
                }else{
                    max.par.model.count <- max.par.model.count + 3 + 1 + 19
                    ip = c(0.4, .25, .25, .25, nuc.ip, fitness.pars)
                    parameter.column.names <- c("omega", "freqA", "freqC", "freqG", parameter.column.names, paste("fitness", aa.ordered, sep="_"))
                    upper = c(log(99), 0, 0, 0, rep(log(99), length(ip)-4))
                    lower = rep(-10, length(ip))
                }

                codon.index.matrix = NA

                index.matrix = matrix(0, n.partitions, length(ip))
                index.matrix[1,] = 1:ncol(index.matrix)
                ip.vector = ip
                upper.vector = upper
                lower.vector = lower
                if(n.partitions > 1){
                    for(partition.index in 2:n.partitions){
                        #ip.vector = c(ip.vector, 0.4)
                        #upper.vector = c(upper.vector, log(99))
                        #lower.vector = c(lower.vector, -10)
                        #index.matrix.tmp = numeric(max.par.model.count)
                        #index.matrix.tmp[2:max.par.model.count] = 2:max.par.model.count
                        #index.matrix.tmp[index.matrix.tmp==0] = seq(max(index.matrix)+1, length.out=length(index.matrix.tmp[index.matrix.tmp==0]))
                        #index.matrix[partition.index,] <- index.matrix.tmp
                        index.matrix[partition.index,] <- 1:ncol(index.matrix)
                    }
                }
            }
            if(codon.model == "FMutSel"){
                empirical.codon.freq.unlist <- matrix(unlist(codon.freq.by.gene.list), ncol = 64, byrow = TRUE)
                empirical.codon.freq <- colSums(empirical.codon.freq.unlist)/ sum(colSums(empirical.codon.freq.unlist))
                fitness.pars <- GetFitnessStartingValues(codon.freqs=empirical.codon.freq, n.pars=64)
                codon.ordered <- .codon.name
                codon.ordered <- codon.ordered[-c(49,51,57,64)]
                if(nuc.model == "UNREST"){
                    max.par.model.count <- max.par.model.count + 1 + 60
                    ip = c(0.4, nuc.ip, fitness.pars)
                    parameter.column.names <- c("omega", parameter.column.names, paste("fitness", codon.ordered, sep="_"))
                    upper = c(rep(log(99), length(ip)-3))
                    lower = rep(-10, length(ip))
                }else{
                    max.par.model.count <- max.par.model.count + 3 + 1 + 60
                    ip = c(0.4, .25, .25, .25, nuc.ip, fitness.pars)
                    parameter.column.names <- c("omega", "freqA", "freqC", "freqG", parameter.column.names, paste("fitness", codon.ordered, sep="_"))
                    upper = c(log(99), 0, 0, 0, rep(log(99), length(ip)-4))
                    lower = rep(-10, length(ip))
                }

                codon.index.matrix = NA

                index.matrix = matrix(0, n.partitions, length(ip))
                index.matrix[1,] = 1:ncol(index.matrix)
                ip.vector = ip
                upper.vector = upper
                lower.vector = lower
                if(n.partitions > 1){
                    for(partition.index in 2:n.partitions){
                        #ip.vector = c(ip.vector, 0.4)
                        #upper.vector = c(upper.vector, log(99))
                        #lower.vector = c(lower.vector, -10)
                        #index.matrix.tmp = numeric(max.par.model.count)
                        #index.matrix.tmp[2:max.par.model.count] = 2:max.par.model.count
                        #index.matrix.tmp[index.matrix.tmp==0] = seq(max(index.matrix)+1, length.out=length(index.matrix.tmp[index.matrix.tmp==0]))
                        #index.matrix[partition.index,] <- index.matrix.tmp
                        index.matrix[partition.index,] <- 1:ncol(index.matrix)
                    }
                }
            }

            number.of.current.restarts <- 1
            best.lik <- 10000000
            while(number.of.current.restarts < (max.restarts+1)){
                cat(paste("Finished. Performing analysis...", sep=""), "\n")
                mle.pars.mat <- index.matrix
                mle.pars.mat[] <- c(ip.vector, 0)[index.matrix]
                print(mle.pars.mat)
                if(edge.length == "optimize"){
                    cat("       Optimizing edge lengths", "\n")
                    phy$edge.length <- colMeans(starting.branch.lengths)
                    #opts.edge <- opts
                    upper.edge <- rep(log(10), length(phy$edge.length))
                    lower.edge <- rep(log(1e-8), length(phy$edge.length))
                    results.edge.final <- nloptr(x0=log(phy$edge.length), eval_f = OptimizeEdgeLengths, ub=upper.edge, lb=lower.edge, opts=opts.edge, par.mat=mle.pars.mat, codon.site.data=site.pattern.data.list, codon.site.counts=site.pattern.count.list, data.type=data.type, codon.model=codon.model, n.partitions=n.partitions, nsites.vector=nsites.vector, index.matrix=index.matrix, phy=phy, aa.optim_array=NULL, root.p_array=NULL, codon.freq.by.aa=NULL, codon.freq.by.gene=NULL, numcode=numcode, diploid=diploid, aa.properties=aa.properties, volume.fixed.value=NULL, nuc.model=nuc.model, codon.index.matrix=codon.index.matrix, edge.length=edge.length, include.gamma=include.gamma, gamma.type=gamma.type, ncats=ncats, k.levels=k.levels, logspace=TRUE, verbose=verbose, n.cores.by.gene=n.cores.by.gene, n.cores.by.gene.by.site=n.cores.by.gene.by.site, estimate.importance=FALSE, neglnl=TRUE, HMM=FALSE)
                    print(results.edge.final$objective)
                    print(exp(results.edge.final$solution))
                    phy$edge.length <- exp(results.edge.final$solution)
                }
                cat("       Optimizing model parameters", "\n")
                #ParallelizedOptimizedByGene <- function(n.partition){
                optim.by.gene <- nloptr(x0=log(ip.vector), eval_f = OptimizeModelParsLarge, ub=upper.vector, lb=lower.vector, opts=opts, codon.site.data=site.pattern.data.list, codon.site.counts=site.pattern.count.list, data.type=data.type, codon.model=codon.model, n.partitions=n.partitions, nsites.vector=nsites.vector, index.matrix=index.matrix, phy=phy, aa.optim_array=NULL, root.p_array=NULL, numcode=numcode, diploid=diploid, aa.properties=aa.properties, volume.fixed.value=NULL, nuc.model=nuc.model, codon.index.matrix=codon.index.matrix, edge.length=edge.length, include.gamma=include.gamma, gamma.type=gamma.type, ncats=ncats, k.levels=k.levels, logspace=TRUE, verbose=verbose, n.cores.by.gene=n.cores.by.gene, n.cores.by.gene.by.site=n.cores.by.gene.by.site, neglnl=TRUE)
                #tmp.pars <- c(optim.by.gene$objective, optim.by.gene$solution)
                #return(tmp.pars)
                #}
                #results.set <- mclapply(1:n.partitions, ParallelizedOptimizedByGene, mc.cores=n.cores.by.gene)
                #results.set <- lapply(1:n.partitions, ParallelizedOptimizedByGene)
                #parallelized.parameters <- t(matrix(unlist(results.set),dim(index.matrix)[2]+1,n.partitions))
                #results.final <- NULL
                #results.final$objective <- sum(parallelized.parameters[,1])
                #results.final$solution <- c(t(parallelized.parameters[,-1]))
                results.final$objective <- optim.by.gene$objective
                results.final$solution <- optim.by.gene$solution
                mle.pars.mat <- index.matrix
                mle.pars.mat[] <- c(exp(results.final$solution), 0)[index.matrix]
                print(results.final$objective)
                print(mle.pars.mat)

                current.likelihood <- results.final$objective
                cat(paste("Current likelihood", current.likelihood, sep=" "), "\n")
                lik.diff <- 10
                iteration.number <- 1
                while(lik.diff != 0 & iteration.number<=max.iterations){
                    cat(paste("Finished. Iterating search -- Round", iteration.number, sep=" "), "\n")
                    if(edge.length == "optimize"){
                        cat("       Optimizing edge lengths", "\n")
                        #opts.edge <- opts
                        opts.edge$ftol_rel <- opts$ftol_rel * (max(1,tol.step^((max.iterations+1)-iteration.number)))

                        results.edge.final <- nloptr(x0=log(phy$edge.length), eval_f = OptimizeEdgeLengths, ub=upper.edge, lb=lower.edge, opts=opts.edge, par.mat=mle.pars.mat, codon.site.data=site.pattern.data.list, codon.site.counts=site.pattern.count.list, data.type=data.type, codon.model=codon.model, n.partitions=n.partitions, nsites.vector=nsites.vector, index.matrix=index.matrix, phy=phy, aa.optim_array=NULL, root.p_array=NULL, codon.freq.by.aa=NULL, codon.freq.by.gene=NULL, numcode=numcode, diploid=diploid, aa.properties=aa.properties, volume.fixed.value=NULL, nuc.model=nuc.model, codon.index.matrix=codon.index.matrix, edge.length=edge.length, include.gamma=include.gamma, gamma.type=gamma.type, ncats=ncats, k.levels=k.levels, logspace=TRUE, verbose=verbose, n.cores.by.gene=n.cores.by.gene, n.cores.by.gene.by.site=n.cores.by.gene.by.site, estimate.importance=FALSE, neglnl=TRUE, HMM=FALSE)
                        print(results.edge.final$objective)
                        print(exp(results.edge.final$solution))
                        phy$edge.length <- exp(results.edge.final$solution)
                    }
                    cat("       Optimizing model parameters", "\n")
                    opts.params <- opts
                    opts.params$ftol_rel <- opts$ftol_rel * (max(1,tol.step^((max.iterations+1)-iteration.number)))
                    print(length(results.final$solution))
                    print(results.final$solution)
                    #ParallelizedOptimizedByGene <- function(n.partition){
                    optim.by.gene <- nloptr(x0=results.final$solution, eval_f = OptimizeModelParsLarge, ub=upper.vector, lb=lower.vector, opts=opts, codon.site.data=site.pattern.data.list, codon.site.counts=site.pattern.count.list, data.type=data.type, codon.model=codon.model, n.partitions=n.partitions, nsites.vector=nsites.vector, index.matrix=index.matrix, phy=phy, aa.optim_array=NULL, root.p_array=NULL, numcode=numcode, diploid=diploid, aa.properties=aa.properties, volume.fixed.value=NULL, nuc.model=nuc.model, codon.index.matrix=codon.index.matrix, edge.length=edge.length, include.gamma=include.gamma, gamma.type=gamma.type, ncats=ncats, k.levels=k.levels, logspace=TRUE, verbose=verbose, n.cores.by.gene=n.cores.by.gene, n.cores.by.gene.by.site=n.cores.by.gene.by.site, neglnl=TRUE)
                    #tmp.pars <- c(optim.by.gene$objective, optim.by.gene$solution)
                    # return(tmp.pars)
                    #}
                    #results.set <- mclapply(1:n.partitions, ParallelizedOptimizedByGene, mc.cores=n.cores.by.gene)
                    #results.set <- lapply(1:n.partitions, ParallelizedOptimizedByGene)
                    #parallelized.parameters <- t(matrix(unlist(results.set),dim(index.matrix)[2]+1,n.partitions))
                    #results.final <- NULL
                    #results.final$objective <- sum(parallelized.parameters[,1])
                    #results.final$solution <- c(t(parallelized.parameters[,-1]))
                    results.final$objective <- optim.by.gene$objective
                    results.final$solution <- optim.by.gene$solution
                    mle.pars.mat <- index.matrix
                    mle.pars.mat[] <- c(exp(results.final$solution), 0)[index.matrix]
                    print(results.final$objective)
                    print(mle.pars.mat)

                    lik.diff <- round(abs(current.likelihood-results.final$objective), 8)
                    current.likelihood <- results.final$objective
                    cat(paste("Current likelihood", current.likelihood, sep=" "), paste("difference from previous round", lik.diff, sep=" "), "\n")
                    iteration.number <- iteration.number + 1
                }
                #Output for use in sims#
                if(output.by.restart == TRUE){
                    obj.tmp = list(np=max(index.matrix) + length(phy$edge.length) + sum(nsites.vector), loglik = -results.final$objective, AIC = -2*(-results.final$objective)+2*(max(index.matrix) + length(phy$edge.length) + sum(nsites.vector)), mle.pars=mle.pars.mat, index.matrix=index.matrix, partitions=partitions[1:n.partitions], opts=opts, phy=phy, nsites=nsites.vector, data.type=data.type, codon.model=codon.model, aa.optim=NULL, aa.optim.type=optimal.aa, nuc.model=nuc.model, include.gamma=include.gamma, gamma.type=gamma.type, ncats=ncats, k.levels=k.levels, numcode=numcode, diploid=diploid, aa.properties=aa.properties, volume.fixed.value=NULL, empirical.aa.freqs=empirical.aa.freq.list, max.tol=max.tol, max.evals=max.evals, selac.starting.vals=ip.vector)
                    class(obj.tmp) = "selac"
                    save(obj.tmp,file=paste(paste(codon.data.path, output.restart.filename, sep=""), number.of.current.restarts, "Rsave", sep="."))
                }
                ########################
                if(results.final$objective < best.lik){
                    best.ip <- ip.vector
                    best.lik <- results.final$objective
                    best.solution <- mle.pars.mat
                    best.edge.lengths <- phy$edge.length
                }
                number.of.current.restarts <- number.of.current.restarts + 1
            }
            loglik <- -(best.lik) #to go from neglnl to lnl
            mle.pars.mat <- best.solution
            if(edge.length == "optimize"){
                phy$edge.length <- best.edge.lengths
            }
            cat("Finished. Summarizing results...", "\n")
            colnames(mle.pars.mat) <- parameter.column.names

            if(edge.length == "optimize"){
                np <- max(index.matrix) + length(phy$edge.length)
            }else{
                np <- max(index.matrix)
            }
            obj = list(np=np, loglik = loglik, AIC = -2*loglik+2*np, AICc = NULL, mle.pars=mle.pars.mat, index.matrix=index.matrix, partitions=partitions[1:n.partitions], opts=opts, phy=phy, data.type=data.type, codon.model=codon.model, nsites=nsites.vector, aa.optim=NULL, aa.optim.type=optimal.aa, nuc.model=nuc.model, include.gamma=include.gamma, gamma.type=gamma.type, ncats=ncats, k.levels=k.levels, numcode=numcode, diploid=diploid, aa.properties=aa.properties, empirical.aa.freqs=empirical.aa.freq.list, max.tol=max.tol, max.evals=max.evals)
            class(obj) = "selac"
        }
    }
    if(optimal.aa=="majrule" | optimal.aa=="optimize" | optimal.aa=="averaged" | optimal.aa=="user") {
        codon.index.matrix = CreateCodonMutationMatrixIndex()
        cpv.starting.parameters <- GetAADistanceStartingParameters(aa.properties=aa.properties)
        if(max.restarts > 1){
            selac.starting.vals <- matrix(0, max.restarts+1, 3)
            selac.starting.vals[,1] <- runif(n = max.restarts+1, min = (10^-10)*5e6, max = (10^-5)*5e6)
            selac.starting.vals[,2] <- runif(n = max.restarts+1, min = 0.01, max = 3)
            selac.starting.vals[,3] <- runif(n = max.restarts+1, min = 0.01, max = 1)
        }else{
            if(is.null(user.supplied.starting.param.vals)){
                selac.starting.vals <- matrix(c(2, 1.8292716544, 0.1017990371), 1, 3)
                selac.starting.vals <- rbind(selac.starting.vals, c(2, 1.8292716544, 0.1017990371))
            }else{
                selac.starting.vals <- matrix(c(user.supplied.starting.param.vals[1], user.supplied.starting.param.vals[2], user.supplied.starting.param.vals[3]), 1, 3)
                selac.starting.vals <- rbind(selac.starting.vals, c(user.supplied.starting.param.vals[1], user.supplied.starting.param.vals[2], user.supplied.starting.param.vals[3]))
            }
        }
        if(include.gamma == TRUE){
            #Gamma variation is turned ON:
            if(nuc.model == "JC"){
                if(k.levels == 0){
                    if(start.from.mle == TRUE){
                        ip = mle.matrix[1,]
                    }else{
                        ip = c(selac.starting.vals[1,1:3], 0.25, 0.25, 0.25, 1)
                    }
                    parameter.column.names <- c("C.q.phi.Ne", "alpha", "beta", "freqA", "freqC", "freqG", "shape.gamma")
                    upper = c(log(50),  21, 21, 0, 0, 0, 5)
                    lower = rep(-21, length(ip))
                    max.par.model.count = 6 + 0 + 1
                }else{
                    if(start.from.mle == TRUE){
                        ip = mle.matrix[1,]
                    }else{
                        ip = c(selac.starting.vals[1,1:3], 0.25, 0.25, 0.25, 1, 1, 1)
                    }
                    parameter.column.names <- c("C.q.phi.Ne", "alpha", "beta", "freqA", "freqC", "freqG", "a0", "a1", "shape.gamma")
                    upper = c(log(50), 21, 21, 0, 0, 0, 10, 10, 5)
                    lower = rep(-21, length(ip))
                    max.par.model.count = 6 + 0 + 2 + 1
                }
            }
            if(nuc.model == "GTR") {
                if(k.levels == 0){
                    if(start.from.mle == TRUE){
                        ip = mle.matrix[1,]
                    }else{
                        ip = c(selac.starting.vals[1,1:3], 0.25, 0.25, 0.25, nuc.ip, 1)
                    }
                    parameter.column.names <- c("C.q.phi.Ne", "alpha", "beta", "freqA", "freqC", "freqG", "C_A", "G_A", "T_A", "G_C", "T_C", "shape.gamma")
                    upper = c(log(50), 21, 21, 0, 0, 0, rep(21, length(nuc.ip)), 5)
                    lower = rep(-21, length(ip))
                    max.par.model.count = 6 + 5 + 1
                }else{
                    if(start.from.mle == TRUE){
                        ip = mle.matrix[1,]
                    }else{
                        ip = c(selac.starting.vals[1,1:3], 0.25, 0.25, 0.25, 1, 1, nuc.ip, 1)
                    }
                    parameter.column.names <- c("C.q.phi.Ne", "alpha", "beta", "freqA", "freqC", "freqG", "a0", "a1", "C_A", "G_A", "T_A", "G_C", "T_C", "shape.gamma")
                    upper = c(log(50), 21, 21, 0, 0, 0, 10, 10, rep(21, length(nuc.ip)), 5)
                    lower = rep(-21, length(ip))
                    max.par.model.count = 6 + 5 + 2	+ 1
                }
            }
            if(nuc.model == "UNREST") {
                if(k.levels == 0){
                    if(start.from.mle == TRUE){
                        ip = mle.matrix[1,]
                    }else{
                        ip = c(selac.starting.vals[1,1:3], nuc.ip, 1)
                    }
                    parameter.column.names <- c("C.q.phi.Ne", "alpha", "beta", "C_A", "G_A", "T_A", "A_C", "G_C", "T_C", "A_G", "C_G", "A_T", "C_T", "G_T", "shape.gamma")
                    upper = c(log(50), 21, 21, rep(21, length(nuc.ip)), 5)
                    lower = rep(-21, length(ip))
                    max.par.model.count = 3 + 11 + 1
                }else{
                    if(start.from.mle == TRUE){
                        ip = mle.matrix[1,]
                    }else{
                        ip = c(selac.starting.vals[1,1:3], 1, 1, nuc.ip, 1)
                    }
                    parameter.column.names <- c("C.q.phi.Ne", "alpha", "beta", "a0", "a1", "C_A", "G_A", "T_A", "A_C", "G_C", "T_C", "A_G", "C_G", "A_T", "C_T", "G_T", "shape.gamma")
                    upper = c(log(50), 21, 21, 10, 10, rep(21, length(nuc.ip)), 5)
                    lower = rep(-21, length(ip))
                    max.par.model.count = 3 + 11 + 2 + 1
                }
            }
            index.matrix = matrix(0, n.partitions, max.par.model.count)
            index.matrix[1,] = 1:ncol(index.matrix)
            ip.vector = ip
            if(n.partitions > 1){
                # Gamma variation is turned ON:
                for(partition.index in 2:n.partitions){
                    if(nuc.model == "JC"){
                        #ip.vector = c(ip.vector, ip[1], ip[8])
                        if(start.from.mle == TRUE){
                            ip.vector = c(ip.vector, mle.matrix[partition.index,1])
                        }else{
                            ip.vector = c(ip.vector, ip[1])
                        }
                    }else{
                        if(nuc.model == "GTR"){
                            index.matrix.tmp = numeric(max.par.model.count)
                            if(k.levels == 0){
                                #index.matrix.tmp[c(2:11)] = c(2:11)
                                #ip.vector = c(ip.vector, ip[1], ip[12])
                                index.matrix.tmp[c(2:12)] = c(2:12)
                                if(start.from.mle == TRUE){
                                    ip.vector = c(ip.vector, mle.matrix[partition.index,1])
                                }else{
                                    ip.vector = c(ip.vector, ip[1])
                                }
                            }else{
                                #index.matrix.tmp[c(2:13)] = c(2:13)
                                #ip.vector = c(ip.vector, ip[1], ip[14])
                                index.matrix.tmp[c(2:14)] = c(2:14)
                                if(start.from.mle == TRUE){
                                    ip.vector = c(ip.vector, mle.matrix[partition.index,1])
                                }else{
                                    ip.vector = c(ip.vector, ip[1])
                                }
                            }
                        }else{
                            index.matrix.tmp = numeric(max.par.model.count)
                            if(k.levels == 0){
                                #index.matrix.tmp[c(2:14)] = c(2:14)
                                #ip.vector = c(ip.vector, ip[1], ip[15])
                                index.matrix.tmp[c(2:15)] = c(2:15)
                                if(start.from.mle == TRUE){
                                    ip.vector = c(ip.vector, mle.matrix[partition.index,1])
                                }else{
                                    ip.vector = c(ip.vector, ip[1])
                                }
                            }else{
                                #index.matrix.tmp[c(2:16)] = c(2:16)
                                #ip.vector = c(ip.vector, ip[1], ip[17])
                                index.matrix.tmp[c(2:17)] = c(2:17)
                                if(start.from.mle == TRUE){
                                    ip.vector = c(ip.vector, ip[partition.index,1])
                                }else{
                                    ip.vector = c(ip.vector, ip[1])
                                }
                            }
                        }
                    }
                    index.matrix.tmp[index.matrix.tmp==0] = seq(max(index.matrix)+1, length.out=length(index.matrix.tmp[index.matrix.tmp==0]))
                    index.matrix[partition.index,] <- index.matrix.tmp
                }
            }
        }else{
            # Gamma variation is turned OFF:
            if(nuc.model == "JC"){
                if(k.levels == 0){
                    if(start.from.mle == TRUE){
                        ip = mle.matrix[1,]
                    }else{
                        ip = c(selac.starting.vals[1,1:3], 0.25, 0.25, 0.25)
                    }
                    parameter.column.names <- c("C.q.phi.Ne",  "alpha", "beta", "freqA", "freqC", "freqG")
                    upper = c(log(50), 21, 21, 0, 0, 0)
                    lower = rep(-21, length(ip))
                    max.par.model.count = 6 + 0 + 0
                }else{
                    if(start.from.mle == TRUE){
                        ip = mle.matrix[1,]
                    }else{
                        ip = c(selac.starting.vals[1,1:3], 0.25, 0.25, 0.25, 1, 1)
                    }
                    parameter.column.names <- c("C.q.phi.Ne", "alpha", "beta", "freqA", "freqC", "freqG", "a0", "a1")
                    upper = c(log(50), 21, 21, 0, 0, 0, 10, 10)
                    lower = rep(-21, length(ip))
                    max.par.model.count = 6 + 0 + 0 + 2
                }
            }
            if(nuc.model == "GTR") {
                if(k.levels == 0){
                    if(start.from.mle == TRUE){
                        ip = mle.matrix[1,]
                    }else{
                        ip = c(selac.starting.vals[1,1:3], 0.25, 0.25, 0.25, nuc.ip)
                    }
                    parameter.column.names <- c("C.q.phi.Ne", "alpha", "beta", "freqA", "freqC", "freqG", "C_A", "G_A", "T_A", "G_C", "T_C")
                    upper = c(log(50), 21, 21, 0, 0, 0, rep(21, length(nuc.ip)))
                    lower = rep(-21, length(ip))
                    max.par.model.count = 6 + 5 + 0
                }else{
                    ip = c(selac.starting.vals[1,1:3], 0.25, 0.25, 0.25, 1, 1, nuc.ip)
                    parameter.column.names <- c("C.q.phi.Ne", "alpha", "beta", "freqA", "freqC", "freqG", "a0", "a1", "C_A", "G_A", "T_A", "G_C", "T_C")
                    upper = c(log(50), 21, 21, 0, 0, 0, 10, 10, rep(21, length(nuc.ip)))
                    lower = rep(-21, length(ip))
                    max.par.model.count = 6 + 5 + 0 + 2
                }
            }
            if(nuc.model == "UNREST") {
                if(k.levels == 0){
                    if(start.from.mle == TRUE){
                        ip = mle.matrix[1,]
                    }else{
                        ip = c(selac.starting.vals[1,1:3], nuc.ip)
                    }
                    parameter.column.names <- c("C.q.phi.Ne", "alpha", "beta", "C_A", "G_A", "T_A", "A_C", "G_C", "T_C", "A_G", "C_G", "A_T", "C_T", "G_T")
                    upper = c(log(50), 21, 21, rep(21, length(nuc.ip)))
                    lower = rep(-21, length(ip))
                    max.par.model.count = 3 + 11 + 0
                }else{
                    if(start.from.mle == TRUE){
                        ip = mle.matrix[1,]
                    }else{
                        ip = c(selac.starting.vals[1,1:3], 1, 1, nuc.ip)
                    }
                    parameter.column.names <- c("C.q.phi.Ne", "alpha", "beta", "a0", "a1", "C_A", "G_A", "T_A", "A_C", "G_C", "T_C", "A_G", "C_G", "A_T", "C_T", "G_T")
                    upper = c(log(50), 21, 21, 10, 10, rep(21, length(nuc.ip)))
                    lower = rep(-21, length(ip))
                    max.par.model.count = 3 + 11 + 0 + 2
                }
            }
            index.matrix = matrix(0, n.partitions, max.par.model.count)
            index.matrix[1,] = 1:ncol(index.matrix)
            ip.vector = ip
            if(n.partitions > 1){
                for(partition.index in 2:n.partitions){
                    if(nuc.model == "JC"){
                        if(start.from.mle == TRUE){
                            ip.vector = c(ip.vector, mle.matrix[partition.index,1])
                        }else{
                            ip.vector = c(ip.vector, ip[1])
                        }
                    }else{
                        if(nuc.model == "GTR"){
                            if(start.from.mle == TRUE){
                                ip.vector = c(ip.vector, mle.matrix[partition.index,1])
                            }else{
                                ip.vector = c(ip.vector, ip[1])
                            }
                            index.matrix.tmp = numeric(max.par.model.count)
                            if(k.levels == 0){
                                index.matrix.tmp[c(2:11)] = c(2:11)
                            }else{
                                index.matrix.tmp[c(2:13)] = c(2:13)
                            }
                        }else{
                            if(start.from.mle == TRUE){
                                ip.vector = c(ip.vector, mle.matrix[partition.index,1])
                            }else{
                                ip.vector = c(ip.vector, ip[1])
                            }
                            index.matrix.tmp = numeric(max.par.model.count)
                            if(k.levels == 0){
                                index.matrix.tmp[c(2:14)] = c(2:14)
                            }else{
                                index.matrix.tmp[c(2:16)] = c(2:16)
                            }
                        }
                    }
                    index.matrix.tmp[index.matrix.tmp==0] = seq(max(index.matrix)+1, length.out=length(index.matrix.tmp[index.matrix.tmp==0]))
                    index.matrix[partition.index,] <- index.matrix.tmp
                }
            }
        }

        #THIS IS FOR THERE IS A SEPARATE GAMMA PER GENE:
        #if(include.gamma == TRUE){
        #    index.matrix.red <- t(matrix(1:(n.partitions*2), 2, n.partitions))
        #}else{
        #    index.matrix.red <- t(matrix(1:n.partitions, 1, n.partitions))
        #}

        #This is so we can break out alpha, beta, GTR, and gamma which are shared among ALL genes:
        index.matrix.red <- t(matrix(1:n.partitions, 1, n.partitions))

        if(optimal.aa == "optimize"){
            number.of.current.restarts <- 1
            aa.optim.original <- aa.optim.list
            best.lik <- 1000000
            while(number.of.current.restarts < (max.restarts+1)){
                cat(paste("Finished. Performing random restart ", number.of.current.restarts,"...", sep=""), "\n")
                aa.optim.list <- aa.optim.original
                cat("       Doing first pass using majority-rule optimal amino acid...", "\n")
                mle.pars.mat <- index.matrix
                mle.pars.mat[] <- c(ip.vector, 0)[index.matrix]
                print(mle.pars.mat)
                if(edge.length == "optimize"){
                    cat("              Optimizing edge lengths", "\n")
                    phy$edge.length <- colMeans(starting.branch.lengths)
                    #phy$edge.length <- colMeans(starting.branch.lengths) / (1/selac.starting.vals[number.of.current.restarts, 2])
                    #opts.edge <- opts
                    upper.edge <- rep(log(50), length(phy$edge.length))
                    lower.edge <- rep(log(1e-8), length(phy$edge.length))
                    results.edge.final <- nloptr(x0=log(phy$edge.length), eval_f = OptimizeEdgeLengths, ub=upper.edge, lb=lower.edge, opts=opts.edge, par.mat=mle.pars.mat, codon.site.data=site.pattern.data.list, codon.site.counts=site.pattern.count.list, data.type=data.type, codon.model=codon.model, n.partitions=n.partitions, nsites.vector=nsites.vector, index.matrix=index.matrix, phy=phy, aa.optim_array=aa.optim.list, root.p_array=NULL, codon.freq.by.aa=codon.freq.by.aa.list, codon.freq.by.gene=codon.freq.by.gene.list, numcode=numcode, diploid=diploid, aa.properties=aa.properties, volume.fixed.value=cpv.starting.parameters[3], nuc.model=nuc.model, codon.index.matrix=codon.index.matrix, edge.length=edge.length, include.gamma=include.gamma, gamma.type=gamma.type, ncats=ncats, k.levels=k.levels, logspace=TRUE, verbose=verbose, n.cores.by.gene=n.cores.by.gene, n.cores.by.gene.by.site=n.cores.by.gene.by.site, estimate.importance=FALSE, neglnl=TRUE, HMM=FALSE)
                    print(results.edge.final$objective)
                    print(exp(results.edge.final$solution))
                    phy$edge.length <- exp(results.edge.final$solution)
                }
                cat("              Optimizing model parameters", "\n")

                #if(include.gamma == TRUE){
                #    alpha.beta.gtr <- mle.pars.mat[1,c(2:(max.par.model.count-1))]
                #    upper.bounds.shared <- upper[c(2:(max.par.model.count-1))]
                #    lower.bounds.shared <- lower[c(2:(max.par.model.count-1))]
                #}else{
                alpha.beta.gtr <- mle.pars.mat[1,c(2:max.par.model.count)]
                upper.bounds.shared <- upper[c(2:max.par.model.count)]
                lower.bounds.shared <- lower[c(2:max.par.model.count)]
                #}

                ParallelizedOptimizedByGene <- function(n.partition){
                    #if(include.gamma == TRUE){
                    #    tmp.par.mat <- mle.pars.mat[,c(1, max.par.model.count)]
                    #    upper.bounds.gene <- upper[c(1,max.par.model.count)]
                    #    lower.bounds.gene <- lower[c(1,max.par.model.count)]
                    #}else{
                    tmp.par.mat <- as.matrix(mle.pars.mat[,1])
                    upper.bounds.gene <- upper[1]
                    lower.bounds.gene <- lower[1]
                    #}
                    optim.by.gene <- nloptr(x0=log(tmp.par.mat[n.partition,]), eval_f = OptimizeModelParsAlphaBetaGtrFixed, ub=upper.bounds.gene, lb=lower.bounds.gene, opts=opts, alpha.beta.gtr=alpha.beta.gtr, codon.site.data=site.pattern.data.list[[n.partition]], codon.site.counts=site.pattern.count.list[[n.partition]], data.type=data.type, codon.model=codon.model, n.partitions=1, nsites.vector=nsites.vector[n.partition], index.matrix=index.matrix.red[1,], phy=phy, aa.optim_array=aa.optim.list[[n.partition]], codon.freq.by.aa=codon.freq.by.aa.list[[n.partition]], codon.freq.by.gene=codon.freq.by.gene.list[[n.partition]], numcode=numcode, diploid=diploid, aa.properties=aa.properties, volume.fixed.value=cpv.starting.parameters[3], nuc.model=nuc.model, codon.index.matrix=codon.index.matrix, edge.length=edge.length, include.gamma=include.gamma, gamma.type=gamma.type, ncats=ncats, k.levels=k.levels, logspace=TRUE, verbose=verbose, n.cores.by.gene=n.cores.by.gene, n.cores.by.gene.by.site=n.cores.by.gene.by.site, estimate.importance=FALSE, neglnl=TRUE, HMM=FALSE)
                    tmp.pars <- c(optim.by.gene$objective, optim.by.gene$solution)
                    return(tmp.pars)
                }
                results.set <- mclapply(1:n.partitions, ParallelizedOptimizedByGene, mc.cores=n.cores.by.gene)
                #if(include.gamma == TRUE){
                #The number of columns is 3: [1] log-likelihood, [2] C.q.phi, [3] phi gamma:
                #parallelized.parameters <- t(matrix(unlist(results.set), 3, n.partitions))
                #}else{
                #The number of columns is 2: [1] log-likelihood, [2] C.q.phi:
                parallelized.parameters <- t(matrix(unlist(results.set), 2, n.partitions))
                #}
                results.final <- NULL
                results.final$objective <- sum(parallelized.parameters[,1])
                results.final$solution <- c(t(parallelized.parameters[,-1]))
                mle.pars.mat.red <- index.matrix.red
                mle.pars.mat.red[] <- c(exp(results.final$solution), 0)[index.matrix.red]
                print(mle.pars.mat.red)
                optim.alpha.beta.gtr.all.genes <- nloptr(x0=log(alpha.beta.gtr), eval_f = OptimizeAlphaBetaGtrOnly, ub=upper.bounds.shared, lb=lower.bounds.shared, opts=opts, fixed.pars=mle.pars.mat.red, codon.site.data=site.pattern.data.list, codon.site.counts=site.pattern.count.list, data.type=data.type, codon.model=codon.model, n.partitions=n.partitions, nsites.vector=nsites.vector, index.matrix=index.matrix.red, phy=phy, aa.optim_array=aa.optim.list, codon.freq.by.aa=codon.freq.by.aa.list, codon.freq.by.gene=codon.freq.by.gene.list, numcode=numcode, diploid=diploid, aa.properties=aa.properties, volume.fixed.value=cpv.starting.parameters[3], nuc.model=nuc.model, codon.index.matrix=codon.index.matrix, edge.length=edge.length, include.gamma=include.gamma, gamma.type=gamma.type, ncats=ncats, k.levels=k.levels, logspace=TRUE, verbose=verbose, n.cores.by.gene=n.cores.by.gene, n.cores.by.gene.by.site=n.cores.by.gene.by.site, estimate.importance=FALSE, neglnl=TRUE, HMM=FALSE)
                results.final$objective <- optim.alpha.beta.gtr.all.genes$objective
                alpha.beta.gtr <- exp(optim.alpha.beta.gtr.all.genes$solution)
                #if(include.gamma == TRUE){
                #    mle.pars.mat <- c()
                #    for(row.index in 1:dim(mle.pars.mat.red)[1]){
                #        mle.pars.mat <- rbind(mle.pars.mat, c(mle.pars.mat.red[row.index,1], alpha.beta.gtr, mle.pars.mat.red[row.index,2]))
                #    }
                #}else{
                mle.pars.mat <- c()
                for(row.index in 1:dim(mle.pars.mat.red)[1]){
                    mle.pars.mat <- rbind(mle.pars.mat, c(mle.pars.mat.red[row.index,1], alpha.beta.gtr))
                }
                #}
                print(results.final$objective)
                print(mle.pars.mat)

                current.likelihood <- results.final$objective
                cat(paste("       Current likelihood", current.likelihood, sep=" "), "\n")
                lik.diff <- 10
                iteration.number <- 1
                while(lik.diff != 0 & iteration.number <= max.iterations){
                    cat(paste("       Finished. Iterating search -- Round", iteration.number, sep=" "), "\n")
                    cat("              Optimizing amino acids", "\n")
                    aa.optim.list <- as.list(numeric(n.partitions))
                    ParallelizedOptimizeAAByGene <- function(n.partition){
                        gene.tmp <- read.dna(partitions[n.partition], format='fasta')
                        if(!is.null(fasta.rows.to.keep)){
                            gene.tmp <- as.list(as.matrix(cbind(gene.tmp))[fasta.rows.to.keep,])
                        }else{
                            gene.tmp <- as.list(as.matrix(cbind(gene.tmp)))
                        }
                        codon.data <- DNAbinToCodonNumeric(gene.tmp)
                        codon.data <- codon.data[phy$tip.label,]
                        tmp.aa.optim.full <- GetOptimalAAPerSite(x=log(mle.pars.mat[n.partition,]), codon.data=codon.data, phy=phy, aa.optim_array=aa.optim.list[[n.partition]], codon.freq.by.aa=codon.freq.by.aa.list[[n.partition]], codon.freq.by.gene=codon.freq.by.gene.list[[n.partition]], numcode=numcode, diploid=diploid, aa.properties=aa.properties, volume.fixed.value=cpv.starting.parameters[3], nuc.model=nuc.model, codon.index.matrix=codon.index.matrix, include.gamma=include.gamma, gamma.type=gamma.type, ncats=ncats, k.levels=k.levels, logspace=TRUE, verbose=verbose, neglnl=TRUE, n.cores.by.gene.by.site=n.cores.by.gene.by.site)
                        return(tmp.aa.optim.full)
                    }
                    aa.optim.full.list <- mclapply(1:n.partitions, ParallelizedOptimizeAAByGene, mc.cores=n.cores.by.gene)
                    for(partition.index in sequence(n.partitions)) {
                        gene.tmp <- read.dna(partitions[partition.index], format='fasta')
                        if(!is.null(fasta.rows.to.keep)){
                            gene.tmp <- as.list(as.matrix(cbind(gene.tmp))[fasta.rows.to.keep,])
                        }else{
                            gene.tmp <- as.list(as.matrix(cbind(gene.tmp)))
                        }
                        codon.data <- DNAbinToCodonNumeric(gene.tmp)
                        codon.data <- codon.data[phy$tip.label,]
                        codon.freq.by.aa.list[[partition.index]] <- GetCodonFreqsByAA(codon.data[,-1], aa.optim.full.list[[partition.index]], numcode=numcode)
                        aa.optim.frame.to.add <- matrix(c("optimal", aa.optim.full.list[[partition.index]]), 1, dim(codon.data)[2])
                        colnames(aa.optim.frame.to.add) <- colnames(codon.data)
                        codon.data <- rbind(codon.data, aa.optim.frame.to.add)
                        codon.data <- SitePattern(codon.data, includes.optimal.aa=TRUE)
                        site.pattern.data.list[[partition.index]] = codon.data$unique.site.patterns
                        site.pattern.count.list[[partition.index]] = codon.data$site.pattern.counts
                        aa.optim.list[[partition.index]] = codon.data$optimal.aa
                    }
                    if(edge.length == "optimize"){
                        cat("              Optimizing edge lengths", "\n")
                        #opts.edge <- opts
                        opts.edge$ftol_rel <- opts$ftol_rel * (max(1,tol.step^((max.iterations+1)-iteration.number)))

                        results.edge.final <- nloptr(x0=log(phy$edge.length), eval_f = OptimizeEdgeLengths, ub=upper.edge, lb=lower.edge, opts=opts.edge, par.mat=mle.pars.mat, codon.site.data=site.pattern.data.list, codon.site.counts=site.pattern.count.list, data.type=data.type, codon.model=codon.model, n.partitions=n.partitions, nsites.vector=nsites.vector, index.matrix=index.matrix, phy=phy, aa.optim_array=aa.optim.list, root.p_array=NULL, codon.freq.by.aa=codon.freq.by.aa.list, codon.freq.by.gene=codon.freq.by.gene.list, numcode=numcode, diploid=diploid, aa.properties=aa.properties, volume.fixed.value=cpv.starting.parameters[3], nuc.model=nuc.model, codon.index.matrix=codon.index.matrix, edge.length=edge.length, include.gamma=include.gamma, gamma.type=gamma.type, ncats=ncats, k.levels=k.levels, logspace=TRUE, verbose=verbose, n.cores.by.gene=n.cores.by.gene, n.cores.by.gene.by.site=n.cores.by.gene.by.site, estimate.importance=FALSE, neglnl=TRUE, HMM=FALSE)
                        print(results.edge.final$objective)
                        print(exp(results.edge.final$solution))
                        phy$edge.length <- exp(results.edge.final$solution)
                    }
                    cat("              Optimizing model parameters", "\n")

                    ParallelizedOptimizedByGene <- function(n.partition){
                        #if(include.gamma == TRUE){
                        #    tmp.par.mat <- mle.pars.mat[,c(1, max.par.model.count)]
                        #    upper.bounds.gene <- upper[c(1,max.par.model.count)]
                        #    lower.bounds.gene <- lower[c(1,max.par.model.count)]
                        #}else{
                        tmp.par.mat <- as.matrix(mle.pars.mat[,1])
                        upper.bounds.gene <- upper[1]
                        lower.bounds.gene <- lower[1]
                        #}
                        opts.params <- opts
                        opts.params$ftol_rel <- opts$ftol_rel * (max(1,tol.step^((max.iterations+1)-iteration.number)))
                        optim.by.gene <- nloptr(x0=log(tmp.par.mat[n.partition,]), eval_f=OptimizeModelParsAlphaBetaGtrFixed, ub=upper.bounds.gene, lb=lower.bounds.gene, opts=opts.params, alpha.beta.gtr=alpha.beta.gtr, codon.site.data=site.pattern.data.list[[n.partition]], codon.site.counts=site.pattern.count.list[[n.partition]], data.type=data.type, codon.model=codon.model, n.partitions=1, nsites.vector=nsites.vector[n.partition], index.matrix=index.matrix.red[1,], phy=phy, aa.optim_array=aa.optim.list[[n.partition]], codon.freq.by.aa=codon.freq.by.aa.list[[n.partition]], codon.freq.by.gene=codon.freq.by.gene.list[[n.partition]], numcode=numcode, diploid=diploid, aa.properties=aa.properties, volume.fixed.value=cpv.starting.parameters[3], nuc.model=nuc.model, codon.index.matrix=codon.index.matrix, edge.length=edge.length, include.gamma=include.gamma, gamma.type=gamma.type, ncats=ncats, k.levels=k.levels, logspace=TRUE, verbose=verbose, n.cores.by.gene=n.cores.by.gene, n.cores.by.gene.by.site=n.cores.by.gene.by.site, estimate.importance=FALSE, neglnl=TRUE, HMM=FALSE)
                        tmp.pars <- c(optim.by.gene$objective, optim.by.gene$solution)
                        return(tmp.pars)
                    }
                    results.set <- mclapply(1:n.partitions, ParallelizedOptimizedByGene, mc.cores=n.cores.by.gene)
                    #if(include.gamma == TRUE){
                    #The number of columns is 3: [1] log-likelihood, [2] C.q.phi.Ne, [3] phi gamma:
                    #parallelized.parameters <- t(matrix(unlist(results.set), 3, n.partitions))
                    #}else{
                    #The number of columns is 2: [1] log-likelihood, [2] C.q.phi.Ne:
                    parallelized.parameters <- t(matrix(unlist(results.set), 2, n.partitions))
                    #}
                    results.final <- NULL
                    results.final$objective <- sum(parallelized.parameters[,1])
                    results.final$solution <- c(t(parallelized.parameters[,-1]))
                    mle.pars.mat.red <- index.matrix.red
                    mle.pars.mat.red[] <- c(exp(results.final$solution), 0)[index.matrix.red]

                    optim.alpha.beta.gtr.all.genes <- nloptr(x0=log(alpha.beta.gtr), eval_f = OptimizeAlphaBetaGtrOnly, ub=upper.bounds.shared, lb=lower.bounds.shared, opts=opts, fixed.pars=mle.pars.mat.red, codon.site.data=site.pattern.data.list, codon.site.counts=site.pattern.count.list, data.type=data.type, codon.model=codon.model, n.partitions=n.partitions, nsites.vector=nsites.vector, index.matrix=index.matrix.red, phy=phy, aa.optim_array=aa.optim.list, codon.freq.by.aa=codon.freq.by.aa.list, codon.freq.by.gene=codon.freq.by.gene.list, numcode=numcode, diploid=diploid, aa.properties=aa.properties, volume.fixed.value=cpv.starting.parameters[3], nuc.model=nuc.model, codon.index.matrix=codon.index.matrix, edge.length=edge.length, include.gamma=include.gamma, gamma.type=gamma.type, ncats=ncats, k.levels=k.levels, logspace=TRUE, verbose=verbose, n.cores.by.gene=n.cores.by.gene, n.cores.by.gene.by.site=n.cores.by.gene.by.site, estimate.importance=FALSE, neglnl=TRUE, HMM=FALSE)
                    results.final$objective <- optim.alpha.beta.gtr.all.genes$objective
                    alpha.beta.gtr <- exp(optim.alpha.beta.gtr.all.genes$solution)
                    #if(include.gamma == TRUE){
                    #    mle.pars.mat <- c()
                    #    for(row.index in 1:dim(mle.pars.mat.red)[1]){
                    #        mle.pars.mat <- rbind(mle.pars.mat, c(mle.pars.mat.red[row.index,1], alpha.beta.gtr, mle.pars.mat.red[row.index,2]))
                    #    }
                    #}else{
                    mle.pars.mat <- c()
                    for(row.index in 1:dim(mle.pars.mat.red)[1]){
                        mle.pars.mat <- rbind(mle.pars.mat, c(mle.pars.mat.red[row.index,1], alpha.beta.gtr))
                    }
                    #}
                    print(results.final$objective)
                    print(mle.pars.mat)
                    lik.diff <- round(abs(current.likelihood-results.final$objective), 8)
                    current.likelihood <- results.final$objective
                    cat(paste("       Current likelihood", current.likelihood, sep=" "), paste("difference from previous round", lik.diff, sep=" "), "\n")
                    iteration.number <- iteration.number + 1
                }
                #Output for use in sims#
                if(output.by.restart == TRUE){
                    obj.tmp = list(np=max(index.matrix) + length(phy$edge.length) + sum(nsites.vector), loglik = -results.final$objective, AIC = -2*(-results.final$objective)+2*(max(index.matrix) + length(phy$edge.length) + sum(nsites.vector)), mle.pars=mle.pars.mat, index.matrix=index.matrix, partitions=partitions[1:n.partitions], opts=opts, phy=phy, nsites=nsites.vector, data.type=data.type, codon.model=codon.model, aa.optim=aa.optim.full.list, aa.optim.type=optimal.aa, nuc.model=nuc.model, include.gamma=include.gamma, gamma.type=gamma.type, ncats=ncats, k.levels=k.levels, numcode=numcode, diploid=diploid, aa.properties=aa.properties, volume.fixed.value=cpv.starting.parameters[3], codon.freq.by.aa=codon.freq.by.aa.list, codon.freq.by.gene=codon.freq.by.gene.list, max.tol=max.tol, max.evals=max.evals, selac.starting.vals=ip.vector)
                    class(obj.tmp) = "selac"
                    save(obj.tmp, file=paste(paste(codon.data.path, output.restart.filename, sep=""), number.of.current.restarts, "Rsave", sep="."))
                }
                ########################
                if(results.final$objective < best.lik){
                    best.ip <- ip.vector
                    best.lik <- results.final$objective
                    best.solution <- mle.pars.mat
                    best.edge.lengths <- phy$edge.length
                    best.aa.optim.list <- aa.optim.full.list
                    best.codon.freq.by.aa <- codon.freq.by.aa.list
                    best.codon.freq.by.gene <- codon.freq.by.gene.list
                }
                number.of.current.restarts <- number.of.current.restarts + 1
                ip.vector[c(index.matrix[,1])] <- selac.starting.vals[number.of.current.restarts, 1]
                ip.vector[2:3] <- selac.starting.vals[number.of.current.restarts, 2:3]
                aa.optim.list <- aa.optim.original
            }
            selac.starting.vals <- best.ip
            loglik <- -(best.lik) #to go from neglnl to lnl
            mle.pars.mat <- best.solution
            aa.optim.full.list <- best.aa.optim.list
            codon.freq.by.aa.list <- best.codon.freq.by.aa
            codon.freq.by.gene.list <- best.codon.freq.by.gene

            if(edge.length == "optimize"){
                phy$edge.length <- best.edge.lengths
            }
        }else{
            if(optimal.aa == "averaged"){
                aa.optim.list = NULL
            }
            number.of.current.restarts <- 1
            best.lik <- 1000000
            while(number.of.current.restarts < (max.restarts+1)){
                if(optimal.aa == "user"){
                    cat(paste("Finished. Performing random restart ", number.of.current.restarts," using user-supplied optimal amino acids...", sep=""), "\n")
                }else{
                    cat(paste("Finished. Performing random restart ", number.of.current.restarts," using majority-rule optimal amino acids...", sep=""), "\n")
                }
                mle.pars.mat <- index.matrix
                mle.pars.mat[] <- c(ip.vector, 0)[index.matrix]
                cat("       Doing first pass...", "\n")
                print(mle.pars.mat)
                if(edge.length == "optimize"){
                    cat("              Optimizing edge lengths", "\n")
                    phy$edge.length <- colMeans(starting.branch.lengths)
                    #phy$edge.length <- colMeans(starting.branch.lengths) / (1/selac.starting.vals[number.of.current.restarts, 2])
                    #opts.edge <- opts
                    upper.edge <- rep(log(50), length(phy$edge.length))
                    lower.edge <- rep(log(1e-8), length(phy$edge.length))
                    results.edge.final <- nloptr(x0=log(phy$edge.length), eval_f = OptimizeEdgeLengths, ub=upper.edge, lb=lower.edge, opts=opts.edge, par.mat=mle.pars.mat, codon.site.data=site.pattern.data.list, codon.site.counts=site.pattern.count.list, data.type=data.type, codon.model=codon.model, n.partitions=n.partitions, nsites.vector=nsites.vector, index.matrix=index.matrix, phy=phy, aa.optim_array=aa.optim.list, root.p_array=NULL, codon.freq.by.aa=codon.freq.by.aa.list, codon.freq.by.gene=codon.freq.by.gene.list, numcode=numcode, diploid=diploid, aa.properties=aa.properties, volume.fixed.value=cpv.starting.parameters[3], nuc.model=nuc.model, codon.index.matrix=codon.index.matrix, edge.length=edge.length, include.gamma=include.gamma, gamma.type=gamma.type, ncats=ncats, k.levels=k.levels, logspace=TRUE, verbose=verbose, n.cores.by.gene=n.cores.by.gene, n.cores.by.gene.by.site=n.cores.by.gene.by.site, estimate.importance=FALSE, neglnl=TRUE, HMM=FALSE)
                    print(results.edge.final$objective)
                    print(exp(results.edge.final$solution))
                    phy$edge.length <- exp(results.edge.final$solution)
                }
                cat("              Optimizing model parameters", "\n")

                #if(include.gamma == TRUE){
                #    alpha.beta.gtr <- mle.pars.mat[1,c(2:(max.par.model.count-1))]
                #    upper.bounds.shared <- upper[c(2:(max.par.model.count-1))]
                #    lower.bounds.shared <- lower[c(2:(max.par.model.count-1))]
                #}else{
                alpha.beta.gtr <- mle.pars.mat[1,c(2:max.par.model.count)]
                upper.bounds.shared <- upper[c(2:max.par.model.count)]
                lower.bounds.shared <- lower[c(2:max.par.model.count)]
                #}

                ParallelizedOptimizedByGene <- function(n.partition){
                    #if(include.gamma == TRUE){
                    #    tmp.par.mat <- mle.pars.mat[,c(1, max.par.model.count)]
                    #    upper.bounds.gene <- upper[c(1,max.par.model.count)]
                    #    lower.bounds.gene <- lower[c(1,max.par.model.count)]
                    #}else{
                    tmp.par.mat <- as.matrix(mle.pars.mat[,1])
                    upper.bounds.gene <- upper[1]
                    lower.bounds.gene <- lower[1]
                    #}
                    optim.by.gene <- nloptr(x0=log(tmp.par.mat[n.partition,]), eval_f = OptimizeModelParsAlphaBetaGtrFixed, ub=upper.bounds.gene, lb=lower.bounds.gene, opts=opts, alpha.beta.gtr=alpha.beta.gtr, codon.site.data=site.pattern.data.list[[n.partition]], codon.site.counts=site.pattern.count.list[[n.partition]], data.type=data.type, codon.model=codon.model, n.partitions=1, nsites.vector=nsites.vector[n.partition], index.matrix=index.matrix.red[1,], phy=phy, aa.optim_array=aa.optim.list[[n.partition]], codon.freq.by.aa=codon.freq.by.aa.list[[n.partition]], codon.freq.by.gene=codon.freq.by.gene.list[[n.partition]], numcode=numcode, diploid=diploid, aa.properties=aa.properties, volume.fixed.value=cpv.starting.parameters[3], nuc.model=nuc.model, codon.index.matrix=codon.index.matrix, edge.length=edge.length, include.gamma=include.gamma, gamma.type=gamma.type, ncats=ncats, k.levels=k.levels, logspace=TRUE, verbose=verbose, n.cores.by.gene=n.cores.by.gene, n.cores.by.gene.by.site=n.cores.by.gene.by.site, estimate.importance=FALSE, neglnl=TRUE, HMM=FALSE)
                    tmp.pars <- c(optim.by.gene$objective, optim.by.gene$solution)
                    return(tmp.pars)
                }
                results.set <- mclapply(1:n.partitions, ParallelizedOptimizedByGene, mc.cores=n.cores.by.gene)
                #if(include.gamma == TRUE){
                #The number of columns is 3: [1] log-likelihood, [2] C.q.phi.Ne, [3] phi gamma:
                #    parallelized.parameters <- t(matrix(unlist(results.set), 3, n.partitions))
                #}else{
                #The number of columns is 2: [1] log-likelihood, [2] C.q.phi.Ne:
                parallelized.parameters <- t(matrix(unlist(results.set), 2, n.partitions))
                #}
                results.final <- NULL
                results.final$objective <- sum(parallelized.parameters[,1])
                results.final$solution <- c(t(parallelized.parameters[,-1]))
                mle.pars.mat.red <- index.matrix.red
                mle.pars.mat.red[] <- c(exp(results.final$solution), 0)[index.matrix.red]
                print(mle.pars.mat.red)
                optim.alpha.beta.gtr.all.genes <- nloptr(x0=log(alpha.beta.gtr), eval_f = OptimizeAlphaBetaGtrOnly, ub=upper.bounds.shared, lb=lower.bounds.shared, opts=opts, fixed.pars=mle.pars.mat.red, codon.site.data=site.pattern.data.list, codon.site.counts=site.pattern.count.list, data.type=data.type, codon.model=codon.model, n.partitions=n.partitions, nsites.vector=nsites.vector, index.matrix=index.matrix.red, phy=phy, aa.optim_array=aa.optim.list, codon.freq.by.aa=codon.freq.by.aa.list, codon.freq.by.gene=codon.freq.by.gene.list, numcode=numcode, diploid=diploid, aa.properties=aa.properties, volume.fixed.value=cpv.starting.parameters[3], nuc.model=nuc.model, codon.index.matrix=codon.index.matrix, edge.length=edge.length, include.gamma=include.gamma, gamma.type=gamma.type, ncats=ncats, k.levels=k.levels, logspace=TRUE, verbose=verbose, n.cores.by.gene=n.cores.by.gene, n.cores.by.gene.by.site=n.cores.by.gene.by.site, estimate.importance=FALSE, neglnl=TRUE, HMM=FALSE)
                results.final$objective <- optim.alpha.beta.gtr.all.genes$objective
                alpha.beta.gtr <- exp(optim.alpha.beta.gtr.all.genes$solution)
                #if(include.gamma == TRUE){
                #    mle.pars.mat <- c()
                #    for(row.index in 1:dim(mle.pars.mat.red)[1]){
                #        mle.pars.mat <- rbind(mle.pars.mat, c(mle.pars.mat.red[row.index,1], alpha.beta.gtr, mle.pars.mat.red[row.index,2]))
                #    }
                #}else{
                mle.pars.mat <- c()
                for(row.index in 1:dim(mle.pars.mat.red)[1]){
                    mle.pars.mat <- rbind(mle.pars.mat, c(mle.pars.mat.red[row.index,1], alpha.beta.gtr))
                }
                #}
                print(results.final$objective)
                print(mle.pars.mat)

                current.likelihood <- results.final$objective
                cat(paste("       Current likelihood", current.likelihood, sep=" "), "\n")
                lik.diff <- 10
                iteration.number <- 1
                while(lik.diff != 0 & iteration.number <= max.iterations){
                    cat(paste("       Finished. Iterating search -- Round", iteration.number, sep=" "), "\n")
                    if(edge.length == "optimize"){
                        cat("              Optimizing edge lengths", "\n")
                        #opts.edge <- opts
                        opts.edge$ftol_rel <- opts$ftol_rel * (max(1,tol.step^((max.iterations+1)-iteration.number)))

                        results.edge.final <- nloptr(x0=log(phy$edge.length), eval_f = OptimizeEdgeLengths, ub=upper.edge, lb=lower.edge, opts=opts.edge, par.mat=mle.pars.mat, codon.site.data=site.pattern.data.list, codon.site.counts=site.pattern.count.list, data.type=data.type, codon.model=codon.model, n.partitions=n.partitions, nsites.vector=nsites.vector, index.matrix=index.matrix, phy=phy, aa.optim_array=aa.optim.list, root.p_array=NULL, codon.freq.by.aa=codon.freq.by.aa.list, codon.freq.by.gene=codon.freq.by.gene.list, numcode=numcode, diploid=diploid, aa.properties=aa.properties, volume.fixed.value=cpv.starting.parameters[3], nuc.model=nuc.model, codon.index.matrix=codon.index.matrix, edge.length=edge.length, include.gamma=include.gamma, gamma.type=gamma.type, ncats=ncats, k.levels=k.levels, logspace=TRUE, verbose=verbose, n.cores.by.gene=n.cores.by.gene, n.cores.by.gene.by.site=n.cores.by.gene.by.site, estimate.importance=FALSE, neglnl=TRUE, HMM=FALSE)
                        print(results.edge.final$objective)
                        print(exp(results.edge.final$solution))
                        phy$edge.length <- exp(results.edge.final$solution)
                    }
                    cat("              Optimizing model parameters", "\n")

                    ParallelizedOptimizedByGene <- function(n.partition){
                        #if(include.gamma == TRUE){
                        #    tmp.par.mat <- mle.pars.mat[,c(1, max.par.model.count)]
                        #    upper.bounds.gene <- upper[c(1,max.par.model.count)]
                        #    lower.bounds.gene <- lower[c(1,max.par.model.count)]
                        #}else{
                        tmp.par.mat <- as.matrix(mle.pars.mat[,1])
                        upper.bounds.gene <- upper[1]
                        lower.bounds.gene <- lower[1]
                        #}
                        opts.params <- opts
                        opts.params$ftol_rel <- opts$ftol_rel * (max(1,tol.step^((max.iterations+1)-iteration.number)))
                        optim.by.gene <- nloptr(x0=log(tmp.par.mat[n.partition,]), eval_f=OptimizeModelParsAlphaBetaGtrFixed, ub=upper.bounds.gene, lb=lower.bounds.gene, opts=opts.params, alpha.beta.gtr=alpha.beta.gtr, codon.site.data=site.pattern.data.list[[n.partition]], codon.site.counts=site.pattern.count.list[[n.partition]], data.type=data.type, codon.model=codon.model, n.partitions=1, nsites.vector=nsites.vector[n.partition], index.matrix=index.matrix.red[1,], phy=phy, aa.optim_array=aa.optim.list[[n.partition]], codon.freq.by.aa=codon.freq.by.aa.list[[n.partition]], codon.freq.by.gene=codon.freq.by.gene.list[[n.partition]], numcode=numcode, diploid=diploid, aa.properties=aa.properties, volume.fixed.value=cpv.starting.parameters[3], nuc.model=nuc.model, codon.index.matrix=codon.index.matrix, edge.length=edge.length, include.gamma=include.gamma, gamma.type=gamma.type, ncats=ncats, k.levels=k.levels, logspace=TRUE, verbose=verbose, n.cores.by.gene=n.cores.by.gene, n.cores.by.gene.by.site=n.cores.by.gene.by.site, estimate.importance=FALSE, neglnl=TRUE, HMM=FALSE)
                        tmp.pars <- c(optim.by.gene$objective, optim.by.gene$solution)
                        return(tmp.pars)
                    }
                    results.set <- mclapply(1:n.partitions, ParallelizedOptimizedByGene, mc.cores=n.cores.by.gene)
                    #if(include.gamma == TRUE){
                    #The number of columns is 3: [1] log-likelihood, [2] C.q.phi, [3] phi gamma:
                    #    parallelized.parameters <- t(matrix(unlist(results.set), 3, n.partitions))
                    #}else{
                    #The number of columns is 2: [1] log-likelihood, [2] C.q.phi:
                    parallelized.parameters <- t(matrix(unlist(results.set), 2, n.partitions))
                    #}
                    results.final <- NULL
                    results.final$objective <- sum(parallelized.parameters[,1])
                    results.final$solution <- c(t(parallelized.parameters[,-1]))
                    mle.pars.mat.red <- index.matrix.red
                    mle.pars.mat.red[] <- c(exp(results.final$solution), 0)[index.matrix.red]

                    optim.alpha.beta.gtr.all.genes <- nloptr(x0=log(alpha.beta.gtr), eval_f = OptimizeAlphaBetaGtrOnly, ub=upper.bounds.shared, lb=lower.bounds.shared, opts=opts, fixed.pars=mle.pars.mat.red, codon.site.data=site.pattern.data.list, codon.site.counts=site.pattern.count.list, data.type=data.type, codon.model=codon.model, n.partitions=n.partitions, nsites.vector=nsites.vector, index.matrix=index.matrix.red, phy=phy, aa.optim_array=aa.optim.list, codon.freq.by.aa=codon.freq.by.aa.list, codon.freq.by.gene=codon.freq.by.gene.list, numcode=numcode, diploid=diploid, aa.properties=aa.properties, volume.fixed.value=cpv.starting.parameters[3], nuc.model=nuc.model, codon.index.matrix=codon.index.matrix, edge.length=edge.length, include.gamma=include.gamma, gamma.type=gamma.type, ncats=ncats, k.levels=k.levels, logspace=TRUE, verbose=verbose, n.cores.by.gene=n.cores.by.gene, n.cores.by.gene.by.site=n.cores.by.gene.by.site, estimate.importance=FALSE, neglnl=TRUE, HMM=FALSE)
                    results.final$objective <- optim.alpha.beta.gtr.all.genes$objective
                    alpha.beta.gtr <- exp(optim.alpha.beta.gtr.all.genes$solution)
                    #if(include.gamma == TRUE){
                    #   mle.pars.mat <- c()
                    #   for(row.index in 1:dim(mle.pars.mat.red)[1]){
                    #        mle.pars.mat <- rbind(mle.pars.mat, c(mle.pars.mat.red[row.index,1], alpha.beta.gtr, mle.pars.mat.red[row.index,2]))
                    #    }
                    #}else{
                    mle.pars.mat <- c()
                    for(row.index in 1:dim(mle.pars.mat.red)[1]){
                        mle.pars.mat <- rbind(mle.pars.mat, c(mle.pars.mat.red[row.index,1], alpha.beta.gtr))
                    }
                    #}
                    print(results.final$objective)
                    print(mle.pars.mat)
                    lik.diff <- round(abs(current.likelihood-results.final$objective), 8)
                    current.likelihood <- results.final$objective
                    cat(paste("       Current likelihood", current.likelihood, sep=" "), paste("difference from previous round", lik.diff, sep=" "), "\n")
                    iteration.number <- iteration.number + 1
                }
                #Output for use in sims#
                if(output.by.restart == TRUE){
                    obj.tmp = list(np=max(index.matrix) + length(phy$edge.length) + sum(nsites.vector), loglik = -results.final$objective, AIC = -2*(-results.final$objective)+2*(max(index.matrix) + length(phy$edge.length) + sum(nsites.vector)), mle.pars=mle.pars.mat, index.matrix=index.matrix, partitions=partitions[1:n.partitions], opts=opts, phy=phy, nsites=nsites.vector, data.type=data.type, codon.model=codon.model, aa.optim=aa.optim.full.list, aa.optim.type=optimal.aa, nuc.model=nuc.model, include.gamma=include.gamma, gamma.type=gamma.type, ncats=ncats, k.levels=k.levels, numcode=numcode, diploid=diploid, aa.properties=aa.properties, volume.fixed.value=cpv.starting.parameters[3], codon.freq.by.aa=codon.freq.by.aa.list, codon.freq.by.gene=codon.freq.by.gene.list, max.tol=max.tol, max.evals=max.evals, selac.starting.vals=ip.vector)
                    class(obj.tmp) = "selac"
                    save(obj.tmp, file=paste(paste(codon.data.path, output.restart.filename, sep=""), number.of.current.restarts, "Rsave", sep="."))
                }
                ########################
                if(results.final$objective < best.lik){
                    best.ip <- ip.vector
                    best.lik <- results.final$objective
                    best.solution <- mle.pars.mat
                    best.edge.lengths <- phy$edge.length
                    best.codon.freq.by.aa <- codon.freq.by.aa.list
                    best.codon.freq.by.gene <- codon.freq.by.gene.list
                }
                number.of.current.restarts <- number.of.current.restarts + 1
                print(ip.vector)
                ip.vector[c(index.matrix[,1])] <- selac.starting.vals[number.of.current.restarts, 1]
                ip.vector[2:3] <- selac.starting.vals[number.of.current.restarts, 2:3]
                print(ip.vector)
            }
            selac.starting.vals <- best.ip
            loglik <- -(best.lik) #to go from neglnl to lnl
            mle.pars.mat <- best.solution
            codon.freq.by.aa.list <- best.codon.freq.by.aa
            codon.freq.by.gene.list <- best.codon.freq.by.gene

            if(edge.length == "optimize"){
                phy$edge.length <- best.edge.lengths
            }
        }
        cat("Finished. Summarizing results...", "\n")
        colnames(mle.pars.mat) <- parameter.column.names

        if(edge.length == "optimize"){
            if(optimal.aa == "user" | optimal.aa == "majrule" | optimal.aa == "averaged"){
                np <- max(index.matrix) + length(phy$edge.length)
            }else{
                np <- max(index.matrix) + length(phy$edge.length) + sum(nsites.vector)
            }
        }else{
            if(optimal.aa == "user" | optimal.aa == "majrule" | optimal.aa == "averaged"){
                np <- max(index.matrix)
            }else{
                np <- max(index.matrix) + sum(nsites.vector)
            }
        }

        #Counting parameters: Do we count the nsites too? Yup.
        obj = list(np=np, loglik = loglik, AIC = -2*loglik+2*np, mle.pars=mle.pars.mat, index.matrix=index.matrix, partitions=partitions[1:n.partitions], opts=opts, phy=phy, nsites=nsites.vector, data.type=data.type, codon.model=codon.model, aa.optim=aa.optim.full.list, aa.optim.type=optimal.aa, nuc.model=nuc.model, include.gamma=include.gamma, gamma.type=gamma.type, ncats=ncats, k.levels=k.levels, numcode=numcode, diploid=diploid, aa.properties=aa.properties, volume.fixed.value=cpv.starting.parameters[3], codon.freq.by.aa=codon.freq.by.aa.list, codon.freq.by.gene=codon.freq.by.gene.list, max.tol=max.tol, max.evals=max.evals, selac.starting.vals=selac.starting.vals)
        class(obj) = "selac"
    }
    return(obj)
}


#' @title Efficient optimization of a Hidden Markov SELAC model
#'
#' @description
#' Efficient optimization of model parameters under a HMM SELAC model
#'
#' @param codon.data.path Provides the path to the directory containing the gene specific fasta files of coding data. Must have a ".fasta" line ending.
#' @param n.partitions The number of partitions to analyze. The order is based on the Unix order of the fasta files in the directory.
#' @param phy The phylogenetic tree to optimize the model parameters.
#' @param data.type The data type being tested. Options are "codon" or "nucleotide".
#' @param codon.model The type of codon model to use. There are four options: "none", "GY94", "FMutSel0", "selac".
#' @param edge.length Indicates whether or not edge lengths should be optimized. By default it is set to "optimize", other option is "fixed", which user-supplied branch lengths.
#' @param edge.linked A logical indicating whether or not edge lengths should be optimized separately for each gene. By default, a single set of each lengths is optimized for all genes.
#' @param optimal.aa Indicates what type of optimal.aa should be used. There are four options: "none", "majrule", "optimize", or "user".
#' @param nuc.model Indicates what type nucleotide model to use. There are three options: "JC", "GTR", or "UNREST".
#' @param estimate.aa.importance Indicates whether gene specific importance of distance parameter is to be estimate.
#' @param include.gamma A logical indicating whether or not to include a discrete gamma model.
#' @param gamma.type Indicates what type of gamma distribution to use. Options are "quadrature" after the Laguerre quadrature approach of Felsenstein 2001 or median approach of Yang 1994.
#' @param ncats The number of discrete categories.
#' @param numcode The ncbi genetic code number for translation. By default the standard (numcode=1) genetic code is used.
#' @param diploid A logical indicating whether or not the organism is diploid or not.
#' @param k.levels Provides how many levels in the polynomial. By default we assume a single level (i.e., linear).
#' @param aa.properties User-supplied amino acid distance properties. By default we assume Grantham (1974) properties.
#' @param verbose Logical indicating whether each iteration be printed to the screen.
#' @param n.cores.by.gene The number of cores to dedicate to parallelize analyses across gene.
#' @param n.cores.by.gene.by.site The number of cores to decidate to parallelize analyses by site WITHIN a gene. Note n.cores.by.gene*n.cores.by.gene.by.site is the total number of cores dedicated to the analysis.
#' @param max.tol Supplies the relative optimization tolerance.
#' @param max.tol.edges Supplies the relative optimization tolerance for branch lengths only. Default is that is the same as the max.tol.
#' @param max.evals Supplies the max number of iterations tried during optimization.
#' @param max.restarts Supplies the number of random restarts.
#' @param user.optimal.aa If optimal.aa is set to "user", this option allows for the user-input optimal amino acids. Must be a list. To get the proper order of the partitions see "GetPartitionOrder" documentation.
#' @param fasta.rows.to.keep Indicates which rows to remove in the input fasta files.
#' @param recalculate.starting.brlen Whether to use given branch lengths in the starting tree or recalculate them.
#' @param output.by.restart Logical indicating whether or not each restart is saved to a file. Default is TRUE.
#' @param output.restart.filename Designates the file name for each random restart.
#' @param user.supplied.starting.param.vals Designates user-supplied starting values for C.q.phi.Ne, Grantham alpha, and Grantham beta. Default is NULL.
#' @param tol.step If > 1, makes for coarser tolerance at earlier iterations of the optimizer
#' @param optimizer.algorithm The optimizer used by nloptr.
#' @param max.iterations Sets the number of cycles to optimize the different parts of the model.
#'
#' @details
#' A hidden Markov model which no longers optimizes the optimal amino acids, but instead allows for the optimal sequence to vary along branches, clades, taxa, etc. Like the original function, we optimize parameters across each gene separately while keeping the shared parameters, alpha, beta, edge lengths, and nucleotide substitution parameters constant across genes. We then optimize alpha, beta, gtr, and the edge lengths while keeping the rest of the parameters for each gene fixed. This approach is potentially more efficient than simply optimizing all parameters simultaneously, especially if fitting models across 100's of genes.
SelacHMMOptimize <- function(codon.data.path, n.partitions=NULL, phy, data.type="codon", codon.model="selac", edge.length="optimize", edge.linked=TRUE, nuc.model="GTR", estimate.aa.importance=FALSE, include.gamma=FALSE, gamma.type="quadrature", ncats=4, numcode=1, diploid=TRUE, k.levels=0, aa.properties=NULL, verbose=FALSE, n.cores.by.gene=1, n.cores.by.gene.by.site=1, max.tol=1e-3, max.tol.edges=1e-3, max.evals=1000000, max.restarts=3, user.optimal.aa=NULL, fasta.rows.to.keep=NULL, recalculate.starting.brlen=TRUE, output.by.restart=TRUE, output.restart.filename="restartResult", user.supplied.starting.param.vals=NULL, tol.step=1, optimizer.algorithm="NLOPT_LN_SBPLX", max.iterations=6) {

    if(!data.type == "codon"){
        stop("Check that your data type input is correct. Options currently are codon only", call.=FALSE)
    }
    if(!codon.model == "selac"){
        stop("Check that your codon model is correct. Options are currently selac only", call.=FALSE)
    }
    if(!edge.length == "optimize" & !edge.length == "fixed"){
        stop("Check that you have a supported edge length option. Options are optimize or fixed.", call.=FALSE)
    }
    if(!nuc.model == "JC" & !nuc.model == "GTR" & !nuc.model == "UNREST"){
        stop("Check that you have a supported nucleotide substitution model. Options are JC, GTR, or UNREST.", call.=FALSE)
    }
    if(!gamma.type == "quadrature" & !gamma.type == "median"){
        stop("Check that you have a supported gamma type. Options are quadrature after Felsenstein 2001 or median after Yang 1994.", call.=FALSE)
    }

    cat(paste("Using", n.cores.by.gene * n.cores.by.gene.by.site, "total processors", sep=" "), "\n")

    cat("Initializing data and model parameters...", "\n")

    partitions <- system(paste("ls -1 ", codon.data.path, "*.fasta", sep=""), intern=TRUE)

    estimate.importance <- estimate.aa.importance

    if(is.null(n.partitions)){
        n.partitions <- length(partitions)
    }else{
        n.partitions = n.partitions
    }

    site.pattern.data.list <- as.list(numeric(n.partitions))
    site.pattern.count.list <- as.list(numeric(n.partitions))
    nsites.vector <- c()
    codon.freq.by.aa.list <- as.list(numeric(n.partitions))
    codon.freq.by.gene.list <- as.list(numeric(n.partitions))
    starting.branch.lengths <- matrix(0, n.partitions, length(phy$edge[,1]))
    for (partition.index in sequence(n.partitions)) {
        gene.tmp <- read.dna(partitions[partition.index], format='fasta')
        if(!is.null(fasta.rows.to.keep)){
            gene.tmp <- as.list(as.matrix(cbind(gene.tmp))[fasta.rows.to.keep,])
        }else{
            gene.tmp <- as.list(as.matrix(cbind(gene.tmp)))
        }
        starting.branch.lengths[partition.index,] <- ComputeStartingBranchLengths(phy, gene.tmp, data.type=data.type, recalculate.starting.brlen=recalculate.starting.brlen)$edge.length
        codon.data <- DNAbinToCodonNumeric(gene.tmp)
        codon.data <- codon.data[phy$tip.label,]
        nsites.vector = c(nsites.vector, dim(codon.data)[2] - 1)
        codon.freq.by.aa.list[[partition.index]] <- NULL
        codon.freq.by.gene.list[[partition.index]] <- GetCodonFreqsByGeneHMM(codon.data[,-1])
        codon.data <- SitePattern(codon.data, includes.optimal.aa=FALSE)
        site.pattern.data.list[[partition.index]] = codon.data$unique.site.patterns
        site.pattern.count.list[[partition.index]] = codon.data$site.pattern.counts
    }

    opts <- list("algorithm" = optimizer.algorithm, "maxeval" = max.evals, "ftol_rel" = max.tol)
    opts.edge <- list("algorithm" = optimizer.algorithm, "maxeval" = max.evals, "ftol_rel" = max.tol.edges)

    results.final <- c()
    if(nuc.model == "JC"){
        nuc.ip = NULL
        max.par.model.count = 0
        parameter.column.names <- c("sel.reg")
    }
    if(nuc.model == "GTR"){
        nuc.ip = rep(1, 5)
        max.par.model.count = 5
        parameter.column.names <- c("C_A", "G_A", "T_A", "G_C", "T_C", "sel.reg")
    }
    if(nuc.model == "UNREST"){
        nuc.ip = rep(1, 11)
        max.par.model.count = 11
        parameter.column.names <- c("C_A", "G_A", "T_A", "A_C", "G_C", "T_C", "A_G", "C_G", "A_T", "C_T", "G_T", "sel.reg")
    }

    codon.index.matrix <- CreateCodonMutationMatrixIndexEvolveAA()
    cpv.starting.parameters <- GetAADistanceStartingParameters(aa.properties=aa.properties)
    if(max.restarts > 1){
        selac.starting.vals <- matrix(0, max.restarts+1, 3)
        selac.starting.vals[,1] <- runif(n = max.restarts+1, min = (10^-10)*5e6, max = (10^-5)*5e6)
        selac.starting.vals[,2] <- runif(n = max.restarts+1, min = 0.01, max = 3)
        selac.starting.vals[,3] <- runif(n = max.restarts+1, min = 0.01, max = 1)
    }else{
        if(is.null(user.supplied.starting.param.vals)){
            selac.starting.vals <- matrix(c(2, 1.8292716544, 0.1017990371), 1, 3)
            selac.starting.vals <- rbind(selac.starting.vals, c(2, 1.8292716544, 0.1017990371))
        }else{
            selac.starting.vals <- matrix(c(user.supplied.starting.param.vals[1], user.supplied.starting.param.vals[2], user.supplied.starting.param.vals[3]), 1, 3)
            selac.starting.vals <- rbind(selac.starting.vals, c(user.supplied.starting.param.vals[1], user.supplied.starting.param.vals[2], user.supplied.starting.param.vals[3]))
        }
    }
    if(include.gamma == TRUE){
        #Gamma variation is turned ON:
        if(nuc.model == "JC"){
            if(k.levels == 0){
                if(estimate.importance == TRUE){
                    ip = c(selac.starting.vals[1,1:3], 0.25, 0.25, 0.25, 1, 0.01, 1)
                    parameter.column.names <- c("C.q.phi.Ne", "alpha", "beta", "freqA", "freqC", "freqG", "shape.gamma", "sel.reg", "importance")
                    upper = c(log(50), log(20), log(20), 0, 0, 0, 5, 21, 4)
                    lower = c(rep(-21, length(ip)-1), -4)
                    max.par.model.count = 6 + 0 + 1 + 1 + 1
                }else{
                    ip = c(selac.starting.vals[1,1:3], 0.25, 0.25, 0.25, 1, 0.01)
                    parameter.column.names <- c("C.q.phi.Ne", "alpha", "beta", "freqA", "freqC", "freqG", "shape.gamma", "sel.reg")
                    upper = c(log(50), log(20), log(20), 0, 0, 0, 5, 21)
                    lower = rep(-21, length(ip))
                    max.par.model.count = 6 + 0 + 1 + 1
                }
            }else{
                if(estimate.importance == TRUE){
                    ip = c(selac.starting.vals[1,1:3], 0.25, 0.25, 0.25, 1, 1, 1, 0.01, 1)
                    parameter.column.names <- c("C.q.phi.Ne", "alpha", "beta", "freqA", "freqC", "freqG", "a0", "a1", "shape.gamma", "sel.reg", "importance")
                    upper = c(log(50), log(20), log(20), 0, 0, 0, 21, 21, 5, 21, 4)
                    lower = c(rep(-21, length(ip)-1), -4)
                    max.par.model.count = 6 + 0 + 2 + 1 + 1 + 1
                }else{
                    ip = c(selac.starting.vals[1,1:3], 0.25, 0.25, 0.25, 1, 1, 1, 0.01)
                    parameter.column.names <- c("C.q.phi.Ne", "alpha", "beta", "freqA", "freqC", "freqG", "a0", "a1", "shape.gamma", "sel.reg")
                    upper = c(log(50), log(20), log(20), 0, 0, 0, 21, 21, 5, 21)
                    lower = rep(-21, length(ip))
                    max.par.model.count = 6 + 0 + 2 + 1 + 1
                }
            }
        }
        if(nuc.model == "GTR") {
            if(k.levels == 0){
                if(estimate.importance == TRUE){
                    ip = c(selac.starting.vals[1,1:3], 0.25, 0.25, 0.25, nuc.ip, 1, 0.01, 1)
                    parameter.column.names <- c("C.q.phi.Ne", "alpha", "beta", "freqA", "freqC", "freqG", "C_A", "G_A", "T_A", "G_C", "T_C", "shape.gamma", "sel.reg", "importance")
                    upper = c(log(50), log(20), log(20), 0, 0, 0, rep(21, length(nuc.ip)), 5, 21, 4)
                    lower = c(rep(-21, length(ip)-1), -4)
                    max.par.model.count = 6 + 5 + 1 + 1 + 1
                }else{
                    ip = c(selac.starting.vals[1,1:3], 0.25, 0.25, 0.25, nuc.ip, 1, 0.01)
                    parameter.column.names <- c("C.q.phi.Ne", "alpha", "beta", "freqA", "freqC", "freqG", "C_A", "G_A", "T_A", "G_C", "T_C", "shape.gamma", "sel.reg")
                    upper = c(log(50), log(20), log(2), 0, 0, 0, rep(21, length(nuc.ip)), 5, 21)
                    lower = rep(-21, length(ip))
                    max.par.model.count = 6 + 5 + 1 + 1
                }
            }else{
                if(estimate.importance == TRUE){
                    ip = c(selac.starting.vals[1,1:3], 0.25, 0.25, 0.25, 1, 1, nuc.ip, 1, 0.01, 1)
                    parameter.column.names <- c("C.q.phi.Ne", "alpha", "beta", "freqA", "freqC", "freqG", "a0", "a1", "C_A", "G_A", "T_A", "G_C", "T_C", "shape.gamma", "sel.reg", "importance")
                    upper = c(log(50), log(20), log(20), 0, 0, 0, 21, 21, rep(21, length(nuc.ip)), 5, 21, 4)
                    lower = c(rep(-21, length(ip)-1), -4)
                    max.par.model.count = 6 + 5 + 2	+ 1 + 1 + 1
                }else{
                    ip = c(selac.starting.vals[1,1:3], 0.25, 0.25, 0.25, 1, 1, nuc.ip, 1, 0.01)
                    parameter.column.names <- c("C.q.phi.Ne", "alpha", "beta", "freqA", "freqC", "freqG", "a0", "a1", "C_A", "G_A", "T_A", "G_C", "T_C", "shape.gamma", "sel.reg")
                    upper = c(log(50), log(20), log(20), 0, 0, 0, 21, 21, rep(21, length(nuc.ip)), 5, 21)
                    lower = rep(-21, length(ip))
                    max.par.model.count = 6 + 5 + 2	+ 1 + 1
                }
            }
        }
        if(nuc.model == "UNREST") {
            if(k.levels == 0){
                if(estimate.importance == TRUE){
                    ip = c(selac.starting.vals[1,1:3], nuc.ip, 1, 0.01, 1)
                    parameter.column.names <- c("C.q.phi.Ne", "alpha", "beta", "C_A", "G_A", "T_A", "A_C", "G_C", "T_C", "A_G", "C_G", "A_T", "C_T", "G_T", "shape.gamma", "sel.reg", "importance")
                    upper = c(log(50), log(20), log(20), rep(21, length(nuc.ip)), 5, 21, 4)
                    lower = c(rep(-21, length(ip)-1), -4)
                    max.par.model.count = 3 + 11 + 1 + 1 + 1
                }else{
                    ip = c(selac.starting.vals[1,1:3], nuc.ip, 1, 0.01)
                    parameter.column.names <- c("C.q.phi.Ne", "alpha", "beta", "C_A", "G_A", "T_A", "A_C", "G_C", "T_C", "A_G", "C_G", "A_T", "C_T", "G_T", "shape.gamma", "sel.reg")
                    upper = c(log(50), log(20), log(20), rep(21, length(nuc.ip)), 5, 21)
                    lower = rep(-21, length(ip))
                    max.par.model.count = 3 + 11 + 1 + 1
                }
            }else{
                if(estimate.importance == TRUE){
                    ip = c(selac.starting.vals[1,1:3], 1, 1, nuc.ip, 1, 0.01, 1)
                    parameter.column.names <- c("C.q.phi.Ne", "alpha", "beta", "a0", "a1", "C_A", "G_A", "T_A", "A_C", "G_C", "T_C", "A_G", "C_G", "A_T", "C_T", "G_T", "shape.gamma", "sel.reg", "importance")
                    upper = c(log(50), log(20), log(20), 21, 21, rep(21, length(nuc.ip)), 5, 21, 4)
                    lower = c(rep(-21, length(ip)-1), -4)
                    max.par.model.count = 3 + 11 + 2 + 1 + 1 + 1
                }else{
                    ip = c(selac.starting.vals[1,1:3], 1, 1, nuc.ip, 1, 0.01)
                    parameter.column.names <- c("C.q.phi.Ne", "alpha", "beta", "a0", "a1", "C_A", "G_A", "T_A", "A_C", "G_C", "T_C", "A_G", "C_G", "A_T", "C_T", "G_T", "shape.gamma", "sel.reg")
                    upper = c(log(50), log(20), log(20), 21, 21, rep(21, length(nuc.ip)), 5, 21)
                    lower = rep(-21, length(ip))
                    max.par.model.count = 3 + 11 + 2 + 1 + 1
                }
            }
        }
        index.matrix = matrix(0, n.partitions, max.par.model.count)
        index.matrix[1,] = 1:ncol(index.matrix)
        ip.vector = ip
        if(n.partitions > 1){
            # Gamma variation is turned ON:
            for(partition.index in 2:n.partitions){
                if(nuc.model == "JC"){
                    if(estimate.importance == TRUE){
                        ip.vector = c(ip.vector, ip[1], ip[8], ip[9])
                    }else{
                        ip.vector = c(ip.vector, ip[1], ip[8])
                    }
                }else{
                    if(nuc.model == "GTR"){
                        index.matrix.tmp = numeric(max.par.model.count)
                        if(k.levels == 0){
                            if(estimate.importance == TRUE){
                                index.matrix.tmp[c(2:12)] = c(2:12)
                                ip.vector = c(ip.vector, ip[1], ip[13], ip[14])
                            }else{
                                index.matrix.tmp[c(2:12)] = c(2:12)
                                ip.vector = c(ip.vector, ip[1], ip[13])
                            }
                        }else{
                            if(estimate.importance == TRUE){
                                index.matrix.tmp[c(2:14)] = c(2:14)
                                ip.vector = c(ip.vector, ip[1], ip[15], ip[16])
                            }else{
                                index.matrix.tmp[c(2:14)] = c(2:14)
                                ip.vector = c(ip.vector, ip[1], ip[15])
                            }
                        }
                    }else{
                        index.matrix.tmp = numeric(max.par.model.count)
                        if(k.levels == 0){
                            if(estimate.importance == TRUE){
                                index.matrix.tmp[c(2:15)] = c(2:15)
                                ip.vector = c(ip.vector, ip[1], ip[16], ip[17])
                            }else{
                                index.matrix.tmp[c(2:15)] = c(2:15)
                                ip.vector = c(ip.vector, ip[1], ip[16])
                            }
                        }else{
                            if(estimate.importance == TRUE){
                                index.matrix.tmp[c(2:17)] = c(2:17)
                                ip.vector = c(ip.vector, ip[1], ip[18], ip[19])
                            }else{
                                index.matrix.tmp[c(2:17)] = c(2:17)
                                ip.vector = c(ip.vector, ip[1], ip[18])
                            }
                        }
                    }
                }
                index.matrix.tmp[index.matrix.tmp==0] = seq(max(index.matrix)+1, length.out=length(index.matrix.tmp[index.matrix.tmp==0]))
                index.matrix[partition.index,] <- index.matrix.tmp
            }
        }
    }else{
        # Gamma variation is turned OFF:
        if(nuc.model == "JC"){
            if(k.levels == 0){
                if(estimate.importance == TRUE){
                    ip = c(selac.starting.vals[1,1:3], 0.25, 0.25, 0.25, 0.01, 1)
                    parameter.column.names <- c("C.q.phi.Ne", "alpha", "beta", "freqA", "freqC", "freqG", "sel.reg", "importance")
                    upper = c(log(50), log(20), log(20), 0, 0, 0, 21, 4)
                    lower = c(rep(-21, length(ip)-1), -4)
                    max.par.model.count = 6 + 0 + 0 + 1 + 1
                }else{
                    ip = c(selac.starting.vals[1,1:3], 0.25, 0.25, 0.25, 0.01)
                    parameter.column.names <- c("C.q.phi.Ne", "alpha", "beta", "freqA", "freqC", "freqG", "sel.reg")
                    upper = c(log(50), log(20), log(20), 0, 0, 0, 21)
                    lower = rep(-21, length(ip))
                    max.par.model.count = 6 + 0 + 0 + 1
                }
            }else{
                if(estimate.importance == TRUE){
                    ip = c(selac.starting.vals[1,1:3], 0.25, 0.25, 0.25, 1, 1, 0.01, 1)
                    parameter.column.names <- c("C.q.phi.Ne", "alpha", "beta", "freqA", "freqC", "freqG", "a0", "a1", "sel.reg", "importance")
                    upper = c(log(50), log(20), log(20), 0, 0, 0, 21, 21, 21, 4)
                    lower = c(rep(-21, length(ip)-1), -4)
                    max.par.model.count = 6 + 0 + 0 + 2 + 1 + 1
                }else{
                    ip = c(selac.starting.vals[1,1:3], 0.25, 0.25, 0.25, 1, 1, 0.01)
                    parameter.column.names <- c("C.q.phi.Ne", "alpha", "beta", "freqA", "freqC", "freqG", "a0", "a1", "sel.reg")
                    upper = c(log(50), log(20), log(20), 0, 0, 0, 21, 21, 21)
                    lower = rep(-21, length(ip))
                    max.par.model.count = 6 + 0 + 0 + 2 + 1
                }
            }
        }
        if(nuc.model == "GTR") {
            if(k.levels == 0){
                if(estimate.importance == TRUE){
                    ip = c(selac.starting.vals[1,1:3], 0.25, 0.25, 0.25, nuc.ip, 0.01, 1)
                    parameter.column.names <- c("C.q.phi.Ne", "alpha", "beta", "freqA", "freqC", "freqG", "C_A", "G_A", "T_A", "G_C", "T_C", "sel.reg", "importance")
                    upper = c(log(50), log(20), log(20), 0, 0, 0, rep(21, length(nuc.ip)), 21, 4)
                    lower = c(rep(-21, length(ip)-1), -4)
                    max.par.model.count = 6 + 5 + 0 + 1 + 1
                }else{
                    ip = c(selac.starting.vals[1,1:3], 0.25, 0.25, 0.25, nuc.ip, 0.01)
                    parameter.column.names <- c("C.q.phi.Ne", "alpha", "beta", "freqA", "freqC", "freqG", "C_A", "G_A", "T_A", "G_C", "T_C", "sel.reg")
                    upper = c(log(50), log(20), log(20), 0, 0, 0, rep(21, length(nuc.ip)), 21)
                    lower = rep(-21, length(ip))
                    max.par.model.count = 6 + 5 + 0 + 1
                }
            }else{
                if(estimate.importance == TRUE){
                    ip = c(selac.starting.vals[1,1:3], 0.25, 0.25, 0.25, 1, 1, nuc.ip, 0.01, 1)
                    parameter.column.names <- c("C.q.phi.Ne", "alpha", "beta", "freqA", "freqC", "freqG", "a0", "a1", "C_A", "G_A", "T_A", "G_C", "T_C", "sel.reg", "importance")
                    upper = c(log(50), log(20), log(20), 0, 0, 0, 21, 21, rep(21, length(nuc.ip)), 21, 4)
                    lower = c(rep(-21, length(ip)-1), -4)
                    max.par.model.count = 6 + 5 + 0 + 2 + 1 + 1
                }else{
                    ip = c(selac.starting.vals[1,1:3], 0.25, 0.25, 0.25, 1, 1, nuc.ip, 0.01)
                    parameter.column.names <- c("C.q.phi.Ne", "alpha", "beta", "freqA", "freqC", "freqG", "a0", "a1", "C_A", "G_A", "T_A", "G_C", "T_C", "sel.reg")
                    upper = c(log(50), log(20), log(20), 0, 0, 0, 21, 21, rep(21, length(nuc.ip)), 21)
                    lower = rep(-21, length(ip))
                    max.par.model.count = 6 + 5 + 0 + 2 + 1
                }
            }
        }
        if(nuc.model == "UNREST") {
            if(k.levels == 0){
                if(estimate.importance == TRUE){
                    ip = c(selac.starting.vals[1,1:3], nuc.ip, 0.01, 1)
                    parameter.column.names <- c("C.q.phi.Ne", "alpha", "beta", "C_A", "G_A", "T_A", "A_C", "G_C", "T_C", "A_G", "C_G", "A_T", "C_T", "G_T", "sel.reg", "importance")
                    upper = c(log(50), log(20), log(20), rep(21, length(nuc.ip)), 21, 4)
                    lower = c(rep(-21, length(ip)-1), -4)
                    max.par.model.count = 3 + 11 + 0 + 1 + 1
                }else{
                    ip = c(selac.starting.vals[1,1:3], nuc.ip, 0.01)
                    parameter.column.names <- c("C.q.phi.Ne", "alpha", "beta", "C_A", "G_A", "T_A", "A_C", "G_C", "T_C", "A_G", "C_G", "A_T", "C_T", "G_T", "sel.reg")
                    upper = c(log(50), log(20), log(20), rep(21, length(nuc.ip)), 21)
                    lower = rep(-21, length(ip))
                    max.par.model.count = 3 + 11 + 0 + 1
                }
            }else{
                if(estimate.importance == TRUE){
                    ip = c(selac.starting.vals[1,1:3], 1, 1, nuc.ip, 0.01, 1)
                    parameter.column.names <- c("C.q.phi.Ne", "alpha", "beta", "a0", "a1", "C_A", "G_A", "T_A", "A_C", "G_C", "T_C", "A_G", "C_G", "A_T", "C_T", "G_T", "sel.reg", "importance")
                    upper = c(log(50), log(20), log(20), 21, 21, rep(21, length(nuc.ip)), 21, 4)
                    lower = rep(-21, length(ip)-1, -4)
                    max.par.model.count = 3 + 11 + 0 + 2 + 1 + 1
                }else{
                    ip = c(selac.starting.vals[1,1:3], 1, 1, nuc.ip, 0.01)
                    parameter.column.names <- c("C.q.phi.Ne", "alpha", "beta", "a0", "a1", "C_A", "G_A", "T_A", "A_C", "G_C", "T_C", "A_G", "C_G", "A_T", "C_T", "G_T", "sel.reg")
                    upper = c(log(50), log(20), log(20), 21, 21, rep(21, length(nuc.ip)), 21)
                    lower = rep(-21, length(ip))
                    max.par.model.count = 3 + 11 + 0 + 2 + 1
                }
            }
        }
        index.matrix = matrix(0, n.partitions, max.par.model.count)
        index.matrix[1,] = 1:ncol(index.matrix)
        ip.vector = ip
        if(n.partitions > 1){
            for(partition.index in 2:n.partitions){
                if(nuc.model == "JC"){
                    if(estimate.importance == TRUE){
                        ip.vector = c(ip.vector, ip[1], ip[7], ip[8])
                    }else{
                        ip.vector = c(ip.vector, ip[1], ip[7])
                    }
                }else{
                    if(nuc.model == "GTR"){
                        index.matrix.tmp = numeric(max.par.model.count)
                        if(k.levels == 0){
                            if(estimate.importance == TRUE){
                                index.matrix.tmp[c(2:11)] = c(2:11)
                                ip.vector = c(ip.vector, ip[1], ip[12], ip[13])
                            }else{
                                index.matrix.tmp[c(2:11)] = c(2:11)
                                ip.vector = c(ip.vector, ip[1], ip[12])
                            }
                        }else{
                            if(estimate.importance == TRUE){
                                index.matrix.tmp[c(2:13)] = c(2:13)
                                ip.vector = c(ip.vector, ip[1], ip[14], ip[15])
                            }else{
                                index.matrix.tmp[c(2:13)] = c(2:13)
                                ip.vector = c(ip.vector, ip[1], ip[14])
                            }
                        }
                    }else{
                        index.matrix.tmp = numeric(max.par.model.count)
                        if(k.levels == 0){
                            if(estimate.importance == TRUE){
                                index.matrix.tmp[c(2:14)] = c(2:14)
                                ip.vector = c(ip.vector, ip[1], ip[15], ip[16])
                            }else{
                                index.matrix.tmp[c(2:14)] = c(2:14)
                                ip.vector = c(ip.vector, ip[1], ip[15])
                            }
                        }else{
                            if(estimate.importance == TRUE){
                                index.matrix.tmp[c(2:16)] = c(2:16)
                                ip.vector = c(ip.vector, ip[1], ip[17], ip[18])
                            }else{
                                index.matrix.tmp[c(2:16)] = c(2:16)
                                ip.vector = c(ip.vector, ip[1], ip[17])
                            }
                        }
                    }
                }
                index.matrix.tmp[index.matrix.tmp==0] = seq(max(index.matrix)+1, length.out=length(index.matrix.tmp[index.matrix.tmp==0]))
                index.matrix[partition.index,] <- index.matrix.tmp
            }
        }
    }

    #THIS IS FOR THERE IS A SEPARATE GAMMA PER GENE:
    if(estimate.importance == TRUE){
        index.matrix.red <- t(matrix(1:(n.partitions*3), 3, n.partitions))
    }else{
        index.matrix.red <- t(matrix(1:(n.partitions*2), 2, n.partitions))
    }
    #This is so we can break out alpha, beta, GTR, and gamma which are shared among ALL genes:
    #index.matrix.red <- t(matrix(1:n.partitions, 1, n.partitions))

    number.of.current.restarts <- 1
    best.lik <- 1000000
    while(number.of.current.restarts < (max.restarts+1)){
        cat(paste("Finished. Performing random restart ", number.of.current.restarts," ...", sep=""), "\n")
        mle.pars.mat <- index.matrix
        mle.pars.mat[] <- c(ip.vector, 0)[index.matrix]
        cat("       Doing first pass...", "\n")
        print(mle.pars.mat)
        if(edge.length == "optimize"){
            cat("              Optimizing edge lengths", "\n")
            phy$edge.length <- colMeans(starting.branch.lengths)
            #phy$edge.length <- colMeans(starting.branch.lengths) / (1/selac.starting.vals[number.of.current.restarts, 2])
            #opts.edge <- opts
            upper.edge <- rep(log(10), length(phy$edge.length))
            lower.edge <- rep(log(1e-8), length(phy$edge.length))
            results.edge.final <- nloptr(x0=log(phy$edge.length), eval_f = OptimizeEdgeLengths, ub=upper.edge, lb=lower.edge, opts=opts.edge, par.mat=mle.pars.mat, codon.site.data=site.pattern.data.list, codon.site.counts=site.pattern.count.list, data.type=data.type, codon.model=codon.model, n.partitions=n.partitions, nsites.vector=nsites.vector, index.matrix=index.matrix, phy=phy, aa.optim_array=NULL, root.p_array=NULL, codon.freq.by.aa=NULL, codon.freq.by.gene=codon.freq.by.gene.list, numcode=numcode, diploid=diploid, aa.properties=aa.properties, volume.fixed.value=cpv.starting.parameters[3], nuc.model=nuc.model, codon.index.matrix=codon.index.matrix, edge.length=edge.length, include.gamma=include.gamma, gamma.type=gamma.type, ncats=ncats, k.levels=k.levels, logspace=TRUE, verbose=verbose, n.cores.by.gene=n.cores.by.gene, n.cores.by.gene.by.site=n.cores.by.gene.by.site, estimate.importance=estimate.importance, neglnl=TRUE, HMM=TRUE)
            print("here!!")
            print(results.edge.final$objective)
            print(exp(results.edge.final$solution))
            phy$edge.length <- exp(results.edge.final$solution)
        }
        cat("              Optimizing model parameters", "\n")

        if(estimate.importance == TRUE){
            alpha.beta.gtr <- mle.pars.mat[1,c(2:(max.par.model.count-2))]
            upper.bounds.shared <- upper[c(2:(max.par.model.count-2))]
            lower.bounds.shared <- lower[c(2:(max.par.model.count-2))]
        }else{
            alpha.beta.gtr <- mle.pars.mat[1,c(2:(max.par.model.count-1))]
            upper.bounds.shared <- upper[c(2:(max.par.model.count-1))]
            lower.bounds.shared <- lower[c(2:(max.par.model.count-1))]
        }

        ParallelizedOptimizedByGene <- function(n.partition){
            if(estimate.importance == TRUE){
                # make sure this is always a vector no matter the number of partitions
                tmp.par.mat <- mle.pars.mat[n.partition,c(1, max.par.model.count-1, max.par.model.count)]
                upper.bounds.gene <- upper[c(1, max.par.model.count-1, max.par.model.count)]
                lower.bounds.gene <- lower[c(1, max.par.model.count-1, max.par.model.count)]
            }else{
                tmp.par.mat <- mle.pars.mat[,c(1, max.par.model.count)]
                upper.bounds.gene <- upper[c(1, max.par.model.count)]
                lower.bounds.gene <- lower[c(1, max.par.model.count)]
            }
            optim.by.gene <- nloptr(x0=log(tmp.par.mat), eval_f = OptimizeModelParsAlphaBetaGtrFixed, ub=upper.bounds.gene, lb=lower.bounds.gene, opts=opts, alpha.beta.gtr=alpha.beta.gtr, codon.site.data=site.pattern.data.list[[n.partition]], codon.site.counts=site.pattern.count.list[[n.partition]], data.type=data.type, codon.model=codon.model, n.partitions=1, nsites.vector=nsites.vector[n.partition], index.matrix=index.matrix.red[1,], phy=phy, aa.optim_array=NULL, codon.freq.by.aa=NULL, codon.freq.by.gene=codon.freq.by.gene.list[[n.partition]], numcode=numcode, diploid=diploid, aa.properties=aa.properties, volume.fixed.value=cpv.starting.parameters[3], nuc.model=nuc.model, codon.index.matrix=codon.index.matrix, edge.length=edge.length, include.gamma=include.gamma, gamma.type=gamma.type, ncats=ncats, k.levels=k.levels, logspace=TRUE, verbose=verbose, n.cores.by.gene=n.cores.by.gene, n.cores.by.gene.by.site=n.cores.by.gene.by.site, estimate.importance=estimate.importance, neglnl=TRUE, HMM=TRUE)
            tmp.pars <- c(optim.by.gene$objective, optim.by.gene$solution)
            return(tmp.pars)
        }
        results.set <- mclapply(1:n.partitions, ParallelizedOptimizedByGene, mc.cores=n.cores.by.gene)
        if(estimate.importance == TRUE){
            #The number of columns is 3: [1] log-likelihood, [2] C.q.phi.Ne, [3] aa transition [4] importance:
            parallelized.parameters <- t(matrix(unlist(results.set), 4, n.partitions))
        }else{
            #The number of columns is 2: [1] log-likelihood, [2] C.q.phi.Ne [3] aa transition:
            parallelized.parameters <- t(matrix(unlist(results.set), 3, n.partitions))
        }

        results.final <- NULL
        results.final$objective <- sum(parallelized.parameters[,1])
        results.final$solution <- c(t(parallelized.parameters[,-1]))
        mle.pars.mat.red <- index.matrix.red
        mle.pars.mat.red[] <- c(exp(results.final$solution), 0)[index.matrix.red]
        print(mle.pars.mat.red)
        optim.alpha.beta.gtr.all.genes <- nloptr(x0=log(alpha.beta.gtr), eval_f = OptimizeAlphaBetaGtrOnly, ub=upper.bounds.shared, lb=lower.bounds.shared, opts=opts, fixed.pars=mle.pars.mat.red, codon.site.data=site.pattern.data.list, codon.site.counts=site.pattern.count.list, data.type=data.type, codon.model=codon.model, n.partitions=n.partitions, nsites.vector=nsites.vector, index.matrix=index.matrix.red, phy=phy, aa.optim_array=NULL, codon.freq.by.aa=NULL, codon.freq.by.gene=codon.freq.by.gene.list, numcode=numcode, diploid=diploid, aa.properties=aa.properties, volume.fixed.value=cpv.starting.parameters[3], nuc.model=nuc.model, codon.index.matrix=codon.index.matrix, edge.length=edge.length, include.gamma=include.gamma, gamma.type=gamma.type, ncats=ncats, k.levels=k.levels, logspace=TRUE, verbose=verbose, n.cores.by.gene=n.cores.by.gene, n.cores.by.gene.by.site=n.cores.by.gene.by.site, estimate.importance=estimate.importance, neglnl=TRUE, HMM=TRUE)
        results.final$objective <- optim.alpha.beta.gtr.all.genes$objective
        alpha.beta.gtr <- exp(optim.alpha.beta.gtr.all.genes$solution)
        if(estimate.importance == TRUE){
            mle.pars.mat <- c()
            for(row.index in 1:dim(mle.pars.mat.red)[1]){
                mle.pars.mat <- rbind(mle.pars.mat, c(mle.pars.mat.red[row.index,1], alpha.beta.gtr, mle.pars.mat.red[row.index,2], mle.pars.mat.red[row.index,3]))
            }
        }else{
            mle.pars.mat <- c()
            for(row.index in 1:dim(mle.pars.mat.red)[1]){
                mle.pars.mat <- rbind(mle.pars.mat, c(mle.pars.mat.red[row.index,1], alpha.beta.gtr, mle.pars.mat.red[row.index,2]))
            }
        }
        print(results.final$objective)
        print(mle.pars.mat)

        current.likelihood <- results.final$objective
        cat(paste("       Current likelihood", current.likelihood, sep=" "), "\n")
        lik.diff <- 10
        iteration.number <- 1
        while(lik.diff != 0 & iteration.number <= max.iterations){
            cat(paste("       Finished. Iterating search -- Round", iteration.number, sep=" "), "\n")
            if(edge.length == "optimize"){
                cat("              Optimizing edge lengths", "\n")
                #opts.edge <- opts
                opts.edge$ftol_rel <- opts$ftol_rel * (max(1,tol.step^((max.iterations+1)-iteration.number)))
                results.edge.final <- nloptr(x0=log(phy$edge.length), eval_f = OptimizeEdgeLengths, ub=upper.edge, lb=lower.edge, opts=opts.edge, par.mat=mle.pars.mat, codon.site.data=site.pattern.data.list, codon.site.counts=site.pattern.count.list, data.type=data.type, codon.model=codon.model, n.partitions=n.partitions, nsites.vector=nsites.vector, index.matrix=index.matrix, phy=phy, aa.optim_array=NULL, root.p_array=NULL, codon.freq.by.aa=NULL, codon.freq.by.gene=codon.freq.by.gene.list, numcode=numcode, diploid=diploid, aa.properties=aa.properties, volume.fixed.value=cpv.starting.parameters[3], nuc.model=nuc.model, codon.index.matrix=codon.index.matrix, edge.length=edge.length, include.gamma=include.gamma, gamma.type=gamma.type, ncats=ncats, k.levels=k.levels, logspace=TRUE, verbose=verbose, n.cores.by.gene=n.cores.by.gene, n.cores.by.gene.by.site=n.cores.by.gene.by.site, estimate.importance=estimate.importance, neglnl=TRUE, HMM=TRUE)
                print(results.edge.final$objective)
                print(exp(results.edge.final$solution))
                phy$edge.length <- exp(results.edge.final$solution)
            }
            cat("              Optimizing model parameters", "\n")

            ParallelizedOptimizedByGene <- function(n.partition){
                if(estimate.importance == TRUE){
                    tmp.par.mat <- mle.pars.mat[n.partition,c(1, max.par.model.count-1, max.par.model.count)]
                    upper.bounds.gene <- upper[c(1, max.par.model.count-1, max.par.model.count)]
                    lower.bounds.gene <- lower[c(1, max.par.model.count-1, max.par.model.count)]
                }else{
                    tmp.par.mat <- mle.pars.mat[,c(1, max.par.model.count)]
                    upper.bounds.gene <- upper[c(1, max.par.model.count)]
                    lower.bounds.gene <- lower[c(1, max.par.model.count)]
                }
                optim.by.gene <- nloptr(x0=log(tmp.par.mat), eval_f = OptimizeModelParsAlphaBetaGtrFixed, ub=upper.bounds.gene, lb=lower.bounds.gene, opts=opts, alpha.beta.gtr=alpha.beta.gtr, codon.site.data=site.pattern.data.list[[n.partition]], codon.site.counts=site.pattern.count.list[[n.partition]], data.type=data.type, codon.model=codon.model, n.partitions=1, nsites.vector=nsites.vector[n.partition], index.matrix=index.matrix.red[1,], phy=phy, aa.optim_array=NULL, codon.freq.by.aa=NULL, codon.freq.by.gene=codon.freq.by.gene.list[[n.partition]], numcode=numcode, diploid=diploid, aa.properties=aa.properties, volume.fixed.value=cpv.starting.parameters[3], nuc.model=nuc.model, codon.index.matrix=codon.index.matrix, edge.length=edge.length, include.gamma=include.gamma, gamma.type=gamma.type, ncats=ncats, k.levels=k.levels, logspace=TRUE, verbose=verbose, n.cores.by.gene=n.cores.by.gene, n.cores.by.gene.by.site=n.cores.by.gene.by.site, estimate.importance=estimate.importance, neglnl=TRUE, HMM=TRUE)
                tmp.pars <- c(optim.by.gene$objective, optim.by.gene$solution)
                return(tmp.pars)
            }
            results.set <- mclapply(1:n.partitions, ParallelizedOptimizedByGene, mc.cores=n.cores.by.gene)
            if(estimate.importance == TRUE){
                #The number of columns is 3: [1] log-likelihood, [2] C.q.phi.Ne, [3] aa transition [4] importance:
                parallelized.parameters <- t(matrix(unlist(results.set), 4, n.partitions))
            }else{
                #The number of columns is 2: [1] log-likelihood, [2] C.q.phi.Ne [3] aa transition:
                parallelized.parameters <- t(matrix(unlist(results.set), 3, n.partitions))
            }

            results.final <- NULL
            results.final$objective <- sum(parallelized.parameters[,1])
            results.final$solution <- c(t(parallelized.parameters[,-1]))
            mle.pars.mat.red <- index.matrix.red
            mle.pars.mat.red[] <- c(exp(results.final$solution), 0)[index.matrix.red]

            print(mle.pars.mat.red)
            optim.alpha.beta.gtr.all.genes <- nloptr(x0=log(alpha.beta.gtr), eval_f = OptimizeAlphaBetaGtrOnly, ub=upper.bounds.shared, lb=lower.bounds.shared, opts=opts, fixed.pars=mle.pars.mat.red, codon.site.data=site.pattern.data.list, codon.site.counts=site.pattern.count.list, data.type=data.type, codon.model=codon.model, n.partitions=n.partitions, nsites.vector=nsites.vector, index.matrix=index.matrix.red, phy=phy, aa.optim_array=NULL, codon.freq.by.aa=NULL, codon.freq.by.gene=codon.freq.by.gene.list, numcode=numcode, diploid=diploid, aa.properties=aa.properties, volume.fixed.value=cpv.starting.parameters[3], nuc.model=nuc.model, codon.index.matrix=codon.index.matrix, edge.length=edge.length, include.gamma=include.gamma, gamma.type=gamma.type, ncats=ncats, k.levels=k.levels, logspace=TRUE, verbose=verbose, n.cores.by.gene=n.cores.by.gene, n.cores.by.gene.by.site=n.cores.by.gene.by.site, estimate.importance=estimate.importance, neglnl=TRUE, HMM=TRUE)
            results.final$objective <- optim.alpha.beta.gtr.all.genes$objective
            alpha.beta.gtr <- exp(optim.alpha.beta.gtr.all.genes$solution)
            if(estimate.importance == TRUE){
                mle.pars.mat <- c()
                for(row.index in 1:dim(mle.pars.mat.red)[1]){
                    mle.pars.mat <- rbind(mle.pars.mat, c(mle.pars.mat.red[row.index,1], alpha.beta.gtr, mle.pars.mat.red[row.index,2], mle.pars.mat.red[row.index,3]))
                }
            }else{
                mle.pars.mat <- c()
                for(row.index in 1:dim(mle.pars.mat.red)[1]){
                    mle.pars.mat <- rbind(mle.pars.mat, c(mle.pars.mat.red[row.index,1], alpha.beta.gtr, mle.pars.mat.red[row.index,2]))
                }
            }
            print(results.final$objective)
            print(mle.pars.mat)
            lik.diff <- round(abs(current.likelihood-results.final$objective), 8)
            current.likelihood <- results.final$objective
            cat(paste("       Current likelihood", current.likelihood, sep=" "), paste("difference from previous round", lik.diff, sep=" "), "\n")
            iteration.number <- iteration.number + 1
        }
        #Output for use in sims#
        if(output.by.restart == TRUE){
            obj.tmp = list(np=max(index.matrix) + length(phy$edge.length) + sum(nsites.vector), loglik = -results.final$objective, AIC = -2*(-results.final$objective)+2*(max(index.matrix) + length(phy$edge.length) + sum(nsites.vector)), mle.pars=mle.pars.mat, index.matrix=index.matrix, partitions=partitions[1:n.partitions], opts=opts, phy=phy, nsites=nsites.vector, data.type=data.type, codon.model=codon.model, aa.optim=NULL, aa.optim.type=NULL, nuc.model=nuc.model, include.gamma=include.gamma, gamma.type=gamma.type, ncats=ncats, k.levels=k.levels, numcode=numcode, diploid=diploid, aa.properties=aa.properties, volume.fixed.value=cpv.starting.parameters[3], codon.freq.by.aa=NULL, codon.freq.by.gene=codon.freq.by.gene.list, max.tol=max.tol, max.evals=max.evals, selac.starting.vals=ip.vector)
            class(obj.tmp) = "selac"
            save(obj.tmp, file=paste(paste(codon.data.path, output.restart.filename, sep=""), number.of.current.restarts, "Rsave", sep="."))
        }
        ########################
        if(results.final$objective < best.lik){
            best.ip <- ip.vector
            best.lik <- results.final$objective
            best.solution <- mle.pars.mat
            best.edge.lengths <- phy$edge.length
            best.codon.freq.by.aa <- codon.freq.by.aa.list
            best.codon.freq.by.gene <- codon.freq.by.gene.list
        }
        number.of.current.restarts <- number.of.current.restarts + 1
        print(ip.vector)
        ip.vector[c(index.matrix[,1])] <- selac.starting.vals[number.of.current.restarts, 1]
        ip.vector[2:3] <- selac.starting.vals[number.of.current.restarts, 2:3]
        print(ip.vector)
    }
    selac.starting.vals <- best.ip
    loglik <- -(best.lik) #to go from neglnl to lnl
    mle.pars.mat <- best.solution
    codon.freq.by.aa.list <- best.codon.freq.by.aa
    codon.freq.by.gene.list <- best.codon.freq.by.gene

    if(edge.length == "optimize"){
        phy$edge.length <- best.edge.lengths
    }

    cat("Finished. Summarizing results...", "\n")
    colnames(mle.pars.mat) <- parameter.column.names

    if(edge.length == "optimize"){
        np <- max(index.matrix) + length(phy$edge.length)
    }else{
        np <- max(index.matrix)
    }

    #Counting parameters: Do we count the nsites too? Yup.
    obj = list(np=np, loglik = loglik, AIC = -2*loglik+2*np, mle.pars=mle.pars.mat, index.matrix=index.matrix, partitions=partitions[1:n.partitions], opts=opts, phy=phy, nsites=nsites.vector, data.type=data.type, codon.model=codon.model, aa.optim=NULL, aa.optim.type=NULL, nuc.model=nuc.model, include.gamma=include.gamma, gamma.type=gamma.type, ncats=ncats, k.levels=k.levels, numcode=numcode, diploid=diploid, aa.properties=aa.properties, volume.fixed.value=cpv.starting.parameters[3], codon.freq.by.aa=NULL, codon.freq.by.gene=codon.freq.by.gene.list, max.tol=max.tol, max.evals=max.evals, selac.starting.vals=selac.starting.vals)
    class(obj) = "selac"
    return(obj)
}



######################################################################################################################################
######################################################################################################################################
### Utility function getting raw likelihoods not just across genes, but across sites. Also allows for different optimal AA to try
######################################################################################################################################
######################################################################################################################################

#' @title Get data partiion order
#'
#' @description
#'  Provides the order of the partitions after the data is read into SELAC.
#'
#' @param codon.data.path Provides the path to the directory containing the gene specific fasta files of coding data. Must have a ".fasta" line ending.
#'
#' @details
#' Provides the order of the partitions when the data is read into SELAC. This function is mainly useful for when users want to supply their own optimal amino acid list into SELAC.
GetPartitionOrder <- function(codon.data.path){
    partitions <- system(paste("ls -1 ", codon.data.path, "*.fasta", sep=""), intern=TRUE)
    return(partitions)
}


#' @title Calculate functionality
#'
#' @description
#' Calculates the functionality of a single gene
#'
#' @param gene.length Indicates the length of the gene used to calculate functionality.
#' @param aa.data A matrix of amino acids
#' @param optimal.aa A vector of inferred optimal amino acids.
#' @param alpha The inferred Grantham composition paramter
#' @param beta The inferred Grantham polarity parameter
#' @param gamma The inferred Grantham molecular volume parameter
#' @param gp A vector of gamma rates for calculating among site hetergeneity in functionality.
#' @param aa.properties User-supplied amino acid distance properties. By default we assume Grantham (1974) properties.
#'
#' @details
#' The purpose of this function is to provide the functionality of a gene based on the inferred parameters from SelAC. The functionality is often used to scale phi.
GetFunctionality <- function(gene.length, aa.data, optimal.aa, alpha, beta, gamma, gp=NULL, aa.properties=NULL){
    if(is.null(aa.properties)) {
        #     aa.properties <- structure(c(0, 2.75, 1.38, 0.92, 0, 0.74, 0.58, 0, 0.33, 0, 0,
        # 1.33, 0.39, 0.89, 0.65, 1.42, 0.71, 0, 0.13, 0.2, 8.1, 5.5, 13,
        # 12.3, 5.2, 9, 10.4, 5.2, 11.3, 4.9, 5.7, 11.6, 8, 10.5, 10.5,
        # 9.2, 8.6, 5.9, 5.4, 6.2, 31, 55, 54, 83, 132, 3, 96, 111, 119,
        # 111, 105, 56, 32.5, 85, 124, 32, 61, 84, 170, 136), .Dim = c(20L,
        # 3L), .Dimnames = list(c("Ala", "Cys", "Asp", "Glu", "Phe", "Gly",
        # "His", "Ile", "Lys", "Leu", "Met", "Asn", "Pro", "Gln", "Arg",
        # "Ser", "Thr", "Val", "Trp", "Tyr"), c("c", "p", "v"))) #properties from Grantham paper
        aa.properties <- structure(c(0, 2.75, 1.38, 0.92, 0, 0.74, 0.58, 0, 0.33, 0, 0,
        1.33, 0.39, 0.89, 0.65, 1.42, 0.71, 0, 0.13, 0.2, 8.1, 5.5, 13,
        12.3, 5.2, 9, 10.4, 5.2, 11.3, 4.9, 5.7, 11.6, 8, 10.5, 10.5,
        9.2, 8.6, 5.9, 5.4, 6.2, 31, 55, 54, 83, 132, 3, 96, 111, 119,
        111, 105, 56, 32.5, 85, 124, 32, 61, 84, 170, 136), .Dim = c(20L,
        3L), .Dimnames = list(c("A", "C", "D", "E", "F", "G",
        "H", "I", "K", "L", "M", "N", "P", "Q", "R",
        "S", "T", "V", "W", "Y"), c("c", "p", "v"))) #properties from Grantham paper
    }
    if(is.null(gp)){
        gp <- rep(1, gene.length)
    }
    aa.distances <- c()
    #Note using only the second row, because we are comparing empirical S. cervisae rates:
    for(site.index in 1:gene.length){
        if(aa.data[,site.index]!="NA"){
            #broke this up to make debugging easier:
            distance <- ((alpha*(aa.properties[aa.data[,site.index],1] - aa.properties[optimal.aa[site.index],1])^2 + beta*(aa.properties[aa.data[,site.index],2]-aa.properties[optimal.aa[site.index],2])^2+gamma*(aa.properties[aa.data[,site.index],3]-aa.properties[optimal.aa[site.index],3])^2)^(1/2))
            aa.distances <- c(aa.distances, (1+gp[site.index]*distance))
        }else{
            aa.distances <- c(aa.distances, 0)
            gene.length <- gene.length - 1
        }
    }
    functionality = 1/((1/gene.length) * sum(aa.distances))
    return(functionality)
}


#' @title Calculate site likelihoods under SelAC
#'
#' @description
#' Calculates the likelihoods across sites and across genes under SELAC
#'
#' @param selac.obj An object of class SELAC.
#' @param codon.data.path Provides the path to the directory containing the gene specific fasta files of coding data.
#' @param aa.optim.input A list of optimal amino acids with each list element designating a character vector for each gene. The optimal amino acids be the MLE from a selac run (default) or a list of user defined optimal A.A.
#' @param fasta.rows.to.keep Indicates which rows to remove in the input fasta files.

#'
#' @details
#' The purpose of this function is to provide the site likelihoods across genes. It is also flexible in that it allows different hypotheses about optimal acids across genes and/or site. The output is a list object, with each list entry designating 1) the tot.likelihood for that gene, and 2) the site likelihoods for that gene.
GetSelacSiteLikelihoods <- function(selac.obj, codon.data.path, aa.optim.input=NULL, fasta.rows.to.keep=NULL) {

    codon.index.matrix = CreateCodonMutationMatrixIndex()
    phy <- selac.obj$phy
    partitions <- selac.obj$partitions
    include.gamma <- selac.obj$include.gamma
    aa.properties <- selac.obj$aa.properties
    diploid <- selac.obj$diploid
    gamma.type <- selac.obj$gamma.type
    ncats <- selac.obj$ncats
    numcode <- selac.obj$numcode
    gamma <- selac.obj$volume.fixed.value
    nuc.model <- selac.obj$nuc.model
    k.levels <- selac.obj$k.levels
    n.cores <- NULL
    obj.final <- as.list(1:length(partitions))
    n.cores.by.gene.by.site <- selac.obj$n.cores.by.gene.by.site

    for(partition.index in 1:length(partitions)){
        x <- c(selac.obj$mle.pars[partition.index,])
        gene.tmp <- read.dna(partitions[partition.index], format='fasta')
        if(!is.null(fasta.rows.to.keep)){
            gene.tmp <- as.list(as.matrix(cbind(gene.tmp))[fasta.rows.to.keep,])
        }else{
            gene.tmp <- as.list(as.matrix(cbind(gene.tmp)))
        }
        codon.data.tmp <- DNAbinToCodonNumeric(gene.tmp)
        codon.data.tmp <- codon.data.tmp[phy$tip.label,]
        codon.data = NULL
        codon.data$unique.site.patterns = codon.data.tmp
        nsites <- dim(codon.data$unique.site.patterns)[2]-1
        codon.data$site.pattern.counts = rep(1, nsites)

        codon.freq.by.aa=selac.obj$codon.freq.by.aa[[partition.index]]
        codon.freq.by.gene=selac.obj$codon.freq.by.gene[[partition.index]]

        if(is.null(aa.optim.input)){
            aa.optim_array <- selac.obj$aa.optim[[partition.index]]
        }

        if(include.gamma == TRUE){
            shape = x[length(x)]
            x = x[-length(x)]
        }

        C.Phi.q.Ne <- x[1]
        C <- 4
        q <- 4e-7
        Ne <- 5e6
        Phi.q.Ne <- C.Phi.q.Ne / C
        Phi.Ne <- Phi.q.Ne / q
        Phi <- Phi.Ne / Ne
        alpha <- x[2]
        beta <- x[3]

        if(k.levels > 0){
            if(nuc.model == "JC") {
                base.freqs=c(x[4:6], 1-sum(x[4:6]))
                nuc.mutation.rates <- CreateNucleotideMutationMatrix(1, model=nuc.model, base.freqs=base.freqs)
            }
            if(nuc.model == "GTR") {
                base.freqs=c(x[4:6], 1-sum(x[4:6]))
                nuc.mutation.rates <- CreateNucleotideMutationMatrix(x[9:length(x)], model=nuc.model, base.freqs=base.freqs)
            }
            if(nuc.model == "UNREST") {
                nuc.mutation.rates <- CreateNucleotideMutationMatrix(x[6:length(x)], model=nuc.model)
            }
        }else{
            if(nuc.model == "JC") {
                base.freqs=c(x[4:6], 1-sum(x[4:6]))
                nuc.mutation.rates <- CreateNucleotideMutationMatrix(1, model=nuc.model, base.freqs=base.freqs)
            }
            if(nuc.model == "GTR") {
                base.freqs=c(x[4:6], 1-sum(x[4:6]))
                nuc.mutation.rates <- CreateNucleotideMutationMatrix(x[7:length(x)], model=nuc.model, base.freqs=base.freqs)
            }
            if(nuc.model == "UNREST") {
                nuc.mutation.rates <- CreateNucleotideMutationMatrix(x[4:length(x)], model=nuc.model)
            }
        }

        codon_mutation_matrix <- matrix(nuc.mutation.rates[codon.index.matrix], dim(codon.index.matrix))
        codon_mutation_matrix[is.na(codon_mutation_matrix)]=0

        if(include.gamma==TRUE){
            if(gamma.type == "median"){
                rates.k <- DiscreteGamma(shape=shape, ncats=ncats)
                weights.k <- rep(1/ncats, ncats)
            }
            if(gamma.type == "quadrature"){
                rates.and.weights <- LaguerreQuad(shape=shape, ncats=ncats)
                rates.k <- rates.and.weights[1:ncats]
                weights.k <- rates.and.weights[(ncats+1):(ncats*2)]
            }
            if(gamma.type == "lognormal"){
                rates.and.weights <- LogNormalQuad(shape=shape, ncats=ncats)
                rates.k <- rates.and.weights[1:ncats]
                weights.k <- rates.and.weights[(ncats+1):(ncats*2)]
            }
            final.likelihood.mat = matrix(0, nrow=ncats, ncol=nsites)
            for(k in sequence(ncats)){
                if(k.levels > 0){
                    aa.distances <- CreateAADistanceMatrix(alpha=alpha, beta=beta, gamma=gamma, aa.properties=aa.properties, normalize=FALSE, poly.params=x[7:8], k=k.levels)
                }else{
                    aa.distances <- CreateAADistanceMatrix(alpha=alpha, beta=beta, gamma=gamma, aa.properties=aa.properties, normalize=FALSE, poly.params=NULL, k=k.levels)
                }
                Q_codon_array <- FastCreateAllCodonFixationProbabilityMatrices(aa.distances=aa.distances, nsites=nsites, C=C, Phi=Phi*rates.k[k], q=q, Ne=Ne, include.stop.codon=TRUE, numcode=numcode, diploid=diploid, flee.stop.codon.rate=0.9999999)
                final.likelihood.mat[k,] = GetLikelihoodSAC_CodonForManyCharVaryingBySite(codon.data, phy, Q_codon_array, codon.freq.by.aa=codon.freq.by.aa, codon.freq.by.gene=codon.freq.by.gene, aa.optim_array=aa.optim_array, codon_mutation_matrix=codon_mutation_matrix, Ne=Ne, rates=NULL, numcode=numcode, diploid=diploid, n.cores.by.gene.by.site=n.cores.by.gene.by.site)
            }
            likelihood <- sum(log(colSums(exp(final.likelihood.mat)*weights.k)) * codon.data$site.pattern.counts)
        }else{
            if(k.levels > 0){
                aa.distances <- CreateAADistanceMatrix(alpha=alpha, beta=beta, gamma=gamma, aa.properties=aa.properties, normalize=FALSE, poly.params=x[7:8], k=k.levels)
            }else{
                aa.distances <- CreateAADistanceMatrix(alpha=alpha, beta=beta, gamma=gamma, aa.properties=aa.properties, normalize=FALSE, poly.params=NULL, k=k.levels)
            }
            Q_codon_array <- FastCreateAllCodonFixationProbabilityMatrices(aa.distances=aa.distances, nsites=nsites, C=C, Phi=Phi, q=q, Ne=Ne, include.stop.codon=TRUE, numcode=numcode, diploid=diploid, flee.stop.codon.rate=0.9999999)
            final.likelihood = GetLikelihoodSAC_CodonForManyCharVaryingBySite(codon.data, phy, Q_codon_array, codon.freq.by.aa=codon.freq.by.aa, codon.freq.by.gene=codon.freq.by.gene, aa.optim_array=aa.optim_array, codon_mutation_matrix=codon_mutation_matrix, Ne=Ne, rates=NULL, numcode=numcode, diploid=diploid, n.cores.by.gene.by.site=n.cores.by.gene.by.site)
            likelihood <- sum(final.likelihood * codon.data$site.pattern.counts)
        }
        obj.gene <- NULL
        obj.gene$tot.likelihood <- likelihood
        obj.gene$partial.likelihoods <- final.likelihood
        obj.final[[partition.index]] <- obj.gene
    }
    return(obj.final)
}


#' @title Phi rate category information under SELAC+gamma
#'
#' @description
#' Provides likelihood information and best rates across sites and across genes under SELAC+gamma
#'
#' @param selac.obj An object of class SELAC.
#' @param codon.data.path Provides the path to the directory containing the gene specific fasta files of coding data.
#' @param aa.optim.input A list of optimal amino acids with each list element designating a character vector for each gene. The optimal amino acids be the MLE from a selac run (default) or a list of user defined optimal A.A.
#' @param fasta.rows.to.keep Indicates which rows to remove in the input fasta files.
#' @param n.cores.by.gene.by.site The number of cores to decidate to parallelize analyses by site WITHIN a gene. Note n.cores.by.gene*n.cores.by.gene.by.site is the total number of cores dedicated to the analysis.
#'
#' @details
#' The purpose of this function is to determine which rate category best fits each site across genes. The output is a list object, with each list entry designating the optimal rate category across sites for that gene.
GetSelacPhiCat <- function(selac.obj, codon.data.path, aa.optim.input=NULL, fasta.rows.to.keep=NULL, n.cores.by.gene.by.site=1) {

    codon.index.matrix = CreateCodonMutationMatrixIndex()
    phy <- selac.obj$phy
    partitions <- selac.obj$partitions
    include.gamma <- selac.obj$include.gamma
    aa.properties <- selac.obj$aa.properties
    diploid <- selac.obj$diploid
    gamma.type <- selac.obj$gamma.type
    ncats <- selac.obj$ncats
    numcode <- selac.obj$numcode
    gamma <- selac.obj$volume.fixed.value
    nuc.model <- selac.obj$nuc.model
    k.levels <- selac.obj$k.levels
    n.cores.by.gene.by.site <- selac.obj$n.cores.by.gene.by.site
    volume.fixed.value <- selac.obj$volume.fixed.value
    n.cores <- NULL
    obj.final <- as.list(1:length(partitions))

    for(partition.index in 1:length(partitions)){
        x <- c(selac.obj$mle.pars[partition.index,])
        if(include.gamma == TRUE){
            shape = x[length(x)]
        }
        if(include.gamma==TRUE){
            if(gamma.type == "median"){
                rates.k <- DiscreteGamma(shape=shape, ncats=ncats)
                weights.k <- rep(1/ncats, ncats)
            }
            if(gamma.type == "quadrature"){
                rates.and.weights <- LaguerreQuad(shape=shape, ncats=ncats)
                rates.k <- rates.and.weights[1:ncats]
                weights.k <- rates.and.weights[(ncats+1):(ncats*2)]
            }
            if(gamma.type == "lognormal"){
                rates.and.weights <- LogNormalQuad(shape=shape, ncats=ncats)
                rates.k <- rates.and.weights[1:ncats]
                weights.k <- rates.and.weights[(ncats+1):(ncats*2)]
            }
        }
        gene.tmp <- read.dna(partitions[partition.index], format='fasta')
        if(!is.null(fasta.rows.to.keep)){
            gene.tmp <- as.list(as.matrix(cbind(gene.tmp))[fasta.rows.to.keep,])
        }else{
            gene.tmp <- as.list(as.matrix(cbind(gene.tmp)))
        }
        codon.data.tmp <- DNAbinToCodonNumeric(gene.tmp)
        codon.data.tmp <- codon.data.tmp[phy$tip.label,]
        codon.data = NULL
        codon.data$unique.site.patterns = codon.data.tmp
        nsites <- dim(codon.data$unique.site.patterns)[2]-1
        codon.data$site.pattern.counts = rep(1, nsites)

        codon.freq.by.aa=selac.obj$codon.freq.by.aa[[partition.index]]
        codon.freq.by.gene=selac.obj$codon.freq.by.gene[[partition.index]]

        if(is.null(aa.optim.input)){
            aa.optim_array <- selac.obj$aa.optim[[partition.index]]
        }
        phi.likelihoods.per.site <- GetPhiLikelihoodPerSite(x, codon.data=codon.data, phy=phy, aa.optim_array=aa.optim_array, codon.freq.by.aa=codon.freq.by.aa, codon.freq.by.gene=codon.freq.by.gene, numcode=numcode, diploid=diploid, aa.properties=aa.properties, volume.fixed.value=volume.fixed.value, nuc.model=nuc.model, codon.index.matrix=codon.index.matrix, include.gamma=include.gamma, gamma.type=gamma.type, ncats=ncats, k.levels=k.levels, logspace=FALSE, verbose=FALSE, neglnl=FALSE, n.cores.by.gene.by.site=1)
        C.Phi.q.Ne <- x[1]
        C <- 4
        q <- 4e-7
        Ne <- 5e6
        Phi.q.Ne <- C.Phi.q.Ne / C
        Phi.Ne <- Phi.q.Ne / q
        Phi <- Phi.Ne / Ne
        obj <- NULL
        obj$likelihood <- phi.likelihoods.per.site
        rate.vector <- c()
        indicator.raw.vector <- c()
        indicator.weighted.vector <- c()
        model.ave.phi.vector <- c()
        model.weight.ave.phi.vector <- c()
        for(i in 1:dim(phi.likelihoods.per.site)[2]){
            #model-average based solely on the raw likelihoods:
            tmp.rate.class <- which.max(phi.likelihoods.per.site[,i])
            rate.vector <- c(rate.vector, rates.k[tmp.rate.class])
            indicator.raw.vector <- c(indicator.raw.vector, tmp.rate.class)
            aic <- -2 * phi.likelihoods.per.site[,i]
            daic <- aic - aic[tmp.rate.class]
            w.aic <- exp(-.5 * daic) / sum(exp(-.5 * daic))
            model.ave.phi.vector <- c(model.ave.phi.vector, sum(Phi * rates.k * w.aic))
            #model-average based on rescaling of the likelihoods according to laguerre weights:
            tmp.rate.class <- which.max(log(exp(phi.likelihoods.per.site[,i]) * weights.k))
            indicator.weighted.vector <- c(indicator.weighted.vector, tmp.rate.class)
            aic <- -2 * log(exp(phi.likelihoods.per.site[,i]) * weights.k)
            daic <- aic - aic[tmp.rate.class]
            w.aic <- exp(-.5 * daic) / sum(exp(-.5 * daic))
            model.weight.ave.phi.vector <- c(model.weight.ave.phi.vector, sum(Phi * rates.k * w.aic))
        }
        obj$best.rate.by.site <- rate.vector
        obj$model.ave.rawLik.phi <- model.ave.phi.vector
        obj$model.ave.weightedLik.phi <- model.weight.ave.phi.vector
        obj$indicator.by.site.rawLik <- indicator.raw.vector
        obj$indicator.by.site.weightedLik <- indicator.weighted.vector
        obj.final[[partition.index]] <- obj
    }
    return(obj.final)
}



GetAALikelihoodPerSite <- function(x, codon.data, phy, aa.optim_array=NULL, codon.freq.by.aa=NULL, codon.freq.by.gene=NULL, numcode=1, diploid=TRUE, aa.properties=NULL, volume.fixed.value=0.0003990333, nuc.model, codon.index.matrix, include.gamma=FALSE, gamma.type="quadrature", ncats=4, k.levels=0, logspace=FALSE, verbose=TRUE, neglnl=FALSE, n.cores.by.gene.by.site=n.cores.by.gene.by.site) {
    if(logspace) {
        x = exp(x)
    }
    if(include.gamma == TRUE){
        shape = x[length(x)]
        x = x[-length(x)]
    }

    C.Phi.q.Ne <- x[1]
    C <- 4
    q <- 4e-7
    Ne <- 5e6
    Phi.q.Ne <- C.Phi.q.Ne / C
    Phi.Ne <- Phi.q.Ne / q
    Phi <- Phi.Ne / Ne
    alpha <- x[2]
    beta <- x[3]
    gamma <- volume.fixed.value

    if(k.levels > 0){
        if(nuc.model == "JC") {
            base.freqs=c(x[4:6], 1-sum(x[4:6]))
            nuc.mutation.rates <- CreateNucleotideMutationMatrix(1, model=nuc.model, base.freqs=base.freqs)
            poly.params <- x[7:8]
        }
        if(nuc.model == "GTR") {
            base.freqs=c(x[4:6], 1-sum(x[4:6]))
            nuc.mutation.rates <- CreateNucleotideMutationMatrix(x[9:length(x)], model=nuc.model, base.freqs=base.freqs)
            poly.params <- x[7:8]
        }
        if(nuc.model == "UNREST") {
            nuc.mutation.rates <- CreateNucleotideMutationMatrix(x[6:length(x)], model=nuc.model, base.freqs=NULL)
            poly.params <- x[4:5]
        }
    }else{
        if(nuc.model == "JC") {
            base.freqs=c(x[4:6], 1-sum(x[4:6]))
            nuc.mutation.rates <- CreateNucleotideMutationMatrix(1, model=nuc.model, base.freqs=base.freqs)
        }
        if(nuc.model == "GTR") {
            base.freqs=c(x[4:6], 1-sum(x[4:6]))
            nuc.mutation.rates <- CreateNucleotideMutationMatrix(x[7:length(x)], model=nuc.model, base.freqs=base.freqs)
        }
        if(nuc.model == "UNREST") {
            nuc.mutation.rates <- CreateNucleotideMutationMatrix(x[4:length(x)], model=nuc.model, base.freqs=NULL)
        }
    }

    if(!is.null(codon.data$unique.site.patterns)){
        codon.data.list <- codon.data
        nsites.unique <- dim(codon.data$unique.site.patterns)[2]-1
    }else{
        nsites.unique <- dim(codon.data)[2]-1
        codon.data.list <- NULL
        codon.data.list$unique.site.patterns <- codon.data
        codon.data.list$site.pattern.counts <- rep(1, nsites.unique)
    }
    nsites <- sum(codon.data$site.pattern.counts)

    #codon_mutation_matrix = c(as.vector(nuc.mutation.rates), 0)[codon.index.matrix]
    codon_mutation_matrix <- matrix(nuc.mutation.rates[codon.index.matrix], dim(codon.index.matrix))
    codon_mutation_matrix[is.na(codon_mutation_matrix)]=0

    optimal.vector.by.site <- rep(NA, nsites.unique)
    #unique.aa <- GetMatrixAANames(numcode)
    optimal.aa.likelihood.mat <- matrix(0, nrow=length(.unique.aa), ncol=nsites.unique)

    for(i in 1:length(.unique.aa)){
        if(.unique.aa[i]=="*"){
            optimal.aa.likelihood.mat[i,] <- rep(-1000000, nsites.unique)
        }else{
            aa.optim_array = rep(.unique.aa[i], nsites.unique)
            if(include.gamma==TRUE){
                gene_site_array <- array(1, dim=c(21, nsites.unique, ncats=ncats))
                if(gamma.type == "median"){
                    rates.k <- DiscreteGamma(shape=shape, ncats=ncats)
                    weights.k <- rep(1/ncats, ncats)
                }
                if(gamma.type == "quadrature"){
                    rates.and.weights <- LaguerreQuad(shape=shape, ncats=ncats)
                    rates.k <- rates.and.weights[1:ncats]
                    weights.k <- rates.and.weights[(ncats+1):(ncats*2)]
                }
                if(gamma.type == "lognormal"){
                    rates.and.weights <- LogNormalQuad(shape=shape, ncats=ncats)
                    rates.k <- rates.and.weights[1:ncats]
                    weights.k <- rates.and.weights[(ncats+1):(ncats*2)]
                }
                final.likelihood.mat = matrix(0, nrow=ncats, ncol=nsites.unique)
                for(k.cat in sequence(ncats)){
                    if(k.levels > 0){
                        aa.distances <- CreateAADistanceMatrix(alpha=alpha, beta=beta, gamma=gamma, aa.properties=aa.properties, normalize=FALSE, poly.params=poly.params, k=k.levels)
                    }else{
                        aa.distances <- CreateAADistanceMatrix(alpha=alpha, beta=beta, gamma=gamma, aa.properties=aa.properties, normalize=FALSE, poly.params=NULL, k=k.levels)
                    }
                    Q_codon_array <- FastCreateAllCodonFixationProbabilityMatrices(aa.distances=aa.distances, nsites=nsites, C=C, Phi=Phi*rates.k[k.cat], q=q, Ne=Ne, include.stop.codon=TRUE, numcode=numcode, diploid=diploid, flee.stop.codon.rate=0.9999)
                    tmp = GetLikelihoodSAC_CodonForManyCharVaryingBySite(codon.data.list, phy, Q_codon_array, codon.freq.by.aa=codon.freq.by.aa, codon.freq.by.gene=codon.freq.by.gene, aa.optim_array=aa.optim_array, codon_mutation_matrix=codon_mutation_matrix, Ne=Ne, rates=NULL, numcode=numcode, diploid=diploid, n.cores.by.gene.by.site=n.cores.by.gene.by.site)
                    tmp[is.na(tmp)] = -1000000
                    gene_site_array[,,k.cat] = tmp
                }
            }else{
                gene_site_array <- c()
                if(k.levels > 0){
                    aa.distances <- CreateAADistanceMatrix(alpha=alpha, beta=beta, gamma=gamma, aa.properties=aa.properties, normalize=FALSE, poly.params=poly.params, k=k.levels)
                }else{
                    aa.distances <- CreateAADistanceMatrix(alpha=alpha, beta=beta, gamma=gamma, aa.properties=aa.properties, normalize=FALSE, poly.params=NULL, k=k.levels)
                }
                Q_codon_array <- FastCreateAllCodonFixationProbabilityMatrices(aa.distances=aa.distances, nsites=nsites, C=C, Phi=Phi, q=q, Ne=Ne, include.stop.codon=TRUE, numcode=numcode, diploid=diploid, flee.stop.codon.rate=0.9999)
                tmp = GetLikelihoodSAC_CodonForManyCharVaryingBySite(codon.data.list, phy, Q_codon_array, codon.freq.by.aa=codon.freq.by.aa, codon.freq.by.gene=codon.freq.by.gene, aa.optim_array=aa.optim_array, codon_mutation_matrix=codon_mutation_matrix, Ne=Ne, rates=NULL, numcode=numcode, n.cores.by.gene.by.site=n.cores.by.gene.by.site)
                tmp[is.na(tmp)] = -1000000
                gene_site_array = tmp
            }
        }
    }
    return(gene_site_array)
}


GetPhiLikelihoodPerSite <- function(x, codon.data, phy, aa.optim_array=NULL, codon.freq.by.aa=NULL, codon.freq.by.gene=NULL, numcode=1, diploid=TRUE, aa.properties=NULL, volume.fixed.value=0.0003990333, nuc.model, codon.index.matrix, include.gamma, gamma.type, ncats, k.levels=0, logspace=FALSE, verbose=TRUE, neglnl=FALSE, n.cores.by.gene.by.site=n.cores.by.gene.by.site) {
    if(logspace) {
        x = exp(x)
    }
    if(include.gamma == TRUE){
        shape = x[length(x)]
        x = x[-length(x)]
    }

    C.Phi.q.Ne <- x[1]
    C <- 4
    q <- 4e-7
    Ne <- 5e6
    Phi.q.Ne <- C.Phi.q.Ne / C
    Phi.Ne <- Phi.q.Ne / q
    Phi <- Phi.Ne / Ne
    alpha <- x[2]
    beta <- x[3]
    gamma <- volume.fixed.value

    if(k.levels > 0){
        if(nuc.model == "JC") {
            base.freqs=c(x[4:6], 1-sum(x[4:6]))
            #During the early stages of the optimization process it will try weird values for the base frequencies.
            if(any(base.freqs < 0)){
                return(1000000)
            }
            nuc.mutation.rates <- CreateNucleotideMutationMatrix(1, model=nuc.model, base.freqs=base.freqs)
            poly.params <- x[7:8]
        }
        if(nuc.model == "GTR") {
            base.freqs=c(x[4:6], 1-sum(x[4:6]))
            #During the early stages of the optimization process it will try weird values for the base frequencies.
            if(any(base.freqs < 0)){
                return(1000000)
            }
            nuc.mutation.rates <- CreateNucleotideMutationMatrix(x[9:length(x)], model=nuc.model, base.freqs=base.freqs)
            poly.params <- x[7:8]
        }
        if(nuc.model == "UNREST") {
            nuc.mutation.rates <- CreateNucleotideMutationMatrix(x[6:length(x)], model=nuc.model, base.freqs=NULL)
            poly.params <- x[4:5]
        }
    }else{
        if(nuc.model == "JC") {
            base.freqs=c(x[4:6], 1-sum(x[4:6]))
            #During the early stages of the optimization process it will try weird values for the base frequencies.
            if(any(base.freqs < 0)){
                return(1000000)
            }
            nuc.mutation.rates <- CreateNucleotideMutationMatrix(1, model=nuc.model, base.freqs=base.freqs)
        }
        if(nuc.model == "GTR") {
            base.freqs=c(x[4:6], 1-sum(x[4:6]))
            #During the early stages of the optimization process it will try weird values for the base frequencies.
            if(any(base.freqs < 0)){
                return(1000000)
            }
            nuc.mutation.rates <- CreateNucleotideMutationMatrix(x[7:length(x)], model=nuc.model, base.freqs=base.freqs)
        }
        if(nuc.model == "UNREST") {
            nuc.mutation.rates <- CreateNucleotideMutationMatrix(x[4:length(x)], model=nuc.model, base.freqs=NULL)
        }
    }
    #codon_mutation_matrix = CreateCodonMutationMatrix(nuc.mutation.rates) #We now make an index matrix first then just place the nucleotide rates into it:
    #codon_mutation_matrix = c(as.vector(nuc.mutation.rates), 0)[codon.index.matrix]
    codon_mutation_matrix <- matrix(nuc.mutation.rates[codon.index.matrix], dim(codon.index.matrix))
    codon_mutation_matrix[is.na(codon_mutation_matrix)]=0
    nsites.unique <- dim(codon.data$unique.site.patterns)[2]-1
    nsites <- sum(codon.data$site.pattern.counts)

    if(include.gamma==TRUE){
        if(gamma.type == "median"){
            rates.k <- DiscreteGamma(shape=shape, ncats=ncats)
            weights.k <- rep(1/ncats, ncats)
        }
        if(gamma.type == "quadrature"){
            rates.and.weights <- LaguerreQuad(shape=shape, ncats=ncats)
            rates.k <- rates.and.weights[1:ncats]
            weights.k <- rates.and.weights[(ncats+1):(ncats*2)]
        }
        if(gamma.type == "lognormal"){
            rates.and.weights <- LogNormalQuad(shape=shape, ncats=ncats)
            rates.k <- rates.and.weights[1:ncats]
            weights.k <- rates.and.weights[(ncats+1):(ncats*2)]
        }
        final.likelihood = matrix(0, nrow=ncats, ncol=nsites.unique)
        for(k.cat in sequence(ncats)){
            if(k.levels > 0){
                aa.distances <- CreateAADistanceMatrix(alpha=alpha, beta=beta, gamma=gamma, aa.properties=aa.properties, normalize=FALSE, poly.params=poly.params, k=k.levels)
            }else{
                aa.distances <- CreateAADistanceMatrix(alpha=alpha, beta=beta, gamma=gamma, aa.properties=aa.properties, normalize=FALSE, poly.params=NULL, k=k.levels)
            }
            Q_codon_array <- FastCreateAllCodonFixationProbabilityMatrices(aa.distances=aa.distances, nsites=nsites, C=C, Phi=Phi*rates.k[k.cat], q=q, Ne=Ne, include.stop.codon=TRUE, numcode=numcode, diploid=diploid, flee.stop.codon.rate=0.9999999)
            final.likelihood[k.cat,] = GetLikelihoodSAC_CodonForManyCharVaryingBySite(codon.data, phy, Q_codon_array, codon.freq.by.aa=codon.freq.by.aa, codon.freq.by.gene=codon.freq.by.gene, aa.optim_array=aa.optim_array, codon_mutation_matrix=codon_mutation_matrix, Ne=Ne, rates=NULL, numcode=numcode, diploid=diploid, n.cores.by.gene.by.site=n.cores.by.gene.by.site)
        }
        #likelihood <- sum(log(colSums(exp(final.likelihood.mat)*weights.k)) * codon.data$site.pattern.counts)
    }else{
        if(k.levels > 0){
            aa.distances <- CreateAADistanceMatrix(alpha=alpha, beta=beta, gamma=gamma, aa.properties=aa.properties, normalize=FALSE, poly.params=poly.params, k=k.levels)
        }else{
            aa.distances <- CreateAADistanceMatrix(alpha=alpha, beta=beta, gamma=gamma, aa.properties=aa.properties, normalize=FALSE, poly.params=NULL, k=k.levels)
        }
        Q_codon_array <- FastCreateAllCodonFixationProbabilityMatrices(aa.distances=aa.distances, nsites=nsites, C=C, Phi=Phi, q=q, Ne=Ne, include.stop.codon=TRUE, numcode=numcode, diploid=diploid, flee.stop.codon.rate=0.9999999)
        final.likelihood = GetLikelihoodSAC_CodonForManyCharVaryingBySite(codon.data, phy, Q_codon_array, codon.freq.by.aa=codon.freq.by.aa, codon.freq.by.gene=codon.freq.by.gene, aa.optim_array=aa.optim_array, codon_mutation_matrix=codon_mutation_matrix, Ne=Ne, rates=NULL, numcode=numcode, diploid=diploid, n.cores.by.gene.by.site=n.cores.by.gene.by.site)
        #likelihood <- sum(final.likelihood * codon.data$site.pattern.counts)
    }

    return(final.likelihood)
}


GetGeneSiteInfo <- function(selac.obj, partition.number=1){
    phy <- selac.obj$phy
    yeast.gene <- read.dna(selac.obj$partitions[partition.number], format="fasta")
    yeast.gene <- as.list(as.matrix(cbind(yeast.gene))[1:7,])
    chars <- DNAbinToCodonNumeric(yeast.gene)
    codon.data <- chars[phy$tip.label,]
    aa.data <- ConvertCodonNumericDataToAAData(codon.data, numcode=1)
    aa.optim <- selac.obj$aa.optim[[1]] #starting values for all, final values for majrule
    aa.optim.full.list <- aa.optim
    codon.freq.by.aa <- GetCodonFreqsByAA(codon.data[,-1], aa.optim, numcode=1)
    codon.freq.by.gene <- GetCodonFreqsByGene(codon.data[,-1])
    aa.optim.frame.to.add <- matrix(c("optimal", aa.optim), 1, dim(codon.data)[2])
    colnames(aa.optim.frame.to.add) <- colnames(codon.data)
    codon.data <- rbind(codon.data, aa.optim.frame.to.add)
    codon.data <- SitePattern(codon.data, includes.optimal.aa=TRUE)
    aa.optim <- codon.data$optimal.aa
    codon.index.matrix <- CreateCodonMutationMatrixIndex()
    volume.fixed.value <- 0.0003990333
    n.cores.by.gene.by.site <- selac.obj$n.cores.by.gene.by.site
    pars.to.do <- selac.obj$mle.pars[partition.number,]
    selac.all.sites.amino.acid <- GetAALikelihoodPerSite(x=log(pars.to.do), codon.data, phy, aa.optim_array=aa.optim, codon.freq.by.aa=codon.freq.by.aa, codon.freq.by.gene=codon.freq.by.gene, numcode=1, diploid=TRUE, aa.properties=NULL, volume.fixed.value=0.0003990333, nuc.model="UNREST", codon.index.matrix, include.gamma=TRUE, gamma.type="quadrature", ncats=4, k.levels=0, logspace=TRUE, verbose=FALSE, n.cores.by.gene.by.site=n.cores.by.gene.by.site)

    selac.all.sites.phi <- GetPhiLikelihoodPerSite(x=log(pars.to.do), codon.data, phy, aa.optim_array=aa.optim, codon.freq.by.aa=codon.freq.by.aa, codon.freq.by.gene=codon.freq.by.gene, numcode=1, diploid=TRUE, aa.properties=NULL, volume.fixed.value=0.0003990333, nuc.model="UNREST", codon.index.matrix, include.gamma=TRUE, gamma.type="quadrature", ncats=4, k.levels=0, logspace=TRUE, verbose=FALSE, n.cores.by.gene.by.site=n.cores.by.gene.by.site)

    obj <- NULL
    quadl <- LaguerreQuad(selac.obj$mle.pars[1,length(selac.obj$mle.pars[1,])],4)
    obj$phi <- selac.obj$mle.pars[partition.number,1] * quadl[1:4]
    obj$phi.weights <- quadl[5:8]
    obj$site.aa.information <- selac.all.sites.amino.acid
    obj$site.phi.information <- selac.all.sites.phi
    return(obj)
}

#library(selac)
#load("yeastRokasSelacUNRESTgamma.Rdata")
#pp <- GetGeneSiteInfo(result, partition.number=1)


######################################################################################################################################
######################################################################################################################################
### Print function for the selac class:
######################################################################################################################################
######################################################################################################################################

print.selac <- function(x,...){
    ntips=Ntip(x$phy)
    output<-data.frame(x$loglik,x$AIC,ntips,sum(x$nsites), x$k.levels, row.names="")
    names(output)<-c("-lnL","AIC", "ntax", "nsites", "k.levels")
    cat("\nFit\n")
    print(output)
    cat("\n")
    cat("\nModel options\n")
    output.part.deux <- data.frame(x$nuc.model, x$data.type, x$aa.optim.type, x$include.gamma, x$ncats, row.names="")
    names(output.part.deux) <- c("model","data", "opt.aa?", "disc.gamma", "n.cats")
    print(output.part.deux)
    cat("\n")
    cpv.starting.parameters <- GetAADistanceStartingParameters(aa.properties=x$aa.properties)
    if(x$aa.optim.type=="majrule" | x$aa.optim.type=="optimize"){
        if(x$nuc.model == "JC"){
            cat("\nSELAC Parameters\n")
            if(x$include.gamma==TRUE){
                if(x$k.levels > 1){
                    output<-data.frame(x$mle.pars[1,2], x$mle.pars[1,3], x$volume.fixed.value, row.names="")
                }else{
                    output<-data.frame(x$mle.pars[1,2], x$mle.pars[1,3], x$volume.fixed.value, row.names="")
                }
                names(output)<-c("c","p","v")
            }else{
                output<-data.frame(x$mle.pars[1,2], x$mle.pars[1,3], x$volume.fixed.value, row.names="")
                names(output)<-c("c","p","v")
            }
            print(output)
            cat("\n")
        }
        if(x$nuc.model == "GTR"){
            cat("\nSELAC parameters\n")
            #if(x$include.gamma==TRUE){
            #    if(x$k.levels > 1){
            #        output <- data.frame(x$mle.pars[1,2], x$mle.pars[1,3], x$volume.fixed.value, x$mle.pars[1,14], row.names="")
            #    }else{
            #        output <- data.frame(x$mle.pars[1,2], x$mle.pars[1,3], x$volume.fixed.value, x$mle.pars[1,12], row.names="")
            #    }
            #    names(output)<-c("c","p","v","disc.gamma")
            #}else{
            output<-data.frame(x$mle.pars[1,2], x$mle.pars[1,3], x$volume.fixed.value, row.names="")
            names(output)<-c("c","p","v")
            #}
            print(output)
            cat("\n")

        }
    }
}