impostar: ImPoStAR: Implement Population Structure Analyses in R

#' Run AMOVA
#'
#' This function performs an analysis of molecular variance (AMOVA) test for genetic structure, implementing the appropriate resampling strategies to model the total sampling error associated with either Sanger sequencing (population sampling error) or next-generation sequencing (population + sequencer sampling error). The current implementation works with pooled data, no missing data and one level of population structure.
#' @usage runAMOVA(designFile, dataFile, outputFile=NULL, NresamplesToStop=1000, maxPermutations=10000, multi.core = TRUE, do.bootstrap = FALSE, save.distributions = FALSE)
#' @param designFile 	a data.frame or character string indicating the path to the design file. Required.
#' @param dataFile a data.frame or character string indicating the path to the data file. Required.
#' @param outputFile a character string indicating the name of the optional .rds output file written to working directory. If NULL (default), output is only retained as an R object. Note that writing large .rds files will increase time to completion.
#' @param NresamplesToStop an integer indicating the number of iterations to complete, after which resampling will stop if resampled F >= observed F. Default = 1000.
#' @param maxPermutations an integer indicating the maximum number of iterations to complete. Default = 10000
#' @param multi.core a logical or integer indicating the number of cores to use in parallel processing. If FALSE will run on one core. If TRUE (default) will detect OS and use the number of available cores minus 1. If integer will run on specified number of cores. Note that parallel processing not supported by Windows.
#' @param NGSdata a logical indicating whether the resampling strategy for next-generation sequencing data (TRUE, Default) or Sanger sequencing data (FALSE) should be implemented.
#' @param save.distributions a logical indicating if the distributions of the F values during bootstrapping be returned. Note that this might require a very large amount of space.
#' @return The runAMOVA function returns a list and optional .rds file written to the working directory. Each list element contains the AMOVA table for one SNP, in the same order as the input file.
#' @author  Scott A. King, Christopher E. Bird, Rebecca M. Hamner, Jason D. Selwyn, Evan Krell
#' @references Hamner, R.M., J.D. Selwyn, E. Krell, S.A. King, and C.E. Bird. In review. Modeling next-generation sequencer sampling error in pooled population samples dramatically reduces false positives in genetic structure tests.
#' @seealso \code{\link{simulate_data}}, \code{\link{runLogRegTest}}
#' @examples
#' # create design file
#' design <- data.frame(n=rep(20,3), Sample=c(1,2,3))
#' # simulate data file
#' simdata <- simulate_data(rep(50, 3), rep(100, 3), rep(0.5, 3), 5, file_name=T)
#' # run AMOVA
#' AMOVAresults <- runAMOVA(designFile=design, dataFile=simdata, outputFile="AMOVAoutput", NresamplesToStop=10, maxPermutations=100, multi.core = T, do.bootstrap = T)
#' @import Rcpp VariantAnnotation parallel
#' @export
runAMOVA <- function(
  designFile,                 # required user input - a data.frame or character string indicating the path to the design file
  dataFile,                   # required user input - a data.frame or character string indicating the path to the data file
  outputFile=NULL,            # a character string indicating the name of the optional .rds output file written to working directory. If NULL (default), output is only retained as an R object. Note that writing large .rds files will increase time to completion.
  NresamplesToStop=1000,      # optional user input, an integer indicating the number of iterations to complete, after which resampling will stop if resampled F >= observed F. Default = 1000.
  maxPermutations=10000,      # an integer indicating the maximum number of iterations to complete. Default = 10000
  multi.core = TRUE,          # a logical or integer indicating the number of cores to use in parallel processing. If FALSE will run on one core. If TRUE will detect OS and use the number of available cores minus 1. If integer will run on specified number of cores. Note that parallel processing not supported by Windows.
  NGSdata = TRUE,        # a logical indicating whether the resampling strategy for next-generation sequencing data (TRUE, Default) or Sanger sequencing data (FALSE) should be implemented.
  save.distributions = FALSE  # a logical indicating if the distributions of the F values during bootstrapping be returned. Note that this might require a very large amount of space.
)
{
  sfw.version <- " 1.04 - 3 June 2017"
  do.bootstrap <- NGSdata
  # force evaluation so won't have issues when sending to other nodes in a cluser!
  force(NresamplesToStop)
  force(maxPermutations)
  force(multi.core)
  force(do.bootstrap)
  force(save.distributions)
  do.bootstrapReadsOld <- FALSE;
  do.bootstrapReads <- FALSE;
  do.bootstrapAlleles <- FALSE;  # First bootstrap method of bootstrapping alleles.
  do.bootstrapReads <- do.bootstrap;   # New method
  force(do.bootstrapAlleles)
  force(do.bootstrapReads)
  library(Rcpp)

  #JASON CHANGE STUFF
  permutationMethod <- "exact" #Set here to force exact. Need to remove other methods # "exact" "rexact", "freely", "bird", "alleles"
  preshuffle <- FALSE               # use a preshuffle, so all SNPs have the same shuffle

  cat( "NresamplesToStop:", NresamplesToStop, " maxPermutations:", maxPermutations, " permutationMethod:", permutationMethod, " multi.core:", multi.core, " preshuffle:", preshuffle,
       " do.bootstrap:", do.bootstrap,
       " do.bootstrapReads:", do.bootstrapReads,
       " do.bootstrapAlleles:", do.bootstrapAlleles,
       " save.distributions:", save.distributions)

  # List of variables and their meanings

  #  These variables don't change for each SNP

  # source1.alleles.indices  - indices for the alleles, grouped by source1
  #   srcX.in.srcY[[1]][[2]]
  # source2.alleles.indices  - indices for the alleles, grouped by source2
  #   srcX.in.srcY[[1]][[3]]
  # source2.source1.indices  - indices for source1, grouped by source2
  #   srcX.in.srcY[[2]][[3]]

  # srcX.in.srcY <- list(list(srcX.in.srcY[[1]][[2]], srcX.in.srcY[[1]][[2]]), list(NULL, srcX.in.srcY[[1]][[3]]))

  # source1.numalleles        - number of alleles in each source1
  # num.alleles.in.src[[2]]
  # source2.numalleleXs        - number of alleles in each source2
  # num.alleles.in.src[[3]]

  # source1.of.alleles       - index of the source1 for each allele
  #   srcX.in.srcY[[2]][[1]]
  # source2.of.alleles       - index of the source2 for each allele
  #   srcX.in.srcY[[3]][[1]]
  # source2.of.sourceX1       - index of the source2 for each source1
  #   srcX.in.srcY[[3]][[2]]

  # source1.names            - name of the source1s   - probably just an int id
  #  source.names[[2]]
  # source2.names            - name of the source2s   - probably just an int id
  #  source.names[[3]]
  # n.source1                - number of source1s
  #  num.source[2]
  # n.source2                - number of source2s
  #  num.source[3]
  # n.total.alleles          - number of alleles
  #  num.source[1]

  # These variables change per SNP - and during shuffling

  # alleles.ref.N            - number of reference alleles in each source1
  # num.reference.alleles[[2]]
  # alleles.alt.N            - number of alternate alleles in each source1
  # num.alternate.alleles[[2]]
  # alleles.state            - state of each allele,  0 - ref,  1 - alternate

  # alts.source2             - number of alternate alleles in each source2
  # num.alternate.alleles[[3]]

  # source1.alt.frequencies  - frequencies of alternate alleles in each source1 (alts/(alts+refs))
  # alt.frequencies[[2]]
  # source2.alt.frequencies  - frequencies of alternate alleles in each source2 (alts/(alts+refs))
  # alt.frequencies[[3]]

  # If not defined in this scope, can't be seen.

  SSW <- vector();           # Sum of Squares - Within

  cat("SAK Amova - version ", sfw.version, "\n")

  if(class(dataFile)=='character' & class(designFile)=='character'){
    print(paste("processing", designFile, dataFile, "with permutation method", permutationMethod))
  }

  if(class(dataFile)=='character'){
    fileExtension <- tail(unlist(strsplit(dataFile,"[.]")),1)

    if (fileExtension == "vcf"){
      library(VariantAnnotation)  # To read vcf files
      print("Reading VCF file\n")
      vcf <- readVcf(dataFile, "foo")
    } else {
      #print("Reading CSV Selwyn format\n")
      experimentalData <- read.csv(dataFile)
    }
  } else if (class(dataFile)=='data.frame'){
    experimentalData<-dataFile
    fileExtension <- 'no_file'
  } else {
    stop('dataFile must either be a data.frame or character string')
  }


  if(class(designFile)=='character'){
    expDesign <- read.csv(designFile)
  } else if (class(designFile)=='data.frame'){
    expDesign <- designFile
  } else {
    stop('designFile must either be a data.frame or character string')
  }


  num.sources.of.variation <-dim(expDesign)[2]                      #calculate from experimental design file
  cat("There are ", num.sources.of.variation-1, " sources of variation: ", names(expDesign)[2:num.sources.of.variation], "\n")


  ploidy <- 2
  num.alleles.in.src <- sapply(1:(num.sources.of.variation+1), function(x) list())

  if (fileExtension == "vcf"){
    reference.reads.N <- matrix(as.numeric(unlist(geno(vcf)$RO)), nrow=nrow(geno(vcf)$RO))
    alternate.reads.N <- matrix(as.numeric(unlist(geno(vcf)$AO)), nrow=nrow(geno(vcf)$AO))
    num.alleles.in.src[[2]] <- read.csv("King/individuals.per.pool.csv")[,3]*ploidy
  } else {
    if (length(grep("RD[0-9]+",colnames(experimentalData))) > 0) {
      reference.reads.N <-matrix(as.numeric(unlist(experimentalData[,grep("RD[0-9]+",colnames(experimentalData))])), nrow=dim(experimentalData)[1])
      alternate.reads.N <-matrix(as.numeric(unlist(experimentalData[,grep("AD[0-9]+",colnames(experimentalData))])), nrow=dim(experimentalData)[1])
    } else if (length(grep("RO[0-9]+",colnames(experimentalData))) > 0) {
      reference.reads.N <-matrix(as.numeric(unlist(experimentalData[,grep("RO[0-9]+",colnames(experimentalData))])), nrow=dim(experimentalData)[1])
      alternate.reads.N <-matrix(as.numeric(unlist(experimentalData[,grep("AO[0-9]+",colnames(experimentalData))])), nrow=dim(experimentalData)[1])
    } else {
      print("Can't find the allele read counts.  Expecting column header pairs of either RO#/AO# or RD#/AD#")
    }
    num.alleles.in.src[[2]] <- expDesign[,1]*ploidy
    numSNPs = dim(experimentalData)[1]
  }
  num.alleles.in.src[[1]] <- 1;
  total.reads <- reference.reads.N + alternate.reads.N
  num.source <- vector()
  num.source[2] <- length(num.alleles.in.src[[2]])
  all.source1s <- 1:num.source[2]   # this vector used a lot so store it

  # index 1 is for alleles
  # index 2 is for src1/individuals with comes from the N column of the expDesign.

  # The expdesign file basically tells you where each src1 is stored, or the srcx index of each src 1:

  num.source[1] = sum(num.alleles.in.src[[2]])  # Total number of individuals

  # The following ranges of indices are used many times, so instead of calculating them each time, we will store them.
  one.to.num.sources.of.variation <- 1:num.sources.of.variation
  if (num.sources.of.variation < 3) {
    variationSourcesList <- NULL
  } else {
    variationSourcesList <- 2:(num.sources.of.variation-1)
  }

  if (num.sources.of.variation < 2) {
    variationSourcesPlusAllelesList <- NULL
  } else {
    variationSourcesPlusAllelesList <- 1:(num.sources.of.variation-1)
  }


  # The srcX.in.srcY list of lists  is used multiple ways.   When X is < Y is telling membership, when X is > Y it is telling you the list of members.
  # And since R starts at 1, the alleles have to be in here,  so when looking at the experiment, its source 1, (whether that be individuals or pools)  will be stored
  # with an index of 2.
  #
  srcX.in.srcY <- sapply(one.to.num.sources.of.variation, function(x) list())
  srcX.in.srcY[[2]][[1]] <- rep(1:length(num.alleles.in.src[[2]]), num.alleles.in.src[[2]])      # src1 in alleles
  if (num.sources.of.variation > 2)
    for (x in variationSourcesList) {
      srcX.in.srcY[[x+1]][[x]] <- as.numeric(unlist(unique(expDesign[,c(x, x+1)])[2]))
      num.source[[x]]=length(srcX.in.srcY[[x+1]][[x]])
      if (x > 2)
        for (k in 1:(x-2)) {
          srcX.in.srcY[[x+1]][[x-k]] <- srcX.in.srcY[[x+1]][[x]][srcX.in.srcY[[x]][[x-k]]]
        }
    }
  # num.source  list stores how many things are members of each source of variation.  Remember that index 1 is for alleles.
  num.source[[num.sources.of.variation]] <- length(unique(expDesign[,num.sources.of.variation]))

  if (num.sources.of.variation > 2)
    for (x in 3:(num.sources.of.variation)) {
      for (y in 2:(x-1)) {
        srcX.in.srcY[[y]][[x]] <- lapply(1:num.source[x], function(a) which(srcX.in.srcY[[x]][[y]] == a))
      }
      # to get the number of alleles at a level, just sum up the alleles for that group at the earlier level.
      num.alleles.in.src[[x]]<-vapply(srcX.in.srcY[[x-1]][[x]], function(a) sum(num.alleles.in.src[[x-1]][a]),0)
      srcX.in.srcY[[x]][[1]] <- rep(1:length(num.alleles.in.src[[x]]), num.alleles.in.src[[x]])         #
    }
  num.alleles.in.src[[num.sources.of.variation+1]]<- sum(num.alleles.in.src[[num.sources.of.variation]])

  if (num.sources.of.variation > 1)
    for (x in 2:num.sources.of.variation) {
      srcX.in.srcY[[1]][[x]] <- lapply(1:length(num.alleles.in.src[[x]]), function(a) which(srcX.in.srcY[[x]][[1]]==a))
    }
  srcX.in.srcY[[1]][[num.sources.of.variation+1]] <- list()
  srcX.in.srcY[[1]][[num.sources.of.variation+1]][[1]] <- 1:num.alleles.in.src[[num.sources.of.variation+1]][1]


  resetSG <- sapply(one.to.num.sources.of.variation, function(x) vector())   ##
  resetN <- sapply(one.to.num.sources.of.variation, function(x) list(c(1)))  # list of vectors, and set the first element to 1 for all of them.    ##
  #!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!

  #  i in 2:3
  #  j in 2:2  or 2:3

  #          ---
  #          \             num.alleles.in.src[[j]]^2
  #sg[i][j] = >  --------------------------------------------------
  #          /   num.alleles.in.src[[i+1]][srcX.in.srcY[[i+1]][[j]]]
  #          ---
  #          ---
  #          \             num.alleles.in.src[[2]]^2
  #sg[2][2] = >  --------------------------------------------------
  #          /   num.alleles.in.src[[3]][srcX.in.srcY[[3]][[2]]]                 800
  #          ---
  #          ---
  #          \             num.alleles.in.src[[2]]^2
  #sg[3][2] = >  --------------------------------------------------
  #          /   num.alleles.in.src[[4]][srcX.in.srcY[[4]][[2]]]                 800
  #          ---
  #          ---
  #          \             num.alleles.in.src[[3]]^2
  #sg[3][3] = >  --------------------------------------------------
  #          /   num.alleles.in.src[[4]][srcX.in.srcY[[4]][[3]]]                 16
  #          ---


  # Note if all of the num.alleles.in.src[[x]] are the same, only need this once!!!
  # for symmetric experiments, this is the case, but maybe not with pools
  same.sizes <- all(sapply(num.alleles.in.src, function(x) min(x) == max(x)))

  # regardless  srcX.in.srcY  does not change.
  num.source[num.sources.of.variation+1] <- 1
  DF <<- sapply(one.to.num.sources.of.variation, function (i) num.source[i] - num.source[i+1])
  DF.total <<- num.source[1] - 1                                   # I'm invariant
  #JASON HERE

  # DF <<- sapply(one.to.num.sources.of.variation, function (i) prod(num.source[(i+1):length(num.source)])*(num.source[i]-1))
  # DF.total <<- prod(num.source) - 1

  calcSG <- function(srcNs) {
    sg <- resetSG
    for (i in variationSourcesList) {  # skip src1 - alleles/error
      for (j in 2:i) {  # skip src1 - alleles/error
        sg[[i]][j] <- sum( (srcNs[[j]]^2)/ srcNs[[i+1]][srcX.in.srcY[[i+1]][[j]]])
      }
    }
    for (j in 2:num.sources.of.variation) {  # skip src1 - alleles/error
      sg[[num.sources.of.variation]][j] <- sum((srcNs[[j]]^2) / num.source[1])
    }
    return(sg)
  }
  sg.noshuffle <- calcSG(num.alleles.in.src)
  tsg <- sg.noshuffle    ## init
  #browser()

  calcN <- function(sg) {
    n <- resetN
    #for (i in 2:num.sources.of.variation) {
    #for (j in 2:i) {
    #if (i == j) n[[i]][j] <- (num.source[1] - sg[[i]][j])/DF[i]        ##  7%    #3
    #else        n[[i]][j] <- (sg[[i-1]][j] - sg[[i]][j])/DF[i]
    #}
    #}
    for (i in 2:num.sources.of.variation)
      n[[i]][i] <- (num.source[1] - sg[[i]][i])/DF[i]

    if (num.sources.of.variation > 2)  # SAK fix me
      for (i in 3:num.sources.of.variation) {
        for (j in 2:(i-1)) {
          n[[i]][j] <- (sg[[i-1]][j] - sg[[i]][j])/DF[i]
        }
      }
    return(n)
  }
  n.noshuffle <- calcN(sg.noshuffle)
  tn <- n.noshuffle    ## init
  if (same.sizes) {
    print("Experiment is symettric so optimizing")
    calcSG <- function(srcNs) {return(sg.noshuffle)}
    calcN <- function(sg) {return(n.noshuffle)}
  }

  #---------------------------------------------------------------------------------------------------------
  # amova

  # SSW - Sum of Squares  - within    == sum (Alts*Refs/(Alts+Refs))    So as these approch each other (50/50) it is maximized
  # DF  - Degrees of freedom          ==
  # ssa - Sum of Square   - among
  # MS  - Mean Square = ssa/DF
  #
  # Values are dependant upon
  # SSW which is calculated outside since it doesn't change very often
  # srcNs - number of alleles in each grouping
  # sg  - well, this is calculated outside and doesn't appear to be needed anymore, n depends on it, but cacluated outside again
  # n   - cacluated outside and it is used to calcuate variance (sigma squared)
  # DF, calculated outside and won't change

  #amova<-function(srcNs, sg, n, fval.only = FALSE)
  amova<-function(srcNs, n, fval.only = FALSE)
  {
    ssa <- vector()
    MS  <- vector()
    f <- vector()
    ssa <- SSW[one.to.num.sources.of.variation+1] - SSW[one.to.num.sources.of.variation] # all        # 3% #7
    #cat(paste("ssa is"), ssa,"\n")
    MS <- ssa / DF                            # src2indicies     # 4% #6
    #cat(paste("MS is"), MS,"\n")

    sig.sq <- vector()           # .12s
    sig.sq[1] <- MS[1]           # .08s
    for (i in 2:num.sources.of.variation) {
      #tsum <- MS[i]
      tsum <- 0
      for (j in 2:i) {
        tsum <- tsum+(n[[i]][j-1]*sig.sq[j-1])
      }
      sig.sq[i] <- (MS[i] - tsum) / n[[i]][i]
    }
    sig.sq[num.sources.of.variation+1] <- sum(sig.sq[one.to.num.sources.of.variation])


    numerator <- sig.sq[1]
    for (i in 2:num.sources.of.variation) {
      numerator <- numerator + sig.sq[i]
      f[i] <- sig.sq[i]/numerator
    }
    f[1] <- (numerator - sig.sq[1])/numerator

    #if (is.nan(f[2])) browser()

    if (fval.only) {
      return(f)
    } else {
      result = matrix(NA, num.sources.of.variation+1, 7)
      #result[1, ] <- c(DF[3], ssa[3],    MS[3],     sig.sq[3],  f[3],  NA, NA)
      #result[2, ] <- c(DF[2], ssa[2],    MS[2],     sig.sq[2],  f[2],  NA, NA)
      #result[3, ] <- c(DF[1], ssa[1],    MS[1],       sig.sq[1],    f[1], NA, NA)
      #result[4, ] <- c(DF.total, SSW[num.sources.of.variation+1], MS.total, sig.sq[num.sources.of.variation+1], NA, NA, NA);
      #   colnames(result) <- c("df","SS","MS","sigma squared","f","p","#permutations")
      for (i in num.sources.of.variation:1) {
        result[num.sources.of.variation-i+1, ] <- c(DF[i], ssa[i],    MS[i],     sig.sq[i],  f[i],  NA, NA)
        #  4-4+1 = 1 --s4     4-3+1 = 2 --s3   4-2+1 = 3 --s2    4-1+1 = 4 --s1  (error)
      }
      result[num.sources.of.variation+1, ] <- c(DF.total, SSW[num.sources.of.variation+1], MS.total, sig.sq[num.sources.of.variation+1], NA, NA, NA);

      #   rownames(result) <- c("source2", "source1", "error", "total")

      return(result)
    }

    ## Instead of return the matrix,  should just return the array..
    #return(c((DF, ssa,    MS,     sig.sq,  f,  NA, NA)
  }
  #!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
  #AMOVA FUNCTION END
  #!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
  #---------------------------------------------------------------------------------------------------------


  ##  Here are the different ways to do shuffling.  We set a variable to the correct one to call, so the rest of the code
  #   doesn't care which one is being called.  They all return either the number of alts in each entitity being
  #   shuffled, or the indices of the entities being shuffled.

  # We shuffle the alleles via alleles.state
  # Then add each pair together, and we store the odd and even indices for a speed increase
  # This version optimized for individuals, that is 2 src 0 per src 1.
  shuffleSource1FreelyIndividuals <- function() {
    m<-sample(alleles.state)
    return(m[oddi] + m[eveni])   # speed hack for individuals!
  }

  shuffleSource1FreelyIndividualsPre <- function(p) {
    m <- alleles.state[preshuf[[1]][[p]]]
    return(m[oddi] + m[eveni])   # speed hack for individuals!
  }

  # shuffle the alleles by resample the alleles.state variable
  # then sum up alternate alleles by counting 1s in each src 1 grouping.
  # This version is when we have an unknown number of src 0s per src 1
  shuffleSource1FreelyAll <- function() {
    m<-sample(alleles.state)
    return(vapply(srcX.in.srcY[[1]][[2]], function(x) sum(m[x]),0))
  }

  shuffleSource1FreelyAllPre <- function(p) {
    m<-alleles.state[preshuf[[1]][[p]]]
    return(vapply(srcX.in.srcY[[1]][[2]], function(x) sum(m[x]),0))
  }

  #
  # If we know the number of source 0's that we are going to add for each src 1, and they are all the same
  # and in the case of individuals it is two, that means every pair get added together so lets store the indicies
  # for these pairs so we don't have to determine more than once.
  data.is.individuals <- FALSE
  if (max(num.alleles.in.src[[2]]) == 2 && min(num.alleles.in.src[[2]]) == 2) { # we have individuals
    print("Data is individuals so optimizing for that case.")
    data.is.individuals <- TRUE
    oddi <- seq(1,num.alleles.in.src[[num.sources.of.variation+1]], 2)
    eveni <- seq(2,num.alleles.in.src[[num.sources.of.variation+1]], 2)
    #    shuffleSource1Freely <- shuffleSource1FreelyIndividuals
    #    if (preshuffle) {
    #      print("i1 Using shuffleSource1FreelyIndividualsPre")
    #      shuffleSource1Freely <- function() {shuffleSource1FreelyIndividualsPre(curPermutationi) }
    #    }
  }
  #    else {
  #    shuffleSource1Freely <- shuffleSource1FreelyAll
  #  }



  #shuffleSource1.6b <- function()
  # this was version 1.6b, one attempt to be as fast as possible.  It is still unclear which is the best
  # version, althought the optimization for individuals is hard to beat.
  # The next one is about the same speed, but simpler, so we'll use it.
  shuffleSource1old <- function() {
    c <- 0;
    refs<-vector(length=num.source[2])
    for(i in 1:num.source[3]) {
      m<-sample(srcX.in.srcY[[1]][[3]][[i]], replace=FALSE)
      for (s1 in srcX.in.srcY[[2]][[3]][[i]]) {
        refs[s1] <- sum(alleles.state[m[srcX.in.srcY[[1]][[2]][[s1]]-c]])
      }
      c <- c + length(m)
    }
    return(refs)
  }


  cppFunction('
              NumericVector cshuf2(List l, NumericVector state, NumericVector Shuf) {
              int n = l.size();
              NumericVector results(n);
              for(int i=0; i<n ; ++i) {
              NumericVector y(l[i]);
              int m=y.size();
              results[i] = 0;
              for (int j=0; j<m; j++) {
              results[i] += Shuf[y[j]-1];
              }
              }
              return results;
              }
              ')

  # Shuffle source1 basic, but sum using c++
  #  shuffled.alleles <- unlist(sapply(1:num.source[3], function(i) sample(alleles.state[srcX.in.srcY[[1]][[3]][[i]]],replace=FALSE)))
  shuffleSource1withc <- function(p) {
    return(cshuf2(srcX.in.srcY[[1]][[2]], alleles.state, sample(alleles.state)))
  }

  # Shuffle source1 basic
  shuffleSource1 <- function(p) {
    shuffled.alleles <- unlist(sapply(1:num.source[3], function(i) sample(alleles.state[srcX.in.srcY[[1]][[3]][[i]]],replace=FALSE)))
    return(vapply(srcX.in.srcY[[1]][[2]], function(x) sum(shuffled.alleles[x]),0))
  }

  cppFunction('
              NumericVector cshuf(List l, NumericVector state, NumericVector pre) {
              int n = l.size();
              NumericVector results(n);
              for(int i=0; i<n ; ++i) {
              NumericVector y(l[i]);
              int m=y.size();
              results[i] = 0;
              for (int j=0; j<m; j++) {
              results[i] += state[pre[y[j]-1]-1];
              }
              }
              return results;
              }
              ')

  shuffleSource1Pre <- function(p) {
    return(cshuf(srcX.in.srcY[[1]][[2]], alleles.state, preshuf[[1]][[p]]))
  }

  shuffleSource1Preold <- function(p) {
    #shuffled.alleles <- preshuf[[1]][[p]]
    shuffled.alleles <- alleles.state[preshuf[[1]][[p]]]
    return(vapply(srcX.in.srcY[[1]][[2]], function(x) sum(shuffled.alleles[x]),0))
  }

  # Same as version 1.6b, but not just for src 1.
  #
  # This will shuffle the alleles for src i, by constraining withing src i+1, and return the alternate counts
  # for src 1.

  preshuf <- vector(mode="list", length= (num.sources.of.variation-1))
  for (i in variationSourcesPlusAllelesList) {
    preshuf[[i]] <- vector(mode="list", length=maxPermutations)
  }

  preshuffleSourcei.alleles <- function(i, N) {
    for (j in 1:N) {
      preshuf[[i]][[j]] <<- unlist(lapply(srcX.in.srcY[[1]][[i+2]], function(x) sample(x)))
    }
  }

  # will only call for source 1
  preshuffleSourcei.freely <- function(i, N) {
    for (j in 1:N) {
      #preshuf[[i]][[j]] <<- sample(alleles.state)
      preshuf[[i]][[j]] <<- sample(1:num.source[i], replace=FALSE)
    }
  }

  preshuffleSourcei <- function(i, N) {

    for (j in 1:N) {
      #preshuf[[i]][[j]] <<- unlist(lapply(srcX.in.srcY[[1]][[i+2]], function(x) sample(alleles.state[x])))
      #preshuf[[i]][[j]] <<- unlist(lapply(srcX.in.srcY[[1]][[i+2]], function(x) sample(x)))
      if (i == (num.sources.of.variation-1))
        preshuf[[i]][[j]] <<- sample(1:num.source[i], replace=FALSE)
      else
        preshuf[[i]][[j]] <<- unlist(lapply(srcX.in.srcY[[i]][[i+2]], function(x) sample(x)))
    }
    #return(shuffle <- unlist(lapply(srcX.in.srcY[[level-1]][[level+1]], sample)))
  }


  getSourceiShuffleNoPre <- function(src) {
    if (src == num.sources.of.variation-1)
      return(sample(1:num.source[src], replace=FALSE))
    else
      return(shuffle <- unlist(lapply(srcX.in.srcY[[src]][[src+2]], sample)))
  }

  getSourceiShufflePre <- function(i,p) {
    return(preshuf[[i]][[p]])
  }

  if (preshuffle) {
    print("g1 Using getSourceiShufflePre")
    getSourceiShuffle <- function(i) {getSourceiShufflePre(i, curPermutationi) }
  } else {
    getSourceiShuffle <- getSourceiShuffleNoPre
  }


  shuffleSourceiAllelesIndividualsPre <- function(i,p) {
    #shuf <- preshuf[[i]][[p]]
    shuf <- alleles.state[preshuf[[i]][[p]]]
    return(shuf[oddi]+shuf[eveni])
  }

  shuffleSourceiAllelesIndividuals <- function(i) {
    shuf <- unlist(lapply(srcX.in.srcY[[1]][[i+2]], function(x) sample(alleles.state[x])))
    return(shuf[oddi]+shuf[eveni])
  }

  shuffleSourceiAllelesPre <- function(i,p) {
    #shuf <- preshuf[[i]][[p]]
    shuf <- alleles.state[preshuf[[i]][[p]]]
    #return(vapply(1:num.source[[2]], function(x) sum(shuf[srcX.in.srcY[[1]][[2]][[x]]]),0)) #
    return(vapply(srcX.in.srcY[[1]][[2]], function(x) sum(shuf[x]),0))
  }

  shuffleSourceiAlleles <- function(i) {
    shuf <- unlist(lapply(srcX.in.srcY[[1]][[i+2]], function(x) sample(alleles.state[x])))  #
    #return(vapply(fred, function(x) sum(shuf[x]),0)) #
    return(vapply(srcX.in.srcY[[1]][[2]], function(x) sum(shuf[x]),0)) #
    #return(vapply(1:num.source[[2]], function(x) sum(shuf[srcX.in.srcY[[1]][[2]][[x]]]),0)) #
    #return(sapply(1:num.source[[2]], function(x) sum(shuf[srcX.in.srcY[[1]][[2]][[x]]])))
  }

  # when we have individuals, this is very fast.
  shuffleSource1Individuals <- function() {
    shuf <- unlist(sapply(srcX.in.srcY[[1]][[3]], function(x) sample(alleles.state[x])))              #
    return(shuf[oddi]+shuf[eveni])
  }

  shuffleSource1IndividualsPre <- function(p) {
    #shuf <- preshuf[[1]][[p]]
    shuf <- alleles.state[preshuf[[1]][[p]]]
    return(shuf[oddi]+shuf[eveni])
  }

  num.reference.alleles <- sapply(one.to.num.sources.of.variation, function(x) list())
  num.alternate.alleles <- sapply(one.to.num.sources.of.variation, function(x) list())
  alt.frequencies <- sapply(one.to.num.sources.of.variation, function(x) list())

  shuffleSource1wreplacement <- function() {
    rbinom(num.source[2], num.alleles.in.src[[2]], alt.frequencies[[2]])
  }


  ## Lots of different ways to sum up the number of alt/ref allels at each level.  Each one is efficient for different situations.
  #  This section of code will try to use the most effecient method.  This can have a huge impact on running time.
  #

  #browser()
  if (permutationMethod == "rexact") {
    shuffleSource1Function <- shuffleSource1wreplacement
  } else if (permutationMethod == "freely") {
    if (max(num.alleles.in.src[[2]]) == 2 && min(num.alleles.in.src[[2]]) == 2) { # we have individuals
      if (preshuffle) {
        print("Using f1 shuffleSource1FreelyIndividualsPre")
        shuffleSource1Function <- function() {shuffleSource1FreelyIndividualsPre(curPermutationi) }
      } else {
        print("Using f2 shuffleSource1FreelyIndividuals")
        shuffleSource1Function <- shuffleSource1FreelyIndividuals
      }
    } else {
      if (preshuffle) {
        print("Using f3 shuffleSource1FreelyAllPre")
        shuffleSource1Function <- function() {shuffleSource1FreelyAllPre(curPermutationi) }
      } else {
        print("Using f4 shuffleSource1FreelyAll")
        shuffleSource1Function <- shuffleSource1FreelyAll
      }
    }
  } else if (permutationMethod == "alleles") {
    if (max(num.alleles.in.src[[2]]) == 2) { # we have individuals
      print("a1 using shuffleSource1/iIndividuals");
      shuffleSource1Function <- shuffleSource1Individuals
      shuffleSourceiFunction <- shuffleSourceiAllelesIndividuals
      if (preshuffle) {
        print("a2 Using shuffleSource1IndividualsPre() and shuffleSourceiAllelesIndividualsPre()")
        shuffleSource1Function <- function() {shuffleSource1IndividualsPre(curPermutationi) }
        shuffleSourceiFunction <- function(i) {shuffleSourceiAllelesIndividualsPre(i, curPermutationi) }
      }
    } else  {
      print("a3 using shuffleSource1/i")
      shuffleSource1Function <- shuffleSource1
      shuffleSourceiFunction <- shuffleSourceiAlleles
      if (preshuffle) {
        print("a4 using shuffleSource1Pre() and shuffleSourceiAllelesPre() ")
        shuffleSource1Function <- function() {shuffleSource1Pre(curPermutationi) }
        shuffleSourceiFunction <- function(i) {shuffleSourceiAllelesPre(i, curPermutationi) }
      }
    }
  } else { # "exact" and "bird"
    if (max(num.alleles.in.src[[2]]) == 2) { # we have individuals
      print("e1 using shuffleSource1/iIndividuals");
      shuffleSource1Function <- shuffleSource1Individuals
      #shuffleSourceiFunction <- shuffleSourceiIndividuals
      if (preshuffle) {
        print("e2 Using shuffleSource1IndividualsPre() and shuffleSourceiIndividualsPre()")
        shuffleSource1Function <- function() {shuffleSource1IndividualsPre(curPermutationi) }
        #shuffleSourceiFunction <- function(i) {shuffleSourceiIndividualsPre(i, curPermutationi) }
      }
    } else  {
      print("e3 using shuffleSource1/i")
      shuffleSource1Function <- shuffleSource1
      #shuffleSourceiFunction <- shuffleSourcei
      if (preshuffle) {
        print("e4 using shuffleSource1Pre() and shuffleSourceiPre() ")
        shuffleSource1Function <- function() {shuffleSource1Pre(curPermutationi) }
        #shuffleSourceiFunction <- function(i) {shuffleSourceiPre(i, curPermutationi) }
      }
    }
  }
  #num.source[num.sources.of.variation+1] <- 1
  num.alternate.alleles[[1]] <- 0;
  num.reference.alleles[[1]] <- 1;
  #num.alleles.in.src[[1]] <- 1;   # these are not correct, but will give us the correct values here.

  MS.total <<- SSW[num.sources.of.variation+1] / DF.total                                   # I'm invariant

  # if (max(num.alleles.in.src[[2]]) == 2 && min(num.alleles.in.src[[2]]) == 2) { # we have individuals

  #  getCutoffs <- function(numReads) {
  #    cutoffs <- rep(0,numReads+1)
  #    heterorange <- round(.2*numReads):round(.8*numReads)
  #    cutoffs[(round(.8*numReads)+1):numReads] <- 2
  #    cutoffs[heterorange+1] <- 1
  #    cutoffs[1] <- 0    # At least one homozygote
  #    cutoffs[numReads+1] <- 2    # At least one homozygote
  #    return(cutoffs)
  #  }
  getNumrefs <- function(r, N) {
    c1 <- round(.2*N)   # This could be stored?
    if (r <c1 || r == 0) return(0)  # Homo if no reads or < the 20% cutoff
    c2 <- round(.8*N)
    if (r >c2 || r == N) return(0)  # Homo if all reads or > the 80% cutoff
    return(1)    # otherwise it is a heterozygote
  }

  getNumrefs2 <- function(r, N) {
    results <- allhetero
    results[r < round(.2*N)] <- 0
    results[ r == 0] <- 0
    results[r > round(.8*N)] <- 2
    results[r == N] <- 2
    #browser()
    return(results)
  }

  processSNP <- function(row, maxPermutations) {
    #if (row%%100 == 0) print(paste("processing", row, "/", numSNPs))
    print(paste("processing", row, "/", numSNPs))
    #browser()

    if (save.distributions) {
      NullDistributionsTmp <- lapply(variationSourcesPlusAllelesList, function(x) vector(length = maxPermutations))
      NullDistributions <- list()
    }

    # Used for bootstrapping of reads
    # these only need to be calculated once so do them here.

    alt.reads <- sapply(alternate.reads.N[row,], function(x) if (x==0) x+1 else x)   # Get the vector of alternate reads for this SNP, no zeros
    ref.reads <- sapply(reference.reads.N[row,], function(x) if (x==0) x+1 else x)   # Get the vector of reference reads for this SNP, no zeros
    reads <- alt.reads+ref.reads                                                     # Get new total reads to calcuate percentages
    ##ReadsForSNP <- total.reads[row,]
    ReadsForSNP <- reads

    tube.ref.frequencies <- ref.reads/reads


    # per SNP
    #ps <- reference.reads.N[row,]/total.reads[row,]
    if (data.is.individuals) {
      allhetero <<- rep(1, num.source[2])
      #reference.allleles.in.tube <- getNumrefs2(new.ref.reads,reads)
      #num.reference.alleles[[2]] <<- sapply(all.source1s, function(x) getNumrefs2(reference.reads.N[row,x], total.reads[row,x]))
      num.reference.alleles[[2]] <<- getNumrefs2(reference.reads.N[row,], total.reads[row,])
    } else {
      num.reference.alleles[[2]] <<- round((reference.reads.N[row,]/total.reads[row,])*as.vector(num.alleles.in.src[[2]]))
    }

    num.alternate.alleles[[2]] <<- num.alleles.in.src[[2]]-num.reference.alleles[[2]]
    alleles.state <<- rep(rep(c(0,1), length(num.alleles.in.src[[2]])), as.vector(rbind(num.reference.alleles[[2]], num.alternate.alleles[[2]])))
    #distance.matrix <- abs(outer(alleles.state, alleles.state, FUN="-"))
    if (preshuffle && row == 1) {
      print("Setting up preshuffle")
      if (permutationMethod == "freely") {
        preshuffleSourcei.freely(1, maxPermutations)         # only need source 1 preshuffled
      } else {
        #for (i in variationSourcesPlusAllelesList )
        for (i in 1:(num.sources.of.variation-1) ) {
          print(paste("preshuffle for src",i))
          if (permutationMethod == "alleles")
            preshuffleSourcei.alleles(i, maxPermutations) # always shuffle alleles
          else
            preshuffleSourcei(i, maxPermutations)         # shuffles srci-1 within src i
        }
      }
    }

    #num.alleles.in.src[[num.sources.of.variation+1]] <- length(alleles.state)

    ## Look into this for rexact
    if (num.sources.of.variation > 2) {
      # These are used by shuffleSource1wreplacement()   used by the rexact method only
      alt.frequencies[[3]] <<- sapply(1:num.source[3], function(x) sum(num.alternate.alleles[[2]][srcX.in.srcY[[3]][[2]]==x])/
                                        (sum(num.alternate.alleles[[2]][srcX.in.srcY[[3]][[2]]==x])+sum(num.reference.alleles[[2]][srcX.in.srcY[[3]][[2]]==x])))
      alt.frequencies[[2]] <<- alt.frequencies[[3]][srcX.in.srcY[[3]][[2]]] # This is really just the source2 frequencies repeated
    }

    # this one is slower, but the number of alleles often doesn't change, so we only count of the alternate
    # alleles, and then call this one instead of either summing up or subtracting to get ref counts.
    calculateSSW <- function(levels) {
      vapply(levels, function(i) sum(num.alternate.alleles[[i]]*(num.alleles.in.src[[i]]-num.alternate.alleles[[i]])/num.alleles.in.src[[i]]),0)
    }

    #print("num.alternate.alleles"); print(num.alternate.alleles)
    #print("num.alleles.in.src"); print(num.alleles.in.src)

    # Call this after changing alternate counts, so can't call this unless have recalculate ref counts as well.
    calculateSSW2 <- function(levels) {
      vapply(levels, function(i) sum(num.alternate.alleles[[i]]*num.reference.alleles[[i]]/num.alleles.in.src[[i]]),0)
    }

    #
    # I'm broken.  So should either set the global num.alternate.alleles in here
    # or return a list the correct size to set outside of the function.
    # But then a little tricky when one list versus multiple lists
    #
    calculateAlternateCounts <- function(levels) {
      ac <- vector(mode="list", length=length(levels))
      i <- levels[1]-1
      ac[[i]]<-num.alternate.alleles[[i]]
      for (i in levels) {
        ac[[i]] <- vapply(srcX.in.srcY[[i-1]][[i]], function(x) sum(ac[[i-1]][x]),0)  #3% #10
      }
      #ac[[i]] <- vapply(srcX.in.srcY[[i-1]][[i]], function(x) sum(num.alternate.alleles[[i-1]][x]),0)  #3% #10
      #browser()
      return(ac[levels])
    }


    ##    SSW[1] <<- 0   # for alleles no SSW
    #    for (i in 2:num.sources.of.variation) {
    #      if (i > 2) {
    #        num.alternate.alleles[[i]]<-vapply(srcX.in.srcY[[i-1]][[i]], function(x) sum(num.alternate.alleles[[i-1]][x]),0)
    #        num.reference.alleles[[i]]<-vapply(srcX.in.srcY[[i-1]][[i]], function(x) sum(num.reference.alleles[[i-1]][x]),0)
    #      }
    ##      SSW[i]<<-sum(num.alternate.alleles[[i]]*(num.alleles.in.src[[i]]-num.alternate.alleles[[i]])/num.alleles.in.src[[i]])
    #    }

    calculateAlternateAlleles <- function() {
      if (num.sources.of.variation > 2)
        for (i in 3:num.sources.of.variation) {
          num.alternate.alleles[[i]]<<-vapply(srcX.in.srcY[[i-1]][[i]], function(x) sum(num.alternate.alleles[[i-1]][x]),0)
        }
      num.alternate.alleles[[num.sources.of.variation+1]]<<- sum(num.alternate.alleles[[num.sources.of.variation]])
    }

    calculateReferenceAlleles <- function() {
      if (num.sources.of.variation > 2)
        for (i in 3:num.sources.of.variation) {
          num.reference.alleles[[i]]<<-vapply(srcX.in.srcY[[i-1]][[i]], function(x) sum(num.reference.alleles[[i-1]][x]),0)
        }
      num.reference.alleles[[num.sources.of.variation+1]]<<- sum(num.reference.alleles[[num.sources.of.variation]])
    }

    #browser()
    #num.alternate.alleles <- calculateAlternateAlleles()
    #num.reference.alleles <- calculateReferenceAlleles()
    calculateAlternateAlleles()
    calculateReferenceAlleles()
    SSW <<- calculateSSW(1:(num.sources.of.variation+1))
    #-print("SSW is")
    #-print(SSW)

    ## This function bootstraps the alleles
    bootstrapAlleles <- function(alts) {
      freq = 1-alts/num.alleles.in.src[[2]]
      ref.reads<-rbinom(num.source[2], total.reads[1,], freq)
      num.reference.alleles <- round((ref.reads/total.reads[1,])*as.vector(num.alleles.in.src[[2]]))
      num.alternate.alleles <- num.alleles.in.src[[2]]-num.reference.alleles
      #alleles.state <<- rep(rep(c(0,1), length(num.alleles.in.src[[2]])), as.vector(rbind(num.reference.alleles[[2]], num.alternate.alleles[[2]])))
      return(num.alternate.alleles)
    }
    ## This function bootstraps the alleles
    ## alts is the new number of alts in each source 1
    bootstrapReads <- function(alts) {
      freq = 1-alts/num.alleles.in.src[[2]]
      #  cat("alts in:", alts)
      #  cat("freq:", freq)
      #ref.reads<-rbinom(num.source[2], total.reads[1,], freq)
      ref.reads<-rbinom(num.source[2], ReadsForSNP, freq)
      #  cat("reads: ", ref.reads)
      #num.reference.alleles <- round((ref.reads/total.reads[1,])*as.vector(num.alleles.in.src[[2]]))
      num.reference.alleles <- round((ref.reads/ReadsForSNP)*as.vector(num.alleles.in.src[[2]]))
      num.alternate.alleles <- num.alleles.in.src[[2]]-num.reference.alleles
      #  cat("alts out:", num.alternate.alleles)
      #  cat("alts diff:", alts-num.alternate.alleles, "\n")
      #  browser()
      #alleles.state <<- rep(rep(c(0,1), length(num.alleles.in.src[[2]])), as.vector(rbind(num.reference.alleles[[2]], num.alternate.alleles[[2]])))
      return(num.alternate.alleles)
    }

    # This function will bootstrap the number of reads.
    #
    bootstrapReadsOld <- function(alts) {
      # Be wary, these really shouldn't be calculated every time since they won't change

      # Now resample the tube using the original number of reads using the percentage of the original reads

      #new.ref.reads <- rbinom(num.source[2], total.reads[row,], tube.ref.frequencies)
      #new.alt.reads <- total.reads[row,]-new.ref.reads
      ref.reads <- sapply(reference.reads.N[row,], function(x) if (x==0) x+1 else x)   # Get the vector of reference reads for this SNP, no zeros
      reads <- alt.reads+ref.reads                                                     # Get new total reads to calcuate percentages
      tube.ref.frequencies <- ref.reads/reads

      new.ref.reads <- rbinom(num.source[2], reads, tube.ref.frequencies)
      new.alt.reads <- reads-new.ref.reads

      if (data.is.individuals)
        #reference.allleles.in.tube <- sapply(all.source1s, function(x) getNumrefs(new.ref.reads[x],total.reads[row, x]))
        reference.allleles.in.tube <- getNumrefs2(new.ref.reads,reads)
      else
        reference.allleles.in.tube <- round((new.ref.reads/total.reads[row, ])*as.vector(num.alleles.in.src[[2]]))

      #browser()
      if (identical(reference.allleles.in.tube, num.reference.alleles[[2]])) {
        #browser()
        return()  # do nothing else
      } else { # number of alleles has changed so recalculate everything!
        num.reference.alleles[[2]] <<- round((ref.reads/reads)*as.vector(num.alleles.in.src[[2]]))
        num.alternate.alleles[[2]] <<- num.alleles.in.src[[2]]-num.reference.alleles[[2]]
        # The alleles.state have changed so recaculte!
        #browser()
        alleles.state <<- rep(rep(c(0,1), length(num.alleles.in.src[[2]])), as.vector(rbind(num.reference.alleles[[2]], num.alternate.alleles[[2]])))
        # Do it this way for now, but maybe should do this elsewhere so don't need to do <<
        #num.alternate.alleles <<- calculateAlternateAlleles()
        #num.reference.alleles <<- calculateReferenceAlleles()
        calculateAlternateAlleles()
        calculateReferenceAlleles()
        SSW <<- calculateSSW(1:(num.sources.of.variation+1))

        #print(cat("bootstrapping SSW is", SSW))

        #browser()
        #  A typical call to amova amova(new.srcNs, tsg, tn, TRUE)
        #  A typical call to amova amova(new.srcNs, tn, TRUE)    # no longer pass in sg, only need n's
        # So we have to update those variables.
      }
      return()
    }


    #browser()

    #
    #  First we get the Amova Table, this will have everything except the p-values and the counts for the p-values

    #amovaTable <- amova(num.alleles.in.src, sg.noshuffle, n.noshuffle)
    amovaTable <- amova(num.alleles.in.src, n.noshuffle)


    #
    # Now we need to get the p-values.  This involves shuffling samples around via a shuffle method.

    if (maxPermutations < 1)  return(amovaTable)    # just return table and be done!

    #browser()

    # ---------------------------------------------------------------------------------------------------------------------
    #
    #   Bird  Permutation
    #
    # For the "bird" permutation method, we start by shuffling source 1, and that shuffle stays when shuffling the next level.
    #
    if (permutationMethod == "bird") {  # here we shuffle a level, calculat f, suffle next level, ... and keep all shuffles
      #print(amovaTable)
      cnt <- rep(0, num.sources.of.variation-1)   # set all the cnts to 0
      pcnt <- rep(0, num.sources.of.variation-1)  # set all the cnts to 0
      #print("SSW is")
      #print(SSW)
      SSW.old <- SSW
      #print("SSW.old is")
      #print(SSW.old)
      old.alternate.alleles <- num.alternate.alleles  # Since shuffles are moved around, we have to restore levels 2 through N each time through
      for(permutationi in 1:maxPermutations) {
        curPermutationi <<- permutationi   # don't know how to get it to function otherwise....
        if (min(cnt) >= NresamplesToStop) break  # done with shuffling all, so lets be done!


        # note the number of alleles doesn't change when bootstrap just the distrubtion of alts and refs.
        new.srcNs <- num.alleles.in.src  # restore the old  srcNs since doing a new set of shuffles

        # First bootstrap the reads in the tube
        if (do.bootstrapReadsOld) {  # Bootstrap reads
          #browser()
          bootstrapReadsOld(Oldalts.shuffled.N)
        } else {
          SSW <<- SSW.old           # restore SSW
          num.alternate.alleles <- old.alternate.alleles
        }
        #print("SSW is now (old)")
        #print(SSW)
        #print("num.alternate.alleles is now (old)")
        #print(num.alternate.alleles)

        for (index.of.src.being.shuffled in variationSourcesPlusAllelesList) {
          # not sure if we should do this or not, for now lets not
          # if (cnt[index.of.src.being.shuffled] >= NresamplesToStop) break  # done with shuffling this level so don't bother
          # will need to fix when the count gets added to table and make sure p correct.
          pcnt[index.of.src.being.shuffled] <- pcnt[index.of.src.being.shuffled] +1

          level <- index.of.src.being.shuffled+1;
          #print(paste("Shuffling src", index.of.src.being.shuffled, "level", level))

          if (index.of.src.being.shuffled == 1) {   #shuffle src1
            alts.shuffled.N <-shuffleSource1Function()   # Get shuffled # of alternate alleles for each s1
            #if (preshuffle) browser()
            #browser()
            #cat(paste("alts.shuffled.N are now"), alts.shuffled.N,"\n")
            SSW[level]<<-sum(alts.shuffled.N*(num.alleles.in.src[[level]]-alts.shuffled.N)/num.alleles.in.src[[level]])
            #print(paste("setting SSW[", level, "] to", SSW[level]))

            if (do.bootstrapAlleles) {
              num.alternate.alleles[[2]]<-bootstrapAlleles(alts.shuffled.N)
            } else if (do.bootstrapReads) {
              num.alternate.alleles[[2]]<-bootstrapReads(alts.shuffled.N)
            } else {
              num.alternate.alleles[[2]]<-alts.shuffled.N
            }
          }
          else if (index.of.src.being.shuffled >= 2) {

            shuffledsrcs <- getSourceiShuffle(index.of.src.being.shuffled)
            new.srcNs[[index.of.src.being.shuffled]] <- num.alleles.in.src[[index.of.src.being.shuffled]][shuffledsrcs]
            num.alternate.alleles[[level]]<-vapply(srcX.in.srcY[[index.of.src.being.shuffled]][[level]],
                                                   function(x) sum(num.alternate.alleles[[index.of.src.being.shuffled]][shuffledsrcs[x]]),0)

            ####	    if (level == num.sources.of.variation)
            ####	      shuffledsrcs <- sample(1:num.source[level-1], replace=FALSE)  # just shuffle all of them.
            ####	    else
            ####	      shuffledsrcs <- unlist(lapply(srcX.in.srcY[[level-1]][[level+1]], sample))
            #### #cat(paste("shuffledsrcs are now"), shuffledsrcs, "\n")
            ####	    new.srcNs[[index.of.src.being.shuffled]] <- num.alleles.in.src[[index.of.src.being.shuffled]][shuffledsrcs]
            #####-cat(paste("new.srcNs[[", index.of.src.being.shuffled,"]] are now"), new.srcNs[[index.of.src.being.shuffled]], "\n")
            ####	    ## alleles move around so numbers change, but just totals  SSWs for all above can change
            ####	    num.alternate.alleles[[index.of.src.being.shuffled+1]]<-vapply(srcX.in.srcY[[index.of.src.being.shuffled]][[index.of.src.being.shuffled+1]],
            ####									   ### function(x) sum(num.alternate.alleles[[index.of.src.being.shuffled]][shuffledsrcs][x]),0)
            ####									   function(x) sum(num.alternate.alleles[[index.of.src.being.shuffled]][shuffledsrcs[x]]),0)
            ####
            #####cat(paste("num.alternate.allles[[", index.of.src.being.shuffled,"]] are now"), num.alternate.alleles[[index.of.src.being.shuffled]], "\n")
            #####cat(paste("num.alternate.allles[[", index.of.src.being.shuffled+1,"]] are now"), num.alternate.alleles[[index.of.src.being.shuffled+1]], "\n")

            SSW[level] <<- sum(num.alternate.alleles[[level]]*(new.srcNs[[level]]-num.alternate.alleles[[level]])/new.srcNs[[level]])
            #print(paste("setting SSW[", index.of.src.being.shuffled+1, "] to", SSW[index.of.src.being.shuffled+1]))
          }

          tsg <- calcSG(new.srcNs)
          tn <- calcN(tsg)
          #foo <- amova(new.srcNs, tsg, tn, TRUE)
          foo <- amova(new.srcNs, tn, TRUE)
          #if (index.of.src.being.shuffled == 2) browser()

          #print(foo)
          #fval <- foo[num.sources.of.variation-index.of.src.being.shuffled, 5]
          fval <- foo[index.of.src.being.shuffled+1]
          if (is.na(fval) || is.nan(fval)) break
          if (save.distributions) {
            NullDistributionsTmp[[index.of.src.being.shuffled]][permutationi] <- fval
          }
          #browser()
          if (fval >= amovaTable[num.sources.of.variation-index.of.src.being.shuffled,5]) {
            cnt[index.of.src.being.shuffled] <- cnt[index.of.src.being.shuffled] +1
          }
        }  # end  for indexed.being.shuffled

      }  # end for 1..maxPermutations
      ## Now lets fill in the p-values and # permutations columns
      for (which.src in variationSourcesPlusAllelesList) {
        if (save.distributions) {
          NullDistributions[[which.src]] <- NullDistributionsTmp[[which.src]][1:pcnt[which.src]]  # remove non used slots
        }
        amovaTable[num.sources.of.variation-which.src,6] <- cnt[which.src]/pcnt[which.src]
        #if (is.na(amovaTable[num.sources.of.variation-which.src,6])) browser()
        amovaTable[num.sources.of.variation-which.src,7] <- pcnt[which.src]
      }
    } #end if (permutationMethod == "bird")

    # ---------------------------------------------------------------------------------------------------------------------
    #
    #   exact  Permutation
    else if (permutationMethod == "exact") {
      #browser()

      cnt <- rep(0, num.sources.of.variation-1)   # set all the cnts to 0
      pcnt <- rep(0, num.sources.of.variation-1)  # set all the cnts to 0
      SSW.old <- SSW
      old.alternate.alleles <- num.alternate.alleles  # Since shuffles are moved around, we have to restore levels 2 through N each time through

      for(permutationi in 1:maxPermutations) {
        curPermutationi <<- permutationi   # don't know how to get it to function otherwise....

        if (min(cnt) >= NresamplesToStop) break  # done with shuffling all, so lets be done!
        # note the number of alleles doesn't change when bootstrap just the distrubtion of alts and refs.
        new.srcNs <- num.alleles.in.src  # restore the old  srcNs since doing a new set of shuffles

        # First bootstrap the reads in the tube
        if (do.bootstrapReadsOld) {  # Bootstrap reads
          bootstrapReadsOld(alts.shuffled.N)
        } else {
          SSW <<- SSW.old           # restore SSW
          num.alternate.alleles <- old.alternate.alleles
        }


        for (index.of.src.being.shuffled in variationSourcesPlusAllelesList) {
          pcnt[index.of.src.being.shuffled] <- pcnt[index.of.src.being.shuffled] +1
          level <- index.of.src.being.shuffled+1;

          ## There was no bootstrap going on here.  Now there should be!
          ##	    if (do.bootstrapAlleles) {
          ##	      num.alternate.alleles[[2]]<-bootstrapAlleles(alts.shuffled.N)
          ##	    } else if (do.bootstrapReads) {
          ##	      num.alternate.alleles[[2]]<-bootstrapReads(alts.shuffled.N)
          ##	    } else {
          ##	      num.alternate.alleles[[2]]<-alts.shuffled.N
          ##	    }


          # When we shuffle src1, the alleles   the number of alleles
          if (index.of.src.being.shuffled == 1) {   #shuffle src1  or alleles
            alts.shuffled.N <-shuffleSource1Function()   # Get shuffled # of alternate alleles for each s1
            #cat("After shuffle alts:", alts.shuffled.N);
            if (do.bootstrapReads) {
              ## It doesn't appear that we need to set this, can just use the alts.shuffled.N
              ## num.alternate.alleles[[2]]<-bootstrapReads(alts.shuffled.N)
              alts.shuffled.N<-bootstrapReads(alts.shuffled.N)
              #cat("\nAfter bootstrap alts:", alts.shuffled.N, " ");
              #cat("num.alternate.alleles: ")
              #print(num.alternate.alleles)
              num.alternate.alleles[[2]] <- alts.shuffled.N
              calculateAlternateAlleles()
              calculateReferenceAlleles()
              SSW <<- calculateSSW(1:(num.sources.of.variation+1))
              #cat("after bootstrap num.alternate.alleles: \n")
              #print(num.alternate.alleles)
            }
            SSW[level]<<-sum(alts.shuffled.N*(num.alleles.in.src[[level]]-alts.shuffled.N)/num.alleles.in.src[[level]])
            #browser()


          } else if (index.of.src.being.shuffled >= 2) {
            # restore the previous SSW - only need to restore the one that has changed!
            # Bird method doesn't do this.  Is that really the only difference????
            SSW[index.of.src.being.shuffled]<<-sum(num.alternate.alleles[[index.of.src.being.shuffled]]*(num.alleles.in.src[[index.of.src.being.shuffled]]-num.alternate.alleles[[index.of.src.being.shuffled]])/num.alleles.in.src[[index.of.src.being.shuffled]])
            shuffledsrcs <- getSourceiShuffle(index.of.src.being.shuffled)
            new.srcNs[[index.of.src.being.shuffled]] <- num.alleles.in.src[[index.of.src.being.shuffled]][shuffledsrcs]
            num.alternate.alleles[[level]]<-vapply(srcX.in.srcY[[index.of.src.being.shuffled]][[level]],
                                                   function(x) sum(num.alternate.alleles[[index.of.src.being.shuffled]][shuffledsrcs[x]]),0)
            SSW[level] <<- sum(num.alternate.alleles[[level]]*(new.srcNs[[level]]-num.alternate.alleles[[level]])/new.srcNs[[level]])

            if (do.bootstrapReads) {
              alts.shuffled.N<-bootstrapReads(num.alternate.alleles[[1]])
              SSW[2]<<-sum(alts.shuffled.N*(num.alleles.in.src[[level]]-alts.shuffled.N)/num.alleles.in.src[[level]])
            }
          }


          if (index.of.src.being.shuffled >= 2) {   # srcNs change
            oldtsg <- tsg
            tsg <- calcSG(new.srcNs)     ##
            oldtn <- tn
            if (!identical(tsg, oldtsg)) {
              tn <- calcN(tsg)             ##
            } else {
              tn <- oldtn
            }
            foo <- amova(new.srcNs, tn, TRUE) #
            #browser()
          } else {
            # if we shuffled the alleles within src1, then the numbers cannot change.
            foo <- amova(num.alleles.in.src, n.noshuffle, TRUE)  #  Ns don't change
            #browser()
          }
          #fval <- foo[num.sources.of.variation-index.of.src.being.shuffled, 5]
          # 1 ==>  4-1 = foo[3,5]
          # 2 ==>  4-2 = foo[2,5]
          # 3 ==>  4-3 = foo[1,5]
          fval <- foo[index.of.src.being.shuffled+1]
          #cat("fval:", fval, "\n")
          if (is.na(fval) || is.nan(fval)) break
          if (save.distributions) {
            NullDistributionsTmp[[index.of.src.being.shuffled]][permutationi] <- fval
          }
          #browser()
          if (fval >= amovaTable[num.sources.of.variation-index.of.src.being.shuffled,5]) {
            cnt[index.of.src.being.shuffled] <- cnt[index.of.src.being.shuffled] +1
          }
        }  # end  for indexed.being.shuffled
        #print(foo)

      }  # end for 1..maxPermutations
      ## Now lets fill in the p-values and # permutations columns
      for (which.src in variationSourcesPlusAllelesList) {
        if (save.distributions) {
          NullDistributions[[which.src]] <- NullDistributionsTmp[[which.src]][1:pcnt[which.src]]  # remove non used slots
        }
        amovaTable[num.sources.of.variation-which.src,6] <- cnt[which.src]/pcnt[which.src]
        #cat("cnt", cnt, "pcnt", pcnt, "\n")
        #if (is.na(amovaTable[num.sources.of.variation-which.src,6])) browser()
        amovaTable[num.sources.of.variation-which.src,7] <- pcnt[which.src]
      }
      #print(amovaTable)
    } #end if (permutationMethod == "exact")


    # ---------------------------------------------------------------------------------------------------------------------
    #
    #  freely   Purmutation
    #
    ##
    ##  The first shuffle methods is the "freely" method
    #
    #   For the freely method, the alleles are freely shuffled among all groups of the current source level being tested, that is
    #   there is no restriction on the shuffle.  Only one shuffle needs to be done for all sources, and it is reused.
    #
    else if (permutationMethod == "freely") {
      cnt <- rep(0, num.sources.of.variation-1)    # set all the cnts to 0
      for(permutationi in 1:maxPermutations) {
        curPermutationi <<- permutationi           # don't know how to get it to function otherwise....

        # First bootstrap the reads in the tube
        if (do.bootstrapReadsOld) {
          num.alternate.alleles[[2]]<-bootstrapReadsOld(alts.shuffled.N)
        }

        # Next shuffle source 1

        alts.shuffled.N <-shuffleSource1Function()   # Get shuffled # of alternate alleles for each s1
        #browser()
        #cat(paste("shuffle #", permutationi, "alts.shuffled.N are now"), alts.shuffled.N,"\n")
        # update SSW for source 1
        SSW[2]<<-sum(alts.shuffled.N*(num.alleles.in.src[[2]]-alts.shuffled.N)/num.alleles.in.src[[2]])
        #print(paste("setting SSW[", 2, "] to", SSW[2]))
        # now update all the others
        if (do.bootstrapAlleles) {
          num.alternate.alleles[[2]]<-bootstrapAlleles(alts.shuffled.N)
        }
        else if (do.bootstrapReads) {
          num.alternate.alleles[[2]]<-bootstrapReadsOld(alts.shuffled.N)
        }
        else {
          num.alternate.alleles[[2]]<-alts.shuffled.N
        }

        # Calculate the number of alternate alleles at each level and calulate SSW
        for (index.of.src.being.shuffled in variationSourcesList ) {
          if (is.nan(amovaTable[num.sources.of.variation-index.of.src.being.shuffled,5])) {print("NaN freely f AT");  break }
          level <- index.of.src.being.shuffled+1;
          num.alternate.alleles[[level]]<-vapply(srcX.in.srcY[[level-1]][[level]], function(x) sum(num.alternate.alleles[[level-1]][x]),0)
          #-cat(paste("num.alternate.allles[[", level-1,"]] are now"), num.alternate.alleles[[level-1]], "\n")
          #cat(paste("num.alternate.allles[[", level,"]] are now"), num.alternate.alleles[[level]], "\n")
          SSW[level]<<-sum(num.alternate.alleles[[level]]*(num.alleles.in.src[[level]]-num.alternate.alleles[[level]])/num.alleles.in.src[[level]])
          #print(paste("setting SSW[", level, "] to", SSW[level]))
        }
        #print("SSW:")
        #print(SSW)
        #foo <- amova(num.alleles.in.src, sg.noshuffle, n.noshuffle, TRUE)  # group counts don't change at all, use original
        foo <- amova(num.alleles.in.src, n.noshuffle, TRUE)  # group counts don't change at all, use original
        #browser()
        #print(foo)

        for (index.of.src.being.shuffled in variationSourcesPlusAllelesList) {
          if (is.nan(amovaTable[num.sources.of.variation-index.of.src.being.shuffled,5])) {print("NaN freely f foo");  break }
          level <- index.of.src.being.shuffled+1;
          #fval <- foo[num.sources.of.variation-index.of.src.being.shuffled, 5]
          fval <- foo[index.of.src.being.shuffled+1]
          if (is.na(fval) || is.nan(fval)) break
          if (save.distributions) {
            NullDistributionsTmp[[index.of.src.being.shuffled]][permutationi] <- fval
          }
          if (fval >= amovaTable[num.sources.of.variation-index.of.src.being.shuffled,5]) {
            cnt[index.of.src.being.shuffled] <- cnt[index.of.src.being.shuffled] +1
          }
        }
        if (min(cnt) >= NresamplesToStop) break  # done with shuffling since all ps have enough observations >= f
      }  # end of foreach permutation

      # Put p-vals into amovaTable  and store Null Distributions
      for (index.of.src.being.shuffled in variationSourcesPlusAllelesList) {
        amovaTable[num.sources.of.variation-index.of.src.being.shuffled,6] <- cnt[index.of.src.being.shuffled]/permutationi
        amovaTable[num.sources.of.variation-index.of.src.being.shuffled,7] <- permutationi
        if (save.distributions) {
          NullDistributions[[index.of.src.being.shuffled]] <- NullDistributionsTmp[[index.of.src.being.shuffled]][1:permutationi]  # remove non used slots
        }
      }
    }  # end of if permutationMethod == "freely"

    #
    #  Deal with the purmutation method "alleles"
    #
    #  For this method, only alleles get shuffled.
    #  When testing source i, the alleles are moved around within the confines of src i+1

    #  So for each permutation, the sizes (Ns) of the groupings do not change, but the alt/ref allele counts do.
    #  So that means the SSW from src 1 to src i+1 will change.

    # ---------------------------------------------------------------------------------------------------------------------
    #
    #  alleles   Purmutation
    #
    else if (permutationMethod == "alleles") {
      for (index.of.src.being.shuffled in variationSourcesPlusAllelesList) {
        level <- index.of.src.being.shuffled+1;
        #-print(paste("Shuffling src", index.of.src.being.shuffled, "level", level))
        #if (is.nan(amovaTable[num.sources.of.variation-index.of.src.being.shuffled,5]) ) {  #Can we have Nans?
        #}else
        cnt <- 0;

        for(permutationi in 1:maxPermutations) {
          curPermutationi <<- permutationi   # don't know how to get it to function otherwise....
          # Level one can actually use the individual speed up, so lets do that.

          #num.alternate.alleles[[2]] <- shuffleSourceiFunction(index.of.src.being.shuffled)
          alts.shuffled.N <- shuffleSourceiFunction(index.of.src.being.shuffled)
          #browser()
          if (do.bootstrapAlleles) {
            num.alternate.alleles[[2]]<-bootstrapAlleles(alts.shuffled.N)
          } else if (do.bootstrapReads) {
            num.alternate.alleles[[2]]<-bootstrapReads(alts.shuffled.N)
          } else {
            num.alternate.alleles[[2]]<-alts.shuffled.N
          }

          #-print(paste("shuffle for src", index.of.src.being.shuffled, "is:"))
          #-print(num.alternate.alleles[[2]])

          #browser()
          if (level > 2) {
            num.alternate.alleles[3:level] <- calculateAlternateCounts(3:level)
            #-print(paste("now alternates counts for others are 3:",level, "is:"))
            #-print(num.alternate.alleles[3:level])
          }
          SSW[2:(level+1)] <<- calculateSSW(2:(level+1))

          #-print("SSW is:")
          #-print(SSW)

          #foo <- amova(num.alleles.in.src, sg.noshuffle, n.noshuffle, TRUE)  #  Ns don't change
          foo <- amova(num.alleles.in.src, n.noshuffle, TRUE)  #  Ns don't change
          #-print(foo)
          #browser()


          #print(paste("comparing if (",foo[num.sources.of.variation-index.of.src.being.shuffled,5],">=",amovaTable[num.sources.of.variation-index.of.src.being.shuffled,5]))
          #fval <- foo[num.sources.of.variation-index.of.src.being.shuffled, 5]
          fval <- foo[index.of.src.being.shuffled+1]
          if (is.na(fval) || is.nan(fval)) break
          if (save.distributions) {
            NullDistributionsTmp[[index.of.src.being.shuffled]][permutationi] <- fval
          }
          #print(paste("fval ", fval, " vs ", amovaTable[num.sources.of.variation-index.of.src.being.shuffled,5]))
          if (fval >= amovaTable[num.sources.of.variation-index.of.src.being.shuffled,5]) {
            if ((cnt<-cnt+1) == NresamplesToStop) {break}     # if we have 100 above or equal, no point in continuing
          }
        }
        if (save.distributions) {
          NullDistributions[[index.of.src.being.shuffled]] <- NullDistributionsTmp[[index.of.src.being.shuffled]][1:permutationi]  # remove non used slots
        }

        amovaTable[num.sources.of.variation-index.of.src.being.shuffled,6] <- cnt/permutationi
        amovaTable[num.sources.of.variation-index.of.src.being.shuffled,7] <- permutationi
      }

    } # end if (permutationMethod == "alleles")

    # ---------------------------------------------------------------------------------------------------------------------
    #
    #  Rexact and exact   Purmutation
    #
    else {#  not freely, bird, or alleles or exact!!.      So rexact
      cat("here 3\n")
      for (index.of.src.being.shuffled in variationSourcesPlusAllelesList) {
        level <- index.of.src.being.shuffled+1;
        #-print(paste("Shuffling src", index.of.src.being.shuffled, "level", level))
        #if (is.nan(amovaTable[num.sources.of.variation-index.of.src.being.shuffled,5]) ) {  #Can we have Nans?
        #}else
        cnt <- 0;
        if (index.of.src.being.shuffled >= 2) {
          # restore the previous SSW - only need to restore the one that has changed!
          SSW[index.of.src.being.shuffled]<<-sum(num.alternate.alleles[[index.of.src.being.shuffled]]*(num.alleles.in.src[[index.of.src.being.shuffled]]-num.alternate.alleles[[index.of.src.being.shuffled]])/num.alleles.in.src[[index.of.src.being.shuffled]])
          #print(paste("setting SSW[", index.of.src.being.shuffled, "] to", SSW[index.of.src.being.shuffled]))
        }
        new.srcNs <- num.alleles.in.src  # restore the old  srcNs

        for(permutationi in 1:maxPermutations) {
          curPermutationi <<- permutationi   # don't know how to get it to function otherwise....

          if (index.of.src.being.shuffled == 1) {   #shuffle src1  or alleles
            alts.shuffled.N <-shuffleSource1Function()   # Get shuffled # of alternate alleles for each s1      ##

            #browser()
            #-cat(paste("alts.shuffled.N are now"), alts.shuffled.N,"\n")
            SSW[level]<<-sum(alts.shuffled.N*(num.alleles.in.src[[level]]-alts.shuffled.N)/num.alleles.in.src[[level]])
            #print(paste("setting SSW[", level, "] to", SSW[level]))
          } else if (index.of.src.being.shuffled >= 2) {   #shuffle src2
            # Shuffle the src1 groups, across the source2s

            ### SAK need to deal with preshuffle for other levels here!!!

            shuffledsrcs <- getSourceiShuffle(index.of.src.being.shuffled)

            #	    if (level == num.sources.of.variation)
            #	      shuffledsrcs <- sample(1:num.source[level-1], replace=FALSE)  # just shuffle all of them.
            #	    else
            #	      shuffledsrcs <- unlist(lapply(srcX.in.srcY[[level-1]][[level+1]], sample))
            #-cat(paste("shuffledsrcs are now"), shuffledsrcs, "\n")

            new.srcNs[[index.of.src.being.shuffled]] <- num.alleles.in.src[[index.of.src.being.shuffled]][shuffledsrcs]
            #-cat(paste("new.srcNs[[", index.of.src.being.shuffled,"]] are now"), new.srcNs[[index.of.src.being.shuffled]], "\n")
            ## alleles move around so numbers change, but just totals  SSWs for all above can change
            num.alternate.alleles[[level]]<-vapply(srcX.in.srcY[[index.of.src.being.shuffled]][[level]],
                                                   function(x) sum(num.alternate.alleles[[index.of.src.being.shuffled]][shuffledsrcs[x]]),0)

            #-cat(paste("num.alternate.allles[[", index.of.src.being.shuffled,"]] are now"), num.alternate.alleles[[index.of.src.being.shuffled]], "\n")
            #-cat(paste("num.alternate.allles[[", index.of.src.being.shuffled+1,"]] are now"), num.alternate.alleles[[index.of.src.being.shuffled+1]], "\n")
            SSW[level] <<- sum(num.alternate.alleles[[level]]*(new.srcNs[[level]]-num.alternate.alleles[[level]])/new.srcNs[[level]])
            #print(paste("setting SSW[", index.of.src.being.shuffled+1, "] to", SSW[index.of.src.being.shuffled+1]))
          }


          # bootstrap
          # take shuffled alleles, and then bootstrap the reads
          ### This must be fixed or verfied!!!
          ### SAK SAK
          if (do.bootstrapAlleles) {
            alts.shuffled.N<-bootstrapAlleles(alts.shuffled.N)
          }
          if (do.bootstrapReads) {
            alts.shuffled.N<-bootstrapReads(alts.shuffled.N)
          }


          if (index.of.src.being.shuffled >= 2) {   # srcNs change
            oldtsg <- tsg
            tsg <- calcSG(new.srcNs)     ##
            oldtn <- tn
            if (!identical(tsg, oldtsg)) {
              tn <- calcN(tsg)             ##
            } else {
              tn <- oldtn
            }
            #foo <- amova(new.srcNs, tsg, tn, TRUE) #
            foo <- amova(new.srcNs, tn, TRUE) #
          } else {
            #foo <- amova(num.alleles.in.src, sg.noshuffle, n.noshuffle, TRUE)  #  Ns don't change
            foo <- amova(num.alleles.in.src, n.noshuffle, TRUE)  #  Ns don't change
          }
          #-print(foo)
          #fval <- foo[num.sources.of.variation-index.of.src.being.shuffled, 5]
          # 1 ==>  4-1 = foo[3,5]
          # 2 ==>  4-2 = foo[2,5]
          # 3 ==>  4-3 = foo[1,5]
          fval <- foo[index.of.src.being.shuffled+1]
          if (is.na(fval) || is.nan(fval)) break
          if (save.distributions) {
            NullDistributionsTmp[[index.of.src.being.shuffled]][permutationi] <- fval
          }
          #browser()
          if (fval >= amovaTable[num.sources.of.variation-index.of.src.being.shuffled,5]) {
            #cnt[index.of.src.being.shuffled] <- cnt[index.of.src.being.shuffled] +1
            if ((cnt<-cnt+1) == NresamplesToStop) {break}     # if we have 100 above or equal, no point in continuing
          }
        }
        if (save.distributions) {
          NullDistributions[[index.of.src.being.shuffled]] <- NullDistributionsTmp[[index.of.src.being.shuffled]][1:permutationi]  # remove non used slots
        }

        amovaTable[num.sources.of.variation-index.of.src.being.shuffled,6] <- cnt/permutationi
        cat( "cnt:", cnt, " permutationi:", permutationi,"\n")
        amovaTable[num.sources.of.variation-index.of.src.being.shuffled,7] <- permutationi
      }  # end of for loop for source to permute
    } # End of if permutation method, elseif...

    #browser()

    colnames(amovaTable)<-c("df","SS","MS","sigma squared","f","p","#permutations")
    row.names(amovaTable)<-c(names(expDesign)[num.sources.of.variation:2],'Within','Total')


    if (save.distributions)
      return (list(amovaTable, NullDistributions))
    else
      return(amovaTable)
  }  # end  ProcessSNP


  if(multi.core==F){
    AllAmovaTables = lapply(1:(numSNPs), function(s) processSNP(s, maxPermutations=maxPermutations))
  } else if(multi.core==T | is.numeric(multi.core)){
    library(parallel)
    num.cores <- ifelse(multi.core==T,detectCores()-1,multi.core)

    if(Sys.info()['sysname']=='Windows'){
      my.cluster <- makeCluster(num.cores, type="PSOCK")
      vars <- c("DF", "DF.total","MS.total")
      clusterExport(my.cluster, vars)
    } else {
      my.cluster <- makeCluster(num.cores, type="FORK")
    }

    AllAmovaTables = parLapply(my.cluster, 1:(numSNPs), function(s) processSNP(s, maxPermutations=maxPermutations))
    stopCluster(my.cluster)

  } else {
    stop('multi.core argument must be logical or numeric')
  }


  #Print summary results
  pvals <- list()
  for(s in (num.sources.of.variation-1):1){
    pvals[[s]] <- sapply(1:numSNPs, function(snp) AllAmovaTables[[snp]][num.sources.of.variation-s,6])
    print(paste("There were ", length(pvals[[s]][pvals[[s]]<=.05]),"/", length(pvals[[s]]), " values <= .05 for source", names(expDesign)[2:num.sources.of.variation][s]))
  }

  if(!is.null(outputFile)){
    saveRDS(AllAmovaTables, file=paste(outputFile,"rds",sep="."))
  }

  return(AllAmovaTables)


}

    © 2018 GitHub, Inc.
    Terms
    Privacy
    Security
    Status
    Help

    Contact GitHub
    Pricing
    API
    Training
    Blog
    About

Press h to open a hovercard with more details.
cbirdlab/impostar documentation built on June 1, 2019, 7:08 p.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
cbirdlab/impostar
ImPoStAR: Implement Population Structure Analyses in R

R/old/runAMOVA.R
In cbirdlab/impostar: ImPoStAR: Implement Population Structure Analyses in R

R Package Documentation

Browse R Packages

We want your feedback!

cbirdlab/impostar ImPoStAR: Implement Population Structure Analyses in R

R/old/runAMOVA.R In cbirdlab/impostar: ImPoStAR: Implement Population Structure Analyses in R

R Package Documentation

Browse R Packages

We want your feedback!

cbirdlab/impostar
ImPoStAR: Implement Population Structure Analyses in R

R/old/runAMOVA.R
In cbirdlab/impostar: ImPoStAR: Implement Population Structure Analyses in R