#' Coal.Theta.Sim.File.G: function to simulate a vector (k.vec) of mutation counts given a vector of locus lengths (l.vec), vector of sample numbers (n.vec) with gamma rate variation
#'
#' This function returns k.vec, l.vec, n.vec from simulating under the coalescent model, with the results written to 'out.file'
#' @param theta Population size parameter theta
#' @param n.list List containing the number of samples for each locus
#' @param l.list List containing the number of samples for each locus
#' @param num.loci Number of loci to simulate
#' @param alpha.param Shape parameter of the gamma distribution for among-locus rate variation
#' @param K Number of discrete classes to approximate the gamma distribution
#' @param out.file Outfile to write in alleles format
#' @keywords population genetics, coalescent models
#' @export
#' @examples
#'
#'
#####################################################################################
# Simulation under the coalescent mode with gamma rate variation #
#####################################################################################
Coal.Theta.Sim.File.G <- function(theta, n.vec, l.vec, num.loci, out.file, alpha.param, K){
nucleotide.set <- c("A", "C", "G", "T") # Set of nucleotides
if (file.exists(out.file)) file.remove(out.file) # Clear output
sim.k.vec <- rep(NA, num.loci = num.loci) # Initialize k vector
sim.l.vec <- rep(NA, num.loci = num.loci) # Initialize k vector
sim.n.vec <- rep(NA, num.loci = num.loci) # Initialize k vector
i <- 1 # Initialize increment counter
locus.counter <- 0 # Initialize 'good' locus counter, number of mutations less than length of locus (infinite sites model)
n.sample <- sample(x = n.vec, size = num.loci, replace = T) # Random sample for the number of alleles per population
l.sample <- sample(x = l.vec, size = num.loci, replace = T) # Random sample for the length of the loci
r.sample <- sample(x = discrete.gamma(alpha = alpha.param, k = K), size = num.loci, replace = T)# get rate vector
while (i <= num.loci){ # For each simulated locus
mutation.count <- 0 # Init mutation counter for locus
n <- n.sample[i] # Get number of alleles to sample
l <- l.sample[i] # Get length of locus
k <- n # Convert to k
r <- r.sample[i] # Get locus rate
while (k > 1) { # While 2 or more lineages to coalesce
p.mut = (l*r*theta)/(k-1+(l*r*theta)) # Prob. of a mutation
p.coal = (k-1)/(k-1+(l*r*theta)) # Prob of coalescene
event = sample(c("M", "C"), size = 1, prob = c(p.mut, p.coal)) # Sample ranodom event
if (event == "C"){ # If event is coalescence
k = k-1 # Reduce number of lineages by 1
}else{mutation.count = mutation.count + 1}
}
if (mutation.count <= l){
i = i + 1 # increase locus counter
locus.counter = locus.counter + 1 # Increase 'good' locus counter (k < locus.length; infinite sites model)
sim.k.vec[locus.counter] <- mutation.count # Append count of mutations
sim.l.vec[locus.counter] <- l # Append locus length
sim.n.vec[locus.counter] <- n # Append number of sampled alleles
null.string <- sample(x = nucleotide.set, size = l, replace = T) # Sample ancestral sequence without mutations
for (j in 1:n){ # For allele sample
string1 <- gsub(pattern = 'XXX', replacement = j, x = ">SimLocus_YYY_Sample_XXX ") # Replace
string2 <- gsub(pattern = 'YYY', replacement = locus.counter, x = string1) # Replace
sequence.string <- null.string # Assign ancestral sequence
if (j == 1 && mutation.count > 0){ # If at the first sample
for (sim.k in 1:mutation.count){ # For each simulated mutation
ancestral.allele <- sequence.string[sim.k] # Get ancestral allele
derived.nuc.set <- nucleotide.set[nucleotide.set != ancestral.allele] # Reduced set for mutation
derived.allele <- sample(x = derived.nuc.set, size = 1, replace = T) # Sample mutated allele
sequence.string[sim.k] <- derived.allele # Assign derived allele
}
}
string3 <- gsub(pattern = 'ZZZ', replacement = string2, x = "ZZZ WWW")
string4 <- gsub(pattern = 'WWW', replacement = toString(paste(sequence.string, sep = '', collapse = '')), x = string3)
write(x = string4, file = out.file, append = T) # Write results to outfile
}
print(locus.counter)
write(x = "// |1", file = out.file, append = T)
}
}
return(list(sim.k.vec = sim.k.vec, sim.n.vec = sim.n.vec, sim.l.vec = sim.l.vec)) # Return the number of mutations simulated
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.