Nothing
#' A function to simulate QTL and/or SNP data.
#'
#' Simulates a chromosome of arbitrary length with arbitrary numbers, types, and spacings of genetic loci over arbitrary pedigrees.
#'
#' @param pedigree A pedigree
#' @param founders A vector of indicator variables denoting founder status (1=founder, 0=non-founder)
#' @param positions Genome locations in cM for markers
#' @param initHe Initial levels of expected heterozygosity
#' @param mutationType A vector of locus types - see details
#' @param mutationRate A vector of mutation rates
#' @param founderHaplotypes A matrix or dataframe containing founder haplotypes
#' @param phenotyped A vector of IDs of those individuals for which to return phenotypic data
#' @param genotyped A vector of IDs of those individuals for which to return genotypic data
#' @param returnG If 'y' then genotypic data for all loci (including \code{cIAM} loci) will be returned.
#' @param initFreqs A list of allele frequencies for all loci. If \code{initFreqs} is specified, it will override information from \code{initHe}. \code{extractA} from package \code{MasterBayes} can be used to obtain obtain \code{initFreqs} form a sample of genotypes. For \code{cIAM} loci, allele names in \code{initFreqs} should be allelic substitution effects.
#'
#' @details
#' Valid mutation types are `Micro', `Dom', `dIAM' and `cIAM', for microsatellite, dominant (AFLP), discrete infinite alleles mutation model loci (SNPs), and continuous infinite alleles mutation model loci (polymorphisms effecting phenotypic variation). cIAM loci have mutational allelic substitution effects taken drawn from a normal distribution with mean 0 and variance 1.
#'
#' @return
#' \item{Phenotypes}{A vector of phenotypes. Calculated as the sum of all allelic effects. Scaling is currently left to be done post-hoc.}
#' \item{MarkerData}{A vector of marker genotypes, i.e. alleles at all loci except those designated `cIAM'}
#'
#' @seealso \code{\link{phen_sim}}, \code{\link{micro_sim}}
#'
#' @examples
#' testData <- as.data.frame(matrix(
#' c(
#' 1, NA, NA, 1, 1, 1, 2, 2,
#' 2, NA, NA, 1, 1, 1, 2, 2,
#' 3, NA, NA, 1, 1, 1, 2, 2,
#' 4, NA, NA, 1, 0, 1, 2, 2,
#' 5, NA, NA, 1, 0, 1, 2, 2,
#' 6, 1, 4, 0, -1, 2, 3, 3,
#' 7, 1, 4, 0, -1, 2, 3, 3,
#' 8, 1, 4, 0, -1, 2, 3, 3,
#' 9, 1, 4, 0, -1, 2, 3, 3,
#' 10, 2, 5, 0, -1, 2, 3, 3,
#' 11, 2, 5, 0, -1, 2, 3, 3,
#' 12, 2, 5, 0, -1, 2, 3, 3,
#' 13, 2, 5, 0, -1, 2, 3, 3,
#' 14, 3, 5, 0, -1, 2, 3, 3,
#' 15, 3, 5, 0, -1, 2, 3, 3,
#' 16, 3, 5, 0, -1, 2, 3, 3,
#' 17, 3, 5, 0, -1, 2, 3, 3
#' ),
#' 17, 8,
#' byrow = TRUE
#' ))
#'
#' names(testData) <- c(
#' "id", "dam", "sire", "founder", "sex",
#' "cohort", "first", "last"
#' )
#' pedigree <- as.data.frame(cbind(
#' testData$id, testData$dam,
#' testData$sire
#' ))
#' for (x in 1:3) pedigree[, x] <- as.factor(pedigree[, x])
#' names(pedigree) <- c("id", "dam", "sire")
#' pedigree
#'
#' ## make up some microsatellite and gene allele frquencies:
#' sampleGenotypes <- as.data.frame(matrix(c(
#' 1, 2, -1.32, 0.21, 2, 1, 0.21, 0.21
#' ), 2, 4, byrow = TRUE))
#' testFreqs <- extractA(sampleGenotypes)
#'
#' ## note that alleles at the gene locus are given as their
#' ## allelic substitution effects:
#' testFreqs
#'
#' ## simulate data for these indivdiuals based on a single QTL
#' ## with two equally alleles with balanced frequencies in the
#' ## founders, linked (2 cM) to a highly polymorphic microsatellite:
#' genome_sim(
#' pedigree = pedigree, founders = testData$founder, positions = c(0, 2),
#' mutationType = c("Micro", "cIAM"), mutationRate = c(0, 0),
#' initFreqs = testFreqs, returnG = "y"
#' )
#' ## since we specified returnG='y', we can check that
#' ## the phenotypes add up to the
#' ## allelic substitution effects for the second locus.
#'
#' @keywords simulation
#'
#' @export
#'
genome_sim <-
function(pedigree, founders = NULL, positions = NULL, initHe = NULL,
mutationType = NULL, mutationRate = NULL, phenotyped = NULL,
founderHaplotypes = NULL, genotyped = NULL, returnG = "n", initFreqs = NULL) {
original.order <- order(positions)
positions <- positions[original.order]
initHe <- initHe[original.order]
mutationType <- mutationType[original.order]
mutationRate <- mutationRate[original.order]
founderHaplotypes <- founderHaplotypes[, original.order]
if (all.equal(original.order, 1:length(original.order)) == FALSE & is.null(initFreqs) == FALSE) {
stop("ordering of initFreqs will fail")
}
for (x in 1:3) pedigree[, x] <- as.character(pedigree[, x])
if (any(match(pedigree[, 2], pedigree[, 1]) > match(pedigree[, 1], pedigree[, 1]), na.rm = T)) {
stop("Dams appearing after their offspring: use fixPedigree")
}
if (any(match(pedigree[, 3], pedigree[, 1]) > match(pedigree[, 1], pedigree[, 1]), na.rm = T)) {
stop("Sires appearing after their offspring: use fixPedigree")
}
if (is.null(phenotyped) == FALSE && is.numeric(phenotyped) && length(phenotyped) > length(pedigree[, 1])) {
stop("Length of phenotype availability indicator vector incompatible with length of pedigree")
}
## assumes that 'true pedigree has been supplied' and that conditions
## of fixPedigreep()
## haplotypes can be supplied for founders, in which case they will be assigned in the order provided
## i.e., at least twice as many haplotypes as founders are required.
founderHap <- 1
## mutationType can be 'cIAM','dIAM','Micro','Dom'
mutation <- function(currentAllele, mutationType, mu) {
mutAllele <- currentAllele
if (rbinom(1, 1, mu) == 1) {
if (mutationType == "cIAM" | mutationType == "dIAM") mutAllele <- rnorm(1, 0, 1)
if (mutationType == "Micro") {
if (rbinom(1, 1, 0.5) == 1) {
mutAllele <- currentAllele - 1
} else {
mutAllele <- currentAllele + 1
}
}
if (mutationType == "dom") mutAllele <- abs(currentAllele - 1)
}
mutAllele
}
## while the continuous IAM model will work well for mutations, it will cause
## A = N.founders if applied directly. So I'll make a matrix of loci times
## times 1000, fill and fill it with founder alleles are appropriate frequencies
if (is.null(initFreqs) & is.null(founderHaplotypes)) {
founderIAMalleles <- matrix(NA, length(positions), 1000)
for (x in 1:length(positions)) {
A <- as.integer(1 / (1 - initHe[x]) + 0.9999)
A.index <- as.integer(runif(1000, 0, 1 / (1 - initHe[x]))) + 1
A.continuous.values <- rnorm(A, 0, 1)
founderIAMalleles[x, ] <- A.continuous.values[A.index]
}
}
## mutationType can be 'cIAM','dIAM','Micro','Dom'
rndAllele <- function(loc) {
allele <- NULL
if (is.null(initFreqs)) {
if (mutationType[loc] == "cIAM" | mutationType[loc] == "dIAM") allele <- founderIAMalleles[loc, as.integer(runif(1, 0, 1000)) + 1]
if (mutationType[loc] == "Micro") {
effectiveAlleles <- 1 / (1 - initHe[loc])
allele <- as.integer(runif(1, 0, effectiveAlleles)) + 200
}
if (mutationType[loc] == "Dom") {
effectiveAlleles <- 1 / (1 - initHe[loc])
allele <- as.integer(runif(1, 0, effectiveAlleles))
}
} else {
i <- runif(1, 0, 1)
a <- 0
while (i > 0) {
a <- a + 1
i <- i - initFreqs[[loc]][a]
}
allele <- as.numeric(names(initFreqs[[loc]][a]))
if (mutationType[loc] == "cIAM") allele <- as.numeric(names(initFreqs[[loc]][a]))
}
allele
}
## to get the right bit go to (individual-1)*2+chromosome
genomes <- matrix(NA, length(pedigree[, 1]) * 2, length(positions))
Kosambi.c <- function(m) {
f <- function(m, c) {
m - 0.25 * log((1 + 2 * c) / (1 - 2 * c))
}
c <- m
if (m > 0) c <- uniroot(f, c(0, 0.5), m = m)$root
c
}
segregate <- function(parent) {
# map distances between adjascent loci
map.intervals <- positions[2:length(positions)] -
positions[1:(length(positions) - 1)]
# Haldane-based expected crossover freq
# obs.cross.frac<-((1-exp(-2*map.intervals/100))/2)
# Kosambi's mapping function
obs.cross.frac <- apply(X = as.matrix(map.intervals) / 100, MARGIN = 1, FUN = Kosambi.c)
# observed recombinations
recomb <- rbinom(length(obs.cross.frac), 1, obs.cross.frac)
p <- genomes[((parent - 1) * 2 + 1):((parent - 1) * 2 + 2), ]
offspringHaplotype <- array(dim = length(positions))
chromosome <- sample(1:2, 1)
offspringHaplotype[1] <- p[chromosome, 1]
for (x in 1:length(map.intervals)) {
if (recomb[x] == 1 & chromosome == 1) {
chromosome <- 2
} else {
if (recomb[x] == 1 & chromosome == 2) chromosome <- 1
}
offspringHaplotype[x + 1] <- p[chromosome, x + 1]
}
offspringHaplotype
}
message(paste("Processing pedigree...", "\n"))
flush.console()
message(paste("0% 50% 100%", "\n"))
flush.console()
message(paste("| | |", "\n"))
flush.console()
mark.at <- (length(pedigree[, 1]) - 1) / 50
count <- 0
## now to go through all individuals
for (x in 1:length(pedigree[, 1])) {
count <- count + 1
if (count > mark.at) {
count <- 0
message("-")
flush.console()
}
## assign HW-assumed genotypes if founder
# if(founders[x]==1){
# genomes[(x-1)*2+1,]<-mapply(rndAllele,loc=1:length(positions))
# genomes[(x-1)*2+2,]<-mapply(rndAllele,loc=1:length(positions))
# } else {
## ... otherwise base genotypes on parents
if (is.na(pedigree$dam[x])) {
if (is.null(founderHaplotypes)) {
genomes[(x - 1) * 2 + 1, ] <- mapply(rndAllele, loc = 1:length(positions))
} else {
genomes[(x - 1) * 2 + 1, ] <- founderHaplotypes[founderHap, ]
founderHap <- founderHap + 1
}
} else {
mum <- which(as.character(pedigree$id) == as.character(pedigree$dam[x]), arr.ind = TRUE)
## segregation
genomes[(x - 1) * 2 + 1, ] <- segregate(mum)
## mutation
genomes[(x - 1) * 2 + 1, ] <- mapply(mutation, currentAllele = genomes[(x - 1) * 2 + 1, ], mutationType = mutationType, mu = mutationRate)
}
if (is.na(pedigree$sire[x])) {
if (is.null(founderHaplotypes)) {
genomes[(x - 1) * 2 + 2, ] <- mapply(rndAllele, loc = 1:length(positions))
} else {
genomes[(x - 1) * 2 + 2, ] <- founderHaplotypes[founderHap, ]
founderHap <- founderHap + 1
}
} else {
dad <- which(as.character(pedigree$id) == as.character(pedigree$sire[x]), arr.ind = TRUE)
## segregation
genomes[(x - 1) * 2 + 2, ] <- segregate(dad)
## mutation
genomes[(x - 1) * 2 + 2, ] <- mapply(mutation, currentAllele = genomes[(x - 1) * 2 + 2, ], mutationType = mutationType, mu = mutationRate)
}
}
message(paste("\n", "...done.", "\n"))
flush.console()
message(paste("\n", "Calculating phenotypes..."))
flush.console()
allelicEffect <- 1
genes <- genomes[subset(1:length(genomes[, 1]), (1:length(genomes[, 1])) %% 2 == 0), subset(1:length(positions), mutationType == "cIAM")] +
genomes[subset(1:length(genomes[, 1]), (1:length(genomes[, 1])) %% 2 == 1), subset(1:length(positions), mutationType == "cIAM")]
if (is.null(dim(genes)[2]) == FALSE && dim(genes)[2] > 1) {
phenotypes <- as.data.frame(cbind(pedigree$id, rowSums(genes) * (1 * allelicEffect)))
} else {
phenotypes <- as.data.frame(cbind(pedigree$id, genes * (1 * allelicEffect)))
}
if (is.character(phenotyped)) phenotypes <- phenotypes[which(phenotypes[, 1] %in% phenotyped), ]
if (is.numeric(phenotyped)) phenotypes <- subset(phenotypes, phenotyped > 0)
message(paste("done.", "\n"))
flush.console()
markerData <- NULL
if (length(subset(mutationType, mutationType != "cIAM")) > 0) {
message(paste("\n", "Tabulating marker genotypes..."))
flush.console()
for (x in 1:length(positions)) {
if (mutationType[x] == "dIAM") genomes[, x] <- as.numeric(as.factor(genomes[, x]))
}
markers <- genomes[, subset(1:length(positions), mutationType != "cIAM")]
doubleids <- as.vector(as.matrix(as.data.frame(rbind(pedigree$id, pedigree$id))))
markers <- as.data.frame(cbind(doubleids, markers))
markers <- markers[which(markers[, 1] %in% genotyped), ]
markerData <- markers
idsformarkers <- markers[, 1]
markers <- as.matrix(markers[, 2:length(markers[1, ])])
message(paste("done.", "\n"))
flush.console()
mt <- subset(mutationType, mutationType != "cIAM")
for (x in 1:length(mt)) {
if (length(table(markers[, x])) > 2) {
mt[x] <- "M"
} else {
mt[x] <- "S"
}
}
}
# if(FALSE){
# write(length(markers[,1])/2, "~/Desktop/textgenome_sim.out", append=TRUE)
# write(length(markers[1,]), "~/Desktop/textgenome_sim.out", append=TRUE)
# markerPositions<-subset(positions, mutationType!='cIAM')
# p<-array(dim=length(markerPositions)+1); p[2:length(p)]<-markerPositions; p[1]<-"P";
# write(p, "~/Desktop/textgenome_sim.out", ncolumns = length(p), append=TRUE)
# write(mt, "~/Desktop/textgenome_sim.out", ncolumns = length(mt), append=TRUE, sep="")
# for(x in 1:(length(markers[,1])/2)){
# write(idsformarkers[(x-1)*2+1], "~/Desktop/textgenome_sim.out", append=TRUE)
# write(markers[(x-1)*2+1,], "~/Desktop/textgenome_sim.out", ncolumns = length(markers[,1]), append=TRUE)
# write(markers[(x-1)*2+2,], "~/Desktop/textgenome_sim.out", ncolumns = length(markers[,1]), append=TRUE)
# }
# }
## calculate pairwise similarities a la Hayes and Goddard
# S<-matrix(NA,(length(genes[,1])/2),(length(genes[,1])/2))
# for(x in 1:(length(S[1,])-1)){
# message(paste(x,'\n')); flush.console();
# for(y in x:length(S[,1])){
# sim=0;
# for(g in 1:length(genes[1,])){
# obs<-length(table(c(genes[((x-1)*2+1):((x-1)*2+2),g],genes[((y-1)*2+1):((y-1)*2+2),g])))
# if(obs<4) sim<-sim+1/(2^(obs-1))
# }
# S[x,y]=sim/length(genes[1,])
# }
# }
# S<-(S-min(S))/1-min(S)
# output<-list(Phenotypes=phenotypes,MarkerData=markerData,MarkerRelatedness=S)
output <- list(Phenotypes = phenotypes, MarkerData = markerData)
if (returnG == "y") output <- list(Phenotypes = phenotypes, MarkerData = markerData, genomes = as.data.frame(cbind(doubleids, genomes)))
output
}
#' @rdname pedantics-deprecated
#' @section \code{genomesim}: the function has just been renamed with no other changes for the moment
#' @export
genomesim <- function(pedigree, founders = NULL, positions = NULL, initHe = NULL,
mutationType = NULL, mutationRate = NULL, phenotyped = NULL,
founderHaplotypes = NULL, genotyped = NULL, returnG = "n", initFreqs = NULL) {
.Deprecated(genome_sim,
msg = "this function from pedantics is deprecated, please use the new 'genome_sim()' instead",
)
genome_sim(
pedigree, founders, positions, initHe,
mutationType, mutationRate, phenotyped,
founderHaplotypes, genotyped, returnG, initFreqs
)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.