This document requires the rutilstimflutre package to be installed, which itself may require other packages, depending on the functions:
suppressPackageStartupMessages(library(rutilstimflutre)) packageVersion("rutilstimflutre") library(rrBLUP) packageVersion("rrBLUP")
This R chunk is used to assess how much time it takes to execute the R code in this document until the end:
t0 <- proc.time()
To make all this reproducible, the seed of the pseudo-random number generator is set:
set.seed(1859)
nb.inds <- 500 nb.chrs <- 10 Ne <- 10^4 L <- 5*10^5 mu <- 10^(-8) theta <- 4 * Ne * mu * L c <- 10^(-8) rho <- 4 * Ne * c * L genomes <- simulCoalescent(nb.inds=nb.inds, nb.reps=nb.chrs, chrom.len=L, pop.mut.rate=theta, pop.recomb.rate=rho) dim(X <- genomes$genos) X[1:3,1:4] afs <- estimSnpAf(X) summary(afs) # mean should be 0.5 hist(afs, breaks="FD", xlim=c(0,1), las=1, col="grey", border="white", main=paste0("AFs of ", ncol(X), " SNPs"), xlab="Allele frequency", ylab="Number of SNPs") mafs <- estimSnpMaf(X) sum(mafs < 0.01) plotHistMinAllelFreq(mafs=mafs)
Check that the linkage disequilibrium is as expected:
chr <- "chr1" min.maf <- 0.2 min.pos <- 0 max.pos <- L (length(snps.tokeep <- rownames(genomes$snp.coords[mafs >= min.maf & genomes$snp.coords$chr == chr & genomes$snp.coords$pos >= min.pos & genomes$snp.coords$pos <= max.pos,]))) (nrow(ld <- estimLd(X=genomes$genos[,snps.tokeep], snp.coords=genomes$snp.coords[snps.tokeep,], use.ldcorsv=FALSE))) summary(ld$cor2) snp.dist <- distSnpPairs(snp.pairs=ld[, c("loc1","loc2")], snp.coords=genomes$snp.coords[snps.tokeep,]) plotLd(snp.dist, ## ld$cor2, estim="r2", sqrt(ld$cor2), estim="r", main=paste0(length(snps.tokeep), " SNPs with MAF >= ", min.maf, " on ", chr), use.density=TRUE, span=1/20, sample.size=2 * nb.inds, Ne=Ne, c=c) abline(v=500, lty=2)
Check that the matrix of additive genetic relationships is as expected:
A <- estimGenRel(X=X, relationships="additive", method="vanraden1") summary(diag(A)) # mean should be 1 summary(A[upper.tri(A)]) # mean should be 0
For the rest, discard all SNPs with a minor allele frequency below 1%:
thresh <- 0.01 dim(X <- X[, estimSnpMaf(X) >= thresh]) summary(afs <- estimSnpAf(X)) summary(mafs <- estimSnpMaf(X)) A <- estimGenRel(X=X, relationships="additive", method="vanraden1") summary(diag(A)) summary(A[upper.tri(A)])
model <- simulAnimalModel(T=1, Q=3, A=A, V.G.A=15, V.E=5) names(model) c(model$C) model$V.G.A model$V.E mean(model$dat$response1) var(model$dat$response1) summary(model$G.A[,1]) hist(model$G.A[,1], breaks="FD", main="True breeding values", las=1, col="grey", border="white")
Path to the package:
p2pkg <- "~/src/rgs3"
Phenotypes:
if(! is.null(p2pkg)){ phenos.file <- paste0(p2pkg, "/inst/extdata/phenos_df.txt.gz") write.table(x=model$dat, file=gzfile(phenos.file), quote=FALSE, sep="\t") print(tools::md5sum(path.expand(phenos.file))) }
Genotypes:
if(! is.null(p2pkg)){ genos.file <- paste0(p2pkg, "/inst/extdata/genos_mat.txt.gz") write.table(x=X, file=gzfile(genos.file), quote=FALSE, sep="\t") print(tools::md5sum(path.expand(genos.file))) }
Genotypic values:
if(! is.null(p2pkg)){ genovals.file <- paste0(p2pkg, "/inst/extdata/genovals.txt.gz") write.table(x=model$G.A[,1], file=gzfile(genovals.file), quote=FALSE, sep="\t", col.names=FALSE) print(tools::md5sum(path.expand(genovals.file))) }
Give the additive genetic relationship matrix to the rrBLUP package:
fit.rrBLUP.A <- mixed.solve(y=model$Y[,1], Z=model$Z, K=A, X=model$W, method="REML", SE=TRUE, return.Hinv=TRUE) cbind(truth=model$C, estim=fit.rrBLUP.A$beta) c(truth=model$V.E, estim=fit.rrBLUP.A$Ve) c(truth=model$V.G.A, estim=fit.rrBLUP.A$Vu) summary(fit.rrBLUP.A$u) hist(fit.rrBLUP.A$u, breaks="FD", main="Predicted breeding values (via A)", las=1, col="grey", border="white") regplot(x=model$G.A, y=fit.rrBLUP.A$u, asp=1, legend.x="bottomright", las=1, xlab="True breeding values", ylab="Predicted breeding values (via A)") abline(h=0, v=0, lty=2)
Give the SNP genotypes matrix to the rrBLUP package:
fit.rrBLUP.X <- mixed.solve(y=model$Y[,1], Z=model$Z %*% (X - 1), K=NULL, X=model$W, method="REML", SE=TRUE, return.Hinv=TRUE) cbind(truth=model$C, estim=fit.rrBLUP.X$beta) c(truth=model$V.E, estim=fit.rrBLUP.X$Ve) fit.rrBLUP.X$Vu summary(fit.rrBLUP.X$u) hist(fit.rrBLUP.X$u, breaks="FD", main="Estimated additive SNP effects", las=1, col="grey", border="white") genovals.hat <- ((X - 1) %*% fit.rrBLUP.X$u)[,1] summary(genovals.hat) hist(genovals.hat, breaks="FD", main="Predicted breeding values (via X)", las=1, col="grey", border="white") regplot(x=model$G.A, y=genovals.hat, asp=1, legend.x="bottomright", las=1, xlab="True breeding values", ylab="Predicted breeding values (via X)") abline(h=0, v=0, lty=2)
t1 <- proc.time(); t1 - t0 print(sessionInfo(), locale=FALSE)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.