Preamble

This document requires the rutilstimflutre package to be installed, which itself may require other packages, depending on the functions:

suppressPackageStartupMessages(library(rutilstimflutre))
packageVersion("rutilstimflutre")
library(rrBLUP)
packageVersion("rrBLUP")

This R chunk is used to assess how much time it takes to execute the R code in this document until the end:

t0 <- proc.time()

To make all this reproducible, the seed of the pseudo-random number generator is set:

set.seed(1859)

Simulate SNP genotypes

nb.inds <- 500
nb.chrs <- 10
Ne <- 10^4
L <- 5*10^5
mu <- 10^(-8)
theta <- 4 * Ne * mu * L
c <- 10^(-8)
rho <- 4 * Ne * c * L
genomes <- simulCoalescent(nb.inds=nb.inds, nb.reps=nb.chrs, chrom.len=L,
                           pop.mut.rate=theta, pop.recomb.rate=rho)
dim(X <- genomes$genos)
X[1:3,1:4]
afs <- estimSnpAf(X)
summary(afs) # mean should be 0.5
hist(afs, breaks="FD", xlim=c(0,1), las=1, col="grey", border="white",
     main=paste0("AFs of ", ncol(X), " SNPs"),
     xlab="Allele frequency", ylab="Number of SNPs")
mafs <- estimSnpMaf(X)
sum(mafs < 0.01)
plotHistMinAllelFreq(mafs=mafs)

Check that the linkage disequilibrium is as expected:

chr <- "chr1"
min.maf <- 0.2
min.pos <- 0
max.pos <- L
(length(snps.tokeep <-
          rownames(genomes$snp.coords[mafs >= min.maf &
                                      genomes$snp.coords$chr == chr &
                                      genomes$snp.coords$pos >= min.pos &
                                      genomes$snp.coords$pos <= max.pos,])))
(nrow(ld <- estimLd(X=genomes$genos[,snps.tokeep],
                    snp.coords=genomes$snp.coords[snps.tokeep,],
                    use.ldcorsv=FALSE)))
summary(ld$cor2)
snp.dist <- distSnpPairs(snp.pairs=ld[, c("loc1","loc2")],
                         snp.coords=genomes$snp.coords[snps.tokeep,])
plotLd(snp.dist,
       ## ld$cor2, estim="r2",
       sqrt(ld$cor2), estim="r",
       main=paste0(length(snps.tokeep), " SNPs with MAF >= ", min.maf,
                   " on ", chr),
       use.density=TRUE,
       span=1/20,
       sample.size=2 * nb.inds,
       Ne=Ne, c=c)
abline(v=500, lty=2)

Check that the matrix of additive genetic relationships is as expected:

A <- estimGenRel(X=X, relationships="additive", method="vanraden1")
summary(diag(A)) # mean should be 1
summary(A[upper.tri(A)]) # mean should be 0

For the rest, discard all SNPs with a minor allele frequency below 1%:

thresh <- 0.01
dim(X <- X[, estimSnpMaf(X) >= thresh])
summary(afs <- estimSnpAf(X))
summary(mafs <- estimSnpMaf(X))
A <- estimGenRel(X=X, relationships="additive", method="vanraden1")
summary(diag(A))
summary(A[upper.tri(A)])

Simulate phenotypes

model <- simulAnimalModel(T=1, Q=3, A=A, V.G.A=15, V.E=5)
names(model)
c(model$C)
model$V.G.A
model$V.E
mean(model$dat$response1)
var(model$dat$response1)
summary(model$G.A[,1])
hist(model$G.A[,1], breaks="FD", main="True breeding values",
     las=1, col="grey", border="white")

Save data

Path to the package:

p2pkg <- "~/src/rgs3"

Phenotypes:

if(! is.null(p2pkg)){
  phenos.file <- paste0(p2pkg, "/inst/extdata/phenos_df.txt.gz")
  write.table(x=model$dat, file=gzfile(phenos.file), quote=FALSE, sep="\t")
  print(tools::md5sum(path.expand(phenos.file)))
}

Genotypes:

if(! is.null(p2pkg)){
  genos.file <- paste0(p2pkg, "/inst/extdata/genos_mat.txt.gz")
  write.table(x=X, file=gzfile(genos.file), quote=FALSE, sep="\t")
  print(tools::md5sum(path.expand(genos.file)))
}

Genotypic values:

if(! is.null(p2pkg)){
  genovals.file <- paste0(p2pkg, "/inst/extdata/genovals.txt.gz")
  write.table(x=model$G.A[,1], file=gzfile(genovals.file), quote=FALSE,
              sep="\t", col.names=FALSE)
  print(tools::md5sum(path.expand(genovals.file)))
}

Quick check

Use A

Give the additive genetic relationship matrix to the rrBLUP package:

fit.rrBLUP.A <- mixed.solve(y=model$Y[,1], Z=model$Z, K=A, X=model$W,
                            method="REML", SE=TRUE, return.Hinv=TRUE)
cbind(truth=model$C, estim=fit.rrBLUP.A$beta)
c(truth=model$V.E, estim=fit.rrBLUP.A$Ve)
c(truth=model$V.G.A, estim=fit.rrBLUP.A$Vu)
summary(fit.rrBLUP.A$u)
hist(fit.rrBLUP.A$u, breaks="FD", main="Predicted breeding values (via A)",
      las=1, col="grey", border="white")
regplot(x=model$G.A, y=fit.rrBLUP.A$u, asp=1, legend.x="bottomright", las=1,
        xlab="True breeding values", ylab="Predicted breeding values (via A)")
abline(h=0, v=0, lty=2)

Use X

Give the SNP genotypes matrix to the rrBLUP package:

fit.rrBLUP.X <- mixed.solve(y=model$Y[,1], Z=model$Z %*% (X - 1), K=NULL, X=model$W,
                            method="REML", SE=TRUE, return.Hinv=TRUE)
cbind(truth=model$C, estim=fit.rrBLUP.X$beta)
c(truth=model$V.E, estim=fit.rrBLUP.X$Ve)
fit.rrBLUP.X$Vu
summary(fit.rrBLUP.X$u)
hist(fit.rrBLUP.X$u, breaks="FD", main="Estimated additive SNP effects",
     las=1, col="grey", border="white")
genovals.hat <- ((X - 1) %*% fit.rrBLUP.X$u)[,1]
summary(genovals.hat)
hist(genovals.hat, breaks="FD", main="Predicted breeding values (via X)",
     las=1, col="grey", border="white")
regplot(x=model$G.A, y=genovals.hat, asp=1, legend.x="bottomright", las=1,
        xlab="True breeding values", ylab="Predicted breeding values (via X)")
abline(h=0, v=0, lty=2)

Appendix

t1 <- proc.time(); t1 - t0
print(sessionInfo(), locale=FALSE)


INRA/rgs3 documentation built on May 20, 2019, 5:24 p.m.