simulBvsr: BVSR

View source: R/quantgen.R

simulBvsrR Documentation

BVSR

Description

Simulate phenotypes according to the following model: Y = W c + Z X_A a + + Z X_D d + epsilon where Y is N x 1; W is N x Q; c is Q x 1; Z is N x I; X_A/D is I x P and epsilon is N x 1 with epsilon ~ Normal_N(0, sigma^2 Id) and c ~ Normal(mean_a, sd_a) so that sd_a is large ("fixed effect"). For SNP p, gamma_p indicates if it is causal, i.e. non-zero additive and/or dominant effect, where Prob(gamma_p=1) is named pi. For the case where pi is small, see Guan & Stephens (2011), Carbonetto & Stephens (2012), Peltola et al (2012), Verzelen (2012). Causal SNP p can have an additive effect, a_p | gamma_p=1 ~ Normal_1(0, sigma_a^2), a dominant effect, d_p | gamma_p=1 ~ Normal_1(0, sigma_d^2), or both.

Usage

simulBvsr(
  Q = 3,
  mu = 50,
  mean.c = 5,
  sd.c = 2,
  X,
  m1 = TRUE,
  ctr = TRUE,
  std = FALSE,
  pi = 1,
  pve = 0.7,
  sigma.a2 = 1,
  sigma.d2 = 0,
  min.maf = 0,
  perc.NA = 0,
  err.df = Inf,
  seed = NULL,
  verbose = 1
)

Arguments

Q

number of fixed effects, i.e. the intercept plus the number of years during which genotypes are phenotyped (starting in 2010)

mu

overall mean

mean.c

mean of the prior on c[2:Q]

sd.c

std dev of the prior on c[2:Q]

X

matrix of bi-allelic SNP genotypes encoded in allele doses in [0,2], with genotypes in rows and SNPs in columns (SNPs with missing values or low MAF should be discarded beforehand)

m1

if TRUE, 1 will be subtracted from each entry of X to make X_A (before optional centering and scaling)

ctr

if TRUE, the columns of the X matrix will be centered to make X_A and X_D

std

if TRUE, the columns of the X matrix will also be standardized to make X_A and X_D (note that Habier et al (2007) don't standardize the genotypes, but Janss et al (2012) do)

pi

proportion of marker effects (a) that are non-zero; setting pi at 1 means simulating from the additive infinitesimal model (equivalent to ridge regression)

pve

proportion of phenotypic variance explained by SNPs with non-zero effects ("heritability"); PVE = V[g] / V[y] where y = g + e and g = g_a + g_d (no epistasis); the magnitude of g_a (resp. g_d) depends on whether or not sigma.a2 (resp. sigma.d2) is set to zero; a value for sigma^2 is then chosen

sigma.a2

prior variance of the non-zero additive effects

sigma.d2

prior variance of the non-zero dominant effects (if non-null, a reasonable choice is half of sigma.a2, as in Servin & Stephens (2007) with their prior D2)

min.maf

minimum minor allele frequency below which SNPs can't have any effect (neither additive nor dominant)

perc.NA

percentage of missing phenotypes, at random

err.df

degrees of freedom of errors' Student's t-distribution

seed

seed for the pseudo-random number generator

verbose

verbosity level (0/1)

Value

list

Author(s)

Timothee Flutre

Examples

## Not run: ## simulate genotypes
set.seed(1859)
I <- 200
P <- 2000
X <- simulGenosDose(nb.genos=I, nb.snps=P)

## estimate genetic relationships
A <- estimGenRel(X=X, relationships="additive", method="vanraden1")
D <- estimGenRel(X=X, relationships="dominant", method="vitezica")

## additive sparse genetic architecture
## choose pi so that sum(gamma * (1 + log(P / sum(gamma)))) < I
Q <- 3
modelA <- simulBvsr(Q=Q, X=X, pi=0.01, pve=0.7, sigma.a2=1)
modelA$sigma2

library(lme4)
fit1 <- lmer(formula=response1 ~ year + (1|geno), data=modelA$dat)
cbind(modelA$c, blues <- fixef(fit1))
blups <- ranef(fit1, drop=TRUE)$geno[rownames(X)]
cor(modelA$g.A, blups, method="pearson")
cor(modelA$g.A, blups, method="spearman")
(vc1 <- as.data.frame(VarCorr(fit1)))
(pve1.hat <- vc1$vcov[1] / (vc1$vcov[1] + vc1$vcov[2]))

fit1A <- lmerAM(formula=response1 ~ year + (1|geno), data=modelA$dat,
                relmat=list(geno=A))
## maybe necessary to use A2 <- nearPD(A)$mat
cbind(modelA$c, blues <- fixef(fit1A$merMod))
blupsA <- ranef(fit1A$merMod, drop=TRUE)$geno[rownames(X)]
cor(modelA$g.A, blupsA, method="pearson")
cor(modelA$g.A, blupsA, method="spearman")
(vc1A <- as.data.frame(VarCorr(fit1A$merMod)))
(pve1A.hat <- vc1A$vcov[1] / (vc1A$vcov[1] + vc1A$vcov[2]))

plot(x=modelA$g.A, y=blups, col="blue", asp=1, las=1)
points(x=modelA$g.A, y=blupsA, col="red")
segments(x0=modelA$g.A, y0=blups, x1=modelA$g.A, y1=blupsA)
abline(h=0, v=0, a=0, b=1, lty=2)
legend("bottomright", legend=c("without A", "with A"), col=c("blue", "red"),
       pch=1, bty="n")

library(varbvs)
fit2 <- varbvs(X=modelA$X.A, Z=NULL, y=blups, verbose=FALSE)
print(fit2s <- summary(fit2))
names(sort(modelA$a[modelA$gamma == 1], decreasing=TRUE))

(pi.hat <- 10^(fit2s$logodds$x0) / (1 + 10^(fit2s$logodds$x0)))
(pi.hat.low <- 10^(fit2s$logodds$a) / (1 + 10^(fit2s$logodds$a)))
(pi.hat.high <- 10^(fit2s$logodds$b) / (1 + 10^(fit2s$logodds$b)))

w <- c(normalizelogweights(fit2$logw))
pips <- c(fit2$alpha %*% w)
cols <- rep("black", P)
cols[modelA$gamma != 0] <- "red"
plot(x=1:P, y=pips, col=cols, las=1, xlab="SNPs", ylab="PIP",
     main="Posterior inclusion probabilities")

y.pred <- predict(fit2, X=modelA$X.A, Z=NULL)
cor(blups, y.pred)

## dominant sparse genetic architecture
Q <- 3
modelAD <- simulBvsr(Q=Q, X=X, pi=0.01, pve=0.7, sigma.a2=0, sigma.d2=1)
modelAD$sigma2

library(lme4)
dat <- modelAD$dat
colnames(dat)[colnames(dat) == "geno"] <- "geno.add"
dat$geno.dom <- dat$geno.add
fit3A <- lmerAM(formula=response1 ~ year + (1|geno.add), data=dat,
                relmat=list(geno.add=A))
(vc3A <- as.data.frame(VarCorr(fit3A$merMod)))
(pve3A.hat <- vc3A$vcov[1] / (vc3A$vcov[1] + vc3A$vcov[2]))

fit3D <- lmerAM(formula=response1 ~ year + (1|geno.dom), data=dat,
                relmat=list(geno.dom=D))
(vc3D <- as.data.frame(VarCorr(fit3D$merMod)))
(pve3D.hat <- vc3D$vcov[1] / (vc3D$vcov[1] + vc3D$vcov[2]))

fit3AD <- lmerAM(formula=response1 ~ year + (1|geno.add) + (1|geno.dom),
                 data=dat, relmat=list(geno.add=A, geno.dom=D))
(vc3AD <- as.data.frame(VarCorr(fit3AD$merMod)))
(pve3AD.hat <- (vc3AD$vcov[1] + vc3AD$vcov[2]) /
                 (vc3AD$vcov[1] + vc3AD$vcov[2] + vc3AD$vcov[3]))

## additive infinitesimal genetic architecture
Q <- 3
modelA <- simulBvsr(Q=Q, X=X, pi=1, pve=0.7, sigma.a2=1)
modelA$sigma2

library(rrBLUP)
fit.RR <- mixed.solve(y=modelA$Y[,1], Z=modelA$Z %*% modelA$X.A,
                      K=NULL, X=modelA$W)
fit.RR$Ve
fit.RR$Vu
fit.AM <- mixed.solve(y=modelA$Y[,1], Z=modelA$Z, K=A, X=modelA$W)
fit.AM$Ve
fit.AM$Vu
afs <- estimSnpAf(X=X)
fit.AM$Vu / (2 * sum(afs * (1 - afs))) # see Habier et al (2007)
fit.AM$Vu / (fit.AM$Vu + fit.AM$Ve) # same as the specified pve

## End(Not run)

timflutre/rutilstimflutre documentation built on Feb. 7, 2024, 8:17 a.m.