simulBvsr | R Documentation |
Simulate phenotypes according to the following model: Y = W c + Z X_A a + + Z X_D d + epsilon where Y is N x 1; W is N x Q; c is Q x 1; Z is N x I; X_A/D is I x P and epsilon is N x 1 with epsilon ~ Normal_N(0, sigma^2 Id) and c ~ Normal(mean_a, sd_a) so that sd_a is large ("fixed effect"). For SNP p, gamma_p indicates if it is causal, i.e. non-zero additive and/or dominant effect, where Prob(gamma_p=1) is named pi. For the case where pi is small, see Guan & Stephens (2011), Carbonetto & Stephens (2012), Peltola et al (2012), Verzelen (2012). Causal SNP p can have an additive effect, a_p | gamma_p=1 ~ Normal_1(0, sigma_a^2), a dominant effect, d_p | gamma_p=1 ~ Normal_1(0, sigma_d^2), or both.
simulBvsr(
Q = 3,
mu = 50,
mean.c = 5,
sd.c = 2,
X,
m1 = TRUE,
ctr = TRUE,
std = FALSE,
pi = 1,
pve = 0.7,
sigma.a2 = 1,
sigma.d2 = 0,
min.maf = 0,
perc.NA = 0,
err.df = Inf,
seed = NULL,
verbose = 1
)
Q |
number of fixed effects, i.e. the intercept plus the number of years during which genotypes are phenotyped (starting in 2010) |
mu |
overall mean |
mean.c |
mean of the prior on c[2:Q] |
sd.c |
std dev of the prior on c[2:Q] |
X |
matrix of bi-allelic SNP genotypes encoded in allele doses in [0,2], with genotypes in rows and SNPs in columns (SNPs with missing values or low MAF should be discarded beforehand) |
m1 |
if TRUE, 1 will be subtracted from each entry of X to make X_A (before optional centering and scaling) |
ctr |
if TRUE, the columns of the X matrix will be centered to make X_A and X_D |
std |
if TRUE, the columns of the X matrix will also be standardized to make X_A and X_D (note that Habier et al (2007) don't standardize the genotypes, but Janss et al (2012) do) |
pi |
proportion of marker effects (a) that are non-zero; setting pi at 1 means simulating from the additive infinitesimal model (equivalent to ridge regression) |
pve |
proportion of phenotypic variance explained by SNPs with non-zero effects ("heritability"); PVE = V[g] / V[y] where y = g + e and g = g_a + g_d (no epistasis); the magnitude of g_a (resp. g_d) depends on whether or not |
sigma.a2 |
prior variance of the non-zero additive effects |
sigma.d2 |
prior variance of the non-zero dominant effects (if non-null, a reasonable choice is half of |
min.maf |
minimum minor allele frequency below which SNPs can't have any effect (neither additive nor dominant) |
perc.NA |
percentage of missing phenotypes, at random |
err.df |
degrees of freedom of errors' Student's t-distribution |
seed |
seed for the pseudo-random number generator |
verbose |
verbosity level (0/1) |
list
Timothee Flutre
## Not run: ## simulate genotypes
set.seed(1859)
I <- 200
P <- 2000
X <- simulGenosDose(nb.genos=I, nb.snps=P)
## estimate genetic relationships
A <- estimGenRel(X=X, relationships="additive", method="vanraden1")
D <- estimGenRel(X=X, relationships="dominant", method="vitezica")
## additive sparse genetic architecture
## choose pi so that sum(gamma * (1 + log(P / sum(gamma)))) < I
Q <- 3
modelA <- simulBvsr(Q=Q, X=X, pi=0.01, pve=0.7, sigma.a2=1)
modelA$sigma2
library(lme4)
fit1 <- lmer(formula=response1 ~ year + (1|geno), data=modelA$dat)
cbind(modelA$c, blues <- fixef(fit1))
blups <- ranef(fit1, drop=TRUE)$geno[rownames(X)]
cor(modelA$g.A, blups, method="pearson")
cor(modelA$g.A, blups, method="spearman")
(vc1 <- as.data.frame(VarCorr(fit1)))
(pve1.hat <- vc1$vcov[1] / (vc1$vcov[1] + vc1$vcov[2]))
fit1A <- lmerAM(formula=response1 ~ year + (1|geno), data=modelA$dat,
relmat=list(geno=A))
## maybe necessary to use A2 <- nearPD(A)$mat
cbind(modelA$c, blues <- fixef(fit1A$merMod))
blupsA <- ranef(fit1A$merMod, drop=TRUE)$geno[rownames(X)]
cor(modelA$g.A, blupsA, method="pearson")
cor(modelA$g.A, blupsA, method="spearman")
(vc1A <- as.data.frame(VarCorr(fit1A$merMod)))
(pve1A.hat <- vc1A$vcov[1] / (vc1A$vcov[1] + vc1A$vcov[2]))
plot(x=modelA$g.A, y=blups, col="blue", asp=1, las=1)
points(x=modelA$g.A, y=blupsA, col="red")
segments(x0=modelA$g.A, y0=blups, x1=modelA$g.A, y1=blupsA)
abline(h=0, v=0, a=0, b=1, lty=2)
legend("bottomright", legend=c("without A", "with A"), col=c("blue", "red"),
pch=1, bty="n")
library(varbvs)
fit2 <- varbvs(X=modelA$X.A, Z=NULL, y=blups, verbose=FALSE)
print(fit2s <- summary(fit2))
names(sort(modelA$a[modelA$gamma == 1], decreasing=TRUE))
(pi.hat <- 10^(fit2s$logodds$x0) / (1 + 10^(fit2s$logodds$x0)))
(pi.hat.low <- 10^(fit2s$logodds$a) / (1 + 10^(fit2s$logodds$a)))
(pi.hat.high <- 10^(fit2s$logodds$b) / (1 + 10^(fit2s$logodds$b)))
w <- c(normalizelogweights(fit2$logw))
pips <- c(fit2$alpha %*% w)
cols <- rep("black", P)
cols[modelA$gamma != 0] <- "red"
plot(x=1:P, y=pips, col=cols, las=1, xlab="SNPs", ylab="PIP",
main="Posterior inclusion probabilities")
y.pred <- predict(fit2, X=modelA$X.A, Z=NULL)
cor(blups, y.pred)
## dominant sparse genetic architecture
Q <- 3
modelAD <- simulBvsr(Q=Q, X=X, pi=0.01, pve=0.7, sigma.a2=0, sigma.d2=1)
modelAD$sigma2
library(lme4)
dat <- modelAD$dat
colnames(dat)[colnames(dat) == "geno"] <- "geno.add"
dat$geno.dom <- dat$geno.add
fit3A <- lmerAM(formula=response1 ~ year + (1|geno.add), data=dat,
relmat=list(geno.add=A))
(vc3A <- as.data.frame(VarCorr(fit3A$merMod)))
(pve3A.hat <- vc3A$vcov[1] / (vc3A$vcov[1] + vc3A$vcov[2]))
fit3D <- lmerAM(formula=response1 ~ year + (1|geno.dom), data=dat,
relmat=list(geno.dom=D))
(vc3D <- as.data.frame(VarCorr(fit3D$merMod)))
(pve3D.hat <- vc3D$vcov[1] / (vc3D$vcov[1] + vc3D$vcov[2]))
fit3AD <- lmerAM(formula=response1 ~ year + (1|geno.add) + (1|geno.dom),
data=dat, relmat=list(geno.add=A, geno.dom=D))
(vc3AD <- as.data.frame(VarCorr(fit3AD$merMod)))
(pve3AD.hat <- (vc3AD$vcov[1] + vc3AD$vcov[2]) /
(vc3AD$vcov[1] + vc3AD$vcov[2] + vc3AD$vcov[3]))
## additive infinitesimal genetic architecture
Q <- 3
modelA <- simulBvsr(Q=Q, X=X, pi=1, pve=0.7, sigma.a2=1)
modelA$sigma2
library(rrBLUP)
fit.RR <- mixed.solve(y=modelA$Y[,1], Z=modelA$Z %*% modelA$X.A,
K=NULL, X=modelA$W)
fit.RR$Ve
fit.RR$Vu
fit.AM <- mixed.solve(y=modelA$Y[,1], Z=modelA$Z, K=A, X=modelA$W)
fit.AM$Ve
fit.AM$Vu
afs <- estimSnpAf(X=X)
fit.AM$Vu / (2 * sum(afs * (1 - afs))) # see Habier et al (2007)
fit.AM$Vu / (fit.AM$Vu + fit.AM$Ve) # same as the specified pve
## End(Not run)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.