gemma | R Documentation |
See Zhou & Stephens (Nature Genetics, 2012), and Zhou et al (PLoS Genetics, 2013).
gemma(
model = "ulmm",
y,
X,
snp.coords,
recode.genos = TRUE,
alleles = NULL,
maf = 0.01,
K.c = NULL,
W = NULL,
weights = NULL,
exe = Sys.which("gemma"),
out.dir = getwd(),
task.id = "gemma",
verbose = 1,
clean = "none",
seed = 1859,
burnin = 1000,
nb.iters = 7000,
thin = 10
)
model |
name of the model to fit (ulmm/bslmm) |
y |
vector of phenotypes with genotype names |
X |
matrix of bi-allelic SNP genotypes encoded, for each SNP, in number of copies of its second allele, i.e. as allele doses in 0,1,2, with genotypes in rows and SNPs in columns; the "second" allele is arbitrary, it corresponds to the second column of |
snp.coords |
data.frame with SNPs as row names and two columns named "coord" and "chr" |
recode.genos |
if TRUE, SNP genotypes in X will be recoded so that the minor allele is counted |
alleles |
data.frame with SNPs in rows (names as row names) and alleles in columns (exactly 2 columns are required); the second column should correspond to the allele which number of copies is counted at each SNP in |
maf |
SNPs with minor allele frequency strictly below this threshold will be discarded |
K.c |
kinship matrix; if NULL, will be estimated using X via |
W |
matrix of covariates with genotypes in rows (names as row names), a first column of 1 for the intercept and, if needed, a second column of covariates values; if NULL, a column of 1 will be used for the intercept |
weights |
vector of positive weights with genotype names |
exe |
path to the executable of GEMMA |
out.dir |
directory in which the output files will be saved |
task.id |
identifier of the task (used in temporary and output file names) |
verbose |
verbosity level (0/1) |
clean |
remove files: none, some (temporary only), all (temporary and results) |
seed |
seed for the pseudo-random number generator for GEMMA |
burnin |
number of iterations to discard as burn-in (if model="bslmm") |
nb.iters |
number of iterations (if model="bslmm") |
thin |
thining (if model="bslmm") |
list with the following components: recoded, cmd, log, global mean, pve, tests (and hyperparams and params for BSLMM)
P. Carbonetto showed how the PVE of a single SNP can be obtained, assuming no covariate, x is centered and Cov(x,beta) is zero: pve <- var(x) * (beta^2 + se^2)/var(y)
Timothee Flutre [aut,cre], Dalel Ahmed [ctb]
gemmaUlmmPerChr
## Not run: ## simulate genotypes
set.seed(1859)
I <- 200
P <- 2000
X <- simulGenosDose(nb.genos=I, nb.snps=P)
## make fake SNP coordinates and alleles
nb.snps.per.chr <- ncol(X) / 2
snp.coords <- data.frame(coord=rep(1:nb.snps.per.chr, 2),
chr=rep(c("chr1","chr2"), each=nb.snps.per.chr),
row.names=colnames(X),
stringsAsFactors=FALSE)
alleles <- data.frame(major=rep("A", ncol(X)),
minor="a",
row.names=colnames(X),
stringsAsFactors=FALSE)
## simulate phenotypes (only additive effects)
modelA <- simulBvsr(Q=1, X=X, pi=0.01, pve=0.7, sigma.a2=1)
summary(abs(modelA$a[modelA$gamma == 1]))
## test SNPs one by one with the univariate LMM
fit.u <- gemma(model="ulmm", y=modelA$Y[,1], X=X, snp.coords=snp.coords,
alleles=alleles, W=modelA$W, out.dir=tempdir(), clean="all")
fit.u$global.mean
fit.u$pve
cor(modelA$a[modelA$gamma == 1], fit.u$tests$beta[modelA$gamma == 1])
cols <- rep("black",ncol(X)); cols[modelA$gamma==1] <- "red"
pvadj.AA <- qqplotPval(fit.u$tests$p_wald, col=cols, ctl.fdr.bh=TRUE,
plot.signif=TRUE)
t(binaryClassif(known.nulls=modelA$gamma == 0,
called.nulls=pvadj.AA$pv.bh > 0.05))
qtl <- names(sort(modelA$a[modelA$gamma == 1], decreasing=TRUE))[1]
boxplotCandidateQtl(y=modelA$Y[,1], X=X, snp=qtl, show.points=TRUE, main=qtl)
slope <- fit.u$tests[qtl,"beta"] *
ifelse(fit.u$recode[qtl], -1, 1)
abline(a=fit.u$global.mean["beta.hat"] - mean(X[,qtl]) * slope,
b=slope, col="red")
## compute the PVE of a single SNP (assuming no covariate)
(pve.qtl <- var(X[,qtl]) * (fit.u$tests[qtl,"beta"]^2 + fit.u$tests[qtl,"se"]^2) / var(modelA$Y[,1]))
## same but per chrom
## fit.u2 <- gemmaUlmmPerChr(y=modelA$Y[,1], X=X, snp.coords=snp.coords,
## alleles=alleles, W=modelA$W,
## out.dir=tempdir(), clean="all")
## fit all SNPs jointly with the BSLMM
burnin <- 10^3
nb.iters <- 10^4
thin <- 10^2
fit.bs <- gemma(model="bslmm", y=modelA$Y[,1], X=X, snp.coords=snp.coords,
alleles=alleles, W=modelA$W, out.dir=tempdir(), clean="all",
burnin=burnin, nb.iters=nb.iters, thin=thin)
posterior.samples <- coda::mcmc(data=fit.bs$hyperparams, start=burnin + 1,
end=burnin + nb.iters, thin=thin)
summary(posterior.samples)
## simulate phenotypes (only dominant effects)
set.seed(1859)
modelD <- simulBvsr(Q=1, X=X, pi=0.01, pve=0.7, sigma.a2=0, sigma.d2=1)
## test SNPs as "additive" one by one with the univariate LMM
A.z <- estimGenRel(X=X, relationships="additive", method="zhou")
fit.u.DA <- gemma(model="ulmm", y=modelD$Y[,1], X=X,
snp.coords, alleles,
K.c=A.z, W=modelD$W, out.dir=tempdir(), clean="all")
cols <- rep("black",ncol(X)); cols[modelD$gamma==1] <- "red"
pvadj.DA <- qqplotPval(fit.u.DA$tests$p_wald, col=cols, ctl.fdr.bh=TRUE,
plot.signif=TRUE)
t(binaryClassif(known.nulls=modelD$gamma == 0,
called.nulls=pvadj.DA$pv.bh > 0.05))
## test SNPs as "dominant" one by one with the univariate LMM
X.D <- recodeIntoDominant(X=X)
fit.u.DD <- gemma(model="ulmm", y=modelD$Y[,1], X=X.D,
snp.coords, alleles,
K.c=A.z, W=modelD$W, out.dir=tempdir(), clean="all")
cols <- rep("black",ncol(X)); cols[modelD$gamma==1] <- "red"
pvadj.DD <- qqplotPval(fit.u.DD$tests$p_wald, col=cols, ctl.fdr.bh=TRUE,
plot.signif=TRUE)
t(binaryClassif(known.nulls=modelD$gamma == 0,
called.nulls=pvadj.DD$pv.bh > 0.05))
## End(Not run)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.