> GWAS
function (pheno, geno, fixed = NULL, K = NULL, n.PC = 0, min.MAF = 0.05,
n.core = 1, P3D = TRUE, plot = TRUE)
{
qvalue <- function(p) {
smooth.df = 3
if (min(p) < 0 || max(p) > 1) {
print("ERROR: p-values not in valid range.")
return(0)
}
lambda = seq(0, 0.9, 0.05)
m <- length(p)
pi0 <- rep(0, length(lambda))
for (i in 1:length(lambda)) {
pi0[i] <- mean(p >= lambda[i])/(1 - lambda[i])
}
spi0 <- smooth.spline(lambda, pi0, df = smooth.df)
pi0 <- predict(spi0, x = max(lambda))$y
pi0 <- min(pi0, 1)
if (pi0 <= 0) {
print("ERROR: The estimated pi0 <= 0. Check that you have valid p-values.")
return(0)
}
u <- order(p)
qvalue.rank <- function(x) {
idx <- sort.list(x)
fc <- factor(x)
nl <- length(levels(fc))
bin <- as.integer(fc)
tbl <- tabulate(bin)
cs <- cumsum(tbl)
tbl <- rep(cs, tbl)
tbl[idx] <- tbl
return(tbl)
}
v <- qvalue.rank(p)
qvalue <- pi0 * m * p/v
qvalue[u[m]] <- min(qvalue[u[m]], 1)
for (i in (m - 1):1) {
qvalue[u[i]] <- min(qvalue[u[i]], qvalue[u[i + 1]],
1)
}
return(qvalue)
}
manhattan <- function(input, fdr.level = 0.05) {
input <- input[order(input[, 2], input[, 3]), ]
chroms <- unique(input[, 2])
n.chrom <- length(chroms)
chrom.start <- rep(0, n.chrom)
chrom.mid <- rep(0, n.chrom)
if (n.chrom > 1) {
for (i in 1:(n.chrom - 1)) {
chrom.start[i + 1] <- chrom.start[i] + max(input[which(input[,
2] == chroms[i]), 3]) + 1
}
}
x.max <- chrom.start[n.chrom] + max(input[which(input[,
2] == chroms[n.chrom]), 3])
plot(0, 0, type = "n", xlim = c(0, x.max), ylim = c(0,
max(input[, 4]) + 1), ylab = "-log(p)", xlab = "Chromosome",
xaxt = "n")
for (i in seq(1, n.chrom, by = 2)) {
ix <- which(input[, 2] == chroms[i])
chrom.mid[i] <- median(chrom.start[i] + input[ix,
3])
points(chrom.start[i] + input[ix, 3], input[ix, 4],
col = "dark blue", pch = 16)
}
if (n.chrom > 1) {
for (i in seq(2, n.chrom, by = 2)) {
ix <- which(input[, 2] == chroms[i])
chrom.mid[i] <- median(chrom.start[i] + input[ix,
3])
points(chrom.start[i] + input[ix, 3], input[ix,
4], col = "cornflowerblue", pch = 16)
}
}
q.ans <- qvalue(10^-input[, 4])
temp <- cbind(q.ans, input[, 4])
temp <- temp[order(temp[, 1]), ]
if (temp[1, 1] < fdr.level) {
temp2 <- tapply(temp[, 2], temp[, 1], mean)
qvals <- as.numeric(rownames(temp2))
x <- which.min(abs(qvals - fdr.level))
first <- max(1, x - 2)
last <- min(x + 2, length(qvals))
if ((last - first) < 4) {
last <- first + 3
}
splin <- smooth.spline(x = qvals[first:last], y = temp2[first:last],
df = 3)
lines(x = c(0, x.max), y = rep(predict(splin, x = fdr.level)$y,
2), lty = 2)
}
axis(side = 1, at = chrom.mid, labels = chroms)
}
qq <- function(scores) {
remove <- which(scores == 0)
if (length(remove) > 0) {
x <- sort(scores[-remove], decreasing = TRUE)
}
else {
x <- sort(scores, decreasing = TRUE)
}
n <- length(x)
unif.p <- -log10(ppoints(n))
plot(unif.p, x, pch = 16, xlab = "Expected -log(p)",
ylab = "Observed -log(p)")
lines(c(0, max(unif.p)), c(0, max(unif.p)), lty = 2)
}
score.calc <- function(M) {
m <- ncol(M)
scores <- array(0, m)
for (i in 1:m) {
Mi <- M[, i]
freq <- mean(Mi + 1, na.rm = TRUE)/2
MAF <- min(freq, 1 - freq)
if (MAF >= min.MAF) {
not.NA.gid <- which(!is.na(Mi))
temp <- rep(1, length(Mi))
temp[not.NA.gid] <- 0
not.NA.obs <- which(Z %*% temp != 1)
n2 <- length(not.NA.obs)
y2 <- matrix(y[not.NA.obs], n2, 1)
Z2 <- Z[not.NA.obs, not.NA.gid]
X3 <- cbind(X2[not.NA.obs, ], Z2 %*% Mi[not.NA.gid])
p <- ncol(X3)
v1 <- 1
v2 <- n2 - p
if (!P3D) {
H2inv <- mixed.solve(y = y2, X = X3, Z = Z2,
K = K2[not.NA.gid, not.NA.gid], return.Hinv = TRUE)$Hinv
}
else {
H2inv <- Hinv[not.NA.obs, not.NA.obs]
}
W <- crossprod(X3, H2inv %*% X3)
Winv <- try(solve(W), silent = TRUE)
if (class(Winv) != "try-error") {
beta <- Winv %*% crossprod(X3, H2inv %*% y2)
resid <- y2 - X3 %*% beta
s2 <- as.double(crossprod(resid, H2inv %*%
resid))/v2
CovBeta <- s2 * Winv
Fstat <- beta[p]^2/CovBeta[p, p]
x <- v2/(v2 + v1 * Fstat)
scores[i] <- -log10(pbeta(x, v2/2, v1/2))
}
}
}
return(scores)
}
make.full <- function(X) {
svd.X <- svd(X)
r <- max(which(svd.X$d > 1e-08))
return(as.matrix(svd.X$u[, 1:r]))
}
n <- nrow(pheno)
pheno.ix <- 2:ncol(pheno)
names <- colnames(pheno)
X <- matrix(1, n, 1)
if (!is.null(fixed)) {
p <- length(fixed)
for (i in 1:p) {
xpos <- match(fixed[i], names)
pheno.ix <- setdiff(pheno.ix, xpos)
xx <- factor(pheno[, xpos])
if (length(unique(xx)) > 1) {
X <- cbind(X, model.matrix(~x - 1, data.frame(x = xx)))
}
}
}
geno <- geno[order(geno[, 2], geno[, 3]), ]
M <- t(geno[, -c(1:3)])
map <- geno[, 1:3]
m <- ncol(M)
geno.gid <- colnames(geno)[-c(1:3)]
rownames(M) <- geno.gid
if (is.null(K)) {
K <- A.mat(M, shrink = FALSE)
}
if (n.PC > 0) {
eig.vec <- eigen(K)$vectors
}
if (length(which(rownames(K) != geno.gid)) > 0) {
stop("Line names in K and genotype file do not match.")
}
n.phenos <- length(pheno.ix)
all.scores <- matrix(0, m, n.phenos)
trait.names <- colnames(pheno)[pheno.ix]
colnames(all.scores) <- trait.names
if (plot) {
p1 <- floor(sqrt(n.phenos))
p2 <- ceiling(n.phenos/p1)
par(mfrow = c(p1, p2))
}
if (n.phenos == 0) {
stop("No phenotypes.")
}
for (i in 1:n.phenos) {
print(paste("GWAS for trait:", trait.names[i]))
y <- pheno[, pheno.ix[i]]
not.miss <- which(!is.na(y))
y <- y[not.miss]
n <- length(y)
pheno.gid <- unique(pheno[not.miss, 1])
n.gid <- length(pheno.gid)
ix.pheno <- match(pheno.gid, geno.gid)
miss.pheno.gid <- which(is.na(ix.pheno))
if (length(miss.pheno.gid) > 0) {
stop(paste("The following lines have phenotypes but no genotypes:",
paste(unique(pheno.gid[miss.pheno.gid]), collapse = " ")))
}
Z <- matrix(0, n, length(pheno.gid))
Z[cbind(1:n, match(pheno[not.miss, 1], pheno.gid))] <- 1
K2 <- K[ix.pheno, ix.pheno]
if (n.PC > 0) {
X2 <- make.full(cbind(X[not.miss, ], Z %*% eig.vec[ix.pheno,
1:n.PC]))
}
else {
X2 <- make.full(X[not.miss, ])
}
if (P3D) {
Hinv <- mixed.solve(y, X = X2, Z = Z, K = K2, return.Hinv = TRUE)$Hinv
print("Variance components estimated. Testing markers.")
}
if ((n.core > 1) & requireNamespace("parallel", quietly = TRUE)) {
it <- split(1:m, factor(cut(1:m, n.core, labels = FALSE)))
scores <- unlist(parallel::mclapply(it, function(markers) {
score.calc(M[ix.pheno, markers])
}, mc.cores = n.core))
}
else {
scores <- score.calc(M[ix.pheno, ])
}
if (plot) {
qq(scores)
title(main = trait.names[i])
}
all.scores[, i] <- scores
}
if (plot) {
if (length(grep("RStudio", names(dev.cur()))) == 0) {
if (dev.cur() == dev.next()) {
dev.new()
}
else {
dev.set(dev.next())
}
}
par(mfrow = c(p1, p2))
for (j in 1:n.phenos) {
manhattan(cbind(map, all.scores[, j]))
title(main = trait.names[j])
}
}
return(data.frame(map, all.scores))
}
<environment: namespace:rrBLUP>
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.