docs/GWAS.R

> GWAS
function (pheno, geno, fixed = NULL, K = NULL, n.PC = 0, min.MAF = 0.05, 
    n.core = 1, P3D = TRUE, plot = TRUE) 
{
    qvalue <- function(p) {
        smooth.df = 3
        if (min(p) < 0 || max(p) > 1) {
            print("ERROR: p-values not in valid range.")
            return(0)
        }
        lambda = seq(0, 0.9, 0.05)
        m <- length(p)
        pi0 <- rep(0, length(lambda))
        for (i in 1:length(lambda)) {
            pi0[i] <- mean(p >= lambda[i])/(1 - lambda[i])
        }
        spi0 <- smooth.spline(lambda, pi0, df = smooth.df)
        pi0 <- predict(spi0, x = max(lambda))$y
        pi0 <- min(pi0, 1)
        if (pi0 <= 0) {
            print("ERROR: The estimated pi0 <= 0. Check that you have valid p-values.")
            return(0)
        }
        u <- order(p)
        qvalue.rank <- function(x) {
            idx <- sort.list(x)
            fc <- factor(x)
            nl <- length(levels(fc))
            bin <- as.integer(fc)
            tbl <- tabulate(bin)
            cs <- cumsum(tbl)
            tbl <- rep(cs, tbl)
            tbl[idx] <- tbl
            return(tbl)
        }
        v <- qvalue.rank(p)
        qvalue <- pi0 * m * p/v
        qvalue[u[m]] <- min(qvalue[u[m]], 1)
        for (i in (m - 1):1) {
            qvalue[u[i]] <- min(qvalue[u[i]], qvalue[u[i + 1]], 
                1)
        }
        return(qvalue)
    }
    manhattan <- function(input, fdr.level = 0.05) {
        input <- input[order(input[, 2], input[, 3]), ]
        chroms <- unique(input[, 2])
        n.chrom <- length(chroms)
        chrom.start <- rep(0, n.chrom)
        chrom.mid <- rep(0, n.chrom)
        if (n.chrom > 1) {
            for (i in 1:(n.chrom - 1)) {
                chrom.start[i + 1] <- chrom.start[i] + max(input[which(input[, 
                  2] == chroms[i]), 3]) + 1
            }
        }
        x.max <- chrom.start[n.chrom] + max(input[which(input[, 
            2] == chroms[n.chrom]), 3])
        plot(0, 0, type = "n", xlim = c(0, x.max), ylim = c(0, 
            max(input[, 4]) + 1), ylab = "-log(p)", xlab = "Chromosome", 
            xaxt = "n")
        for (i in seq(1, n.chrom, by = 2)) {
            ix <- which(input[, 2] == chroms[i])
            chrom.mid[i] <- median(chrom.start[i] + input[ix, 
                3])
            points(chrom.start[i] + input[ix, 3], input[ix, 4], 
                col = "dark blue", pch = 16)
        }
        if (n.chrom > 1) {
            for (i in seq(2, n.chrom, by = 2)) {
                ix <- which(input[, 2] == chroms[i])
                chrom.mid[i] <- median(chrom.start[i] + input[ix, 
                  3])
                points(chrom.start[i] + input[ix, 3], input[ix, 
                  4], col = "cornflowerblue", pch = 16)
            }
        }
        q.ans <- qvalue(10^-input[, 4])
        temp <- cbind(q.ans, input[, 4])
        temp <- temp[order(temp[, 1]), ]
        if (temp[1, 1] < fdr.level) {
            temp2 <- tapply(temp[, 2], temp[, 1], mean)
            qvals <- as.numeric(rownames(temp2))
            x <- which.min(abs(qvals - fdr.level))
            first <- max(1, x - 2)
            last <- min(x + 2, length(qvals))
            if ((last - first) < 4) {
                last <- first + 3
            }
            splin <- smooth.spline(x = qvals[first:last], y = temp2[first:last], 
                df = 3)
            lines(x = c(0, x.max), y = rep(predict(splin, x = fdr.level)$y, 
                2), lty = 2)
        }
        axis(side = 1, at = chrom.mid, labels = chroms)
    }
    qq <- function(scores) {
        remove <- which(scores == 0)
        if (length(remove) > 0) {
            x <- sort(scores[-remove], decreasing = TRUE)
        }
        else {
            x <- sort(scores, decreasing = TRUE)
        }
        n <- length(x)
        unif.p <- -log10(ppoints(n))
        plot(unif.p, x, pch = 16, xlab = "Expected -log(p)", 
            ylab = "Observed -log(p)")
        lines(c(0, max(unif.p)), c(0, max(unif.p)), lty = 2)
    }
    score.calc <- function(M) {
        m <- ncol(M)
        scores <- array(0, m)
        for (i in 1:m) {
            Mi <- M[, i]
            freq <- mean(Mi + 1, na.rm = TRUE)/2
            MAF <- min(freq, 1 - freq)
            if (MAF >= min.MAF) {
                not.NA.gid <- which(!is.na(Mi))
                temp <- rep(1, length(Mi))
                temp[not.NA.gid] <- 0
                not.NA.obs <- which(Z %*% temp != 1)
                n2 <- length(not.NA.obs)
                y2 <- matrix(y[not.NA.obs], n2, 1)
                Z2 <- Z[not.NA.obs, not.NA.gid]
                X3 <- cbind(X2[not.NA.obs, ], Z2 %*% Mi[not.NA.gid])
                p <- ncol(X3)
                v1 <- 1
                v2 <- n2 - p
                if (!P3D) {
                  H2inv <- mixed.solve(y = y2, X = X3, Z = Z2, 
                    K = K2[not.NA.gid, not.NA.gid], return.Hinv = TRUE)$Hinv
                }
                else {
                  H2inv <- Hinv[not.NA.obs, not.NA.obs]
                }
                W <- crossprod(X3, H2inv %*% X3)
                Winv <- try(solve(W), silent = TRUE)
                if (class(Winv) != "try-error") {
                  beta <- Winv %*% crossprod(X3, H2inv %*% y2)
                  resid <- y2 - X3 %*% beta
                  s2 <- as.double(crossprod(resid, H2inv %*% 
                    resid))/v2
                  CovBeta <- s2 * Winv
                  Fstat <- beta[p]^2/CovBeta[p, p]
                  x <- v2/(v2 + v1 * Fstat)
                  scores[i] <- -log10(pbeta(x, v2/2, v1/2))
                }
            }
        }
        return(scores)
    }
    make.full <- function(X) {
        svd.X <- svd(X)
        r <- max(which(svd.X$d > 1e-08))
        return(as.matrix(svd.X$u[, 1:r]))
    }
    n <- nrow(pheno)
    pheno.ix <- 2:ncol(pheno)
    names <- colnames(pheno)
    X <- matrix(1, n, 1)
    if (!is.null(fixed)) {
        p <- length(fixed)
        for (i in 1:p) {
            xpos <- match(fixed[i], names)
            pheno.ix <- setdiff(pheno.ix, xpos)
            xx <- factor(pheno[, xpos])
            if (length(unique(xx)) > 1) {
                X <- cbind(X, model.matrix(~x - 1, data.frame(x = xx)))
            }
        }
    }
    geno <- geno[order(geno[, 2], geno[, 3]), ]
    M <- t(geno[, -c(1:3)])
    map <- geno[, 1:3]
    m <- ncol(M)
    geno.gid <- colnames(geno)[-c(1:3)]
    rownames(M) <- geno.gid
    if (is.null(K)) {
        K <- A.mat(M, shrink = FALSE)
    }
    if (n.PC > 0) {
        eig.vec <- eigen(K)$vectors
    }
    if (length(which(rownames(K) != geno.gid)) > 0) {
        stop("Line names in K and genotype file do not match.")
    }
    n.phenos <- length(pheno.ix)
    all.scores <- matrix(0, m, n.phenos)
    trait.names <- colnames(pheno)[pheno.ix]
    colnames(all.scores) <- trait.names
    if (plot) {
        p1 <- floor(sqrt(n.phenos))
        p2 <- ceiling(n.phenos/p1)
        par(mfrow = c(p1, p2))
    }
    if (n.phenos == 0) {
        stop("No phenotypes.")
    }
    for (i in 1:n.phenos) {
        print(paste("GWAS for trait:", trait.names[i]))
        y <- pheno[, pheno.ix[i]]
        not.miss <- which(!is.na(y))
        y <- y[not.miss]
        n <- length(y)
        pheno.gid <- unique(pheno[not.miss, 1])
        n.gid <- length(pheno.gid)
        ix.pheno <- match(pheno.gid, geno.gid)
        miss.pheno.gid <- which(is.na(ix.pheno))
        if (length(miss.pheno.gid) > 0) {
            stop(paste("The following lines have phenotypes but no genotypes:", 
                paste(unique(pheno.gid[miss.pheno.gid]), collapse = " ")))
        }
        Z <- matrix(0, n, length(pheno.gid))
        Z[cbind(1:n, match(pheno[not.miss, 1], pheno.gid))] <- 1
        K2 <- K[ix.pheno, ix.pheno]
        if (n.PC > 0) {
            X2 <- make.full(cbind(X[not.miss, ], Z %*% eig.vec[ix.pheno, 
                1:n.PC]))
        }
        else {
            X2 <- make.full(X[not.miss, ])
        }
        if (P3D) {
            Hinv <- mixed.solve(y, X = X2, Z = Z, K = K2, return.Hinv = TRUE)$Hinv
            print("Variance components estimated. Testing markers.")
        }
        if ((n.core > 1) & requireNamespace("parallel", quietly = TRUE)) {
            it <- split(1:m, factor(cut(1:m, n.core, labels = FALSE)))
            scores <- unlist(parallel::mclapply(it, function(markers) {
                score.calc(M[ix.pheno, markers])
            }, mc.cores = n.core))
        }
        else {
            scores <- score.calc(M[ix.pheno, ])
        }
        if (plot) {
            qq(scores)
            title(main = trait.names[i])
        }
        all.scores[, i] <- scores
    }
    if (plot) {
        if (length(grep("RStudio", names(dev.cur()))) == 0) {
            if (dev.cur() == dev.next()) {
                dev.new()
            }
            else {
                dev.set(dev.next())
            }
        }
        par(mfrow = c(p1, p2))
        for (j in 1:n.phenos) {
            manhattan(cbind(map, all.scores[, j]))
            title(main = trait.names[j])
        }
    }
    return(data.frame(map, all.scores))
}
<environment: namespace:rrBLUP>
variani/athaliana documentation built on May 3, 2019, 4:34 p.m.