Nothing
#' Exact p-values for genome-wide association analysis in inbred populations
#'
#' The function imports GenABEL (gwaa.data class) data format
#' and calculates the exact dataset-specific p-values for each variant
#' or a given effect size and allele frequency.
#'
#' @param gwaa.object An (optional) object of \code{\link{gwaa.data-class}}.
#' @param n An (optional) integer gives the sample size, only used when \code{gwaa.object = NULL}.
#' @param maf An (optional) vector gives minor allele frequencies across the genome,
#' only used when \code{gwaa.object = NULL}.
#' @param beta An vector gives the effect sizes to be tested, and \code{beta.maf} must
#' also be given if not testing for the whole genome.
#' @param beta.maf An (optional) vector gives the minor allele frequencies in accordance with
#' \code{beta}, must be used together with \code{beta}. If \code{beta.maf}
#' is missing, exact p-values will be calculated for the whole genome.
#' @param low.maf A numeric value that givens the cut-off of the lowest minor allele frequency
#' allowed in the analysis.
#' @param type A string tells the statistical test type, can be \code{'one-sided'} or \code{'two-sided'}.
#'
#' @note None.
#'
#' @return The function returns a data frame of exact p-values (\code{$p.exact}) and corresponding effect
#' sizes \code{$beta} and MAFs \code{$MAF}.
#'
#' @author Xia Shen
#'
#' @references
#' Xia Shen (2015). Flaw or discovery? Calculating exact p-values for
#' genome-wide association studies in inbred populations. \emph{Submitted}.
#'
#' @seealso
#' \code{\link{qtscore}}, \code{\link{t.test}}
#'
#' @examples
#' \dontrun{
#' ## loading example gwaa.data of data from Atwell et al. (2010) Nature
#' data(arab)
#'
#' ## running a regular GWA analysis for long-day flowering time
#' y <- qnorm((rank(phdata(arab)$X1_LD, na.last = "keep") - 0.5)/sum(!is.na(phdata(arab)$X1_LD)))
#' qt1 <- qtscore(y, data = arab)
#'
#' ## calculating all the exact p-values for the whole genome markers
#' exact <- p.exact.gaussian(arab, beta = qt1[,'effB'])
#' }
#' @aliases p.exact.gaussian
#' @keywords p.exact, exact p-value, genome-wide false discovery rate
#'
`p.exact.gaussian` <- function(gwaa.object = NULL, n = NULL, maf = NULL, beta, beta.maf = NULL, low.maf = .05, type = 'two-sided') {
if (all(c(is.null(gwaa.object), is.null(n), is.null(maf)))) {
stop('insufficient input!')
} else if (is.null(gwaa.object) & any(is.null(c(n, maf)))) {
stop('insufficient input!')
} else if (all(c(!is.null(gwaa.object), !is.null(n), !is.null(maf)))) {
cat('over-sufficient input provided, only n & maf in use.\n')
}
if (!is.null(gwaa.object)) {
cat('processing gwaa.object ...\n')
Z <- as.double(gwaa.object)
n <- nrow(Z)
n2 <- colSums(Z == 2)
n0 <- colSums(Z == 0)
minor <- as.numeric(n2 < n0)
allfreq <- minor*n2/n + (1 - minor)*n0/n
}
if (is.null(gwaa.object) & !is.null(n) & !is.null(maf)) {
allfreq <- maf
}
k <- table(allfreq)
freq <- as.numeric(names(k))
cat('MAF distribution obtained:\n')
cat('lowest MAF =', freq[which(freq >= low.maf)[1]], 'count =', k[which(freq >= low.maf)[1]], '\n')
cat('highest MAF =', freq[length(k)], 'count =', k[length(k)], '\n')
if (!is.null(beta) & is.null(beta.maf) & length(beta) == length(allfreq)) {
beta.maf <- allfreq
}
if (is.null(gwaa.object) & (is.null(beta) | is.null(beta.maf))) {
stop('either beta or beta.maf is missing!')
}
if (is.null(gwaa.object) & length(beta) != length(beta.maf)) {
stop('beta and beta.maf have different lengths!')
}
if (!is.null(beta) & !is.null(beta.maf) & length(beta) == length(beta.maf)) {
cat('calculating exact p-values ...\n')
p <- q <- rep(NA, length(beta))
for (i in 1:length(beta)) {
if (beta.maf[i] >= low.maf) {
b <- abs(beta[i])
freqidx <- which(freq >= beta.maf[i] - 1e-8)
freqi <- freq[freqidx]
if (type == 'one-sided') {
logpj <- log(pnorm(b, 0, sqrt(1/n/freqi + 1/n/(1 - freqi))))
} else if (type == 'two-sided') {
logpj <- log(pchisq(b**2/(1/n/freqi + 1/n/(1 - freqi)), 1))
} else {
stop('wrong type of test specified!')
}
q[i] <- exp(sum(logpj*k[freqidx]))
p[i] <- 1 - q[i]
} else {
q[i] <- 0
p[i] <- 1
}
progress(i/length(beta)*100)
}
}
res <- data.frame(beta = beta, MAF = beta.maf, p.exact = p)
if (nrow(res) == length(allfreq) & !is.null(gwaa.object)) {
rownames(res) <- gtdata(gwaa.object)@snpnames
}
return(res)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.