R/estlambda.R
In GenABEL: genome-wide SNP association analysis

#' Estimate the inflation factor for a distribution of P-values
#'
#' Estimate the inflation factor for a distribution of P-values or 1df
#' chi-square test. The major use of this procedure is the Genomic Control, but
#' can also be used to visualise the distribution of P-values coming from other
#' tests. Methods implemented include 'median' (median(chi2)/0.455...),
#' regression (of observed onto expected) and 'KS' (optimizing the
#' chi2.1df distribution fit by use of Kolmogorov-Smirnov test)
#'
#'
#' @param data A vector of reals. If all are <=1, it is assumed that this is a
#'   vector of P-values, else it is treated as a vector of chi-squares
#' @param plot Whether the plot should be shown or not (default).
#' @param proportion The proportion of lowest P (or
#'   \eqn{\chi^2}{chi^2) values to be used when estimating the inflation
#'   factor \eqn{\lambda}{lambda}.
#' @param method "regression" (default), "median", or "KS": method to
#'   be used for \eqn{\lambda}{lambda} estimation.
#' @param filter if the test statistics with 0-value of
#'   \eqn{\chi^2}{chi^2} should be excluded prior to estimation of
#'   \eqn{\lambda}{lambda}.
#' @param df Number of degrees of freedom.
#' @param ... arguments passed to the \code{\link{plot}} function.
#' @return A list with elements \item{estimate}{Estimate of \eqn{\lambda}{lambda}}
#'   \item{se}{Standard error of the estimate}
#' @author Yurii Aulchenko
#' @seealso \code{\link{ccfast}}, \code{\link{qtscore}}
#' @keywords htest
#' @examples
#'
#' data(srdta)
#' pex <- summary(gtdata(srdta))[,"Pexact"]
#' estlambda(pex, plot=TRUE)
#' estlambda(pex, method="regression", proportion = 0.95)
#' estlambda(pex, method="median")
#' estlambda(pex, method="KS")
#' a <- qtscore(bt,srdta)
#' lambda(a)
#'
"estlambda" <- function(data, plot=FALSE, proportion=1.0,
                        method="regression", filter=TRUE, df=1,... ) {
        data <- data[which(!is.na(data))]
        if (proportion>1.0 || proportion<=0)
                stop("proportion argument should be greater then zero and less than or equal to one")

        ntp <- round( proportion * length(data) )
        if ( ntp<1 ) stop("no valid measurements")
        if ( ntp==1 ) {
                warning(paste("One measurement, lambda = 1 returned"))
                return(list(estimate=1.0, se=999.99))
        }
        if ( ntp<10 ) warning(paste("number of points is too small:", ntp))
        if ( min(data)<0 ) stop("data argument has values <0")
        if ( max(data)<=1 ) {
#		lt16 <- (data < 1.e-16)
#		if (any(lt16)) {
#			warning(paste("Some probabilities < 1.e-16; set to 1.e-16"))
#			data[lt16] <- 1.e-16
#		}
                data <- qchisq(data, 1, lower.tail=FALSE)
        }
        if (filter)
        {
                data[which(abs(data)<1e-8)] <- NA
        }
        data <- sort(data)
        ppoi <- ppoints(data)
        ppoi <- sort(qchisq(ppoi, df=df, lower.tail=FALSE))
        data <- data[1:ntp]
        ppoi <- ppoi[1:ntp]
#	s <- summary(lm(data~offset(ppoi)))$coeff
#       bug fix thanks to Franz Quehenberger

        out <- list()
        if (method=="regression") {
                s <- summary( lm(data~0+ppoi) )$coeff
                out$estimate <- s[1,1]
                out$se <- s[1,2]
        } else if (method=="median") {
                out$estimate <- median(data, na.rm=TRUE)/qchisq(0.5, df)
                out$se <- NA
        } else if (method=="KS") {
                limits <- c(0.5, 100)
                out$estimate <- estLambdaKS(data, limits=limits, df=df)
                if ( abs(out$estimate-limits[1])<1e-4 || abs(out$estimate-limits[2])<1e-4 )
                        warning("using method='KS' lambda too close to limits, use other method")
                out$se <- NA
        } else {
                stop("'method' should be either 'regression' or 'median'!")
        }

        if (plot) {
                lim <- c(0, max(data, ppoi,na.rm=TRUE))
#		plot(ppoi,data,xlim=lim,ylim=lim,xlab="Expected",ylab="Observed", ...)
                oldmargins <- par()$mar
                par(mar=oldmargins + 0.2)
                plot(ppoi, data,
                     xlab=expression("Expected " ~ chi^2),
                     ylab=expression("Observed " ~ chi^2),
                     ...)
                abline(a=0, b=1)
                abline(a=0, b=out$estimate, col="red")
                par(mar=oldmargins)
        }

        out
}