Nothing
#' Log QQ p-value plot
#'
#' Fast function for generating a log quantile-quantile (QQ) p-value plot
#'
#' @param pval A vector of p-values
#' @param fdr An optional vector of FDR values to save time if previously
#' computed. If not supplied, these will be calculated using [p.adjust()]
#' using the Benjamini-Hochberg method.
#' @param fdr_cutoff Cutoff for FDR significance
#' @param scheme Vector of 2 colours for plotting non-significant and
#' significant SNPs
#' @param npoints Limits the number of non-significant points being plotted to
#' speed up plotting. See details. Set to `NULL` to plot all points.
#' @param show_plot Logical whether to produce a plot via base graphics or just
#' return dataframe ready for plotting.
#' @param verbose Whether to show messages
#' @param ... Optional plotting arguments passed to [qqplot2()]
#' @details
#' Produces a fast QQ plot. Particularly useful for analyses with very large
#' numbers of p-values (such as eQTL analysis) which can be slow to plot. The
#' function looks first for all comparisons which reached FDR at the designated
#' cut-off and ensures all of these points are plotted. Additional points which
#' typically overlap substantially near the origin are thinned by random
#' sampling. In this way the plot can be reduced from millions of points to
#' 500,000 points with a plot which is indistinguishable from one with all
#' points plotted. For comparison, set `npoints` to `NULL` to plot all points as
#' usual.
#'
#' Calling [qqplot()] will result in a base graphics plot. The plotting
#' dataframe is returned invisibly, so users can save time when refining plots
#' by saving the dataframe produced by [qqplot()] and then invoking [qqplot2()]
#' to simply plot the points. Users who prefer ggplot2 can also pass the
#' dataframe generated by [qqplot()] to [gg_qqplot()].
#' @return Generates a plot using base graphics. Also returns a dataframe
#' invisibly which can be used for downstream plotting via either [qqplot2()]
#' or [gg_qqplot()].
#' @seealso [qqplot2()] [gg_qqplot()]
#' @export
#'
qqplot <- function(pval, fdr = NULL, fdr_cutoff = 0.05,
scheme = c("darkgrey", "royalblue"),
npoints = 5e5, show_plot = TRUE,
verbose = TRUE, ...) {
start <- Sys.time()
if (verbose) message("Sorting p-values")
ord <- order(pval, decreasing = TRUE)
pval <- pval[ord]
if (!is.null(fdr)) {
fdr <- fdr[ord]
} else fdr <- p.adjust(pval, method = "BH")
if (verbose) message("Processing")
fdr <- fdr < fdr_cutoff
n <- length(pval)
x <- rev(seq_len(n) / n - 1 / (2 * n))
x <- -log10(x)
if (!is.null(npoints)) {
# thin non-significant points
rem <- npoints - sum(fdr)
if (rem < 1) rem <- 1e5
cutpoint <- n - sum(fdr)
sam <- sample(cutpoint, rem)
ind <- fdr
ind[sam] <- TRUE
pval <- pval[ind]; fdr <- fdr[ind]; x <- x[ind]
}
df <- data.frame(x, pval, fdr)
df$logp <- -log10(df$pval)
df$fdr <- as.factor(df$fdr)
dur <- as.numeric(Sys.time() - start, units = "secs")
if (verbose) message("Completed processing (", format(dur, digits = 3), " secs)")
if (show_plot) {
qqplot2(df, scheme = scheme, ...)
}
invisible(df)
}
#' Log QQ p-value plot (2nd stage)
#'
#' Second stage plotting function which accepts dataframe generated by
#' [qqplot()]. This can be used to avoid repeating computation of the QQ plot
#' values.
#'
#' @param df A dataframe generated by [qqplot()]
#' @param scheme Vector of 2 colours for plotting non-significant and
#' significant SNPs
#' @param ... Optional plotting arguments passed to [plot()]
#' @return No return value. Produces a base graphics plot.
#' @importFrom graphics abline
#' @export
#'
qqplot2 <- function(df, scheme = c("darkgrey", "royalblue"), ...) {
new.args <- list(...)
abl <- quote(abline(0, 1, col = "red"))
plot.args <- list(x = df$x, y = df$logp, col = scheme[df$fdr],
pch = 20, las = 1,
xlab = expression(-log[10]("expected p-value")),
ylab = expression(-log[10]("observed p-value")),
bty = "l", mgp = c(1.8, 0.5, 0), tcl = -0.3,
panel.first = abl)
if (length(new.args)) plot.args[names(new.args)] <- new.args
do.call("plot", plot.args)
}
#' Log QQ p-value plot (ggplot2)
#'
#' Produces a QQ plot via ggplot2. Requires a dataframe generated by
#' [qqplot()].
#'
#' @param df A dataframe generated by [qqplot()]
#' @param scheme Vector of 2 colours for plotting non-significant and
#' significant SNPs
#' @return A ggplot2 graphics plot object
#' @importFrom ggplot2 ggplot aes geom_point theme_classic theme element_text
#' scale_color_discrete xlab ylab geom_abline
#' @importFrom rlang .data
#' @export
#'
gg_qqplot <- function(df, scheme = c("darkgrey", "royalblue")) {
ggplot(df, aes(x = .data$x, y = .data$logp, color = .data$fdr)) +
geom_abline(intercept = 0, slope = 1, color = "red") +
geom_point() +
scale_color_discrete(type = scheme) +
xlab(expression(-log[10]("expected p-value"))) +
ylab(expression(-log[10]("observed p-value"))) +
theme_classic() +
theme(axis.text = element_text(colour = "black"),
legend.position = "none")
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.