#
# Chirag J Patel
# Nam Pho
#
# post.R - collection of functions providing post-processing and visualization of xwas results
#
#' xwas.to.df
#'
#' Takes the output of the xwas function that returns a list of regressions and parses the results into a simple data.frame. This is useful for downstream plotting within the xwas library as well as custom implementations.
#'
#' @param data is a list of data.frame objects that represent the regression tests, usually this is the output from the xwas function.
#' @param ... are additional parameters that are passed on to the p.adjust function.
#'
#' @return a data.frame with the p-value, estimate effect size, and adjusted p-value.
#'
#' @examples
#' \dontrun{
#' xwas.to.df(data)
#' }
#'
#' @export
xwas.to.df <- function(data, ...) {
if (!is.list(data)) warning("expecting data argument to be of type list.")
data.p <- NULL
var <- NULL
effect <- NULL
prob <- NULL
# dependent variable is first row in coxph output
if ("coef" %in% colnames(data[[1]])) {
var <- 1
effect <- "exp(coef)"
prob <- "Pr(>|z|)"
}
if ("Estimate" %in% colnames(data[[1]])) {
var <- 2
effect <- "Estimate"
# linear regression
if ("t value" %in% colnames(data)) {
prob <- "Pr(>|t|)"
}
# logistic regression
if ("z value" %in% colnames(data)) {
prob <- "Pr(>|z|)"
}
}
if (is.null(var)) {
stop("unrecognized xwas output")
}
data.p <- unlist(lapply(data, function(x) {return(x[var, prob])} ))
data.x <- c()
for (depvar in names(data.p)) {
data.x <- c(data.x, data[[depvar]][var, effect])
}
names(data.x) <- names(data.p)
#data.or <- unlist(lapply(data.all.p, function(x) {return((x/(1-x))^2)}))
data.q <- p.adjust(data.p, ...)
all <- as.data.frame(cbind(p=data.p, x=data.x, q=data.q))
return(all)
}
#' manhattan
#'
#' Takes output of the xwas function to display as a manhattan plot.
#'
#' @param data is the vector containing a list of p-values to display in a manhattan plot.
#' @param qthreshold is the cutoff of significance (the q-value).
#' @param nlabels is the number of top hits to label.
#' @param ylab is a default y-axis label passed on to the plot function.
#' @param frame.plot is the option passed on to the plot function.
#' @param pch is default to 20 setting the point type to a small bullet in the plot function.
#' @param legend is default to FALSE but will put a legend in the upper right corner if TRUE.
#' @param group.by is a data.frame object with n rows and 2 columns. Ideally one of the columns is named "var" but some logic is built in to try and decipher the appropriate column by percentage of matches.
#' @param group.label is a character vector of labels for each of the groups.
#' @param group.col is a color palette of for each group. It's an optional argument and defaults to the rainbow palette. If provided, it should be the same length at group.label as it is a 1:1 match.
#' @param ... are additional arguments passed onto the plot function.
#'
#' @return none
#'
#' @examples
#' \dontrun{
#' manhattan()
#' manhattan(glucose, main="XWAS of LBXGLU")
#' }
#'
#' @export
manhattan <- function(data, qthreshold=0.05, nlabels=0, ylab=expression(-log[10]*'(p-value)'), frame.plot=FALSE, pch=20,
legend=FALSE, group.by=NULL, group.label=NULL, group.col=NULL, ...) {
all <- NULL
data.merged <- NULL
data.log <- NULL
if (!is.list(data)) {
# it's p-values, just plot and exit
data.log <- -log10(data)
names(data.log) <- names(data)
} else {
# this is xwas output
all <- xwas.to.df(data)
if (!is.null(group.by)) {
if (ncol(group.by) != 2) {
stop("expecting only 2 columns for group.by parameter.")
}
if ( table(rownames(all) %in% group.by[, 2])[["TRUE"]] > ceiling(nrow(all)/2) ) {
# if the 2nd column of group.by has at least a 50% match to variable names then we use this to match categories
data.merged <- merge(data.frame(all, var=rownames(all)), group.by, all.x=TRUE, by.x="var", by.y=colnames(group.by)[2])
if (!is.null(group.col) & length(group.col) != length(table(group.by[,2]))) {
warning("non-equal lengths of group.by and group.col provided.")
}
} else {
data.merged <- merge(data.frame(all, var=rownames(all)), group.by, all.x=TRUE, by.x="var", by.y=colnames(group.by)[1])
if (!is.null(group.col) & length(group.col) != length(table(group.by[,1]))) {
warning("non-equal lengths of group.by and group.col provided.")
}
}
colnames(data.merged) <- c("var", colnames(all), "category") # explicitly define columns
data.merged <- data.merged[order(data.merged$category), ] # order the columns for plotting
data.log <- -log10(data.merged$p)
names(data.log) <- data.merged$var
} else {
data.log <- -log10(all$p)
names(data.log) <- rownames(all)
}
}
# meta-data for top hits
top <- sort(data.log, decreasing=TRUE)
top <- top[is.finite(top)]
i <- match(names(head(top, n=nlabels)), names(data.log))
# skeleton of the main plot
plot(data.log, ylab=ylab, frame.plot=frame.plot, pch=pch, col="white", ...)
if (!is.null(group.by)) {
if (is.null(data.merged)) {
stop("currently no support for just providing p-values as a vector in the data argument and with a group.by color option.")
} else {
if (is.null(group.label)) {
group.label <- names(table(data.merged$category))
}
if (is.null(group.col)) {
# if no color set is provided default to rainbow
group.col <- rainbow(length(group.label))
}
# do all the heavy lifting of point plot with color coordination by groups
for (j in 1:nrow(data.merged)) {
points(j, -log10(data.merged$p[j]), col=group.col[match(data.merged$category[j], group.label)], pch=pch)
}
}
} else {
#if (is.null(data.merged)) {
# no merged data means you were given p-values in a vector or you derived it from the xwas object and later set it
# to the same variable names, so just plot
points(data.log, col="black", pch=pch)
#}
points(i, data.log[i], col="red", pch=pch) # makes the top points red if no groups provided
}
if (legend & !is.null(group.label) & !is.null(group.col)) {
legend("topright", group.label, text.col=group.col, bty="n", cex=0.75)
}
cutoff <- -log10(qthreshold)
lines(c(0, length(data.log)), c(cutoff, cutoff), col="red")
if (nlabels > 0) {
text(i, data.log[i], names(data.log[i]), cex=0.5, col="red", pos=4)
}
}
#' volcano
#'
#' Takes a list of linear regressions and displays a volcano plot.
#'
#' @param data is the vector containing a list of p-values to display in a manhattan plot.
#' @param qthreshold is the cutoff of significance (the q-value).
#' @param xlab is a default x-axis label passed on to the plot function.
#' @param ylab is a default y-axis label passed on to the plot function.
#' @param pch is default to 20 which is a small bullet passed onto the plot function.
#' @param ... are additional arguments passed onto the plot function.
#'
#' @return none
#'
#' @examples
#' \dontrun{
#' volcano()
#' volcano(data)
#' }
#'
#' @export
volcano <- function(data, qthreshold=0.05, xlab="Effect", ylab=expression(-log[10]*'(p-value)'), pch=20, ...) {
all <- NULL
if (is.list(data)) {
all <- xwas.to.df(data)
} else {
all <- data
}
sig <- subset(all, q < 0.05)
plot(all$x, -log10(all$p), xlab=xlab, ylab=ylab, pch=pch, ...)
points(sig$x, -log10(sig$p), col="red", pch=pch)
}
#' qqplot
#'
#' Takes a data object that is either the output of the xwas function or a vector of p-values.
#'
#' @param data is an object either a numeric character vector of p-values or the output of the xwas function from which that information can be derived.
#' @param xlab is a default label for the x-axis passed on to the plot function.
#' @param ylab is a default label for the y-axis passed on to the plot function.
#' @param pch is set to 20 by default which is a small bullet point passed on to the plot function.
#' @param ... are additional arguments passed on to the plot function.
#'
#' @return none
#'
#' @examples
#' \dontrun{
#' qqplot()
#' qqplot(data)
#' }
#'
#' @export
qqplot <- function(data, xlab=expression(-log[10]*'(p-expected)'), ylab=expression(-log[10]*'(p-empirical)'), pch=20, ...) {
all <- NULL
if (is.list(data)) {
all <- xwas.to.df(data)
pvalues <- all$p
names(pvalues) <- rownames(all)
} else if (is.numeric(data)) {
pvalues <- data
} else {
stop("unexpected data type for argument data, should be a numeric character or xwas output.")
}
p.empirical <- sort(pvalues)
p.expected <- punif(1:length(pvalues)/length(pvalues))
range <- c(min(-log10(p.empirical), -log10(p.expected)),
max(-log10(p.empirical), -log10(p.expected)))
plot(-log10(p.expected), -log10(p.empirical), xlab=xlab, ylab=ylab, pch=pch, ...)
lines(c(range[1], range[2]), c(range[1], range[2]), col="red")
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.