Nothing
#' Regional association plot
#'
#' @param locus Data frame with columns c("CHR", "POS", "NAME", "PVAL", "RSQR") containing association results.
#' @param map Genetic map, i.e, c("POS","THETA","DIST").
#' @param genes Gene annotation with columns c("START", "STOP", "STRAND", "GENE").
#' @param flanking Flanking length.
#' @param best.pval Best p value for the locus of interest.
#' @param sf scale factors for p values and recombination rates, smaller values are necessary for gene dense regions.
#' @param logpmax Maximum value for -log10(p).
#' @param pch Plotting character for the SNPs to be highlighted, e.g., 21 and 23 refer to circle and diamond.
#'
#' @details
#' This function obtains regional association plot for a particular locus, based on
#' the information about recombinatino rates, linkage disequilibria between the
#' SNP of interest and neighbouring ones, and single-point association tests p values.
#'
#' Note that the best p value is not necessarily within locus in the original design.
#'
#' @export
#' @references
#' \insertRef{saxena07}{gap}
#'
#' @examples
#' \dontrun{
#' require(gap.datasets)
#' asplot(CDKNlocus, CDKNmap, CDKNgenes)
#' title("CDKN2A/CDKN2B Region")
#' asplot(CDKNlocus, CDKNmap, CDKNgenes, best.pval=5.4e-8, sf=c(3,6))
#'
#' ## NCBI2R
#'
#' options(stringsAsFactors=FALSE)
#' p <- with(CDKNlocus,data.frame(SNP=NAME,PVAL))
#' hit <- subset(p,PVAL==min(PVAL,na.rm=TRUE))$SNP
#'
#' library(NCBI2R)
#' # LD under build 36
#' chr_pos <- GetSNPInfo(with(p,SNP))[c("chr","chrpos")]
#' l <- with(chr_pos,min(as.numeric(chrpos),na.rm=TRUE))
#' u <- with(chr_pos,max(as.numeric(chrpos),na.rm=TRUE))
#' LD <- with(chr_pos,GetLDInfo(unique(chr),l,u))
#' # We have complaints; a possibility is to get around with
#' # https://ftp.ncbi.nlm.nih.gov/hapmap/
#' hit_LD <- subset(LD,SNPA==hit)
#' hit_LD <- within(hit_LD,{RSQR=r2})
#' info <- GetSNPInfo(p$SNP)
#' haldane <- function(x) 0.5*(1-exp(-2*x))
#' locus <- with(info, data.frame(CHR=chr,POS=chrpos,NAME=marker,
#' DIST=(chrpos-min(chrpos))/1000000,
#' THETA=haldane((chrpos-min(chrpos))/100000000)))
#' locus <- merge.data.frame(locus,hit_LD,by.x="NAME",by.y="SNPB",all=TRUE)
#' locus <- merge.data.frame(locus,p,by.x="NAME",by.y="SNP",all=TRUE)
#' locus <- subset(locus,!is.na(POS))
#' ann <- AnnotateSNPList(p$SNP)
#' genes <- with(ann,data.frame(ID=locusID,CLASS=fxn_class,PATH=pathways,
#' START=GeneLowPoint,STOP=GeneHighPoint,
#' STRAND=ori,GENE=genesymbol,BUILD=build,CYTO=cyto))
#' attach(genes)
#' ugenes <- unique(GENE)
#' ustart <- as.vector(as.table(by(START,GENE,min))[ugenes])
#' ustop <- as.vector(as.table(by(STOP,GENE,max))[ugenes])
#' ustrand <- as.vector(as.table(by(as.character(STRAND),GENE,max))[ugenes])
#' detach(genes)
#' genes <- data.frame(START=ustart,STOP=ustop,STRAND=ustrand,GENE=ugenes)
#' genes <- subset(genes,START!=0)
#' rm(l,u,ugenes,ustart,ustop,ustrand)
#' # Assume we have the latest map as in CDKNmap
#' asplot(locus,CDKNmap,genes)
#' }
#'
#' @author{Paul de Bakker, Jing Hua Zhao, Shengxu Li}
#' @keywords hplot
asplot <- function (locus, map, genes, flanking=1e3, best.pval=NULL, sf=c(4,4), logpmax=10, pch=21)
{
NAME <- locus$NAME
PVAL <- locus$PVAL
snp <- NAME[PVAL==min(PVAL)]
chr <- locus$CHR[1]
hit <- locus[NAME==snp, ]
if (is.null(best.pval)) best.pval <- hit$PVAL
lb <- min(locus$POS) - flanking
ub <- max(locus$POS) + flanking
lu <- ub - lb
center <- lb + lu / 2
center.100kb <- round(center / 1e5) * 1e5
offset.100kb <- round(lu / 5 / 1e5) * 1e5
offset <- logpmax / sf[1]
ylim <- logpmax + offset
ylim4 <- ylim / 4
yadj <- -offset + ylim / sf[2]
keep <- subset(map, map$POS > lb & map$POS < ub)
genes <- with(genes, subset(genes, (START > lb & START < ub) | (STOP > lb & STOP < ub)))
par(mar = c(4, 4, 3, 4))
xy <- xy.coords(keep$POS, yadj + (keep$THETA/60) * (3 * ylim4))
plot(xy$x, xy$y, type = "l", col = "lightblue", lwd = 1, ylim = c(-offset, logpmax), xlab = "", ylab = "", axes = F)
box()
if (offset.100kb!=0) center5 <- center.100kb + (-2:2) * offset.100kb
else
{
p1 <- min(xy$x)
p5 <- max(xy$x)
p3 <- (p1+p5)/2
p2 <- (p1+p3)/2
p4 <- (p3+p5)/2
center5 <- c(p1,p2,p3,p4,p5)
}
axis(1, at = center5, labels = round(center5 / 1e3, 2), las = 1)
mtext(paste("Chromosome", chr, "position (kb)", sep = " "), side = 1, line = 2.5)
axis(2, at = seq(0, logpmax, 2), labels = seq(0, logpmax, 2), las = 1)
mtext("-log10(Observed p)", side = 2, at = logpmax / 2, line = 2)
axis(4, at = yadj + (0:4) * ylim4, labels = paste((0:4)*20), las = 1, col.axis="blue")
mtext("Recombination rate (cM/Mb)", side = 4, at = -offset + 2 * ylim4, line = 2, col="blue")
lines(c(lb, ub), c(0, 0), lty = "dotted", lwd = 1, col = "black")
points(hit$POS, -log10(hit$PVAL), pch = pch, cex = 2.5, bg = "red")
text(hit$POS, -log10(hit$PVAL), labels = hit$NAME, pos = 3, offset = 1)
if (-log10(best.pval) < logpmax)
{
points(hit$POS, -log10(best.pval), pch = pch, cex = 2.5, bg = "blue")
text(hit$POS, -log10(best.pval), labels = c(paste("P=", best.pval, sep = "")), pos = 4, offset = 2)
} else {
points(hit$POS, logpmax, pch = pch, cex = 2.5, bg = "blue")
text(hit$POS, logpmax, labels = c(paste("P=", best.pval, sep = "")), pos = 4, offset = 1)
}
RSQR <- locus$RSQR
strong.ld <- subset(locus, NAME != snp & RSQR >= 0.8)
moderate.ld <- subset(locus, NAME != snp & RSQR >= 0.5 & RSQR < 0.8)
weak.ld <- subset(locus, NAME != snp & RSQR >= 0.2 & RSQR < 0.5)
not.in.ld <- subset(locus, NAME != snp & RSQR < 0.2)
na.ld <- subset(locus, NAME != snp & is.na(RSQR))
colors <- c("red","orange","yellow","grey","white")
points(strong.ld$POS, -log10(strong.ld$PVAL), pch = pch, cex = 1.25, bg = colors[1])
points(moderate.ld$POS, -log10(moderate.ld$PVAL), pch = pch, cex = 1.25, bg = colors[2])
points(weak.ld$POS, -log10(weak.ld$PVAL), pch = pch, cex = 1.25, bg = colors[3])
points(not.in.ld$POS, -log10(not.in.ld$PVAL), pch = pch, cex = 1, bg = colors[4])
points(na.ld$POS, -log10(na.ld$PVAL), pch = pch, cex = 1, bg = colors[5])
for (i in 1:nrow(genes))
{
gi <- genes[i, ]
GENE <- gi$GENE
cat("-",GENE,"\n")
start <- gi$START
finish <- gi$STOP
center <- (start + finish)/2
lb <- min(xy$x)
ub <- max(xy$x)
adj <- -offset+2*(i%%4)/3
if (!is.na(GENE))
{
if (start<lb) text(lb, adj + ylim4 / 10, labels = GENE, cex = 0.7)
else if (finish>ub) text(ub, adj + ylim4 / 10, labels = GENE, cex = 0.7)
else text(center, adj + ylim4 / 10, labels = GENE, cex = 0.7)
}
STRAND <- gi$STRAND
if (STRAND == "+") arrows(start, adj, finish, adj, length = 0.05, lwd = 2, code = 2, lty = "solid", col = "darkgreen")
else arrows(start, adj, finish, adj, length = 0.05, lwd = 2, code = 1, lty = "solid", col = "darkgreen")
}
ltext <- rbind("0.8-","0.5-","0.2-","0.0-","NA")
legend(min(xy$x),logpmax,legend=ltext,title=expression(r^2),fill=colors)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.