plotExonJunc <- function(fit, coef=ncol(fit), geneid, genecolname=NULL, FDR=0.05, annotation=NULL)
# Assuming the data for diffSplice analysis contains both exons and junctions,
# 'fit' is a MArrayLM object produced by diffSplice().
# To distinguish between exons and junctions, 'fit$genes$Length' are set to 1 for all the junctions.
# Since the diffSplice analysis is usually performed after filtering,
# the full annotation (e.g. the inbuilt annotation used by featureCounts)
# is required for producing the plot.
# Yunshun Chen and Gordon Smyth.
# Created 9 March 2018. Last modified 29 Oct 2018.
{
if(is.null(genecolname))
genecolname <- fit$genecolname
else
genecolname <- as.character(genecolname)
geneid <- as.character(geneid)
i <- fit$genes[, genecolname]==geneid
if(!any(i)) stop(paste0(geneid, " not found."))
# Subsetting
fdr <- p.adjust(fit$p.value[,coef], method="BH")
fdr <- fdr[i]
genes <- fit$genes[i,]
strand <- genes$Strand[1]
p.value <- fit$p.value[i,coef]
coefficients <- fit$coefficients[i,coef]
# Sorting exons and junctions by their start positions
o <- order(genes$Start, genes$End)
genes <- genes[o,]
p.value <- p.value[o]
coefficients <- coefficients[o]
# Which ones are exons? (Junctions are assigned length of 1 prior to the diffSplice analysis)
IsExon <- genes$Length > 1L
genes.e <- genes[IsExon, ]
# Check the format of the annotation file.
if(!is.null(annotation)){
if(is.null(annotation$GeneID)) stop("Annotation file must contain Entrez gene ids.")
if(is.null(annotation$Start) | is.null(annotation$End)) stop("Annotation file must contain start-end positions of exons.")
if(is.null(annotation$Length)) annotation$Length <- abs(annotation$End - annotation$Start) + 1L
# Retrieve annotation information for the exons that have been filtered prior to the diffSplice analysis
sel <- annotation$GeneID == genes$GeneID[1]
genes.e2 <- annotation[sel, ]
genes.e2 <- genes.e2[order(genes.e2$Start), ]
m <- match(genes.e$Start, genes.e2$Start)
genes.e <- genes.e2
}
# Get the start-end positions and the length for all the introns
Start.i <- genes.e$End[-nrow(genes.e)] + 1L
End.i <- genes.e$Start[-1] - 1L
genes.i <- genes.e[-1,]
genes.i$Start <- Start.i
genes.i$End <- End.i
genes.i$Length <- genes.i$End - genes.i$Start + 1L
# Get the start-end positions for all the junctions
if(any(!IsExon)){
genes.j <- genes[!IsExon, ]
# Extend the plotting range for the gene in case there are junctions outside of the gene range.
if(min(genes.j$Start) < min(genes.e$Start)){
intron <- genes.i[1,,drop=FALSE]
intron$Start <- min(genes.j$Start)
intron$End <- min(genes.e$Start) - 1L
intron$Length <- intron$End - intron$Start + 1L
genes.i <- rbind(intron, genes.i)
}
if(max(genes.j$End) > max(genes.e$End)){
intron <- genes.i[1,,drop=FALSE]
intron$Start <- max(genes.e$End) + 1L
intron$End <- max(genes.j$End)
intron$Length <- intron$End - intron$Start + 1L
genes.i <- rbind(genes.i, intron)
}
}
# Combine introns and exons for plotting
genes.ie <- rbind(cbind(genes.e, Flag="Exon"), cbind(genes.i, Flag="Intron"))
genes.ie <- genes.ie[order(genes.ie$Start), ]
# Scale the length of intron/exon segments for better visualization
pseudo.length <- (genes.ie$Length)^.5
pseudo.pos <- cumsum((genes.ie$Length)^.5)
pseudo.start <- c(0, pseudo.pos[-nrow(genes.ie)])
pseudo.end <- pseudo.pos
genes.ie <- cbind(genes.ie, pseudo.start=pseudo.start, pseudo.end=pseudo.end, pseudo.length=pseudo.length)
# Update start-end postions for junctions on the pseudo scale
if(any(!IsExon)){
genes.j <- cbind(genes.j, pseudo.start=0, pseudo.end=0)
for(j in 1:nrow(genes.j)){
k <- which(genes.j$Start[j] <= genes.ie$End)[1]
genes.j$pseudo.start[j] <- genes.ie$pseudo.end[k] - (genes.ie$End[k] - genes.j$Start[j]) / genes.ie$Length[k] * genes.ie$pseudo.length[k]
k <- which(genes.j$End[j] <= genes.ie$End)[1]
genes.j$pseudo.end[j] <- genes.ie$pseudo.end[k] - (genes.ie$End[k] - genes.j$End[j]) / genes.ie$Length[k] * genes.ie$pseudo.length[k]
}
}
# Setup the plot
GeneStart <- min(genes.ie$pseudo.start)
GeneEnd <- max(genes.ie$pseudo.end)
gene.length <- GeneEnd - GeneStart
plot.new()
plot.window(xlim=c(GeneStart, GeneEnd), ylim=c(-0.7, 0.7))
title(main=paste0(geneid, " (", genes$Strand[1], ")"))
# Plot gene range
rect(xleft=GeneStart, xright=GeneEnd, ybottom=-0.02, ytop=0.02, col="gray", border="gray")
if(strand=="+"){
tx.left <- "5'"
tx.right <- "3'"
} else {
tx.left <- "3'"
tx.right <- "5'"
}
text(x=-0.02*gene.length, y=0.1, labels=tx.left)
text(x=1.02*gene.length, y=0.1, labels=tx.right)
# Direction and significance of the diffSplice results
up <- coefficients > 0
down <- coefficients < 0
IsSig <- fdr < FDR
#IsSig <- p.adjust(p.value, method="holm") < FDR
IsSig.j <- IsSig[!IsExon]
down.j <- down[!IsExon]
# Colouring
col <- rep("black", sum(i))
col[up & IsSig] <- "red"
col[down & IsSig] <- "dodgerblue"
col.e <- col[IsExon]
col.j <- col[!IsExon]
# Filtered exons are coloured in grey
if(!is.null(annotation)){
col.e2 <- rep("grey", nrow(genes.e))
col.e2[m] <- col.e
col.e <- col.e2
}
# Plot exons
ex <- genes.ie$Flag=="Exon"
rect(xleft=genes.ie$pseudo.start[ex], xright=genes.ie$pseudo.end[ex], ybottom=-0.1,ytop=0.1, col=col.e, border=col.e)
# Plot junctions
if(any(!IsExon)){
MidPoint <- (genes.j$pseudo.start + genes.j$pseudo.end)/2
y0 <- rep(0.11, sum(!IsExon))
y1 <- rep(0.4, sum(!IsExon))
y1[IsSig.j] <- 0.6
y0[down.j] <- -y0[down.j]
y1[down.j] <- -y1[down.j]
segments(x0=genes.j$pseudo.start, x1=MidPoint, y0=y0, y1=y1, col=col.j, lwd=2)
segments(x0=MidPoint, x1=genes.j$pseudo.end, y0=y1, y1=y0, col=col.j, lwd=2)
}
# Label axis
if(genes$Strand[1]=="+")
labels <- paste0("Exon.", 1:length(col.e))
else
labels <- paste0("Exon.", length(col.e):1)
axis(side=1, at=(genes.ie$pseudo.start[ex]+genes.ie$pseudo.end[ex])/2, las=2, labels=labels)
invisible()
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.