limma: Linear Models for Microarray Data

Documented in plotExonJunc

plotExonJunc <- function(fit, coef=ncol(fit), geneid, genecolname=NULL, FDR=0.05, annotation=NULL)
#	Assuming the data for diffSplice analysis contains both exons and junctions,
#	'fit' is a MArrayLM object produced by diffSplice().
#	To distinguish between exons and junctions, 'fit$genes$Length' are set to 1 for all the junctions.
#	Since the diffSplice analysis is usually performed after filtering,
#	the full annotation (e.g. the inbuilt annotation used by featureCounts)
#	is required for producing the plot.
#	Yunshun Chen and Gordon Smyth.
#	Created 9 March 2018. Last modified 29 Oct 2018.
{
	if(is.null(genecolname)) 
		genecolname <- fit$genecolname
	else
		genecolname <- as.character(genecolname)

	geneid <- as.character(geneid)
	i <- fit$genes[, genecolname]==geneid
	if(!any(i)) stop(paste0(geneid, " not found."))

#	Subsetting
	fdr <- p.adjust(fit$p.value[,coef], method="BH")
	fdr <- fdr[i]
	genes <- fit$genes[i,]
	strand <- genes$Strand[1]
	p.value <- fit$p.value[i,coef]
	coefficients <- fit$coefficients[i,coef]

#	Sorting	exons and junctions by their start positions
	o <- order(genes$Start, genes$End)
	genes <- genes[o,]
	p.value <- p.value[o]
	coefficients <- coefficients[o]
	
#	Which ones are exons? (Junctions are assigned length of 1 prior to the diffSplice analysis)
	IsExon <- genes$Length > 1L
	genes.e <- genes[IsExon, ]

#	Check the format of the annotation file.
	if(!is.null(annotation)){
		if(is.null(annotation$GeneID)) stop("Annotation file must contain Entrez gene ids.")
		if(is.null(annotation$Start) | is.null(annotation$End)) stop("Annotation file must contain start-end positions of exons.")
		if(is.null(annotation$Length)) annotation$Length <- abs(annotation$End - annotation$Start) + 1L

#		Retrieve annotation information for the exons that have been filtered prior to the diffSplice analysis
		sel <- annotation$GeneID == genes$GeneID[1]
		genes.e2 <- annotation[sel, ]
		genes.e2 <- genes.e2[order(genes.e2$Start), ]
		m <- match(genes.e$Start, genes.e2$Start)
		genes.e <- genes.e2
	}

#	Get the start-end positions and the length for all the introns
	Start.i <- genes.e$End[-nrow(genes.e)] + 1L
	End.i <- genes.e$Start[-1] - 1L
	genes.i <- genes.e[-1,]
	genes.i$Start <- Start.i
	genes.i$End <- End.i
	genes.i$Length <- genes.i$End - genes.i$Start + 1L

#	Get the start-end positions for all the junctions
	if(any(!IsExon)){
		genes.j <- genes[!IsExon, ]
		
#		Extend the plotting range for the gene in case there are junctions outside of the gene range.
		if(min(genes.j$Start) < min(genes.e$Start)){
			intron <- genes.i[1,,drop=FALSE]
			intron$Start <- min(genes.j$Start)
			intron$End <- min(genes.e$Start) - 1L
			intron$Length <- intron$End - intron$Start + 1L
			genes.i <- rbind(intron, genes.i)
		}
		if(max(genes.j$End) > max(genes.e$End)){
			intron <- genes.i[1,,drop=FALSE]
			intron$Start <- max(genes.e$End) + 1L
			intron$End <- max(genes.j$End)
			intron$Length <- intron$End - intron$Start + 1L
			genes.i <- rbind(genes.i, intron)
		}
	}

#	Combine introns and exons for plotting
	genes.ie <- rbind(cbind(genes.e, Flag="Exon"), cbind(genes.i, Flag="Intron"))
	genes.ie <- genes.ie[order(genes.ie$Start), ]

#	Scale the length of intron/exon segments for better visualization
	pseudo.length <- (genes.ie$Length)^.5
	pseudo.pos <- cumsum((genes.ie$Length)^.5)
	pseudo.start <- c(0, pseudo.pos[-nrow(genes.ie)])
	pseudo.end <- pseudo.pos
	genes.ie <- cbind(genes.ie, pseudo.start=pseudo.start, pseudo.end=pseudo.end, pseudo.length=pseudo.length)

#	Update start-end postions for junctions on the pseudo scale
	if(any(!IsExon)){
		genes.j <- cbind(genes.j, pseudo.start=0, pseudo.end=0)
		for(j in 1:nrow(genes.j)){
			k <- which(genes.j$Start[j] <= genes.ie$End)[1]
			genes.j$pseudo.start[j] <- genes.ie$pseudo.end[k] - (genes.ie$End[k] - genes.j$Start[j]) / genes.ie$Length[k] * genes.ie$pseudo.length[k]
			k <- which(genes.j$End[j] <= genes.ie$End)[1]
			genes.j$pseudo.end[j] <- genes.ie$pseudo.end[k] - (genes.ie$End[k] - genes.j$End[j]) / genes.ie$Length[k] * genes.ie$pseudo.length[k]
		}
	}

#	Setup the plot
	GeneStart <- min(genes.ie$pseudo.start)
	GeneEnd <- max(genes.ie$pseudo.end)
	gene.length <- GeneEnd - GeneStart
	plot.new()
	plot.window(xlim=c(GeneStart, GeneEnd), ylim=c(-0.7, 0.7))
	title(main=paste0(geneid, " (", genes$Strand[1], ")"))

#	Plot gene range
	rect(xleft=GeneStart, xright=GeneEnd, ybottom=-0.02, ytop=0.02, col="gray", border="gray")
	if(strand=="+"){
		tx.left <- "5'"
		tx.right <- "3'"
	} else {
		tx.left <- "3'"
		tx.right <- "5'"
	}
	text(x=-0.02*gene.length, y=0.1, labels=tx.left)
	text(x=1.02*gene.length, y=0.1, labels=tx.right)
	
#	Direction and significance of the diffSplice results
	up <- coefficients > 0
	down <- coefficients < 0
	IsSig <- fdr < FDR
	#IsSig <- p.adjust(p.value, method="holm") < FDR
	IsSig.j <- IsSig[!IsExon]
	down.j <- down[!IsExon]
	
#	Colouring
	col <- rep("black", sum(i))
	col[up & IsSig] <- "red"
	col[down & IsSig] <- "dodgerblue"
	col.e <- col[IsExon]
	col.j <- col[!IsExon]
	
#	Filtered exons are coloured in grey
	if(!is.null(annotation)){
		col.e2 <- rep("grey", nrow(genes.e))
		col.e2[m] <- col.e
		col.e <- col.e2
	}

#	Plot exons
	ex <- genes.ie$Flag=="Exon"
	rect(xleft=genes.ie$pseudo.start[ex], xright=genes.ie$pseudo.end[ex], ybottom=-0.1,ytop=0.1, col=col.e, border=col.e)

#	Plot junctions
	if(any(!IsExon)){
		MidPoint <- (genes.j$pseudo.start + genes.j$pseudo.end)/2
		y0 <- rep(0.11, sum(!IsExon))
		y1 <- rep(0.4, sum(!IsExon))
		y1[IsSig.j] <- 0.6
		y0[down.j] <- -y0[down.j]
		y1[down.j] <- -y1[down.j]
		segments(x0=genes.j$pseudo.start, x1=MidPoint, y0=y0, y1=y1, col=col.j, lwd=2)
		segments(x0=MidPoint, x1=genes.j$pseudo.end, y0=y1, y1=y0, col=col.j, lwd=2)
	}

#	Label axis
	if(genes$Strand[1]=="+")
		labels <- paste0("Exon.", 1:length(col.e))
	else
		labels <- paste0("Exon.", length(col.e):1)
	axis(side=1, at=(genes.ie$pseudo.start[ex]+genes.ie$pseudo.end[ex])/2, las=2, labels=labels)

	invisible()
}