#' @title A plotting method for gene models
#'
#' @description
#' \code{plotGeneModel} Using an annotated vcf and gff file, plot the gene model with
#' mutations.
#'
#' @param gff A gene annotation in gff / gff3 format
#' @param snpEffVCF A simple vcf file that has been annotated by the SNPeff program
#' @param geneID The gff / vcf id of the gene of interest
#' @param upstreamBuffer How far upstream of the gene should be plotted / extracted?
#' @param downstreamBuffer How far downstream of the gene should be plotted / extracted?
#' @param features2plot What features (in the gff) should be plotted?
#' @param colors What colors should the features be?
#' @param mutations2annotate What types of mutations should be annoted in the plot?
#' These keywords must appear in the "INFO" column of the vcf file
#' @param pchMutation What shape should the annotations be?
#' @param colMutation What color should the annotations be?
#' @param orientation What is the orientation of the gene? If NULL, determined from the
#' gff.
#' @param windowSize What window size should be used in the mutation sliding window plot?
#' @param stepSize What step size should be used in the mutation sliding window plot?
#' @param scaleBar Should a scale bar be plotted?
#' @return A data.frame containing all mutations in the plot.
#'
#' @export
plotGeneModel<-function(gff, snpEffVCF, geneID, upstreamBuffer = 1000, downstreamBuffer = 500,
features2plot = c("exon","five_prime_UTR","three_prime_UTR"),
colors = c("steelblue3","lightsteelblue1","lightsteelblue1"),
mutations2annotate = c("missense","stop","start"),
pchMutation = c(2,8,8), colMutation = c("darkblue","darkred","green"),
orientation = NULL, windowSize = 100, stepSize=10,
scaleBar=.1, makePlot = TRUE, returnData = TRUE){
#1. Subset gff to the gene of interest
if(!ncol(gff)==9) stop("gff file must be in standard format")
colnames(gff)<-c("seqname","source","feature","start","end","score","strand","frame","attribute")
gff<-gff[grep(geneID,gff$attribute),]
if(nrow(gff)==0) {
warning("geneID not found in the gff file\n")
return(NULL)
}
#2. Grab orientation information out of gff (unless specified)
if(is.null(orientation)){
orientation <- ifelse(gff$strand[1]=="+","forward","reverse")
}
dir <- ifelse(orientation == "forward",2,1)
#3. calculate standardized positions relative to the transcription start site
if(orientation == "forward"){
xrange<-c(min(c(gff$start,gff$end))-upstreamBuffer,max(c(gff$start,gff$end))+downstreamBuffer)
}else{
xrange<-c(min(c(gff$start,gff$end))-downstreamBuffer,max(c(gff$start,gff$end))+upstreamBuffer)
}
#4. subset vcf to gene of interest and features of interest
vcf<-data.frame(snpEffVCF[snpEffVCF$POS>=xrange[1] & snpEffVCF$POS<=xrange[2],])
if(nrow(vcf)==0) {
warning("geneID not found in the vcf file\n")
return(NULL)
}
gff<-gff[gff$feature %in% features2plot,]
#5. make basic plot
if(makePlot){
par(mar=c(4, 6, 0, 2) + 0.1)
plot(1,type = "n", xlim=xrange, ylim=c(0,1.3), axes=F, bty="n", xlab = NA, ylab=NA)
arrows(x0=xrange[1], x1=xrange[2],y0=1,y1=1, length = .1, code = dir)
features2plot<-features2plot[features2plot %in% gff$feature]
for(j in 1:length(features2plot)){
gff2<-gff[gff$feature == features2plot[j],]
for(i in 1:nrow(gff2)){
with(gff2[i,], rect(xleft = min(start,end), xright=max(start, end), ytop = 1.05, ybottom=.95,
border = "dodgerblue4" , col = colors[j]))
}
}
}
#6. some code to make the intron pieces from exon data only
gff<-gff[order(gff$end),]
if(makePlot){
introns<-sapply(2:nrow(gff), function(i){
tmp<-gff[(i-1):i,]
if(tmp$end[1]<tmp$start[2]){
return(c(tmp$end[1],tmp$start[2]))
}
})
introns<-data.frame(do.call(rbind,introns[!unlist(lapply(introns, is.null))]))
colnames(introns)<-c("start","end")
introns$mids<-with(introns, (start+end)/2)
with(introns, segments(x0=end,x1=mids,y0=1.05,y1=1.1, col="dodgerblue4", lwd=1))
with(introns, segments(x0=mids,x1=start,y0=1.1,y1=1.05, col="dodgerblue4", lwd=1))
}
#7. process annotated VCF file to get the relevant snps
anns<-lapply(vcf$INFO, function(x) strsplit(x,",", fixed=T)[[1]])
annsC<-lapply(anns, function(x) {
y<-x[grep(geneID,x)]
if(length(y)>1){
y<-y[!grepl("intergenic_region",y)]
if(length(y)>1){
y<-y[1]
}
}
y
})
#8. make a column on missense mutation information
out<-data.frame(effect=unlist(annsC), t(sapply(annsC, function(x) strsplit(x, "|", fixed=T)[[1]][c(2,3,11)])))
names(out)[2:4]<-c("rel.pos", "effect","AA.change")
#9. parse amino acid change information
out$AA.change<-gsub("p.","",out$AA.change, fixed=F)
out$AA.change<-gsub("\\d", "", out$AA.change)
out$AA.change<-paste(substr(out$AA.change,1,3),substr(out$AA.change,4,6),sep="-")
out$AA.change[!grepl("missense",out$rel.pos)]<-NA
#10. add in other annotation information
out$plotAnn<-out$AA.change
out$color<-NA
out$pch<-NA
for(i in 1:length(mutations2annotate)){
mut<-mutations2annotate[i]
if(!grepl("missense",mut)){
out$plotAnn[grepl(mut, out$rel.pos)]<-mut
}
out$color[grepl(mut, out$rel.pos)]<-colMutation[i]
out$pch[grepl(mut, out$rel.pos)]<-pchMutation[i]
}
#11. combine information and prepare other columns to plot
tp<-cbind(vcf[,-which(colnames(vcf) == "INFO")], out)
tp<-tp[order(tp$POS),]
tp<-tp[tp$POS>=min(xrange) & tp$POS<=max(xrange),]
if(makePlot){
#12. add in segments for each SNP
for(i in 1:nrow(tp)){
with(tp, segments(x0=POS[i],x1=POS[i],y0=1.01,y1=.99, col="orange", lwd=1))
}
#13. Sliding window SNP density
wind = windowSize
step = stepSize
xs<-seq(from = xrange[1], to = xrange[2], by=step)
sw<-sapply(xs, function(x) sum(abs(tp$POS-x)<=(wind/2)))
sw<-((sw/max(sw))/5)
lines(xs,sw+.7)
#14. Plot points for each type of annotated snp
toan<-sapply(as.character(tp$rel.pos), function(x)
any(sapply(mutations2annotate, function(y) grepl(y,x))))
if(sum(toan)>0){
mtp<-tp[toan,]
with(mtp, points(x=POS, y=rep(.5, length(POS)),
pch = pch, col = color))
with(mtp, text(x=POS, y=rep(.3, length(POS)),
labels = plotAnn, col = color,
srt=90, cex=.6, adj = c(.5,.5)))
}
#15. add scale bar
if(!is.null(scaleBar)){
seg<-round(diff(xrange)*scaleBar,-2)
beg<-min(gff$start)
end<-beg+seg
segments(x0 = beg, x1 = end, y0=1.11, y1=1.11)
segments(x0=beg,x1=beg, y0 = 1.1, y1=1.12)
segments(x0=end,x1=end, y0 = 1.1, y1=1.12)
text(x = beg, y = 1.11, label = paste(seg,"bp"), adj = c(0,-.5))
}
text(x = max(xrange),
y = 1.11, adj = c(1,-.5),
labels = paste(geneID, " ",
paste0(gff$seqname[1], ":",
min(gff$start)
,"..",
max(gff$end))))
#16. add annotation track labels
axis(2, at = c(1,.7,.4), labels = c("gene model","mut. density", "mut. annot."), las=2,
line=-1, lwd=0)
}
if(returnData){
colnames(tp)[colnames(tp) == "rel.pos"]<-"variant.type"
return(tp[,-which(colnames(tp) %in% c("plotAnn", "color", "pch"))])
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.