##params
getDarkColor <- function(color) {
rgb(
max(0,col2rgb(color)["red",]-100),
max(0,col2rgb(color)["green",]-100),
max(0,col2rgb(color)["blue",]-100),
maxColorValue=255
)
}
parameters <- list(
minConfidenceForCircosPlot=list("minConfidenceForCircosPlot", "string", "medium"),
squishIntrons=list("squishIntrons", "bool", T),
printExonLabels=list("printExonLabels", "bool", T),
render3dEffect=list("render3dEffect", "bool", T),
pdfWidth=list("pdfWidth", "numeric", 11.692),
pdfHeight=list("pdfHeight", "numeric", 8.267),
color1=list("color1", "string", "#e5a5a5"),
color2=list("color2", "string", "#a7c4e5"),
mergeDomainsOverlappingBy=list("mergeDomainsOverlappingBy", "numeric", 0.9),
optimizeDomainColors=list("optimizeDomainColors", "bool", F),
fontSize=list("fontSize", "numeric", 1),
showIntergenicVicinity=list("showVicinity", "numeric", 0),
transcriptSelection=list("transcriptSelection", "string", "coverage")
)
color1 <- parameters$color1[[3]]
color2 <- parameters$color2[[3]]
darkColor1 <- getDarkColor(color1)
darkColor2 <- getDarkColor(color2)
showVicinity <- parameters$showIntergenicVicinity[[3]]
squishIntrons <- parameters$squishIntrons[[3]]
printExonLabels <- parameters$printExonLabels[[3]]
render3dEffect <- parameters$render3dEffect[[3]]
mergeDomainsOverlappingBy <- parameters$mergeDomainsOverlappingBy[[3]]
optimizeDomainColors <- parameters$optimizeDomainColors[[3]]
fontSize <- parameters$fontSize[[3]]
transcriptSelection <- parameters$transcriptSelection[[3]]
minConfidenceForCircosPlot <- parameters$minConfidenceForCircosPlot[[3]]
##drawing functions
drawVerticalGradient <- function(left, right, y, color, selection=NULL) {
# check if gradient should only be drawn in part of the region
if (!is.null(selection)) {
y <- y[selection]
left <- left[selection]
right <- right[selection]
}
# draw gradient
for (i in 1:length(y)) {
polygon(
c(left[1:i], right[1:i]),
c(y[1:i], y[i:1]),
border=NA,
col=rgb(col2rgb(color)["red",], col2rgb(color)["green",], col2rgb(color)["blue",], col2rgb(color, alpha=T)["alpha",]*(1/length(y)), max=255)
)
}
}
drawCurlyBrace <- function(left, right, top, bottom, tip) {
smoothness <- 20
x <- cumsum(exp(-seq(-2.5, 2.5, len=smoothness)^2))
x <- x/max(x)
y <- seq(top, bottom, len=smoothness)
lines(left+(tip-left)+x*(left-tip), y)
lines(tip+x*(right-tip), y)
}
drawIdeogram <- function(adjust, left, right, y, cytobands, contig, breakpoint) {
# define design of ideogram
bandColors <- setNames(rgb(100:0, 100:0, 100:0, maxColorValue=100), paste0("gpos", 0:100))
bandColors <- c(bandColors, gneg="#ffffff", acen="#ec4f4f", stalk="#0000ff")
cytobands$color <- bandColors[cytobands$giemsa]
arcSteps <- 30 # defines roundness of arc
curlyBraceHeight <- 0.03
ideogramHeight <- 0.04
ideogramWidth <- 0.4
# extract bands of given contig
bands <- cytobands[cytobands$contig==contig,]
if (nrow(bands) == 0) {
warning(paste("Ideogram of contig", contig, "cannot be drawn, because no Giemsa staining information is available."))
return(NULL)
}
# scale width of ideogram to fit inside given region
bands$left <- bands$start / max(cytobands$end) * ideogramWidth
bands$right <- bands$end / max(cytobands$end) * ideogramWidth
# left/right-align cytobands
offset <- ifelse(adjust=="left", left, right - max(bands$right))
bands$left <- bands$left + offset
bands$right <- bands$right + offset
# draw curly braces
tip <- min(bands$left) + (max(bands$right)-min(bands$left)) / (max(bands$end)-min(bands$start)) * breakpoint
drawCurlyBrace(left, right, y-0.05+curlyBraceHeight, y-0.05, tip)
# draw title of chromosome
text((max(bands$right)+min(bands$left))/2, y+0.07, paste("chromosome", contig), font=2, cex=fontSize, adj=c(0.5,0))
# draw name of band
bandName <- bands[which(between(breakpoint, bands$start, bands$end)), "name"]
text(tip, y+0.03, bandName, cex=fontSize, adj=c(0.5,0))
# draw start of chromosome
leftArcX <- bands[1,"left"] + (1+cos(seq(pi/2,1.5*pi,len=arcSteps))) * (bands[1,"right"]-bands[1,"left"])
leftArcY <- y + sin(seq(pi/2,1.5*pi,len=arcSteps)) * (ideogramHeight/2)
polygon(leftArcX, leftArcY, col=bands[1,"color"])
# draw bands
centromereStart <- NULL
centromereEnd <- NULL
for (band in 2:(nrow(bands)-1)) {
if (bands[band,"giemsa"] != "acen") {
rect(bands[band,"left"], y-ideogramHeight/2, bands[band,"right"], y+ideogramHeight/2, col=bands[band,"color"])
} else { # draw centromere
if (is.null(centromereStart)) {
polygon(c(bands[band,"left"], bands[band,"right"], bands[band,"left"]), c(y-ideogramHeight/2, y, y+ideogramHeight/2), col=bands[band,"color"])
centromereStart <- bands[band,"left"]
} else {
polygon(c(bands[band,"right"], bands[band,"left"], bands[band,"right"]), c(y-ideogramHeight/2, y, y+ideogramHeight/2), col=bands[band,"color"])
centromereEnd <- bands[band,"right"]
}
}
}
# draw end of chromosome
band <- nrow(bands)
rightArcX <- bands[band,"right"] - (1+cos(seq(1.5*pi,pi/2,len=arcSteps))) * (bands[band,"right"]-bands[band,"left"])
rightArcY <- y + sin(seq(pi/2,1.5*pi,len=arcSteps)) * ideogramHeight/2
polygon(rightArcX, rightArcY, col=bands[band,"color"])
# if there is no centromere, make an artificial one with length zero
if (is.null(centromereStart) || is.null(centromereEnd)) {
centromereStart <- bands[1,"right"]
centromereEnd <- bands[1,"right"]
}
# draw gradients for 3D effect
drawVerticalGradient(leftArcX, rep(centromereStart, arcSteps), leftArcY, rgb(0,0,0,0.8), 1:round(arcSteps*0.4)) # black from top on p-arm
drawVerticalGradient(leftArcX, rep(centromereStart, arcSteps), leftArcY, rgb(1,1,1,0.7), round(arcSteps*0.4):round(arcSteps*0.1)) # white to top on p-arm
drawVerticalGradient(leftArcX, rep(centromereStart, arcSteps), leftArcY, rgb(1,1,1,0.7), round(arcSteps*0.4):round(arcSteps*0.6)) # white to bottom on p-arm
drawVerticalGradient(leftArcX, rep(centromereStart, arcSteps), leftArcY, rgb(0,0,0,0.9), arcSteps:round(arcSteps*0.5)) # black from bottom on p-arm
drawVerticalGradient(rightArcX, rep(centromereEnd, arcSteps), rightArcY, rgb(0,0,0,0.8), 1:round(arcSteps*0.4)) # black from top on q-arm
drawVerticalGradient(rightArcX, rep(centromereEnd, arcSteps), rightArcY, rgb(1,1,1,0.7), round(arcSteps*0.4):round(arcSteps*0.1)) # white to top on q-arm
drawVerticalGradient(rightArcX, rep(centromereEnd, arcSteps), rightArcY, rgb(1,1,1,0.7), round(arcSteps*0.4):round(arcSteps*0.6)) # white to bottom on q-arm
drawVerticalGradient(rightArcX, rep(centromereEnd, arcSteps), rightArcY, rgb(0,0,0,0.9), arcSteps:round(arcSteps*0.5)) # black from bottom on q-arm
}
drawCoverage <- function(left, right, y, coverage, start, end, color) {
maxResolution <- 5000 # max number of data points to draw coverage
# draw coverage as bars
if (!is.null(coverage)) {
coverageData <- as.numeric(coverage[IRanges(sapply(start, max, min(start(coverage))), sapply(end, min, max(end(coverage))))])
# downsample to maxResolution, if there are too many data points
coverageData <- aggregate(coverageData, by=list(round(1:length(coverageData) * (right-left) * maxResolution/length(coverageData))), mean)$x
polygon(c(left, seq(left, right, length.out=length(coverageData)), right), c(y, y+coverageData*0.1, y), col=color, border=NA)
}
}
drawStrand <- function(left, right, y, color, strand) {
if (strand %in% c("+", "-")) {
# draw strand
lines(c(left+0.001, right-0.001), c(y, y), col=color, lwd=2)
lines(c(left+0.001, right-0.001), c(y, y), col=rgb(1,1,1,0.1), lwd=1)
# indicate orientation
if (right - left > 0.01)
for (i in seq(left+0.005, right-0.005, by=sign(right-left-2*0.005)*0.01)) {
arrows(i, y, i+0.001*ifelse(strand=="+", 1, -1), y, col=color, length=0.05, lwd=2, angle=60)
arrows(i, y, i+0.001*ifelse(strand=="+", 1, -1), y, col=rgb(1,1,1,0.1), length=0.05, lwd=1, angle=60)
}
}
}
drawExon <- function(left, right, y, color, title, type) {
gradientSteps <- 10 # defines smoothness of gradient
exonHeight <- 0.03
# if (type == "CDS") {
if (stringr::str_detect(type, "CDS")) {
# draw coding regions as thicker bars
rect(left, y+exonHeight, right, y+exonHeight/2-0.001, col=color, border=NA)
rect(left, y-exonHeight, right, y-exonHeight/2+0.001, col=color, border=NA)
# draw border
lines(c(left, left, right, right), c(y+exonHeight/2, y+exonHeight, y+exonHeight, y+exonHeight/2), col=getDarkColor(color), lend=2)
lines(c(left, left, right, right), c(y-exonHeight/2, y-exonHeight, y-exonHeight, y-exonHeight/2), col=getDarkColor(color), lend=2)
# draw gradients for 3D effect
drawVerticalGradient(rep(left, gradientSteps), rep(right, gradientSteps), seq(y+0.03, y+0.015, len=gradientSteps), rgb(0,0,0,0.2))
drawVerticalGradient(rep(left, gradientSteps), rep(right, gradientSteps), seq(y-0.03, y-0.015, len=gradientSteps), rgb(0,0,0,0.3))
} else if (stringr::str_detect(type, "exon")){ #if (type == "exon") {
rect(left, y+exonHeight/2, right, y-exonHeight/2, col=color, border=getDarkColor(color))
# draw gradients for 3D effect
drawVerticalGradient(rep(left, gradientSteps), rep(right, gradientSteps), seq(y, y+exonHeight/2, len=gradientSteps), rgb(1,1,1,0.6))
drawVerticalGradient(rep(left, gradientSteps), rep(right, gradientSteps), seq(y, y-exonHeight/2, len=gradientSteps), rgb(1,1,1,0.6))
# add exon label
text((left+right)/2, y, title, cex=0.9*fontSize)
}
}
drawCircos <- function(fusion, fusions, cytobands, minConfidenceForCircosPlot) {
# check if Giemsa staining information is available
for (contig in unlist(fusions[fusion,c("contig1", "contig2")])) {
if (!any(cytobands$contig==contig)) {
warning(paste0("Circos plot cannot be drawn, because no Giemsa staining information is available for contig ", contig, "."))
return(NULL)
}
}
# initialize with empty circos plot
circos.clear()
circos.initializeWithIdeogram(cytoband=cytobands, plotType=NULL)
# use gene names as labels or <contig>:<position> for intergenic breakpoints
geneLabels <- data.frame(
contig=c(fusions[fusion,"contig1"], fusions[fusion,"contig2"]),
start=c(fusions[fusion,"breakpoint1"], fusions[fusion,"breakpoint2"])
)
geneLabels$end <- geneLabels$start + 1
geneLabels$gene <- c(fusions[fusion,"gene1"], fusions[fusion,"gene2"])
geneLabels$gene <- ifelse(c(fusions[fusion,"site1"], fusions[fusion,"site2"]) == "intergenic", paste0(c(fusions[fusion,"display_contig1"], fusions[fusion,"display_contig2"]), ":", geneLabels$start), geneLabels$gene)
# draw gene labels
circos.genomicLabels(geneLabels, labels.column=4, side="outside", cex=fontSize)
# draw chromosome labels in connector plot
for (contig in unique(cytobands$contig)) {
set.current.cell(track.index=2, sector.index=contig) # draw in gene label connector track (track.index=2)
circos.text(CELL_META$xcenter, CELL_META$ycenter, contig, cex=0.85)
}
# draw ideograms
circos.genomicIdeogram(cytoband=cytobands)
# draw arcs
confidenceRank <- c(low=0, medium=1, high=2)
for (i in c(setdiff(1:nrow(fusions), fusion), fusion)) { # draw fusion of interest last, such that its arc is on top
f <- fusions[i,]
if (any(cytobands$contig==f$contig1) && any(cytobands$contig==f$contig2)) # ignore viral contigs, because we have no cytoband information for them
if (minConfidenceForCircosPlot != "none" && confidenceRank[f$confidence] >= confidenceRank[minConfidenceForCircosPlot] || i==fusion)
circos.link(
f$contig1, f$breakpoint1,
f$contig2, f$breakpoint2,
lwd=2, col=ifelse(i==fusion, rgb(1,0,0), rgb(1,0.7,0.7))
)
}
}
drawProteinDomains <- function(fusion, exons1, exons2, proteinDomains, color1, color2, mergeDomainsOverlappingBy, optimizeDomainColors) {
exonHeight <- 0.2
exonsY <- 0.5
geneNamesY <- exonsY - exonHeight/2 - 0.05
# find coding exons
codingExons1 <- exons1[exons1$type == "CDS" & fusion$site1 != "intergenic",]
codingExons2 <- exons2[exons2$type == "CDS" & fusion$site2 != "intergenic",]
# cut off coding regions beyond breakpoint
if (fusion$direction1 == "upstream") {
codingExons1 <- codingExons1[codingExons1$end >= fusion$breakpoint1,]
codingExons1$start <- ifelse(codingExons1$start < fusion$breakpoint1, fusion$breakpoint1, codingExons1$start)
} else {
codingExons1 <- codingExons1[codingExons1$start <= fusion$breakpoint1,]
codingExons1$end <- ifelse(codingExons1$end > fusion$breakpoint1, fusion$breakpoint1, codingExons1$end)
}
if (fusion$direction2 == "upstream") {
codingExons2 <- codingExons2[codingExons2$end >= fusion$breakpoint2,]
codingExons2$start <- ifelse(codingExons2$start < fusion$breakpoint2, fusion$breakpoint2, codingExons2$start)
} else {
codingExons2 <- codingExons2[codingExons2$start <= fusion$breakpoint2,]
codingExons2$end <- ifelse(codingExons2$end > fusion$breakpoint2, fusion$breakpoint2, codingExons2$end)
}
# find overlapping domains
exonsGRanges1 <- GRanges(codingExons1$contig, IRanges(codingExons1$start, codingExons1$end), strand=codingExons1$strand)
exonsGRanges2 <- GRanges(codingExons2$contig, IRanges(codingExons2$start, codingExons2$end), strand=codingExons2$strand)
domainsGRanges <- GRanges(proteinDomains$contig, IRanges(proteinDomains$start, proteinDomains$end), strand=proteinDomains$strand)
domainsGRanges$proteinDomainName <- proteinDomains$proteinDomainName
domainsGRanges$proteinDomainID <- proteinDomains$proteinDomainID
domainsGRanges$color <- proteinDomains$color
domainsGRanges <- domainsGRanges[suppressWarnings(unique(queryHits(findOverlaps(domainsGRanges, GenomicRanges::union(exonsGRanges1, exonsGRanges2)))))]
# group overlapping domains by domain ID
domainsGRangesList <- GRangesList(lapply(unique(domainsGRanges$proteinDomainID), function(x) { domainsGRanges[domainsGRanges$proteinDomainID == x] }))
# trim protein domains to exon boundaries
trimDomains <- function(domainsGRangesList, exonsGRanges) {
do.call(
"rbind",
lapply(
domainsGRangesList,
function(x) {
intersected <- as.data.frame(GenomicRanges::reduce(suppressWarnings(GenomicRanges::intersect(x, exonsGRanges))))
if (nrow(intersected) > 0) {
intersected$proteinDomainName <- head(x$proteinDomainName, 1)
intersected$proteinDomainID <- head(x$proteinDomainID, 1)
intersected$color <- head(x$color, 1)
} else {
intersected$proteinDomainName <- character()
intersected$proteinDomainID <- character()
intersected$color <- character()
}
return(intersected)
}
)
)
}
retainedDomains1 <- trimDomains(domainsGRangesList, exonsGRanges1)
retainedDomains2 <- trimDomains(domainsGRangesList, exonsGRanges2)
# calculate length of coding exons
codingExons1$length <- codingExons1$end - codingExons1$start + 1
codingExons2$length <- codingExons2$end - codingExons2$start + 1
# abort, if there are no coding regions
if (sum(exons1$type == "CDS") + sum(exons2$type == "CDS") == 0) {
text(0.5, 0.5, "Genes are not protein-coding.")
return(NULL)
}
codingLength1 <- sum(codingExons1$length)
codingLength2 <- sum(codingExons2$length)
if (codingLength1 + codingLength2 == 0) {
text(0.5, 0.5, "No coding regions retained in fusion transcript.")
return(NULL)
}
antisenseTranscription1 <- sub("/.*", "", fusion$strand1) != sub(".*/", "", fusion$strand1)
antisenseTranscription2 <- sub("/.*", "", fusion$strand2) != sub(".*/", "", fusion$strand2)
if ((codingLength1 == 0 || antisenseTranscription1) && (codingLength2 == 0 || antisenseTranscription2)) {
text(0.5, 0.5, "No coding regions due to antisense transcription.")
return(NULL)
}
# remove introns from protein domains
removeIntronsFromProteinDomains <- function(codingExons, retainedDomains) {
if (nrow(codingExons) == 0) return(NULL)
cumulativeIntronLength <- 0
previousExonEnd <- 0
for (exon in 1:nrow(codingExons)) {
if (codingExons[exon,"start"] > previousExonEnd)
cumulativeIntronLength <- cumulativeIntronLength + codingExons[exon,"start"] - previousExonEnd
domainsInExon <- which(between(retainedDomains$start, codingExons[exon,"start"], codingExons[exon,"end"]))
retainedDomains[domainsInExon,"start"] <- retainedDomains[domainsInExon,"start"] - cumulativeIntronLength
domainsInExon <- which(between(retainedDomains$end, codingExons[exon,"start"], codingExons[exon,"end"]))
retainedDomains[domainsInExon,"end"] <- retainedDomains[domainsInExon,"end"] - cumulativeIntronLength
previousExonEnd <- codingExons[exon,"end"]
}
# merge adjacent domains
retainedDomains <- do.call(
"rbind",
lapply(
unique(retainedDomains$proteinDomainID),
function(x) {
domain <- retainedDomains[retainedDomains$proteinDomainID == x,]
merged <- GenomicRanges::reduce(GRanges(domain$seqnames, IRanges(domain$start, domain$end), strand=domain$strand))
merged$proteinDomainName <- head(domain$proteinDomainName, 1)
merged$proteinDomainID <- head(domain$proteinDomainID, 1)
merged$color <- head(domain$color, 1)
return(as.data.frame(merged))
}
)
)
return(retainedDomains)
}
retainedDomains1 <- removeIntronsFromProteinDomains(codingExons1, retainedDomains1)
retainedDomains2 <- removeIntronsFromProteinDomains(codingExons2, retainedDomains2)
# abort, if no domains are retained
if (is.null(retainedDomains1) && is.null(retainedDomains2)) {
text(0.5, 0.5, "No protein domains retained in fusion.")
return(NULL)
}
# merge domains with similar coordinates
mergeSimilarDomains <- function(domains, mergeDomainsOverlappingBy) {
if (is.null(domains)) return(domains)
merged <- domains[F,] # create empty data frame
domains <- domains[order(domains$end - domains$start, decreasing=T),] # start with bigger domains => bigger domains are retained
for (domain in rownames(domains)) {
if (!any((abs(merged$start - domains[domain,"start"]) + abs(merged$end - domains[domain,"end"])) / (domains[domain,"end"] - domains[domain,"start"]) <= 1-mergeDomainsOverlappingBy))
merged <- rbind(merged, domains[domain,])
}
return(merged)
}
retainedDomains1 <- mergeSimilarDomains(retainedDomains1, mergeDomainsOverlappingBy)
retainedDomains2 <- mergeSimilarDomains(retainedDomains2, mergeDomainsOverlappingBy)
# if desired, reassign colors to protein domains to maximize contrast
if (optimizeDomainColors) {
uniqueDomains <- unique(c(retainedDomains1$proteinDomainID, retainedDomains2$proteinDomainID))
# make rainbow of pretty pastell colors
colors <- rainbow(length(uniqueDomains))
colors <- apply(col2rgb(colors), 2, function(x) { 0.3 + y/255 * 0.7 }) # make pastell colors
colors <- apply(colors, 2, function(x) {rgb(x["red"], x["green"], x["blue"])}) # convert back to rgb
# reassign colors
names(colors) <- uniqueDomains
retainedDomains1$color <- colors[retainedDomains1$proteinDomainID]
retainedDomains2$color <- colors[retainedDomains2$proteinDomainID]
}
# reverse exons and protein domains, if on the reverse strand
if (any(codingExons1$strand == "-")) {
codingExons1$length <- rev(codingExons1$length)
temp <- retainedDomains1$end
retainedDomains1$end <- codingLength1 - retainedDomains1$start
retainedDomains1$start <- codingLength1 - temp
}
if (any(codingExons2$strand == "-")) {
codingExons2$length <- rev(codingExons2$length)
temp <- retainedDomains2$end
retainedDomains2$end <- codingLength2 - retainedDomains2$start
retainedDomains2$start <- codingLength2 - temp
}
# normalize length to 1
codingExons1$length <- codingExons1$length / (codingLength1 + codingLength2)
codingExons2$length <- codingExons2$length / (codingLength1 + codingLength2)
retainedDomains1$start <- retainedDomains1$start / (codingLength1 + codingLength2)
retainedDomains1$end <- retainedDomains1$end / (codingLength1 + codingLength2)
retainedDomains2$start <- retainedDomains2$start / (codingLength1 + codingLength2)
retainedDomains2$end <- retainedDomains2$end / (codingLength1 + codingLength2)
# draw coding regions
rect(0, exonsY-exonHeight/2, sum(codingExons1$length), exonsY+exonHeight/2, col=color1, border=NA)
rect(sum(codingExons1$length), exonsY-exonHeight/2, sum(codingExons1$length) + sum(codingExons2$length), exonsY+exonHeight/2, col=color2, border=NA)
# indicate exon boundaries as dotted lines
exonBoundaries <- cumsum(c(codingExons1$length, codingExons2$length))
if (length(exonBoundaries) > 1) {
exonBoundaries <- exonBoundaries[1:(length(exonBoundaries)-1)]
for (exonBoundary in exonBoundaries)
lines(c(exonBoundary, exonBoundary), c(exonsY-exonHeight, exonsY+exonHeight), col="white", lty=3)
}
# find overlapping domains
# nest if one is contained in another
# stack if they overlap partially
nestDomains <- function(domains) {
if (length(unlist(domains)) == 0) return(domains)
domains <- domains[order(domains$end - domains$start, decreasing=T),]
rownames(domains) <- 1:nrow(domains)
# find nested domains and make tree structure
domains$parent <- 0
for (domain in rownames(domains))
domains[domains$start >= domains[domain,"start"] & domains$end <= domains[domain,"end"] & rownames(domains) != domain,"parent"] <- domain
# find partially overlapping domains
maxOverlappingDomains <- max(coverage(IRanges(domains$start*10e6, domains$end*10e6)))
padding <- 1 / maxOverlappingDomains * 0.4
domains$y <- 0
domains$height <- 0
adjustPositionAndHeight <- function(parentDomain, y, height, padding, e) {
for (domain in which(e$domains$parent == parentDomain)) {
overlappingDomains <- which((between(e$domains$start, e$domains[domain,"start"], e$domains[domain,"end"]) |
between(e$domains$end , e$domains[domain,"start"], e$domains[domain,"end"])) &
e$domains$parent == parentDomain)
e$domains[domain,"height"] <- height/length(overlappingDomains) - padding * (length(overlappingDomains)-1) / length(overlappingDomains)
e$domains[domain,"y"] <- y + (which(domain==overlappingDomains)-1) * (e$domains[domain,"height"] + padding)
adjustPositionAndHeight(domain, e$domains[domain,"y"]+padding, e$domains[domain,"height"]-2*padding, padding, e)
}
}
adjustPositionAndHeight(0, 0, 1, padding, environment())
domains <- domains[order(domains$height, decreasing=T),] # draw nested domains last
return(domains)
}
retainedDomains1 <- nestDomains(retainedDomains1)
retainedDomains2 <- nestDomains(retainedDomains2)
retainedDomains1$y <- exonsY - exonHeight/2 + 0.025 + (exonHeight-2*0.025) * retainedDomains1$y
retainedDomains2$y <- exonsY - exonHeight/2 + 0.025 + (exonHeight-2*0.025) * retainedDomains2$y
retainedDomains1$height <- retainedDomains1$height * (exonHeight-2*0.025)
retainedDomains2$height <- retainedDomains2$height * (exonHeight-2*0.025)
# draw domains
drawProteinDomainRect <- function(left, bottom, right, top, color) {
rect(left, bottom, right, top, col=color, border=getDarkColor(color))
# draw gradients for 3D effect
gradientSteps <- 20
drawVerticalGradient(rep(left, gradientSteps), rep(right, gradientSteps), seq(top, bottom, len=gradientSteps), rgb(1,1,1,0.7))
drawVerticalGradient(rep(left, gradientSteps), rep(right, gradientSteps), seq(bottom, bottom+(top-bottom)*0.4, len=gradientSteps), rgb(0,0,0,0.1))
}
if (length(unlist(retainedDomains1)) > 0)
for (domain in 1:nrow(retainedDomains1))
drawProteinDomainRect(retainedDomains1[domain,"start"], retainedDomains1[domain,"y"], retainedDomains1[domain,"end"], retainedDomains1[domain,"y"]+retainedDomains1[domain,"height"], retainedDomains1[domain,"color"])
if (length(unlist(retainedDomains2)) > 0)
for (domain in 1:nrow(retainedDomains2))
drawProteinDomainRect(sum(codingExons1$length)+retainedDomains2[domain,"start"], retainedDomains2[domain,"y"], sum(codingExons1$length)+retainedDomains2[domain,"end"], retainedDomains2[domain,"y"]+retainedDomains2[domain,"height"], retainedDomains2[domain,"color"])
# draw gene names, if there are coding exons
if (codingLength1 > 0)
text(sum(codingExons1$length)/2, geneNamesY, fusion$gene1, font=2, cex=fontSize)
if (codingLength2 > 0)
text(sum(codingExons1$length)+sum(codingExons2$length)/2, geneNamesY, fusion$gene2, font=2, cex=fontSize)
# calculate how many non-adjacent unique domains there are
# we need this info to know where to place labels vertically
countUniqueDomains <- function(domains) {
uniqueDomains <- 0
if (length(unlist(domains)) > 0) {
uniqueDomains <- 1
if (nrow(domains) > 1) {
previousDomain <- domains[1,"proteinDomainID"]
for (domain in 2:nrow(domains)) {
if (previousDomain != domains[domain,"proteinDomainID"])
uniqueDomains <- uniqueDomains + 1
previousDomain <- domains[domain,"proteinDomainID"]
}
}
}
return(uniqueDomains)
}
if (length(unlist(retainedDomains1)) > 0)
retainedDomains1 <- retainedDomains1[order(retainedDomains1$start),]
uniqueDomains1 <- countUniqueDomains(retainedDomains1)
if (length(unlist(retainedDomains2)) > 0)
retainedDomains2 <- retainedDomains2[order(retainedDomains2$end, decreasing=T),]
uniqueDomains2 <- countUniqueDomains(retainedDomains2)
# draw title of plot
titleY <- exonsY + exonHeight/2 + (uniqueDomains1 + 2) * 0.05
text(0.5, titleY+0.01, "RETAINED PROTEIN DOMAINS", adj=c(0.5, 0), font=2, cex=fontSize)
text(0.5, titleY, ifelse(fusion$reading_frame %in% c("in-frame", "out-of-frame"), paste(fusion$reading_frame, "fusion"), ifelse(fusion$reading_frame == "stop-codon", "stop codon before fusion junction", "reading frame unclear")), adj=c(0.5, 1), cex=fontSize)
# draw domain labels for gene1
if (length(unlist(retainedDomains1)) > 0) {
previousConnectorX <- -1
previousLabelX <- -1
labelY <- exonsY + exonHeight/2 + uniqueDomains1 * 0.05
for (domain in 1:nrow(retainedDomains1)) {
# if possible avoid overlapping lines of labels
connectorX <- min(retainedDomains1[domain,"start"] + 0.01, (retainedDomains1[domain,"start"] + retainedDomains1[domain,"end"])/2)
if (connectorX - previousConnectorX < 0.01 && retainedDomains1[domain,"end"] > previousConnectorX + 0.01)
connectorX <- previousConnectorX + 0.01
labelX <- max(connectorX, previousLabelX) + 0.02
# use a signle label for adjacent domains of same type
adjacentDomainsOfSameType <- domain + 1 <= nrow(retainedDomains1) && retainedDomains1[domain+1,"proteinDomainID"] == retainedDomains1[domain,"proteinDomainID"]
if (adjacentDomainsOfSameType) {
labelX <- retainedDomains1[domain+1,"start"] + 0.015
} else {
text(labelX, labelY, retainedDomains1[domain,"proteinDomainName"], adj=c(0,0.5), col=getDarkColor(retainedDomains1[domain,"color"]), cex=fontSize)
}
lines(c(labelX-0.005, connectorX, connectorX), c(labelY, labelY, retainedDomains1[domain,"y"]+retainedDomains1[domain,"height"]), col=getDarkColor(retainedDomains1[domain,"color"]))
if (!adjacentDomainsOfSameType)
labelY <- labelY - 0.05
previousConnectorX <- connectorX
previousLabelX <- labelX
}
}
# draw domain labels for gene2
if (length(unlist(retainedDomains2)) > 0) {
previousConnectorX <- 100
previousLabelX <- 100
labelY <- exonsY - exonHeight/2 - (uniqueDomains2+1) * 0.05
for (domain in 1:nrow(retainedDomains2)) {
# if possible avoid overlapping connector lines of labels
connectorX <- sum(codingExons1$length) + max(retainedDomains2[domain,"end"] - 0.01, (retainedDomains2[domain,"start"] + retainedDomains2[domain,"end"])/2)
if (previousConnectorX - connectorX < 0.01 && sum(codingExons1$length) + retainedDomains2[domain,"start"] < previousConnectorX - 0.01)
connectorX <- previousConnectorX - 0.01
labelX <- min(connectorX, previousLabelX) - 0.02
# use a signle label for adjacent domains of same type
adjacentDomainsOfSameType <- domain + 1 <= nrow(retainedDomains2) && retainedDomains2[domain+1,"proteinDomainID"] == retainedDomains2[domain,"proteinDomainID"]
if (adjacentDomainsOfSameType) {
labelX <- sum(codingExons1$length) + retainedDomains2[domain+1,"end"] - 0.015
} else {
text(labelX, labelY, retainedDomains2[domain,"proteinDomainName"], adj=c(1,0.5), col=getDarkColor(retainedDomains2[domain,"color"]), cex=fontSize)
}
lines(c(labelX+0.005, connectorX, connectorX), c(labelY, labelY, retainedDomains2[domain,"y"]), col=getDarkColor(retainedDomains2[domain,"color"]))
if (!adjacentDomainsOfSameType)
labelY <- labelY + 0.05
previousConnectorX <- connectorX
previousLabelX <- labelX
}
}
}
findExons <- function(exons, contig, gene, direction, breakpoint, coverage, transcriptId, transcriptSelection) {
# use the provided transcript if desired
if (transcriptSelection == "provided" && transcriptId != "." && transcriptId != "") {
candidateExons <- exons[exons$transcript == transcriptId,]
if (nrow(candidateExons) == 0) {
warning(paste0("Unknown transcript given in fusions file (", transcriptId, "), selecting a different one"))
} else {
return(candidateExons)
}
}
if (transcriptSelection == "canonical") {
candidateExons <- exons[exons$geneName == gene & exons$contig == contig,]
} else {
# look for exon with breakpoint as splice site
transcripts <- exons[exons$geneName == gene & exons$contig == contig & exons$type == "exon" & (direction == "downstream" & abs(exons$end - breakpoint) <= 2 | direction == "upstream" & abs(exons$start - breakpoint) <= 2),"transcript"]
candidateExons <- exons[exons$transcript %in% transcripts,]
# if none was found, use all exons of the gene closest to the breakpoint
if (nrow(candidateExons) == 0) {
candidateExons <- exons[exons$geneName == gene & exons$contig == contig,]
if (length(unique(candidateExons$geneID)) > 0) { # more than one gene found with the given name => use the closest one
distanceToBreakpoint <- aggregate(1:nrow(candidateExons), by=list(candidateExons$geneID), function(x) { min(abs(candidateExons[x,"start"]-breakpoint), abs(candidateExons[x,"end"]-breakpoint)) })
closestGene <- head(distanceToBreakpoint[distanceToBreakpoint[,2] == min(distanceToBreakpoint[,2]),1], 1)
candidateExons <- candidateExons[candidateExons$geneID == closestGene,]
}
}
# if we have coverage information, use the transcript with the highest coverage if there are multiple hits
if (!is.null(coverage)) {
highestCoverage <- -1
transcriptWithHighestCoverage <- NULL
lengthOfTranscriptWithHighestCoverage <- 0
for (transcript in unique(candidateExons$transcript)) {
exonsOfTranscript <- candidateExons[candidateExons$transcript==transcript,]
exonsOfTranscript$start <- sapply(exonsOfTranscript$start, max, min(start(coverage)))
exonsOfTranscript$end <- sapply(exonsOfTranscript$end, min, max(end(coverage)))
lengthOfTranscript <- sum(exonsOfTranscript$end - exonsOfTranscript$start + 1)
coverageSum <- sum(as.numeric(coverage[IRanges(exonsOfTranscript$start, exonsOfTranscript$end)]))
# we prefer shorter transcripts over longer ones, because otherwise there is a bias towards transcripts with long UTRs
# => a longer transcript must have substantially higher coverage to replace a shorter one
substantialDifference <- (1 - min(lengthOfTranscript, lengthOfTranscriptWithHighestCoverage) / max(lengthOfTranscript, lengthOfTranscriptWithHighestCoverage)) / 10
if (lengthOfTranscript > lengthOfTranscriptWithHighestCoverage && coverageSum * (1-substantialDifference) > highestCoverage ||
lengthOfTranscript < lengthOfTranscriptWithHighestCoverage && coverageSum > highestCoverage * (1-substantialDifference)) {
highestCoverage <- coverageSum
transcriptWithHighestCoverage <- transcript
lengthOfTranscriptWithHighestCoverage <- lengthOfTranscript
}
}
candidateExons <- candidateExons[candidateExons$transcript==transcriptWithHighestCoverage,]
}
# if the gene has multiple transcripts, search for transcripts which encompass the breakpoint
if (length(unique(candidateExons$transcript)) > 1) {
transcriptStart <- aggregate(candidateExons$start, by=list(candidateExons$transcript), min)
rownames(transcriptStart) <- transcriptStart[,1]
transcriptEnd <- aggregate(candidateExons$end, by=list(candidateExons$transcript), max)
rownames(transcriptEnd) <- transcriptEnd[,1]
candidateExons <- candidateExons[between(breakpoint, transcriptStart[candidateExons$transcript,2], transcriptEnd[candidateExons$transcript,2]),]
}
}
# find the consensus transcript, if there are multiple hits
if (length(unique(candidateExons$transcript)) > 1) {
consensusTranscript <-
ifelse(grepl("appris_principal_1", candidateExons$attributes), 12,
ifelse(grepl("appris_principal_2", candidateExons$attributes), 11,
ifelse(grepl("appris_principal_3", candidateExons$attributes), 10,
ifelse(grepl("appris_principal_4", candidateExons$attributes), 9,
ifelse(grepl("appris_principal_5", candidateExons$attributes), 8,
ifelse(grepl("appris_principal", candidateExons$attributes), 7,
ifelse(grepl("appris_candidate_longest", candidateExons$attributes), 6,
ifelse(grepl("appris_candidate", candidateExons$attributes), 5,
ifelse(grepl("appris_alternative_1", candidateExons$attributes), 4,
ifelse(grepl("appris_alternative_2", candidateExons$attributes), 3,
ifelse(grepl("appris_alternative", candidateExons$attributes), 2,
ifelse(grepl("CCDS", candidateExons$attributes), 1,
0
))))))))))))
candidateExons <- candidateExons[consensusTranscript == max(consensusTranscript),]
}
# use the transcript with the longest coding sequence, if there are still multiple hits
if (length(unique(candidateExons$transcript)) > 1) {
codingSequenceLength <- ifelse(candidateExons$type == "CDS", candidateExons$end - candidateExons$start, 0)
totalCodingSequenceLength <- aggregate(codingSequenceLength, by=list(candidateExons$transcript), sum)
rownames(totalCodingSequenceLength) <- totalCodingSequenceLength[,1]
candidateExons <- candidateExons[totalCodingSequenceLength[candidateExons$transcript,2] == max(totalCodingSequenceLength[,2]),]
}
# use the transcript with the longest overall sequence, if there are still multiple hits
if (length(unique(candidateExons$transcript)) > 1) {
exonLength <- candidateExons$end - candidateExons$start
totalExonLength <- aggregate(exonLength, by=list(candidateExons$transcript), sum)
rownames(totalExonLength) <- totalExonLength[,1]
candidateExons <- candidateExons[totalExonLength[candidateExons$transcript,2] == max(totalExonLength[,2]),]
}
# if there are still multiple hits, select the first one
candidateExons <- unique(candidateExons[candidateExons$transcript == head(unique(candidateExons$transcript), 1),])
return(candidateExons)
}
.FusionsPlot <- function(fusionTable ,exons, cytobands, alignmentsFile , proteinDomains ,gene1 ,gene2, savePng=FALSE){
##initialize
require(GenomicAlignments)
require(GenomicRanges)
##
if(all( c( missing(gene1), missing(gene2))) == TRUE){
fusions <- fusionTable
}else{
if(all( c( !missing(gene1), !missing(gene2))) == TRUE){
##estan los dos
fusions <- fusionTable[which(fusionTable$gene1 == gene1 & fusionTable$gene2 == gene2 ),,drop=FALSE]
}else{
if(missing(gene1)){
fusions <- fusionTable[which( fusionTable$gene2 == gene2 ),,drop=FALSE]
}else{
fusions <- fusionTable[which( fusionTable$gene1 == gene1 ),,drop=FALSE]
}
}
}
for (fusion in 1:nrow(fusions)) {
message(paste0("Drawing fusion #", fusion, ": ", fusions[fusion,"gene1"], ":", fusions[fusion,"gene2"]))
# compute coverage from alignments file
coverage1 <- NULL
coverage2 <- NULL
if (alignmentsFile != "") {
# determine range in which we need to compute the coverage
determineCoverageRange <- function(exons, gene, contig, breakpoint, showVicinity) {
# find gene with given name closest to the breakpoint
closestExons <- exons[exons$geneName == gene & exons$contig == contig,]
if (length(unique(closestExons$geneID)) > 0) { # more than one gene found with the given name => use the closest one
distanceToBreakpoint <- aggregate(1:nrow(closestExons), by=list(closestExons$geneID), function(x) { min(abs(closestExons[x,"start"]-breakpoint), abs(closestExons[x,"end"]-breakpoint)) })
closestGene <- head(distanceToBreakpoint[distanceToBreakpoint[,2] == min(distanceToBreakpoint[,2]),1], 1)
closestExons <- closestExons[closestExons$geneID == closestGene,]
}
return(IRanges(min(closestExons$start, breakpoint-showVicinity), max(closestExons$end, breakpoint+showVicinity)))
}
# showVicinity <- 0##AGREGADO
coverageRange1 <- determineCoverageRange(exons, fusions[fusion,"gene1"], fusions[fusion,"contig1"], fusions[fusion,"breakpoint1"], showVicinity)
coverageRange2 <- determineCoverageRange(exons, fusions[fusion,"gene2"], fusions[fusion,"contig2"], fusions[fusion,"breakpoint2"], showVicinity)
# function which reads alignments from BAM file with & without "chr" prefix
readCoverage <- function(alignmentsFile, contig, coverageRange) {
coverageData <- tryCatch(
{
alignments <- readGAlignments(alignmentsFile, param=ScanBamParam(which=GRanges(contig, coverageRange)))
coverage(alignments)[[contig]]
},
error=function(e) {
alignments <- readGAlignments(alignmentsFile, param=ScanBamParam(which=GRanges(addChr(contig), coverageRange)))
coverage(alignments)[[addChr(contig)]]
}
)
if (exists("alignments")) rm(alignments)
return(coverageData)
}
# get coverage track
coverage1 <- readCoverage(alignmentsFile, fusions[fusion,"contig1"], coverageRange1)
coverage2 <- readCoverage(alignmentsFile, fusions[fusion,"contig2"], coverageRange2)
}
# find all exons belonging to the fused genes
exons1 <- findExons(exons, fusions[fusion,"contig1"], fusions[fusion,"gene1"], fusions[fusion,"direction1"], fusions[fusion,"breakpoint1"], coverage1, fusions[fusion,"transcript_id1"], transcriptSelection)
if (nrow(exons1) == 0) {
par(mfrow=c(1,1))
plot(0, 0, type="l", xaxt="n", yaxt="n", xlab="", ylab="")
text(0, 0, paste0("Error: exon coordinates of ", fusions[fusion,"gene1"], " not found in\n", exonsFile))
next
}
exons2 <- findExons(exons, fusions[fusion,"contig2"], fusions[fusion,"gene2"], fusions[fusion,"direction2"], fusions[fusion,"breakpoint2"], coverage2, fusions[fusion,"transcript_id2"], transcriptSelection)
if (nrow(exons2) == 0) {
par(mfrow=c(1,1))
plot(0, 0, type="l", xaxt="n", yaxt="n", xlab="", ylab="")
text(0, 0, paste0("Error: exon coordinates of ", fusions[fusion,"gene2"], " not found in\n", exonsFile))
next
}
# in case of intergenic breakpoints, show the vicinity
if (showVicinity > 0) {
if (fusions[fusion,"site1"] == "intergenic")
for (gene in unique(exons[exons$contig == fusions[fusion,"contig1"] & exons$exonNumber != "intergenic" &
(between(exons$end, fusions[fusion,"breakpoint1"]-showVicinity, fusions[fusion,"breakpoint1"]+showVicinity) |
between(exons$start, fusions[fusion,"breakpoint1"]-showVicinity, fusions[fusion,"breakpoint1"]+showVicinity)),"geneName"]))
exons1 <- rbind(exons1, findExons(exons, fusions[fusion,"contig1"], gene, fusions[fusion,"direction1"], fusions[fusion,"breakpoint1"], coverage1))
if (fusions[fusion,"site2"] == "intergenic")
for (gene in unique(exons[exons$contig == fusions[fusion,"contig2"] & exons$exonNumber != "intergenic" &
(between(exons$end, fusions[fusion,"breakpoint2"]-showVicinity, fusions[fusion,"breakpoint2"]+showVicinity) |
between(exons$start, fusions[fusion,"breakpoint2"]-showVicinity, fusions[fusion,"breakpoint2"]+showVicinity)),"geneName"]))
exons2 <- rbind(exons2, findExons(exons, fusions[fusion,"contig2"], gene, fusions[fusion,"direction2"], fusions[fusion,"breakpoint2"], coverage2))
}
# normalize coverage
if (alignmentsFile != "") {
coverageNormalization <- ifelse(
squishIntrons, # => ignore intronic coverage
max(
as.numeric(coverage1[IRanges(sapply(exons1$start,max,min(start(coverage1))),sapply(exons1$end,min,max(end(coverage1))))]),
as.numeric(coverage2[IRanges(sapply(exons2$start,max,min(start(coverage2))),sapply(exons2$end,min,max(end(coverage2))))])),
max(max(coverage1), max(coverage2))
)
coverage1 <- coverage1/coverageNormalization
coverage2 <- coverage2/coverageNormalization
}
# sort coding exons last, such that they are drawn over the border of non-coding exons
exons1 <- exons1[order(exons1$start, -rank(exons1$type)),]
exons2 <- exons2[order(exons2$start, -rank(exons2$type)),]
exons1$source <- as.character(exons1$source)
exons1$type <- as.character(exons1$type)
exons2$source <- as.character(exons2$source)
exons2$type <- as.character(exons2$type)
# insert dummy exons, if breakpoints are outside the gene (e.g., in UTRs)
# this avoids plotting artifacts
breakpoint1 <- fusions[fusion,"breakpoint1"]
breakpoint2 <- fusions[fusion,"breakpoint2"]
if (breakpoint1 < min(exons1$start)) {
exons1 <- rbind(c(contig=exons1[1,"contig"], source="dummy",type="dummy", start=breakpoint1-1000,
end=breakpoint1-1000, strand=exons1[1,"strand"], geneID="", geneName=exons1[1,"geneID"],
transcript=exons1[1,"transcript"], exonNumber =""), exons1)
} else if (breakpoint1 > max(exons1$end)) {
aa <- matrix(c(contig=exons1[1,"contig"], source="dummy",type="dummy", start=breakpoint1-1000,
end=breakpoint1-1000, strand=exons1[1,"strand"], geneID="", geneName=exons1[1,"geneID"],
transcript=exons1[1,"transcript"], exonNumber =""),nrow = 1)
aa <- data.frame(aa)
colnames(aa) <- colnames(exons1)
exons1 <- rbind(exons1,aa)
# exons1$start <- as.numeric(exons1$start)
# exons1$end <- as.numeric(exons1$end)
# exons1 <- rbind(exons1, t(data.frame(
# c(exons1[1,"contig"], "dummy", breakpoint1+1000, breakpoint1+1000, exons1[1,"strand"], "", "dummy", exons1[1,"geneID"], exons1[1,"transcript"], ""))))
}
if (breakpoint2 < min(exons2$start)) {
exons2 <- rbind(c(contig=exons2[1,"contig"], source="dummy",type="dummy", start=breakpoint2-1000,
end=breakpoint2-1000, strand=exons2[1,"strand"],geneID= "", geneName=exons2[1,"geneID"],
transcript=exons2[1,"transcript"], exonNumber= ""), exons2)
} else if (breakpoint2 > max(exons2$end)) {
aa2 <- matrix(c(contig=exons2[1,"contig"], source="dummy",type="dummy", start=breakpoint2-1000,
end=breakpoint2-1000, strand=exons2[1,"strand"],geneID= "", geneName=exons2[1,"geneID"],
transcript=exons2[1,"transcript"], exonNumber= ""),nrow = 1)
aa2 <- data.frame(aa2)
colnames(aa2) <- colnames(exons2)
exons2 <- rbind(exons2, aa2)
# exons2$start <- as.numeric(exons2$start)
# exons2$end <- as.numeric(exons2$end)
}
exons1$start <- as.integer(exons1$start)
exons1$end <- as.integer(exons1$end)
exons2$start <- as.integer(exons2$start)
exons2$end <- as.integer(exons2$end)
exons1$left <- exons1$start
exons1$right <- exons1$end
exons2$left <- exons2$start
exons2$right <- exons2$end
squishedIntronSize <- 200
if (squishIntrons) {
# hide introns in gene1
cumulativeIntronLength <- 0
previousExonEnd <- -squishedIntronSize
for (exon in 1:nrow(exons1)) {
if (breakpoint1 > previousExonEnd+1 && breakpoint1 < exons1[exon,"left"])
breakpoint1 <- (breakpoint1-previousExonEnd) / (exons1[exon,"left"]-previousExonEnd) * squishedIntronSize + previousExonEnd - cumulativeIntronLength
if (exons1[exon,"left"] > previousExonEnd) {
cumulativeIntronLength <- cumulativeIntronLength + exons1[exon,"left"] - previousExonEnd - squishedIntronSize
previousExonEnd <- exons1[exon,"right"]
}
if (breakpoint1 >= exons1[exon,"left"] && breakpoint1 <= exons1[exon,"right"]+1)
breakpoint1 <- breakpoint1 - cumulativeIntronLength
exons1[exon,"left"] <- exons1[exon,"left"] - cumulativeIntronLength
exons1[exon,"right"] <- exons1[exon,"right"] - cumulativeIntronLength
}
# hide introns in gene2
cumulativeIntronLength <- 0
previousExonEnd <- -squishedIntronSize
for (exon in 1:nrow(exons2)) {
if (breakpoint2 > previousExonEnd+1 && breakpoint2 < exons2[exon,"left"])
breakpoint2 <- (breakpoint2-previousExonEnd) / (exons2[exon,"left"]-previousExonEnd) * squishedIntronSize + previousExonEnd - cumulativeIntronLength
if (exons2[exon,"left"] > previousExonEnd) {
cumulativeIntronLength <- cumulativeIntronLength + exons2[exon,"left"] - previousExonEnd - squishedIntronSize
previousExonEnd <- exons2[exon,"right"]
}
if (breakpoint2 >= exons2[exon,"left"] && breakpoint2 <= exons2[exon,"right"]+1)
breakpoint2 <- breakpoint2 - cumulativeIntronLength
exons2[exon,"left"] <- exons2[exon,"left"] - cumulativeIntronLength
exons2[exon,"right"] <- exons2[exon,"right"] - cumulativeIntronLength
}
} else { # don't squish introns
# shift exon coordinates to align the gene to the left border of the plot
exons1$right <- exons1$right - min(exons1$left)
breakpoint1 <- breakpoint1 - min(exons1$left)
exons1$left <- exons1$left - min(exons1$left)
exons2$right <- exons2$right - min(exons2$left)
breakpoint2 <- breakpoint2 - min(exons2$left)
exons2$left <- exons2$left - min(exons2$left)
}
# scale exon sizes to 1
scalingFactor <- max(exons1$right) + max(exons2$right)
exons1$left <- exons1$left / scalingFactor
exons1$right <- exons1$right / scalingFactor
exons2$left <- exons2$left / scalingFactor
exons2$right <- exons2$right / scalingFactor
breakpoint1 <- breakpoint1 / scalingFactor
breakpoint2 <- breakpoint2 / scalingFactor
# shift gene2 to the right of gene1 with a little bit of padding
gene2Offset <- max(exons1$right)+0.05
# center fusion horizontally
fusionOffset1 <- (max(exons1$right)+gene2Offset)/2 - ifelse(fusions[fusion,"direction1"] == "downstream", breakpoint1, max(exons1$right)-breakpoint1)
fusionOffset2 <- fusionOffset1 + ifelse(fusions[fusion,"direction1"] == "downstream", breakpoint1, max(exons1$right)-breakpoint1)
#### start plots ###
if(!missing(alignmentsFile) & savePng){
png(filename = file.path(dirname(alignmentsFile),paste0(fusions[fusion,"gene1"],":",fusions[fusion,"gene2"],
"(",fusions[fusion,"breakpoint1"],":",fusions[fusion,"breakpoint2"],")",".png")))
}
exons2 <- exons2[!is.na(exons2$type),]
# layout: fusion on top, circos plot on bottom left, protein domains on bottom center, statistics on bottom right
layout(matrix(c(1,1,1,2,3,4), 2, 3, byrow=TRUE), widths=c(0.9, 1.2, 0.9))
par(mar=c(0, 0, 0, 0))
plot(0, 0, type="l", xlim=c(-0.12, 1.12), ylim=c(0.4, 1.1), bty="n", xaxt="n", yaxt="n")
# vertical coordinates of layers
yIdeograms <- ifelse(alignmentsFile != "", 0.94, 0.84)
yBreakpointLabels <- ifelse(alignmentsFile != "", 0.86, 0.76)
yCoverage <- 0.72
yExons <- 0.67
yGeneNames <- 0.58
yFusion <- 0.5
yTranscript <- 0.45
yScale <- 0.407
yTrajectoryBreakpointLabels <- yBreakpointLabels - 0.035
yTrajectoryExonTop <- yExons + 0.03
yTrajectoryExonBottom <- yExons - 0.055
yTrajectoryFusion <- yFusion + 0.03
# draw ideograms
if (!is.null(cytobands)) {
drawIdeogram("left", min(exons1$left), max(exons1$right), yIdeograms, cytobands, fusions[fusion,"contig1"], fusions[fusion,"breakpoint1"])
drawIdeogram("right", gene2Offset, gene2Offset+max(exons2$right), yIdeograms, cytobands, fusions[fusion,"contig2"], fusions[fusion,"breakpoint2"])
}
# draw gene & transcript names
text(max(exons1$right)/2, yGeneNames, fusions[fusion,"gene1"], font=2, cex=fontSize, adj=c(0.5,0))
if (fusions[fusion,"site1"] != "intergenic")
text(max(exons1$right)/2, yGeneNames-0.01, head(exons1$transcript,1), cex=0.9*fontSize, adj=c(0.5,1))
text(gene2Offset+max(exons2$right)/2, yGeneNames, fusions[fusion,"gene2"], font=2, cex=fontSize, adj=c(0.5,0))
if (fusions[fusion,"site2"] != "intergenic")
text(gene2Offset+max(exons2$right)/2, yGeneNames-0.01, head(exons2$transcript,1), cex=0.9*fontSize, adj=c(0.5,1))
# if multiple genes in the vicinity are shown, label them
if (fusions[fusion,"site1"] == "intergenic")
for (gene in unique(exons1$geneName)) {
exonsOfGene <- exons1[exons1$geneName == gene & exons1$type != "dummy",]
if (any(exonsOfGene$type == "exon"))
text(mean(c(min(exonsOfGene$left), max(exonsOfGene$right))), yExons-0.04, gene, cex=0.9*fontSize, adj=c(0.5,1))
}
if (fusions[fusion,"site2"] == "intergenic")
for (gene in unique(exons2$geneName)) {
exonsOfGene <- exons2[exons2$geneName == gene & exons2$type != "dummy",]
if (any(exonsOfGene$type == "exon"))
text(gene2Offset+mean(c(min(exonsOfGene$left), max(exonsOfGene$right))), yExons-0.04, gene, cex=0.9*fontSize, adj=c(0.5,1))
}
# label breakpoints
text(breakpoint1+0.01, yBreakpointLabels-0.03, paste0("breakpoint\n", fusions[fusion,"display_contig1"], ":", fusions[fusion,"breakpoint1"]), adj=c(1,0), cex=fontSize)
text(gene2Offset+breakpoint2-0.01, yBreakpointLabels-0.03, paste0("breakpoint\n", fusions[fusion,"display_contig2"], ":", fusions[fusion,"breakpoint2"]), adj=c(0,0), cex=fontSize)
# draw coverage axis
if (alignmentsFile != "") {
lines(c(-0.02, -0.01, -0.01, -0.02), c(yCoverage, yCoverage, yCoverage+0.1, yCoverage+0.1))
text(-0.025, yCoverage, "0", adj=c(1,0.5), cex=0.9*fontSize)
text(-0.025, yCoverage+0.1, coverageNormalization, adj=c(1,0.5), cex=0.9*fontSize)
text(-0.05, yCoverage+0.08, "Coverage", srt=90, cex=0.9*fontSize, adj=c(1,0.5))
rect(min(exons1$left), yCoverage, max(exons1$right), yCoverage+0.1, col="#eeeeee", border=NA)
rect(gene2Offset+min(exons2$left), yCoverage, gene2Offset+max(exons2$right), yCoverage+0.1, col="#eeeeee", border=NA)
}
# plot coverage 1
if (squishIntrons) {
for (exon in 1:nrow(exons1))
if (exons1[exon,"type"] != "CDS") # don't draw coverage twice for coding regions
drawCoverage(exons1[exon,"left"], exons1[exon,"right"], yCoverage, coverage1, exons1[exon,"start"], exons1[exon,"end"], color1)
} else {
drawCoverage(min(exons1$left), max(exons1$right), yCoverage, coverage1, min(exons1$start), max(exons1$end), color1)
}
# plot coverage 2
if (squishIntrons) {
for (exon in 1:nrow(exons2))
er <- try((exons2[exon,"type"] != "CDS"))
if(inherits(er,"try-error")){
er <- er
}
if (exons2[exon,"type"] != "CDS") # don't draw coverage twice for coding regions
drawCoverage(gene2Offset+exons2[exon,"left"], gene2Offset+exons2[exon,"right"], yCoverage, coverage2, exons2[exon,"start"], exons2[exon,"end"], color2)
} else {
drawCoverage(gene2Offset+min(exons2$left), gene2Offset+max(exons2$right), yCoverage, coverage2, min(exons2$start), max(exons2$end), color2)
}
# plot gene 1
lines(c(min(exons1$left), max(exons1$right)), c(yExons, yExons), col=darkColor1)
for (gene in unique(exons1$geneName))
drawStrand(min(exons1[exons1$geneName == gene,"left"]), max(exons1[exons1$geneName == gene,"right"]), yExons, darkColor1, head(exons1[exons1$geneName == gene,"strand"],1))
for (exon in 1:nrow(exons1)){
drawExon(exons1[exon,"left"], exons1[exon,"right"], yExons, color1, exons1[exon,"exonNumber"], exons1[exon,"type"])
}
# drawExon(exons1[exon,"left"], exons1[exon,"right"], yExons, color1, exons1[exon,"exonNumber"], exons1[exon,"type"])
# plot gene 2
lines(c(gene2Offset, gene2Offset+max(exons2$right)), c(yExons, yExons), col=darkColor2)
for (gene in unique(exons2$geneName)){
drawStrand(gene2Offset+min(exons2[exons2$geneName == gene,"left"]), gene2Offset+max(exons2[exons2$geneName == gene,"right"]), yExons, darkColor2, head(exons2[exons2$geneName == gene,"strand"],1))
}
for (exon in 1:nrow(exons2)){
drawExon(gene2Offset+exons2[exon,"left"], gene2Offset+exons2[exon,"right"], yExons, color2, exons2[exon,"exonNumber"], exons2[exon,"type"])
}
# plot gene1 of fusion
if (fusions[fusion,"direction1"] == "downstream") {
# plot strands
lines(c(fusionOffset1, fusionOffset1+breakpoint1), c(yFusion, yFusion), col=darkColor1)
for (gene in unique(exons1$geneName)) {
exonsOfGene <- exons1[exons1$geneName == gene,]
if (min(exonsOfGene$start) <= fusions[fusion,"breakpoint1"])
drawStrand(fusionOffset1+min(exonsOfGene$left), fusionOffset1+min(breakpoint1, max(exonsOfGene$right)), yFusion, col=darkColor1, exonsOfGene$strand[1])
}
# plot exons
for (exon in 1:nrow(exons1))
if (exons1[exon,"start"] <= fusions[fusion,"breakpoint1"])
drawExon(fusionOffset1+exons1[exon,"left"], fusionOffset1+min(breakpoint1, exons1[exon,"right"]), yFusion, color1, exons1[exon,"exonNumber"], exons1[exon,"type"])
# plot trajectories
lines(c(0, 0, fusionOffset1), c(yTrajectoryExonTop, yTrajectoryExonBottom, yTrajectoryFusion), col="red", lty=2)
lines(c(breakpoint1, breakpoint1, fusionOffset1+breakpoint1), c(yTrajectoryBreakpointLabels, yTrajectoryExonBottom, yTrajectoryFusion), col="red", lty=2)
} else if (fusions[fusion,"direction1"] == "upstream") {
# plot strands
lines(c(fusionOffset1, fusionOffset2), c(yFusion, yFusion), col=darkColor1)
for (gene in unique(exons1$geneName)) {
exonsOfGene <- exons1[exons1$geneName == gene,]
if (max(exonsOfGene$end+1) >= fusions[fusion,"breakpoint1"])
drawStrand(fusionOffset2-max(exonsOfGene$right)+breakpoint1, min(fusionOffset2, fusionOffset2-min(exonsOfGene$left)+breakpoint1), yFusion, col=darkColor1, chartr("+-", "-+", exonsOfGene$strand[1]))
}
# plot exons
for (exon in 1:nrow(exons1))
if (exons1[exon,"end"]+1 >= fusions[fusion,"breakpoint1"])
drawExon(fusionOffset1+max(exons1$right)-exons1[exon,"right"], min(fusionOffset2, fusionOffset1+max(exons1$right)-exons1[exon,"left"]), yFusion, color1, exons1[exon,"exonNumber"], exons1[exon,"type"])
# plot trajectories
lines(c(max(exons1$right), max(exons1$right), fusionOffset1), c(yTrajectoryExonTop, yTrajectoryExonBottom, yTrajectoryFusion), col="red", lty=2)
lines(c(breakpoint1, breakpoint1, fusionOffset1+max(exons1$right)-breakpoint1), c(yTrajectoryBreakpointLabels, yTrajectoryExonBottom, yTrajectoryFusion), col="red", lty=2)
}
# plot gene2 of fusion
if (fusions[fusion,"direction2"] == "downstream") {
# plot strands
lines(c(fusionOffset2, fusionOffset2+breakpoint2), c(yFusion, yFusion), col=darkColor2)
for (gene in unique(exons2$geneName)) {
exonsOfGene <- exons2[exons2$geneName == gene,]
if (min(exonsOfGene$start) <= fusions[fusion,"breakpoint2"])
drawStrand(max(fusionOffset2, fusionOffset2+breakpoint2-max(exonsOfGene$right)), fusionOffset2+breakpoint2-min(exonsOfGene$left), yFusion, col=darkColor2, chartr("+-", "-+", exonsOfGene$strand[1]))
}
# plot exons
for (exon in 1:nrow(exons2))
if (exons2[exon,"start"] <= fusions[fusion,"breakpoint2"])
drawExon(max(fusionOffset2, fusionOffset2+breakpoint2-exons2[exon,"right"]), fusionOffset2+breakpoint2-exons2[exon,"left"], yFusion, color2, exons2[exon,"exonNumber"], exons2[exon,"type"])
# plot trajectories
lines(c(gene2Offset, gene2Offset, fusionOffset2+breakpoint2), c(yTrajectoryExonTop, yTrajectoryExonBottom, yTrajectoryFusion), col="red", lty=2)
lines(c(gene2Offset+breakpoint2, gene2Offset+breakpoint2, fusionOffset2), c(yTrajectoryBreakpointLabels, yTrajectoryExonBottom, yTrajectoryFusion), col="red", lty=2)
} else if (fusions[fusion,"direction2"] == "upstream") {
# plot strands
lines(c(fusionOffset2, fusionOffset2+max(exons2$right)-breakpoint2), c(yFusion, yFusion), col=darkColor2)
for (gene in unique(exons2$geneName)) {
exonsOfGene <- exons2[exons2$geneName == gene,]
if (max(exonsOfGene$end+1) >= fusions[fusion,"breakpoint2"])
drawStrand(max(fusionOffset2, fusionOffset2+min(exonsOfGene$left)-breakpoint2), fusionOffset2+max(exonsOfGene$right)-breakpoint2, yFusion, col=darkColor2, exonsOfGene$strand[1])
}
# plot exons
for (exon in 1:nrow(exons2))
if (exons2[exon,"end"]+1 >= fusions[fusion,"breakpoint2"])
drawExon(max(fusionOffset2, fusionOffset2+exons2[exon,"left"]-breakpoint2), fusionOffset2+exons2[exon,"right"]-breakpoint2, yFusion, color2, exons2[exon,"exonNumber"], exons2[exon,"type"])
# plot trajectories
lines(c(gene2Offset+max(exons2$right), gene2Offset+max(exons2$right), fusionOffset2+max(exons2$right)-breakpoint2), c(yTrajectoryExonTop, yTrajectoryExonBottom, yTrajectoryFusion), col="red", lty=2)
lines(c(gene2Offset+breakpoint2, gene2Offset+breakpoint2, fusionOffset2), c(yTrajectoryBreakpointLabels, yTrajectoryExonBottom, yTrajectoryFusion), col="red", lty=2)
}
if (fusions[fusion,"fusion_transcript"] != ".") {
# print fusion transcript colored by gene of origin
fusion_transcript1 <- gsub("\\|.*", "", fusions[fusion,"fusion_transcript"], perl=T)
fusion_transcript1 <- substr(fusion_transcript1, max(1, nchar(fusion_transcript1)-30), nchar(fusion_transcript1))
fusion_transcript2 <- gsub(".*\\|", "", fusions[fusion,"fusion_transcript"], perl=T)
fusion_transcript2 <- substr(fusion_transcript2, 1, min(nchar(fusion_transcript2), 30))
# check for non-template bases
non_template_bases <- gsub(".*\\|([^|]*)\\|.*", "\\1", fusions[fusion,"fusion_transcript"])
if (non_template_bases == fusions[fusion,"fusion_transcript"]) # no non-template bases found
non_template_bases <- ""
# divide non-template bases half-and-half for centered alignment
non_template_bases1 <- substr(non_template_bases, 1, floor(nchar(non_template_bases)/2))
non_template_bases2 <- substr(non_template_bases, ceiling(nchar(non_template_bases)/2), nchar(non_template_bases))
# transcript 1
text(fusionOffset2, yTranscript, bquote(.(fusion_transcript1) * phantom(.(non_template_bases1))), col=darkColor1, adj=c(1,0.5), cex=fontSize)
# transcript 2
text(fusionOffset2, yTranscript, bquote(phantom(.(non_template_bases2)) * .(fusion_transcript2)), col=darkColor2, adj=c(0,0.5), cex=fontSize)
# non-template bases
text(fusionOffset2, yTranscript, non_template_bases1, adj=c(1,0.5), cex=fontSize)
text(fusionOffset2, yTranscript, non_template_bases2, adj=c(0,0.5), cex=fontSize)
}
# draw scale
realScale <- max(exons1$end - exons1$start, exons2$end - exons2$start)
mapScale <- max(exons1$right - exons1$left, exons2$right - exons2$left)
# choose scale which is closest to desired scale length
desiredScaleSize <- 0.2
realScale <- desiredScaleSize / mapScale * realScale
mapScale <- desiredScaleSize
realScaleOptimalFit <- signif(realScale, 1) # round to most significant digit
mapScaleOptimalFit <- realScaleOptimalFit / realScale * mapScale
# draw scale line
lines(c(0, mapScaleOptimalFit), c(yScale, yScale)) # scale line
lines(c(0, 0), c(yScale-0.007, yScale+0.007)) # left whisker
lines(c(mapScaleOptimalFit, mapScaleOptimalFit), c(yScale-0.007, yScale+0.007)) # right whisker
# draw units above scale line
realScaleThousands <- max(0, min(3, floor(log10(realScaleOptimalFit)/3)))
scaleUnits <- c("bp", "kbp", "Mbp", "Gbp")
scaleLabel <- paste(realScaleOptimalFit/max(1,1000^realScaleThousands), scaleUnits[realScaleThousands+1])
text(mapScaleOptimalFit/2, yScale+0.005, scaleLabel, adj=c(0.5,0), cex=fontSize*0.9)
if (squishIntrons)
text(mapScaleOptimalFit, yScale, " introns not to scale", adj=c(0,0.5), cex=fontSize*0.9, font=3)
# draw circos plot
if (is.null(cytobands) || !("circlize" %in% names(sessionInfo()$otherPkgs)) || !("GenomicRanges" %in% names(sessionInfo()$otherPkgs))) {
plot(0, 0, type="l", xlim=c(0, 1), ylim=c(0, 1), bty="n", xaxt="n", yaxt="n")
} else {
par(mar=c(0, 4, 0, 0))
drawCircos(fusion, fusions, cytobands, minConfidenceForCircosPlot)
par(mar=c(0, 0, 0, 0))
}
# draw protein domains
plot(0, 0, type="l", xlim=c(-0.1, 1.1), ylim=c(0, 1), bty="n", xaxt="n", yaxt="n")
par(xpd=NA)
if (!is.null(proteinDomains))
drawProteinDomains(fusions[fusion,], exons1, exons2, proteinDomains, color1, color2, mergeDomainsOverlappingBy, optimizeDomainColors)
par(xpd=F)
# print statistics about supporting alignments
plot(0, 0, type="l", xlim=c(0, 1), ylim=c(0, 1), bty="n", xaxt="n", yaxt="n")
text(0, 0.575, "SUPPORTING READ COUNT", font=2, adj=c(0,0), cex=fontSize)
text(0, 0.525, paste0("Split reads = ", fusions[fusion,"split_reads"], "\n", "Discordant mates = ", fusions[fusion,"discordant_mates"]), adj=c(0,1), cex=fontSize)
if(!missing(alignmentsFile) & savePng){
dev.off()
}
}
}
#' FusionPlot
#' This function provides gene fusion visualization
#' @usage FusionPlot(sbjBamFile,gene1,gene2)
#' @param sbjBamFile string with full path of the processed and sorted bam file processed by RunSTAR and RunArriba
#' @param gene1 string with geneID as returned by \code{\link{RunArriba}}
#' _Fusion.xlsx table. Default missing (will plot all the fusions), otherwise it will only plot all the fusions where this gene is present
#' @param gene2 idem as gene1. If both gene1 and gene2 are set, only the fusions including those genes will be plotted
#' @param fusions if missing, it will upload the Fusions.xlsx file, else excel file with the desired fusions should be provided
#' @param savePlot (boolean) if plot should be saved
#' @seealso \code{\link{runArriba}}
#' @examples
#' \dontrun{
#' test.subject <- GetArribaRTest()
#' bam.subject <- RunSTAR(test.subject)
#' Fusions <- RunArriba(bam.subject)
#' bam.sorted.subject <- RunSortIndexBam(bam.subject)
#' FusionPlot(bam.sorted.subject)
#' }
#'
FusionPlot <- function(sbjBamFile,gene1,gene2, fusions, savePlot=TRUE){
darkColor1 <- getDarkColor(color1)
darkColor2 <- getDarkColor(color2)
software <- .OpenConfigFile()
if(!file.exists(paste0(sbjBamFile,".bai"))){
stop(paste0("\nERROR: it does not seems to be a sorted BAM file\nPlease verify\n"))
}
if(!all(file.exists(sbjBamFile,paste0(sbjBamFile,".bai")))){
stop(paste0("\nERROR: files\n",sbjBamFile,"\n",paste0(sbjBamFile,".bai"),"\nMUST EXIST"))
}
if(missing(fusions)==TRUE){
hd<-ArribaR:::.ReadBamHeader(sbjBamFile)
fusion.file <- unlist(stringr::str_split(sbjBamFile,hd$ProgramName))[1]
fusion.file <- paste0(fusion.file,"Fusions.xlsx")
# fusionTable <- openxlsx::read.xlsx(fusion.file)
fusions <- .ReadArribaFusionTable(fusionsFile = fusion.file)
}else{
fusions <- .ReadArribaFusionTable(fusionsFile = fusions)
}
# read cytoband annotation
cytobands <- .ReadCytobandFile()
# read exon annotation
message("Loading annotation")
exons <- .ReadExonAnnotationFile(fusionsTable = fusions)
# read protein domain annotation
proteinDomains <- .ReadProteinDomainsFile()
##drawing functions
# subjAlignedBam <- "/media/respaldo4t/RNAseq/39738/39738_Aligned.bam"
# Rsamtools::sortBam(file = subjAlignedBam, destination = stringr::str_replace(subjAlignedBam, ".bam","SortedByCoord"))
# subjAlignedBam <- stringr::str_replace(subjAlignedBam, ".bam","SortedByCoord.bam")
# Rsamtools::indexBam(subjAlignedBam)
.FusionsPlot(
fusionTable = fusions,
exons = exons,
cytobands = cytobands,
alignmentsFile = sbjBamFile,
proteinDomains = proteinDomains,
gene1 = gene1,
gene2 = gene2,
savePng = savePlot)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.