#' @title plotgene
#' @name plotgene
#' @description plotgene can be used to plot the gene structure and AS event
#' @importFrom ggbio autoplot
#' @importFrom ggbio theme_null
#' @importFrom ggbio theme_clear
#' @import GenomicFeatures
#' @importFrom GenomicRanges reduce
#' @param object an ICASDataSet
#' @param SJ a charactor of SJ
#' @param flank the distance of flanking extension
#' @param geneModul A TxDb object made from GTF used by BAM aligner or a GRangesList object
#' @param shrinkage the shrinkage for intron region (must be a positive >= 1). default 1 for no shrinkage
#' @param curvature the curvature for splice junction
#' @param reduce To collapse all gene structure annotation features or not
#' @param highlight.region the region to highlight
#' @param highlight.color the color of highlight.region
#' @param highlight.label the label of highlight.region
#' @param highlight.label.size the size of highlight region label
#' @param returnData logical value, indicating return data or plot
#' @param removeTxid logical value, indicating whether to remove the transcript of y axis lab
#'
#' @export
plotgene <- function(object,
SJ,
flank = 200,
geneModul,
shrinkage = 1,
curvature = -0.1,
reduce = FALSE,
highlight.region = NULL,
highlight.color = "#0000FF80",
highlight.label = NULL,
highlight.label.size = 4,
returnData = FALSE,
removeTxid = FALSE,
...) {
if(!is(object, "ICASDataSet"))
stop("The object must be a ICASDataSet data")
if(length(SJ) != 1) {
stop("SJ must be one SJ")
}
if(!SJ %in% row.names(psi(object)))
stop("SJ must be in row.names of psi(object)")
if(!is.numeric(flank)) {
stop("flank must be a positive value")
}
if(!(is(geneModul, "TxDb") | is(geneModul, "GRangesList"))) {
stop("geneModul must be a TxDb form GenomicFeatures or a GRangesList")
}
if(shrinkage < 1) {
stop("shrinkage must be >= 1")
}
if(flank < 0) {
stop("flank must be a positive value")
}
if(!is.numeric(curvature)) {
stop("curvature must be numeric value")
}
if(!is.logical(reduce)) {
stop("reduce must be logical value")
}
if(is.null(highlight.label)) highlight.label <- NA
if(highlight.label.size <= 0) {
stop("highlight.label.size must be a positive value")
}
if(!is.logical(returnData)) {
stop("returnData must be a logical value")
}
if(!is.logical(removeTxid)) {
stop("removeTxid must be a logical value")
}
highlight.label.size <- as.integer(highlight.label.size)
if(!is.null(highlight.region)) {
tryCatch(hlgr <- as(highlight.region, "GRanges"), error = function(e) stop("the highlight.region character vector must contain strings of the form 'chr:start-end' or 'chr:start-end:strand'"))
if(any(width(hlgr) < 3)) {
width(hlgr[width(hlgr) < 3]) <- 3
}
}
event <- SJEvent(object = object, SJ = SJ)
region <- GenomicRanges::reduce(as(strsplit(event$ID, "@")[[1]], "GRanges"))
BiocGenerics::start(region) <- BiocGenerics::start(region) - min(BiocGenerics::start(region), flank)
BiocGenerics::end(region) <- BiocGenerics::end(region) + flank
Mat5 <- data.table::as.data.table(as(strsplit(event$ID, "@")[[1]], "GRanges"))
Mat6 <- data.table::as.data.table(as(SJ, "GRanges"))
if(shrinkage == 1) { # original size
if(is(geneModul, "TxDb")) { # From TxDb
if(reduce) {
p4 <- tryCatch(ggbio::autoplot(geneModul, which = region, stat = "reduce"), error = function(e) stop("maybe your TxDb external pointer is not valid"))
} else {
p4 <- tryCatch(ggbio::autoplot(geneModul, which = region), error = function(e) stop("maybe your TxDb external pointer is not valid"))
}
p4 <- p4 + ggbio::theme_null()
p4 <- p4@ggplot
ypos <- lapply(ggplot_build(p4)$data, function(x) suppressWarnings(max(x$y)))
ypos <- max(unlist(ypos[!mapply(is.infinite, ypos)]))
if(is.null(highlight.region)) { # No highlight.region
p4 +
geom_curve(data = Mat5, aes(x = start, xend = end, y = ypos, yend = ypos), curvature = curvature) +
geom_curve(data = Mat6, aes(x = start, xend = end, y = ypos, yend = ypos), curvature = curvature, color = 2) +
lims(y = c(0, ypos * 1.2)) +
scale_x_continuous(expand = c(0, 0)) -> plast
} else { # With highlight.region
ypos_min <- lapply(ggplot_build(p4)$data, function(x) suppressWarnings(min(x$ymax)))
ypos_min <- min(unlist(ypos_min[!mapply(is.infinite, ypos_min)]))
p4 +
geom_curve(data = Mat5, aes(x = start, xend = end, y = ypos, yend = ypos), curvature = curvature) +
geom_curve(data = Mat6, aes(x = start, xend = end, y = ypos, yend = ypos), curvature = curvature, color = 2) +
lims(y = c(0, ypos * 1.2)) +
scale_x_continuous(expand = c(0, 0)) +
geom_rect(data = as.data.frame(hlgr), aes(xmin = start, xmax = end, ymin = ypos_min, ymax = ypos), fill = highlight.color) +
annotate(geom = "text", x = start(hlgr) + 0.5 * width(hlgr), y = ypos_min * 0.5, label = highlight.label, size = highlight.label.size) -> plast
}
} else { # From GRangesList
if(reduce) geneModul <- GRangesList(reduce(unlist(geneModul)))
lapply(geneModul, function(x) {
GRanges(seqnames = unique(as.character(seqnames(x))),
ranges = IRanges(start = min(start(x)), end = max(end(x))),
strand = unique(as.character(strand(x))))
}) -> tx_range
if(is.null(names(tx_range))) names(tx_range) <- seq_along(tx_range)
tx_range <- mapply(function(x) length(findOverlaps(query = region, subject = x, type = "within")), tx_range)
tx_cover <- names(tx_range)[tx_range == 1]
geneModul <- lapply(geneModul, function(x) x[subjectHits(findOverlaps(region, x))])
geneModul <- geneModul[mapply(length, geneModul) > 0]
if(length(geneModul) == 0) { # No exon in this region
if(length(tx_cover) == 0) tx_cover <- "Ref"
ggplot() +
geom_hline(yintercept = seq_along(tx_cover)) +
scale_y_continuous(breaks = seq_along(tx_cover), labels = tx_cover) +
ggbio::theme_clear() +
theme(axis.title = element_blank(), axis.ticks.y = element_blank()) -> p4
if(is.null(highlight.region)) { # No highlight.region
p4 +
geom_curve(data = Mat5, aes(x = start, xend = end, y = length(tx_cover) * 1.05, yend = length(tx_cover) * 1.05), curvature = curvature) +
geom_curve(data = Mat6, aes(x = start, xend = end, y = length(tx_cover) * 1.05, yend = length(tx_cover) * 1.05), curvature = curvature, color = 2) +
scale_x_continuous(limits = c(start(region), end(region)), expand = c(0, 0)) +
scale_y_continuous(limits = c(0, length(tx_cover) * 1.2), breaks = seq_along(tx_cover), labels = if(reduce) NULL else tx_cover) -> plast
} else { # Add highlight.region
p4 +
geom_curve(data = Mat5, aes(x = start, xend = end, y = length(tx_cover) * 1.05, yend = length(tx_cover) * 1.05), curvature = curvature) +
geom_curve(data = Mat6, aes(x = start, xend = end, y = length(tx_cover) * 1.05, yend = length(tx_cover) * 1.05), curvature = curvature, color = 2) +
scale_x_continuous(limits = c(start(region), end(region)), expand = c(0, 0)) +
geom_rect(data = as.data.frame(hlgr), aes(xmin = start, xmax = end, ymin = 0.9, ymax = length(tx_cover) + 0.1), fill = highlight.color) +
annotate(geom = "text", x = start(hlgr) + 0.5 * width(hlgr), y = 0.5, label = highlight.label, size = highlight.label.size) +
scale_y_continuous(limits = c(0, length(tx_cover) * 1.2), breaks = seq_along(tx_cover), labels = if(reduce) NULL else tx_cover) -> plast
}
} else { # With exon in this region
geneModul <- lapply(geneModul, function(x) {
start(x) <- MinMax(start(x), start(region), end(region))
end(x) <- MinMax(end(x), start(region), end(region))
return(x)
})
geneModul <- GRangesList(geneModul)
p4 <- tryCatch(ggbio::autoplot(geneModul, group.selfish = removeTxid)@ggplot, error = function(e) ggplot())
ypos <- lapply(ggplot_build(p4)$data, function(x) suppressWarnings(max(x$y)))
ypos <- max(unlist(ypos[!mapply(is.infinite, ypos)]))
if(is.null(highlight.region)) { # No highlight.region
p4 +
geom_curve(data = Mat5, aes(x = start, xend = end, y = ypos * 1.05, yend = ypos * 1.05), curvature = curvature) +
geom_curve(data = Mat6, aes(x = start, xend = end, y = ypos * 1.05, yend = ypos * 1.05), curvature = curvature, color = 2) +
scale_x_continuous(expand = c(0, 0)) +
ggbio::theme_clear() +
theme(axis.title = element_blank(), axis.line.y = element_blank(), axis.ticks.y = element_blank()) +
scale_y_continuous(breaks = seq_along(geneModul), labels = names(geneModul), limits = c(0, ypos * 1.2)) -> plast
} else { # Add highlight.region
ypos_min <- lapply(ggplot_build(p4)$data, function(x) suppressWarnings(min(x$ymax)))
ypos_min <- min(unlist(ypos_min[!mapply(is.infinite, ypos_min)]))
p4 +
geom_curve(data = Mat5, aes(x = start, xend = end, y = ypos * 1.05, yend = ypos * 1.05), curvature = curvature) +
geom_curve(data = Mat6, aes(x = start, xend = end, y = ypos * 1.05, yend = ypos * 1.05), curvature = curvature, color = 2) +
ggbio::theme_clear() +
theme(axis.title = element_blank(), axis.line.y = element_blank(), axis.ticks.y = element_blank()) +
geom_rect(data = as.data.frame(hlgr), aes(xmin = start, xmax = end, ymin = ypos_min * 0.9, ymax = ypos * 1.02), fill = highlight.color) +
annotate(geom = "text", x = start(hlgr) + 0.5 * width(hlgr), y = ypos_min * 0.6, label = highlight.label, size = highlight.label.size) +
scale_y_continuous(expand = c(0, 0), breaks = seq_along(geneModul), labels = names(geneModul), limits = c(0, ypos * 1.2)) -> plast
}
}
}
} else { # zoom out the intron
stopifnot(shrinkage > 1)
if(is(geneModul, "TxDb")) { # From TxDb
ebt <- exonsBy(geneModul, by = "tx", use.names = T)
if(is.element("gene_id", colnames(event))) {
if(!is.na(event$gene_id)) {
tbg <- transcriptsBy(x = geneModul, by = "gene")
tx_tu <- tbg[event$gene_id][[1]]$tx_name
geneModul <- ebt[tx_tu]
} else {
tx_tu <- GenomicFeatures::transcriptsByOverlaps(x = geneModul, ranges = region)$tx_name
geneModul <- ebt[tx_tu]
}
} else {
tx_tu <- GenomicFeatures::transcriptsByOverlaps(x = geneModul, ranges = region)$tx_name
geneModul <- ebt[tx_tu]
}
} else { # From GRangesList
geneModul <- geneModul
}
if(reduce) geneModul <- GRangesList(reduce(unlist(geneModul)))
lapply(geneModul, function(x) {
GRanges(seqnames = unique(as.character(seqnames(x))),
ranges = IRanges(start = min(start(x)), end = max(end(x))),
strand = unique(as.character(strand(x))))
}) -> tx_range
if(is.null(names(tx_range))) names(tx_range) <- seq_along(tx_range)
tx_range <- mapply(function(x) length(findOverlaps(query = region, subject = x, type = "within")), tx_range)
tx_cover <- names(tx_range)[tx_range == 1]
geneModul <- lapply(geneModul, function(x) x[subjectHits(findOverlaps(region, x))])
geneModul <- geneModul[mapply(length, geneModul) > 0]
if(length(geneModul) == 0) { # No exon in this region
if(length(tx_cover) == 0) tx_cover <- "Ref"
ggplot() +
geom_hline(yintercept = seq_along(tx_cover)) +
scale_y_continuous(breaks = seq_along(tx_cover), labels = tx_cover) +
ggbio::theme_clear() +
theme(axis.title = element_blank(), axis.ticks.y = element_blank()) -> p4
if(is.null(highlight.region)) { # No highlight.region
p4 +
geom_curve(data = Mat5, aes(x = start, xend = end, y = length(tx_cover) * 1.05, yend = length(tx_cover) * 1.05), curvature = curvature) +
geom_curve(data = Mat6, aes(x = start, xend = end, y = length(tx_cover) * 1.05, yend = length(tx_cover) * 1.05), curvature = curvature, color = 2) +
scale_x_continuous(limits = c(start(region), end(region)), expand = c(0, 0)) +
scale_y_continuous(limits = c(0, length(tx_cover) * 1.2), breaks = seq_along(tx_cover), labels = if(reduce) NULL else tx_cover) -> plast
} else { # Add highlight.region
p4 +
geom_curve(data = Mat5, aes(x = start, xend = end, y = length(tx_cover) * 1.05, yend = length(tx_cover) * 1.05), curvature = curvature) +
geom_curve(data = Mat6, aes(x = start, xend = end, y = length(tx_cover) * 1.05, yend = length(tx_cover) * 1.05), curvature = curvature, color = 2) +
scale_x_continuous(limits = c(start(region), end(region)), expand = c(0, 0)) +
geom_rect(data = as.data.frame(hlgr), aes(xmin = start, xmax = end, ymin = 0.9, ymax = length(tx_cover) + 0.1), fill = highlight.color) +
annotate(geom = "text", x = start(hlgr) + 0.5 * width(hlgr), y = 0.5, label = highlight.label, size = highlight.label.size) +
scale_y_continuous(limits = c(0, length(tx_cover) * 1.2), breaks = seq_along(tx_cover), labels = if(reduce) NULL else tx_cover) -> plast
}
} else { # With exon in this region
## trim overhang exon region from here
geneModul <- lapply(geneModul, function(x) {
start(x) <- MinMax(start(x), start(region), end(region))
end(x) <- MinMax(end(x), start(region), end(region))
return(x)
})
geneModul <- GRangesList(geneModul)
## trim overhang exon region end
## down sampling for shrinkage from here
ex_base <- coverage(reduce(unlist(geneModul)))[[unique(as.character(seqnames(reduce(unlist(geneModul)))))]]
ex_base <- tryCatch(ex_base[start(region):end(region)], error = function(e) ex_base[start(region):length(ex_base)])
if(length(ex_base) < width(region)) ex_base <- c(ex_base, rep(0, width(region) - length(ex_base)))
ex_base_tab <- data.table::data.table(x = start(region):end(region),
isExon = as.numeric(ex_base),
bin = rep(seq_along(runValue(ex_base)), runLength(ex_base)))
set.seed(547498)
ex_base_tab_sub <- tryCatch(rbind(ex_base_tab[isExon == 0, .SD[sample(.N, .N/shrinkage, replace = FALSE), ], by = bin], ex_base_tab[isExon == 1, ]), error = function(e) {
rbind(ex_base_tab[isExon == 0, .SD[sample(.N, .N/shrinkage, replace = TRUE), ], by = bin], ex_base_tab[isExon == 1, ])
})
sites_gtf <- c(start(reduce(unlist(geneModul))) - 1, end(reduce(unlist(geneModul))) + 1) # remaining splice site for SJ
sites_sj <- c(start(as(strsplit(event$ID, "@")[[1]], "GRanges")), end(as(strsplit(event$ID, "@")[[1]], "GRanges")))
sites <- union(sites_gtf, sites_sj)
if(!is.null(highlight.region)) {
sites <- union(sites, c(start(hlgr), end(hlgr)))
}
ex_base_tab_sub <- unique(rbind(ex_base_tab_sub, ex_base_tab[x %in% sites, ]))
data.table::setkey(ex_base_tab_sub, x)
ex_base_tab_sub$x2 <- min(ex_base_tab_sub$x):(min(ex_base_tab_sub$x) + nrow(ex_base_tab_sub) - 1)
data.table::setkey(ex_base_tab_sub, x)
for(i in seq_along(geneModul)) {
start(geneModul[[i]]) <- ex_base_tab_sub[match(start(geneModul[[i]]), x), x2]
end(geneModul[[i]]) <- ex_base_tab_sub[match(end(geneModul[[i]]), x), x2]
}
## down sampling for shrinkage end
p4 <- tryCatch(ggbio::autoplot(geneModul, group.selfish = removeTxid)@ggplot, error = function(e) ggplot())
ypos <- lapply(ggplot_build(p4)$data, function(x) suppressWarnings(max(x$y)))
ypos <- max(unlist(ypos[!mapply(is.infinite, ypos)]))
Mat5$start <- ex_base_tab_sub[match(Mat5$start, x), x2]
Mat6$start <- ex_base_tab_sub[match(Mat6$start, x), x2]
Mat5$end <- ex_base_tab_sub[match(Mat5$end, x), x2]
Mat6$end <- ex_base_tab_sub[match(Mat6$end, x), x2]
x_wrong <- c(ex_base_tab_sub[bin == min(bin), round(mean(x2))], ex_base_tab_sub[bin == max(bin), round(mean(x2))])
x_corect <- ex_base_tab_sub[match(x_wrong, x2), x]
if(is.null(highlight.region)) { # No highlight.region
p4 +
geom_curve(data = Mat5, aes(x = start, xend = end, y = ypos * 1.05, yend = ypos * 1.05), curvature = curvature) +
geom_curve(data = Mat6, aes(x = start, xend = end, y = ypos * 1.05, yend = ypos * 1.05), curvature = curvature, color = 2) +
scale_x_continuous(expand = c(0, 0), breaks = x_wrong, labels = x_corect) +
ggbio::theme_clear() +
theme(axis.title = element_blank(),
axis.line.y = element_blank(),
axis.ticks.y = element_blank(),
axis.line.x = element_blank(),
axis.ticks.x = element_blank(),
axis.text.x = element_blank()) +
scale_y_continuous(breaks = seq_along(geneModul), labels = names(geneModul), limits = c(0, ypos * 1.2)) -> plast
} else { # Add highlight.region
start(hlgr) <- ex_base_tab_sub[match(start(hlgr), x), x2]
end(hlgr) <- ex_base_tab_sub[match(end(hlgr), x), x2]
ypos_min <- lapply(ggplot_build(p4)$data, function(x) suppressWarnings(min(x$ymax)))
ypos_min <- min(unlist(ypos_min[!mapply(is.infinite, ypos_min)]))
p4 +
geom_curve(data = Mat5, aes(x = start, xend = end, y = ypos * 1.05, yend = ypos * 1.05), curvature = curvature) +
geom_curve(data = Mat6, aes(x = start, xend = end, y = ypos * 1.05, yend = ypos * 1.05), curvature = curvature, color = 2) +
ggbio::theme_clear() +
theme(axis.title = element_blank(),
axis.line.y = element_blank(),
axis.ticks.y = element_blank(),
axis.line.x = element_blank(),
axis.ticks.x = element_blank(),
axis.text.x = element_blank()) +
geom_rect(data = as.data.frame(hlgr), aes(xmin = start, xmax = end, ymin = ypos_min * 0.9, ymax = ypos * 1.02), fill = highlight.color) +
annotate(geom = "text", x = start(hlgr) + 0.5 * width(hlgr), y = ypos_min * 0.6, label = highlight.label, size = highlight.label.size) +
scale_y_continuous(expand = c(0, 0), breaks = seq_along(geneModul), labels = names(geneModul), limits = c(0, ypos * 1.2)) +
scale_x_continuous(expand = c(0, 0), breaks = x_wrong, labels = x_corect) -> plast
}
}
}
if(removeTxid) {
plast <- plast + theme(axis.text.y = element_blank())
}
if(returnData) {
res <- tryCatch(list(plast, ex_base_tab_sub), error = function(e) plast)
} else {
res <- plast
}
return(res)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.