Nothing
#' @title Plots the locations of specified splice junction relative to isoforms
#'
#' @description Plots the locations of specified splice junction relative to isoforms. List of isoforms are retrieved from GTF.
#'
#' @param MarvelObject Marvel object. S3 object generated from \code{CheckAlignment.10x} function.
#' @param coord.intron Character string. Coordinates of splice junction whose splice junction will be plotted.
#' @param coord.intron.ext Numeric value. Number of bases to extend the splice junction start and end coordinates into the exons. Helpful to enhance splice junction locations on the plot. Default is \code{50}.
#' @param rescale_introns Logical value. If set to \code{TRUE}, the intron length will be shorten. Helpful when introns are very long and focus visualisation of exons and splice junctions. Default is \code{FALSE}.
#' @param show.protein.coding.only Logical value. If set to \code{TRUE} (default), only protein-coding isoforms will be displayed.
#' @param anno.label.size Numeric value. Font size of isoform ID labels. Default is \code{3}.
#' @param anno.colors Vector of character strings. Colors for non-coding UTRs, coding exons, and splice junctions, respectively. Default is \code{c("black", "gray", "red")}.
#'
#' @return An object of class S3 with new slots \code{MarvelObject$adhocGene$SJPosition$Plot}, \code{MarvelObject$adhocGene$SJPosition$metadata}, \code{MarvelObject$adhocGene$SJPosition$exonfile}, and \code{MarvelObject$adhocGene$SJPosition$cdsfile}.
#'
#' @importFrom plyr join
#' @import Matrix
#'
#' @export
#'
#' @examples
#'
#' marvel.demo.10x <- readRDS(system.file("extdata/data",
#' "marvel.demo.10x.rds",
#' package="MARVEL")
#' )
#'
#' marvel.demo.10x <- adhocGene.PlotSJPosition.10x(
#' MarvelObject=marvel.demo.10x,
#' coord.intron="chr1:100:1001",
#' rescale_introns=FALSE,
#' show.protein.coding.only=TRUE,
#' anno.label.size=1.5
#' )
adhocGene.PlotSJPosition.10x <- function(MarvelObject, coord.intron, coord.intron.ext=50, rescale_introns=FALSE, show.protein.coding.only=TRUE, anno.label.size=3, anno.colors=c("black", "gray", "red")) {
# Define arguments
MarvelObject <- MarvelObject
coord.intron <- coord.intron
sj.metadata <- MarvelObject$sj.metadata
gtf <- MarvelObject$gtf
coord.intron.ext <- coord.intron.ext
rescale_introns <- rescale_introns
show.protein.coding.only <- show.protein.coding.only
anno.label.size <- anno.label.size
anno.colors <- anno.colors
# Example arguments
#MarvelObject <- marvel.demo.10x
#coord.intron <- "chr1:1000:1001"
#sj.metadata <- MarvelObject$sj.metadata
#gtf <- MarvelObject$gtf
#coord.intron.ext <- 10
#rescale_introns <- TRUE
#show.protein.coding.only <- FALSE
#anno.label.size <- 3
#anno.colors <- c("black", "gray", "red")
#################################################################
#################### RETRIEVE TRANSCRIPTS #######################
#################################################################
# Retrieve gene name
gene_short_name <- sj.metadata[which(sj.metadata$coord.intron==coord.intron), "gene_short_name.start"]
if(length(gene_short_name)==0) {
message("No corresponding gene found for coord.intron specified")
return(MarvelObject)
}
# Subset (by approximation) releavnt gene
gtf <- gtf[grep(gene_short_name, gtf$V9, fixed=TRUE), ]
# Report progress
message("Retrieving transcripts from GTF file...")
. <- strsplit(gtf$V9, split=";")
. <- sapply(., function(x) grep("gene_name", x, value=TRUE))
. <- gsub("gene_name", "", .)
. <- gsub(" ", "", .)
. <- gsub("\"", "", .)
gtf$gene_short_name <- .
# Subset relevant gene
gtf <- gtf[which(gtf$gene_short_name==gene_short_name), ]
# Retrieve selected attributes
# gene_id
#. <- strsplit(gtf$V9, split=";")
#. <- sapply(., function(x) grep("gene_id", x, value=TRUE))
#. <- gsub("gene_id", "", .)
#. <- gsub(" ", "", .)
#. <- gsub("\"", "", .)
#gtf$gene_id <- .
# transcript_id
. <- strsplit(gtf$V9, split=";")
. <- sapply(., function(x) grep("transcript_id", x, value=TRUE))
. <- gsub("transcript_id", "", .)
. <- gsub(" ", "", .)
. <- gsub("\"", "", .)
gtf$transcript_id <- .
# transcript_biotype
. <- strsplit(gtf$V9, split=";")
. <- sapply(., function(x) grep("transcript_biotype", x, value=TRUE))
. <- gsub("transcript_biotype", "", .)
. <- gsub(" ", "", .)
. <- gsub("\"", "", .)
if(length(unique(.))==1 & unique(.)[1] == "character(0)") {
. <- strsplit(gtf$V9, split=";")
. <- sapply(., function(x) grep("transcript_type", x, value=TRUE))
. <- gsub("transcript_type", "", .)
. <- gsub(" ", "", .)
. <- gsub("\"", "", .)
gtf$transcript_biotype <- .
} else {
gtf$transcript_biotype <- .
}
# exon_id
. <- strsplit(gtf$V9, split=";")
. <- sapply(., function(x) grep("exon_id", x, value=TRUE))
. <- gsub("exon_id", "", .)
. <- gsub(" ", "", .)
. <- gsub("\"", "", .)
gtf$exon_id <- .
#gtf.backup <- gtf
# Annotate transcripts with ORF status
anno <- unique(gtf[,c("transcript_id", "transcript_biotype")])
anno <- anno[grep("ENST|ENSMUST", anno$transcript_id), ]
# Report progress
message(paste(nrow(anno), " transcripts identified", sep=""))
#################################################################
############# PREPARE WIGGLEPLOTR INPUT: EXON FILE ##############
#################################################################
# Define transcript ids
transcript_ids <- anno$transcript_id
grange.exon.list <- list()
for(i in 1:length(transcript_ids)) {
# Retrieve exons of relevant transcript
gtf.small <- gtf[which(gtf$transcript_id==transcript_ids[i]), ]
gtf.small <- gtf.small[which(gtf.small$V3=="exon"), ]
# Create GRange object
grange <- GenomicRanges::GRanges(seqnames=S4Vectors::Rle(gtf.small$V1),
ranges=IRanges::IRanges(gtf.small$V4,
width=(gtf.small$V5-gtf.small$V4)+1
),
strand=gtf.small$V7[1],
exon_id=gtf.small$exon_id,
exon_name=gtf.small$exon_id,
exon_rank=c(1:length(gtf.small$exon_id))
)
# Save into list
grange.exon.list[[i]] <- grange
}
# Convert list to GenomicRanges::GRanges list
grange.exon.list <- GenomicRanges::GRangesList(grange.exon.list)
names(grange.exon.list) <- transcript_ids
#################################################################
########### PREPARE WIGGLEPLOTR INPUT: EXON FILE + SJ ###########
#################################################################
grange.exon.sj.list <- list()
transcript.ids <- NULL
for(i in 1:length(grange.exon.list)) {
# Retrieve transcript
grange <- grange.exon.list[[i]]
exon <- as.data.frame(grange)
exon$start <- as.numeric(exon$start)
exon$end <- as.numeric(exon$end)
# Retrieve SJ chr, start, end
. <- strsplit(coord.intron, split=":", fixed=TRUE)[[1]]
chr.sj <- .[1]
start.sj <- as.numeric(.[2])
end.sj <- as.numeric(.[3])
# Find start, end exon
exon.sj.start <- exon$end[which(exon$end==start.sj-1)]
exon.sj.end <- exon$start[which(exon$start==end.sj+1)]
# Retrieve IRanges::IRanges to subset
exon.small <- exon[which(exon$end==exon.sj.start | exon$start==exon.sj.end), ]
# Trim exon length
exon.small$start[which(exon.small$end==exon.sj.start)] <- exon.small$end[which(exon.small$end==exon.sj.start)] - coord.intron.ext
exon.small$end[which(exon.small$start==exon.sj.end)] <- exon.small$start[which(exon.small$start==exon.sj.end)] + coord.intron.ext
if(nrow(exon.small) != 0) {
# Create GenomicRanges::GRanges object
grange.sj <- GenomicRanges::GRanges(seqnames=S4Vectors::Rle(exon.small$seqnames),
ranges=IRanges::IRanges(exon.small$start,
width=(exon.small$end-exon.small$start)+1
),
strand=exon.small$strand,
exon_id=exon.small$exon_id,
exon_name=exon.small$exon_id,
exon_rank=c(1:length(exon.small$exon_id))
)
grange.exon.sj.list[[i]] <- grange.sj
transcript.ids[i] <- names(grange.exon.list)[i]
} else {
grange.exon.sj.list[[i]] <- FALSE
transcript.ids[i] <- FALSE
}
}
# Convert list to GenomicRanges::GRanges list
index.keep <- which(transcript.ids != FALSE)
grange.exon.sj.list <- grange.exon.sj.list[index.keep]
transcript.ids <- transcript.ids[index.keep]
if(length(grange.exon.sj.list) != 0) {
grange.exon.sj.list <- GenomicRanges::GRangesList(grange.exon.sj.list)
names(grange.exon.sj.list) <- transcript.ids
}
#################################################################
################ PREPARE WIGGLEPLOTR INPUT: CDS #################
#################################################################
# Define transcript ids
transcript_ids <- anno$transcript_id
grange.cds.list <- list()
transcript.ids <- NULL
for(i in 1:length(transcript_ids)) {
# Retrieve exons of relevant transcript
gtf.small <- gtf[which(gtf$transcript_id==transcript_ids[i]), ]
gtf.small <- gtf.small[which(gtf.small$V3=="CDS"), ]
if(nrow(gtf.small) != 0) {
# Create GRange object
grange <- GenomicRanges::GRanges(seqnames=S4Vectors::Rle(gtf.small$V1),
ranges=IRanges::IRanges(gtf.small$V4,
width=(gtf.small$V5-gtf.small$V4)+1
),
strand=gtf.small$V7[1],
exon_id=gtf.small$exon_id,
exon_name=gtf.small$exon_id,
exon_rank=c(1:length(gtf.small$exon_id))
)
# Save into list
grange.cds.list[[i]] <- grange
transcript.ids[i] <- transcript_ids[i]
} else {
# Save into list
grange.cds.list[[i]] <- FALSE
transcript.ids[i] <- FALSE
}
}
# Convert list to GenomicRanges::GRanges list
index.keep <- which(transcript.ids != FALSE)
grange.cds.list <- grange.cds.list[index.keep]
transcript.ids <- transcript.ids[index.keep]
if(length(grange.cds.list) != 0) {
grange.cds.list <- GenomicRanges::GRangesList(grange.cds.list)
names(grange.cds.list) <- transcript.ids
}
#################################################################
############# PREPARE WIGGLEPLOTR INPUT: METADATA ###############
#################################################################
# Retrieve transcript ids
metadata <- data.frame("transcript_id"=names(grange.exon.list), stringsAsFactors=FALSE)
# Annotate gene id, name
#metadata$gene_id <- gene_id
metadata$gene_short_name <- gene_short_name
# Indicate strand
strand <- gtf$V7[1]
if(strand=="+") {
metadata$strand <- 1
} else {
metadata$strand <- -1
}
# Annotate transcript type
# Retrieve transcript type
metadata <- join(metadata, anno, by="transcript_id", type="left")
metadata$transcript_id.biotype <- paste(metadata$transcript_id, " (", metadata$transcript_biotype, ")", sep="")
# Update GRange exon list
. <- data.frame("transcript_id"=names(grange.exon.list), stringsAsFactors=FALSE)
. <- join(., metadata[,c("transcript_id", "transcript_id.biotype")], by="transcript_id", type="left")
names(grange.exon.list) <- .$transcript_id.biotype
# Update GRange exon-SJ list
if(length(grange.exon.sj.list) != 0) {
. <- data.frame("transcript_id"=names(grange.exon.sj.list), stringsAsFactors=FALSE)
. <- join(., metadata[,c("transcript_id", "transcript_id.biotype")], by="transcript_id", type="left")
names(grange.exon.sj.list) <- paste(.$transcript_id.biotype, "_SJ", sep="")
}
# Update GRange CDS list
. <- data.frame("transcript_id"=names(grange.cds.list), stringsAsFactors=FALSE)
. <- join(., metadata[,c("transcript_id", "transcript_id.biotype")], by="transcript_id", type="left")
names(grange.cds.list) <- .$transcript_id.biotype
# Update metadata
metadata$transcript_id <- metadata$transcript_id.biotype
# Remove intermediate column
metadata$transcript_id.biotype <- NULL
metadata$transcript_biotype <- NULL
# Update exon file, metadata with exon-SJ
# Update exon file
if(length(grange.exon.sj.list) != 0) {
grange.exon.list <- c(grange.exon.list, grange.exon.sj.list)
}
# Update metadata file
if(length(grange.exon.sj.list) != 0) {
#metadata <- data.frame("transcript_id"=names(grange.exon.list),
#"gene_id"=gene_id,
#"gene_short_name"=gene_short_name,
#stringsAsFactors=FALSE
#)
metadata <- data.frame("transcript_id"=names(grange.exon.list),
"gene_short_name"=gene_short_name,
stringsAsFactors=FALSE
)
strand <- gtf$V7[1]
if(strand=="+") {
metadata$strand <- 1
} else {
metadata$strand <- -1
}
}
#################################################################
################# FILTER FOR SPECIFIC BIOTYPE ###################
#################################################################
if(show.protein.coding.only==TRUE) {
transcript_ids <- metadata[grep("protein_coding", metadata$transcript_id, fixed=TRUE), "transcript_id"]
if(length(transcript_ids) != 0) {
metadata <- metadata[grep("protein_coding", metadata$transcript_id, fixed=TRUE), ]
grange.exon.list <- grange.exon.list[metadata$transcript_id]
overlap <- intersect(names(grange.cds.list), transcript_ids)
grange.cds.list <- grange.cds.list[overlap]
} else {
message("No protein-coding transcripts found for this gene")
MarvelObject$adhocGene$SJPosition$metadata <- metadata
return(MarvelObject)
}
}
#################################################################
######################## WIGGLEPLOTR ############################
#################################################################
plot <- wiggleplotr::plotTranscripts(exons=grange.exon.list,
cdss=grange.cds.list,
transcript_annotations=metadata,
rescale_introns=rescale_introns,
new_intron_length=50,
flanking_length=c(50,50),
connect_exons=TRUE,
transcript_label=TRUE,
region_coords = NULL,
anno.colors=anno.colors,
anno.label.size=anno.label.size
)
# Save into new slots
MarvelObject$adhocGene$SJPosition$Plot <- plot
MarvelObject$adhocGene$SJPosition$metadata <- metadata
MarvelObject$adhocGene$SJPosition$exonfile <- grange.exon.list
MarvelObject$adhocGene$SJPosition$cdsfile <- grange.cds.list
# Return final object
return(MarvelObject)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.