#' @title Extract introns coordinates from GENCODE gtf file as a dataframe.
#'
#' @description This function takes a gtf file from GENCODE and returns a dataframe in the R Global Environment containing introns coordinates and their position in the transcript.
#' @usage extract_introns(x)
#' @param x The name of the downloaded gtf file from GENCODE website
#' @export
#' @keywords
#' @seealso
#' @return A dataframe of intron coordinates from the gtf file selected
#' examples \dontrun {
#' # You don't have to run this
#' load_gtf("gencode.v27.lncRNAs.gtf")
#' extract_introns(gencode.v27.lncRNAs.gtf)
#’}
extract_introns <- function(x) {
a <- subset(x, x$type=="exon")
a1 <- classify_exons(a)
b <- subset(a1, select = c("transcript_id", "exon_number"))
c <- as.data.frame(table(b$transcript_id))
d <- subset(c, Freq== '1')
d1 <- nrow(d)
print(paste0("Single exons: ", d1))
e <- dplyr::anti_join(a1,d, by = c("transcript_id" = "Var1"))
colnames(c) <- c("transcript_id", "exon_count")
x <- sort(unique(as.integer(c$exon_count)))
x2 <- max(x)
x3 <- sort(unique(as.integer(e$exon_number)))
x4 <- setdiff(union(x, x3), intersect(x, x3))
y <- NULL
for(i in x) {
if (i==x2) {
next
}
zz <- subset(e, e$EXON_CLASSIFICATION!="last_exons" & e$exon_number==i)
zz$intron_start <- ifelse(zz$strand=="+", zz$end + 1, zz$start - 1)
zz2 <- subset(e,e$exon_number==i+1)
zz2$intron_end <- ifelse(zz2$strand=="+", zz2$start - 1, zz2$end + 1)
zz3 <- subset(zz2, select = "intron_end")
zz4 <- cbind(zz,zz3)
zz4$intron_number <- paste("intron", i, sep = "")
y <- rbind(y,zz4)
}
m <- NULL
for(i in x4) {
zz <- subset(e, e$exon_number==i)
zz$intron_start <- ifelse(zz$strand=="+", zz$end + 1, zz$start - 1)
zz2 <- subset(e,e$exon_number==i+1)
zz2$intron_end <- ifelse(zz2$strand=="+", zz2$start - 1, zz2$end + 1)
zz3 <- subset(zz2, select = "intron_end")
zz4 <- cbind(zz,zz3)
zz4$intron_number <- paste("intron", i, sep = "")
m <- rbind(m,zz4)
}
n <- rbind(y,m)
n$type <- "intron"
n1 <- subset(n, n$strand=="+")
n2 <- subset(n, n$strand=="-")
n3 <- n2$intron_start
n2$intron_start <- n2$intron_end
n2$intron_end <- n3
n4 <- rbind(n1,n2)
n5 <- subset(n4, select = c("seqnames", "intron_start", "intron_end", "width"))
n5$width <- n5$intron_end-n5$intron_start+1
n6 <- n4[,-which(names(n4) %in% c("seqnames", "start", "end", "width", "exon_number", "exon_id", "EXON_CLASSIFICATION", "intron_start", "intron_end"))]
n7 <- cbind(n5,n6)
n8 <- n7 %>% group_by(gene_id) %>% arrange(seqnames, intron_start)
assign(deparse(substitute(introns_df)), n8, envir = .GlobalEnv)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.