#' @title Classify exons as single, first, inner or last exons.
#'
#' @description This function takes a gtf file from GENCODE and returns a dataframe in the R Global Environment containing an additional columnn which states the exons position within a transcript whether they are a single exon, first, inner or last exons.
#' @usage classify_exons(x)
#' @param x The name of the downloaded gtf file from GENCODE website
#' @export
#' @keywords
#' @seealso
#' @return A dataframe with additional column describing exon positions within a trnscript
#' examples \dontrun {
#' # You don't have to run this
#' load_gtf("gencode.v27.lncRNAs.gtf")
#' classify_exons(gencode.v27.lncRNAs.gtf)
#’}
classify_exons <- function(x) {
aa <- x
bb <- subset(aa, aa$type=="exon")
cc <- subset(bb, select = c("transcript_id", "exon_number"))
dd <- as.data.frame(table(cc$transcript_id))
colnames(dd) <- c("transcript_id", "exon_count")
ee <- subset(dd, dd$exon_count == 1)
msg1 <- "Extracting single exons"
cat(msg1)
ff <- dplyr::semi_join(bb,ee, by = "transcript_id")
ff$EXON_CLASSIFICATION <- "single_exons"
fff <- nrow(ff)
print(paste0("Single exons: ", fff))
gg <- dplyr::anti_join(bb,ee, by = "transcript_id")
msg2 <- "Extracting first exons"
cat(msg2)
hh <- subset(gg, gg$exon_number==1)
hh$EXON_CLASSIFICATION <- "first_exons"
hhh <- nrow(hh)
cat(paste0("First exons: ", hhh))
cat(paste0(""))
msg3 <- "Extracting last exons"
cat(msg3)
ii <- subset(gg, gg$exon_number!=1)
jj <- subset(dd, dd$exon_count!= 1)
colnames(jj)[2] <- "exon_number"
kk <- as.data.frame(as.numeric(ii$exon_number))
colnames(kk) <- "exon_number"
ii$exon_number <- NULL
ii2 <- cbind(ii,kk)
ll <- dplyr::semi_join(ii2,jj, by = c("transcript_id", "exon_number")) ## last_exons
ll$EXON_CLASSIFICATION <- "last_exons"
lll <- nrow(ll)
print(paste0("Last exons: ", lll))
mm <- dplyr::anti_join(ii2, ll) ## inner_exons
mm$EXON_CLASSIFICATION <- "inner_exons"
mmm <- nrow(mm)
print(paste0("Inner exons: ", mmm))
final <- rbind(ff,hh,ll,mm)
nnn <- nrow(final)
print(paste0("Total exons: ", nnn))
final2 <- dplyr::left_join(aa,final)
assign(deparse(substitute(classified_exons_df)), final2, envir = .GlobalEnv)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.