scripts/prepareAnnotations.R

#Read command-line options
library("optparse")
option_list <- list(
  make_option(c("--tags"), type="character", default=NULL,
              help="Path to transcript tags table generated by extractTranscriptTags.py.", metavar = "path"),
  make_option(c("--gtf"), type="character", default=NULL,
              help="Path to the reference Ensembl GTF file.", metavar = "path"),
  make_option(c("--out"), type="character", default = "NULL", 
              help = "Path to the binary output file (.rds format).", metavar = "path")
)
opt <- parse_args(OptionParser(option_list=option_list))

tags_file = opt$tags
gtf_file = opt$gtf
out_file = opt$out

#Import other dependencies
library("dplyr")
library("txrevise")
library("GenomicFeatures")

#Import transcript tags
transcript_tags = readr::read_tsv(tags_file) %>%
  dplyr::rename(ensembl_transcript_id = transcript_id)
transcript_meta = txrevise::importTranscriptMetadataFromGTF(gtf_file, transcript_tags)

#Filter the metadata
filtered_metadata = txrevise::filterTranscriptMetadata(transcript_meta)

#Construct TxDb
txdb = GenomicFeatures::makeTxDbFromGFF(gtf_file)
exons = GenomicFeatures::exonsBy(txdb, by = "tx", use.names = TRUE)
cdss = GenomicFeatures::cdsBy(txdb, by = "tx", use.names = TRUE)

#Export data for txrevise event construction
txrevise_data = list(transcript_metadata = filtered_metadata, 
                     exons = exons[filtered_metadata$ensembl_transcript_id], 
                     cdss = cdss[intersect(names(cdss),filtered_metadata$ensembl_transcript_id)])
saveRDS(txrevise_data, out_file)
kauralasoo/txrevise documentation built on March 31, 2022, 12:03 p.m.