GencoDymo2_vignette.R
In GencoDymo2: Comprehensive Analysis of 'GENCODE' Annotations and Splice Site Motifs

## ----setup, include=FALSE-----------------------------------------------------
knitr::opts_chunk$set(
  echo = TRUE,
  warning = FALSE,
  message = FALSE,
  fig.width = 8,
  fig.height = 6
)

## ----install, eval=FALSE------------------------------------------------------
# # Install pak if not already installed
# if (!require("pak")) install.packages("pak")
# # Install from GitHub
# pak::pkg_install("github::monahton/GencoDymo2")
# 
# # Load the package
# library(GencoDymo2)

## ----get_release, eval=FALSE--------------------------------------------------
# # Fetch the most recent human and mouse GENCODE release identifiers
# human_release <- get_latest_release("human", verbose = T)
# mouse_release <- get_latest_release("mouse", verbose = T)

## ----get_release_ex, echo=FALSE-----------------------------------------------
# Get latest human and mouse release
cat("Latest human GENCODE release: release_47")
cat("Latest human GENCODE release: release_M36") 

## ----get-files, eval=FALSE----------------------------------------------------
# 
# # Download latest human long noncoding RNAs GTF
# lnc_47_gtf <- get_gtf(
#   species = "human",
#   release_version = human_release,
#   annotation_type = "long_noncoding_RNAs.gtf.gz",
#   dest_folder = tempdir()
# )
# 
# # Download previous human release (release_46) for comparison
# lnc_46_gtf <- get_gtf(
#   species = "human",
#   release_version = "release_46",
#   annotation_type = "long_noncoding_RNAs.gtf.gz",
#   dest_folder = tempdir()
# )
# 
# # Download latest mouse primary assembly annotations (GFF3)
# mouse_36_gff3 <- get_gff3(
#   species = "mouse",
#   release_version = mouse_release,
#   annotation_type = "primary_assembly.annotation.gff3.gz",
#   dest_folder = tempdir()
# )

## ----annotation-types, echo=FALSE---------------------------------------------
cat("Valid Annotation Types:\n")
valid_annotation_types <- c(
    "annotation",
    "basic.annotation",
    "chr_patch_hapl_scaff.annotation",
    "chr_patch_hapl_scaff.basic.annotation",
    "long_noncoding_RNAs",
    "primary_assembly.annotation",
    "primary_assembly.basic.annotation",
    "tRNAs",
    "polyAs")
valid_annotation_types


## ----load-data, eval=FALSE----------------------------------------------------
# # Loading using the stored paths from previous steps
# lnc_47_df <- load_file(lnc_47_gtf)
# head(lnc_47_df)
# 
# # Alternatively, specify the file path directly
# lnc_46_df <- load_file(file.path(tempdir(), "gencode.v46.long_noncoding_RNAs.gtf.gz"))
# head(lnc_46_df)
# 
# # Load mouse GFF3
# mouse_pri_36 <- load_file(file.path(tempdir(),"gencode.vM36.primary_assembly.annotation.gff3.gz"))
# head(mouse_pri_36)

## ----compare-releases, eval=FALSE---------------------------------------------
# # Compare gene counts between release 47 and 46
# gene_comparison <- compare_release(lnc_47_df, lnc_46_df, type = "gene")
# 
# # Compare exon counts
# exon_comparison <- compare_release(lnc_47_df, lnc_46_df, type = "exon")
# 
# # Compare a specific gene biotype (e.g., TEC) using a custom baseline
# comparison <- compare_release(
#   lnc_47_df,
#   lnc_46_df,
#   type = "gene",
#   gene_type = "TEC",
#   baseline = "count1"
# )

## ----introns, eval=FALSE------------------------------------------------------
# # Human lncRNA introns for release 47
# introns_lnc_47 <- extract_introns(lnc_47_df, verbose = T)
# 
# # Mouse introns (filtering to primary chromosomes first)
# mouse_pri_36 <- mouse_pri_36[grepl("^chr", mouse_pri_36$seqnames), ]
# mouse_introns_pri_36 <- extract_introns(mouse_pri_36, verbose = T)
# 

## ----splice-sites, eval=FALSE-------------------------------------------------
# # Human
# library(BSgenome.Hsapiens.UCSC.hg38)
# lnc_47_ss <- assign_splice_sites(
#   introns_lnc_47,
#   genome = BSgenome.Hsapiens.UCSC.hg38,
#   verbose = T
# )
# 
# # Mouse
# library(BSgenome.Mmusculus.UCSC.mm39)
# mouse_pri_36_ss <- assign_splice_sites(
#   mouse_introns_pri_36,
#   genome = BSgenome.Mmusculus.UCSC.mm39,
#   verbose = T
# )

## ----cryptic, eval=FALSE------------------------------------------------------
# # Identify cryptic (non-canonical) splice sites
# cryptic_ss <- find_cryptic_splice_sites(
#   lnc_47_ss,
#   genome = BSgenome.Hsapiens.UCSC.hg38,
#   canonical_donor = "GT",
#   canonical_acceptor = "AG",
#   verbose = TRUE
# )

## ----motifs, eval=FALSE-------------------------------------------------------
# # Donor motifs (5'ss)
# motifs_donor <- extract_ss_motif(
#   input = lnc_47_ss,
#   genome = BSgenome.Hsapiens.UCSC.hg38,
#   type = "5ss",
#   verbose = T,
#   save_fasta = T,
#   output_file = file.path(tempdir(), "lnc_47_5ss_motifs.fa")
# )
# 
# # Acceptor motifs (3'ss)
# motifs_acc <- extract_ss_motif(
#   input = lnc_47_ss,
#   genome = BSgenome.Hsapiens.UCSC.hg38,
#   type = "3ss",
#   verbose = T,
#   save_fasta = T,
#   output_file = file.path(tempdir(), "lnc_47_3ss_motifs.fa")
# )

## ----unspliced, eval=FALSE----------------------------------------------------
# ## identify single exon genes and transcripts
# single_exon_genes <- extract_single_exon(lnc_47_df, level = "gene")
# single_exon_trans <- extract_single_exon(lnc_47_df, level = "transcript")

## ----exon_class, eval=FALSE---------------------------------------------------
# # Assign the ordinal position of exons
# lnc_47_class_exons <- classify_exons(lnc_47_df, verbose = TRUE)

## ----eval=FALSE---------------------------------------------------------------
# # Length of spliced transcript
# lnc_47_spliced_length <- spliced_trans_length(lnc_47_df)
# head(lnc_47_spliced_length)

## ----stat, eval=FALSE---------------------------------------------------------
# # Exon length statistics
# lnc_47_exon_stats <- stat_summary(lnc_47_class_exons, type = "exon")
# 
# # Intron length statistics
# lnc_47_intron_stats <- stat_summary(introns_lnc_47, type = "intron")

## ----gc-content, eval=FALSE---------------------------------------------------
# # Human
# lnc_47_gc <- calculate_gc_content(
#   lnc_47_df,
#   genome = BSgenome.Hsapiens.UCSC.hg38,
#   verbose = TRUE
# )
# # Mouse
# mouse_pri_36_gc <- calculate_gc_content(
#   mouse_pri_36,
#   genome = BSgenome.Mmusculus.UCSC.mm39,
#   verbose = TRUE
# )

## ----cds, eval=FALSE----------------------------------------------------------
# # Convert to GRanges and extract
# library(GenomicRanges)
# mouse_pri_36_granges <- GRanges(mouse_pri_36)
# mouse_cds_seqs <- extract_cds_sequences(
#   mouse_pri_36_granges,
#   BSgenome.Mmusculus.UCSC.mm39,
#   save_fasta = TRUE,
#   output_file = file.path(tempdir(), "mouse_pri_36_CDS.fa.gz")
#   verbose = TRUE
# )

## ----eval=TRUE, echo=FALSE----------------------------------------------------
   devtools::session_info()

Any scripts or data that you put into this service are public.

GencoDymo2 documentation built on June 8, 2025, 10:29 a.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

GencoDymo2
Comprehensive Analysis of 'GENCODE' Annotations and Splice Site Motifs

inst/doc/GencoDymo2_vignette.R
In GencoDymo2: Comprehensive Analysis of 'GENCODE' Annotations and Splice Site Motifs

Try the GencoDymo2 package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

GencoDymo2 Comprehensive Analysis of 'GENCODE' Annotations and Splice Site Motifs

inst/doc/GencoDymo2_vignette.R In GencoDymo2: Comprehensive Analysis of 'GENCODE' Annotations and Splice Site Motifs

Try the GencoDymo2 package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

GencoDymo2
Comprehensive Analysis of 'GENCODE' Annotations and Splice Site Motifs

inst/doc/GencoDymo2_vignette.R
In GencoDymo2: Comprehensive Analysis of 'GENCODE' Annotations and Splice Site Motifs