Evaluate whether GC content calculated on the RNA sequences using the Ensembl Perl API matches the one calculated using R and the genomic DNA sequence.
library(ensembldb) edb <- EnsDb("/Users/jo/Projects/EnsDbs/98/EnsDb.Hsapiens.v98.sqlite") chrs <- c(1:22, "X", "Y") txs <- transcripts(edb, filter = ~ seq_name %in% chrs) any(is.na(mcols(txs)$gc_content))
Get the sequences using R functionality.
library(GenomicFeatures) dna <- getGenomeTwoBitFile(edb) tx_exons <- exonsBy(edb, "tx", filter = ~ seq_name %in% chrs) tx_seqs <- extractTranscriptSeqs(dna, tx_exons) library(Biostrings) gc_prop <- letterFrequency(tx_seqs, letters = "GC", as.prob = TRUE)[, 1] names(gc_prop) <- names(tx_seqs)
Compare GC contents...
gc_prop_perl <- mcols(txs)$gc_content names(gc_prop_perl) <- names(txs) stopifnot(all(names(gc_prop) %in% names(gc_prop_perl))) gc_prop_perl <- gc_prop_perl[names(gc_prop)] head(gc_prop_perl) head(gc_prop) library(testthat) expect_equal(gc_prop_perl, 100 * gc_prop)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.