inst/doc/cubar.R

## ----include = FALSE----------------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)

## ----setup--------------------------------------------------------------------
suppressPackageStartupMessages(library(Biostrings))
suppressPackageStartupMessages(library(data.table))
library(cubar)
library(ggplot2)

## -----------------------------------------------------------------------------
# example data
yeast_cds

# qc
yeast_cds_qc <- check_cds(yeast_cds)
yeast_cds_qc

## -----------------------------------------------------------------------------
# convert a CDS to codon sequence
seq_to_codons(yeast_cds_qc[['YDR320W-B']])

# convert a CDS to amino acid sequence
Biostrings::translate(yeast_cds_qc[['YDR320W-B']])

## -----------------------------------------------------------------------------
# get codon frequency
yeast_cf <- count_codons(yeast_cds_qc)

## -----------------------------------------------------------------------------
yeast_cf[1:3, 1:3]

## -----------------------------------------------------------------------------
# get codon table for the standard genetic code
ctab <- get_codon_table(gcid = '1')

# plot possible codon and anticodon pairings
plot_ca_pairing(ctab)

## -----------------------------------------------------------------------------
# example of a custom mapping
head(aa2codon)

# create a custom codon table
custom_ctab <- create_codon_table(aa2codon)
head(custom_ctab)

## -----------------------------------------------------------------------------
# get enc
enc <- get_enc(yeast_cf)
head(enc)

plot_dist <- function(x, xlab = 'values'){
    x <- stack(x)
    ggplot(x, aes(x = values)) +
        geom_histogram(bins = 40, fill = '#88CCEE') +
        labs(x = xlab, y = 'Number of genes') +
        theme_classic(base_size = 12) +
        theme(axis.text = element_text(color = 'black'))
}

plot_dist(enc, 'ENC')

## -----------------------------------------------------------------------------
# get fop
fop <- get_fop(yeast_cf)
plot_dist(fop, 'Fop')

## -----------------------------------------------------------------------------
optimal_codons <- est_optimal_codons(yeast_cf, codon_table = ctab)
head(optimal_codons[optimal == TRUE])

## -----------------------------------------------------------------------------
# estimate RSCU of highly expressed genes
yeast_exp <- as.data.table(yeast_exp)
yeast_exp <- yeast_exp[gene_id %in% rownames(yeast_cf), ]
yeast_heg <- head(yeast_exp[order(-fpkm), ], n = 500)
rscu_heg <- est_rscu(yeast_cf[yeast_heg$gene_id, ], codon_table = ctab)
head(rscu_heg) # RSCU values are shown in the column `rscu`

# calculate CAI of all genes
# note: CAI values are usually calculated based RSCU of highly expressed genes.
cai <- get_cai(yeast_cf, rscu = rscu_heg)
plot_dist(cai, xlab = 'CAI')

## -----------------------------------------------------------------------------
# get tRNA gene copy number from GtRNADB
trna_gcn <- table(data.table::tstrsplit(sub(' .*', '', names(yeast_trna)), '-')[[3]])
trna_gcn <- trna_gcn[names(trna_gcn) != 'NNN'] # copy of each anticodon

# calculate tRNA weight for each codon
trna_w <- est_trna_weight(trna_level = trna_gcn, codon_table = ctab)

# get tAI
tai <- get_tai(yeast_cf, trna_w = trna_w)
plot_dist(tai, 'tAI')

## -----------------------------------------------------------------------------
# path_gtrnadb <- 'http://gtrnadb.ucsc.edu/genomes/eukaryota/Scere3/sacCer3-mature-tRNAs.fa'
# yeast_trna <- Biostrings::readRNAStringSet(path_gtrnadb)

## -----------------------------------------------------------------------------
pairs(cbind(CAI = cai, ENC = enc, Fop = fop, TAI = tai),
      cex = 0.5, col = alpha('black', 0.2))

## -----------------------------------------------------------------------------
# get lowly expressed genes
yeast_leg <- head(yeast_exp[order(fpkm), ], n = 500)
yeast_leg <- yeast_leg[gene_id %in% rownames(yeast_cf), ]

# differetial usage test
du_test <- codon_diff(yeast_cds_qc[yeast_heg$gene_id], yeast_cds_qc[yeast_leg$gene_id])
du_test <- du_test[amino_acid != '*', ]

## -----------------------------------------------------------------------------
du_test$codon <- factor(du_test$codon, levels = du_test[order(-global_or), codon])

ggplot(du_test, aes(x = codon, y = log2(global_or))) +
    geom_col() +
    labs(x = NULL, y = 'log2(OR)') +
    theme_classic(base_size = 12) +
    theme(axis.text = element_text(color = 'black'),
          axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1))

## -----------------------------------------------------------------------------
du_test2 <- du_test[!amino_acid %in% c('Met', 'Trp'), ]
du_test2$codon <- factor(du_test2$codon, levels = du_test2[order(-fam_or), codon])

ggplot(du_test2, aes(x = codon, y = log2(fam_or))) +
    geom_col() +
    labs(x = NULL, y = 'log2(OR)') +
    facet_grid(cols = vars(amino_acid), space = 'free', scales = 'free', drop = TRUE) +
    theme_light() +
    theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1))

## -----------------------------------------------------------------------------
# optimize codon usage to the optimal codon of each amino acid
opc_aa <- est_optimal_codons(yeast_cf, codon_table = ctab, level = 'amino_acid')
seq_optimized <- codon_optimize(yeast_cds_qc[['YFR025C']], optimal_codons, level = 'amino_acid')

# CAI before and after optimization
plot_dist(cai, 'CAI') + 
    geom_vline(xintercept = cai['YFR025C'], linetype = 'dashed', color = 'red') +  # before
    geom_vline(xintercept = get_cai(count_codons(seq_optimized), rscu_heg),
               linetype = 'dashed', color = 'black')  # after

## -----------------------------------------------------------------------------
swa <- slide_apply(yeast_cds_qc[['YHR099W']], .f = get_cai,
                   step = 30, before = 20, after = 20, rscu = rscu_heg)

# plot the results
slide_plot(swa, 'CAI')

Try the cubar package in your browser

Any scripts or data that you put into this service are public.

cubar documentation built on April 3, 2025, 8:58 p.m.