knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)
knitr::opts_chunk$set(warning = F, message = F, fig.pos = "center", fig.width = 10, fig.height = 8)

SETUP

Packages

library(brainSTORM)
library(txtools)
library(magrittr)
library(ggplot2)
library(parallel)
library(gridExtra)

These are the file paths needed to start the run:

# Genome and Gene Annotation
fastaGenome <- "/home/labs/schwartzlab/miguelg/BIGDATA/genome_references/ribosomal/Sc_ribosomal_seqs.fa"
bedAnnotation <- "/home/labs/schwartzlab/miguelg/BIGDATA/gene_annotations/ribosomal/Sc_ribosomal_seqs.bed"
r1Files <- grep(list.files("/home/labs/schwartzlab/joeg/data/lib524/Saccharomyces_cerevisiae_SK1/",
                           full.names = TRUE), pattern = "R1", value = TRUE)
OUTDIR <- "/home/labs/schwartzlab/miguelg/WORKSPACE_wexac/storm_seq/lib524/s_cerevisiae"
EXP_NAME <- "S.cerevisiae_SK1-lib524"
NCORES <- 1

Design matrix from R1 Fasta files.

vList <- list(organism = c("PyroAbyss", "TherAcid", "Yeast", "Human"), 
              RTase = c("SSIII", "SSIV", "TGIRT", "RTHIV"),
              libTreat = c("Ac4C", "CMC", "DeacetylatedAc4C", "MocklowdNTPs",
                           "Mock", "Dimroth", "m5C", "AlkBmix", "RBSseqHeatMg", "NaBH4HydBiotin"),
              bioTreat = c("80deg", "95deg", "100deg", "AcidpH1", "AcidpH2", "AcidpH3"))

META <- storm_META(fileNames = r1Files,
                   varsList = vList,
                   setVars = "organism",
                   idVars = c("organism", "RTase", "libTreat"),
                   groupVars = c("libTreat", "RTase"), 
                   outDir = OUTDIR)

STAR genome and mapping reads

# Make transcriptome, make new gene annotation for transcriptome
fastaTxOme <- mkTranscriptome(fastaGenome, bedAnnotation, nCores = NCORES)
bedTxOme <- mkBedFromFastaTxOme(fastaTxOme)

# Creating bisulphite transcriptome
bisTxPath <- bisGenome(fastaTxOme)

# Create STAR genomes
STARGenome <- mkSTARgenome(fastaTxOme, bedTxOme)
STARGenome_bis <- mkSTARgenome(bisTxPath, bedTxOme)

#Loading transcriptome and annotation
GENOME <- txtools::tx_load_genome(fastaTxOme)
TXOME <- txtools::tx_load_bed(bedTxOme)
if(!all(file.exists(META$BAM))){
    # STAR Alignment
    alignSTAR(read1Files = META[libTreat != "m5C" & libTreat != "RBSseqHeatMg", FASTQ], 
              nCores = NCORES, 
              zipped = TRUE,
              STARgenomeDir = STARGenome, 
              alignEndsType = "Local", 
              alignIntronMax = 1, #set to 1 for bacteria/archaea, adjust accordingly for eukaryotes!
              outDir = OUTDIR)
    alignSTAR(read1Files = META[libTreat == "m5C" | libTreat == "RBSseqHeatMg", FASTQ], 
              nCores = NCORES, 
              zipped = TRUE,
              STARgenomeDir = STARGenome_bis, 
              alignEndsType = "Local", 
              alignIntronMax = 1, #set to 1 for bacteria/archaea, adjust accordingly for eukaryotes!
              outDir = OUTDIR)
}

Processing to count data tables using txtools

if(!all(file.exists(META$RDS))){
    rdsFiles <- parallel::mclapply(mc.cores = NCORES, seq_along(META$FASTQ), function(i){
        bam2TxDT(BAMfile = META$BAM[i], 
                 geneAnnot = TXOME, 
                 genome = GENOME, 
                 dtType = "covNuc", 
                 outDir = OUTDIR, 
                 nCores = 1, 
                 remL = 1000, 
                 minR = 0)
    })
}

Reports

libComplexReport(META, maxExtrapolation = 2.01e6, steps = 1e5, verbose = T)
ggLCE <- gg_lce(META, tab_name = file.path(OUTDIR, "libCompReport.txt"), speciesName = EXP_NAME)
plot(ggLCE)
ggNucF <- fastq_nucFreq(META, NCORES, firstN =  1e5) %>% 
    gg_nucFreq(subtitle = EXP_NAME)
plot(ggNucF)
rReport <- reads_report(META, NCORES)
ggReadStats <- gg_readStats(rReport, EXP_NAME)
gridExtra::grid.arrange(ggReadStats[[1]], ggReadStats[[2]], ncol = 2)

brainSTORM

Here we use the storm_STORM() function to create the yeast_STORM object. This object includes all the ribosomal data processed in the above steps, and is also an in-built dataset in the brainSTORM package, to work as an example. It can be called as a normal object after loading the brainSTORM package.

yeast_STORM <- storm_STORM(META, GENOME, TXOME, nCores = 1) 

Different sets of metrics can be assigned to RNA modifications using the helper functions brainSTORM::hlp_start_CALLS() to initialize the CALLS tables and subsequent calls of brainSTORM::hlp_assign_scores() to assign metrics per RNA modification.

STORM <- yeast_STORM %>% 
    add_default_metrics_v2() %>% 
    storm_makeCalls()

For being able to visualize the scores for each metric with respect to known RNA modification we can add known RNA modifications to their positions with addKnownRNAmods(). NOTE: It is important to note that after adding the nuc column for known positions, it is not possible to add new metrics for which this should be postponed as a last step in the process, or take the precaution to keep a backup of the previous object if more metrics need to be added to CALLS.

# Add known RNA modification sites
STORM <- addKnownRNAmods(STORM, rRNAmods_Sc_Taoka)

storm_makeCalls() is a misnomer and has been kept with that name for compatibility issues, but a twin function with a more appropriate name will be added in future versions of the package.

Plotting scores by RNA modification

Here we show boxplots of the metrics divided by their relevant RNA modifications.

storm_metricsBoxPlot_byNuc(STORM, Y_scores, "Yeast - PseudoU scores")
storm_metricsBoxPlot_byNuc(STORM, Nm_scores, "Yeast - 2Ometh scores")
storm_metricsBoxPlot_byNuc(STORM, ac4C_scores, "Yeast - ac4C scores")
storm_metricsBoxPlot_byNuc(STORM, m7G_scores, "Yeast - m7G scores")
storm_metricsBoxPlot_byNuc(STORM, m1A_scores, "Yeast - m1A scores")
storm_metricsBoxPlot_byNuc(STORM, m5C_scores, "Yeast - m5C scores")
storm_metricsBoxPlot_byNuc(STORM, m3U_scores, "Yeast - m3U scores")

Here I show the median value of all the metrics for each different nucleotide and RNA modification. Showing how the different metrics create a profile for predicting each one.

tmp3 <- lapply(names(STORM$CALLS$Yeast), function(RNAm){
    tmp <- STORM$CALLS$Yeast[[RNAm]][,-1:-9]
    tmp2 <- lapply(names(tmp), function(metric){
        tapply(tmp[[metric]], STORM$CALLS$Yeast[[RNAm]]$nuc, function(x) median(x, na.rm = T)) 
    }) %>% do.call(what = cbind) %>% set_colnames(names(tmp))
    tmp2
}) %>% do.call(what = cbind)
colnames(tmp3)
tmp4 <- tmp3[c("Y", "Am", "Um", "Ym", "Gm", "Cm", "m5C", "ac4C", "m1A", "m7G", "m3U", "A", "U", "G", "C"),]
# colnames(tmp4) <- paste("metric", 1:ncol(tmp4), sep = "_")
pheatmap::pheatmap(tmp4, scale = "column", cluster_cols = F, cluster_rows = F)

Session Info

sessionInfo()


SchwartzLab/brainSTORM documentation built on May 14, 2022, 5:14 p.m.