knitr::opts_chunk$set( collapse = TRUE, comment = "#>" )
knitr::opts_chunk$set(warning = F, message = F, fig.pos = "center", fig.width = 10, fig.height = 8)
Packages
library(brainSTORM) library(txtools) library(magrittr) library(ggplot2) library(parallel) library(gridExtra)
These are the file paths needed to start the run:
# Genome and Gene Annotation fastaGenome <- "/home/labs/schwartzlab/miguelg/BIGDATA/genome_references/ribosomal/Sc_ribosomal_seqs.fa" bedAnnotation <- "/home/labs/schwartzlab/miguelg/BIGDATA/gene_annotations/ribosomal/Sc_ribosomal_seqs.bed" r1Files <- grep(list.files("/home/labs/schwartzlab/joeg/data/lib524/Saccharomyces_cerevisiae_SK1/", full.names = TRUE), pattern = "R1", value = TRUE) OUTDIR <- "/home/labs/schwartzlab/miguelg/WORKSPACE_wexac/storm_seq/lib524/s_cerevisiae" EXP_NAME <- "S.cerevisiae_SK1-lib524" NCORES <- 1
vList <- list(organism = c("PyroAbyss", "TherAcid", "Yeast", "Human"), RTase = c("SSIII", "SSIV", "TGIRT", "RTHIV"), libTreat = c("Ac4C", "CMC", "DeacetylatedAc4C", "MocklowdNTPs", "Mock", "Dimroth", "m5C", "AlkBmix", "RBSseqHeatMg", "NaBH4HydBiotin"), bioTreat = c("80deg", "95deg", "100deg", "AcidpH1", "AcidpH2", "AcidpH3")) META <- storm_META(fileNames = r1Files, varsList = vList, setVars = "organism", idVars = c("organism", "RTase", "libTreat"), groupVars = c("libTreat", "RTase"), outDir = OUTDIR)
# Make transcriptome, make new gene annotation for transcriptome fastaTxOme <- mkTranscriptome(fastaGenome, bedAnnotation, nCores = NCORES) bedTxOme <- mkBedFromFastaTxOme(fastaTxOme) # Creating bisulphite transcriptome bisTxPath <- bisGenome(fastaTxOme) # Create STAR genomes STARGenome <- mkSTARgenome(fastaTxOme, bedTxOme) STARGenome_bis <- mkSTARgenome(bisTxPath, bedTxOme) #Loading transcriptome and annotation GENOME <- txtools::tx_load_genome(fastaTxOme) TXOME <- txtools::tx_load_bed(bedTxOme)
if(!all(file.exists(META$BAM))){ # STAR Alignment alignSTAR(read1Files = META[libTreat != "m5C" & libTreat != "RBSseqHeatMg", FASTQ], nCores = NCORES, zipped = TRUE, STARgenomeDir = STARGenome, alignEndsType = "Local", alignIntronMax = 1, #set to 1 for bacteria/archaea, adjust accordingly for eukaryotes! outDir = OUTDIR) alignSTAR(read1Files = META[libTreat == "m5C" | libTreat == "RBSseqHeatMg", FASTQ], nCores = NCORES, zipped = TRUE, STARgenomeDir = STARGenome_bis, alignEndsType = "Local", alignIntronMax = 1, #set to 1 for bacteria/archaea, adjust accordingly for eukaryotes! outDir = OUTDIR) }
if(!all(file.exists(META$RDS))){ rdsFiles <- parallel::mclapply(mc.cores = NCORES, seq_along(META$FASTQ), function(i){ bam2TxDT(BAMfile = META$BAM[i], geneAnnot = TXOME, genome = GENOME, dtType = "covNuc", outDir = OUTDIR, nCores = 1, remL = 1000, minR = 0) }) }
libComplexReport(META, maxExtrapolation = 2.01e6, steps = 1e5, verbose = T)
ggLCE <- gg_lce(META, tab_name = file.path(OUTDIR, "libCompReport.txt"), speciesName = EXP_NAME) plot(ggLCE)
ggNucF <- fastq_nucFreq(META, NCORES, firstN = 1e5) %>% gg_nucFreq(subtitle = EXP_NAME) plot(ggNucF)
rReport <- reads_report(META, NCORES) ggReadStats <- gg_readStats(rReport, EXP_NAME) gridExtra::grid.arrange(ggReadStats[[1]], ggReadStats[[2]], ncol = 2)
Here we use the storm_STORM() function to create the yeast_STORM
object.
This object includes all the ribosomal data processed in the above steps, and is
also an in-built dataset in the brainSTORM package, to work as an example. It
can be called as a normal object after loading the brainSTORM package.
yeast_STORM <- storm_STORM(META, GENOME, TXOME, nCores = 1)
Different sets of metrics can be assigned to RNA modifications using the helper functions brainSTORM::hlp_start_CALLS() to initialize the CALLS tables and subsequent calls of brainSTORM::hlp_assign_scores() to assign metrics per RNA modification.
STORM <- yeast_STORM %>% add_default_metrics_v2() %>% storm_makeCalls()
For being able to visualize the scores for each metric with respect to known RNA modification we can add known RNA modifications to their positions with addKnownRNAmods(). NOTE: It is important to note that after adding the nuc column for known positions, it is not possible to add new metrics for which this should be postponed as a last step in the process, or take the precaution to keep a backup of the previous object if more metrics need to be added to CALLS.
# Add known RNA modification sites STORM <- addKnownRNAmods(STORM, rRNAmods_Sc_Taoka)
storm_makeCalls() is a misnomer and has been kept with that name for compatibility issues, but a twin function with a more appropriate name will be added in future versions of the package.
Here we show boxplots of the metrics divided by their relevant RNA modifications.
storm_metricsBoxPlot_byNuc(STORM, Y_scores, "Yeast - PseudoU scores") storm_metricsBoxPlot_byNuc(STORM, Nm_scores, "Yeast - 2Ometh scores") storm_metricsBoxPlot_byNuc(STORM, ac4C_scores, "Yeast - ac4C scores") storm_metricsBoxPlot_byNuc(STORM, m7G_scores, "Yeast - m7G scores") storm_metricsBoxPlot_byNuc(STORM, m1A_scores, "Yeast - m1A scores") storm_metricsBoxPlot_byNuc(STORM, m5C_scores, "Yeast - m5C scores") storm_metricsBoxPlot_byNuc(STORM, m3U_scores, "Yeast - m3U scores")
Here I show the median value of all the metrics for each different nucleotide and RNA modification. Showing how the different metrics create a profile for predicting each one.
tmp3 <- lapply(names(STORM$CALLS$Yeast), function(RNAm){ tmp <- STORM$CALLS$Yeast[[RNAm]][,-1:-9] tmp2 <- lapply(names(tmp), function(metric){ tapply(tmp[[metric]], STORM$CALLS$Yeast[[RNAm]]$nuc, function(x) median(x, na.rm = T)) }) %>% do.call(what = cbind) %>% set_colnames(names(tmp)) tmp2 }) %>% do.call(what = cbind) colnames(tmp3) tmp4 <- tmp3[c("Y", "Am", "Um", "Ym", "Gm", "Cm", "m5C", "ac4C", "m1A", "m7G", "m3U", "A", "U", "G", "C"),] # colnames(tmp4) <- paste("metric", 1:ncol(tmp4), sep = "_") pheatmap::pheatmap(tmp4, scale = "column", cluster_cols = F, cluster_rows = F)
sessionInfo()
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.