knitr::opts_chunk$set(eval = FALSE) knitr::opts_chunk$set(collapse = TRUE, comment = "#>")
In this tutorial, we are going to utilize 5' scRNA-seq data on epigenetically de-repressed cancer cell lines to quantify transposable element (TE) expression levels at single-cell and locus resolution. Following the workflow, you'll learn the specifics of Repsc to adapt it to your single-cell dataset.
We start the workflow by loading Repsc and the human hg38 BSgenome object into our R environment:
Sys.time() devtools::load_all('/net/mraid14/export/tgdata/users/davidbr/src/Repsc/') devtools::load_all('/net/mraid14/export/tgdata/users/davidbr/src/Reputils/') devtools::load_all('/net/mraid14/export/tgdata/users/davidbr/src/Repdata/') library(BSgenome.Mmusculus.UCSC.mm10)
# path to BAM/SAM files containing mapped reads bam_paths <- dir("~/tgdata/data/nowotschin_2019/aligned", full.names = TRUE, pattern = 'bam.bam$') # split BAM by chromosome for (bam in bam_paths) { Reputils::splitBAM(bam) } # deduplicate split BAMs bam_paths <- dir("~/tgdata/data/nowotschin_2019/aligned", full.names = TRUE, pattern = 'chr[0-9, X, Y, MT]*.bam$') future::plan(future.batchtools::batchtools_sge(resources = list(queue = "all.q", threads = 3, memory = 25), workers = Inf)) res <- listenv::listenv() for (bam in bam_paths) { print(bam) res[[bam]] %<-% Reputils::deduplicateBAM(bam, paired = FALSE, ncores = 3, align_dist = 1e4) %packages% "data.table" } as.list(res)
We then import our gene and TE annotation files as GRanges objects followed by Repsc-specific curation and formatting using the curateGenes
and curateTEs
functions.
# path to Gencode gtf file (provided) gene_path <- system.file(package = 'Repdata', 'extdata', 'mm10', 'genes', 'gencode.vM22.annotation.gtf.gz') # path to RepeatMasker mm9 repeat annotation (provided) rmsk_path <- system.file(package = 'Repdata', 'extdata', 'mm10', 'tes', 'mm10.fa.out.gz') # creating the scSet sc <- createScSet(genome = Mmusculus, protocol = 'threeprime', tes = rmsk_path, genes = gene_path)
# path to bam files containing mapped reads bam_paths <- dir("~/tgdata/data/nowotschin_2019/aligned/", pattern = 'deduplicated.bam$', full.names = TRUE) #hdf5_paths <- dir("~/tgdata/data/nowotschin_2019/cellranger/", pattern = 'filtered_feature_bc_matrix.h5', full.names = TRUE, recursive = TRUE) # create a data.frame specifying import parameters input_df <- data.frame(paths = bam_paths, paired = FALSE, # use FALSE for single-end libraries barcode = 'CB', meta = substring(bam_paths, 52, 65), chunk = chunkFiles(bam_paths, 20), stringsAsFactors = FALSE) checkInput(input_df)
After we have imported and curated the data, we can procede to generate the actual read/UMI count matrix using the addCounts
function.
sc <- addCounts(sc, bams = input_df, use_gcluster = TRUE)
plotCells(sc) sc_f <- selectCells(sc, max_mito = 0.05, min_size = 5000) plotCells(sc_f)
plotMapping(sc_f)
sc_f <- selectPeaks(sc_f) plotPeaks(sc_f)
sc_f <- selectFeatures(sc_f) plotFeatures(sc_f)
[1]
sessionInfo()
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.