knitr::opts_chunk$set(eval = FALSE) knitr::opts_chunk$set(collapse = TRUE, comment = "#>")
In this tutorial, we are going to utilize 5' scRNA-seq data on epigenetically de-repressed cancer cell lines to quantify transposable element (TE) expression levels at single-cell and locus resolution. Following the workflow, you'll learn the specifics of Repsc to adapt it to your single-cell dataset.
We start the workflow by loading Repsc and the human hg38 BSgenome object into our R environment:
Sys.time() devtools::load_all('/net/mraid14/export/tgdata/users/davidbr/src/Repsc/') devtools::load_all('/net/mraid14/export/tgdata/users/davidbr/src/Reputils/') devtools::load_all('/net/mraid14/export/tgdata/users/davidbr/src/Repdata/') library(BSgenome.Hsapiens.UCSC.hg38)
We then import our gene and TE annotation files as GRanges objects followed by Repsc-specific curation and formatting using the curateGenes
and curateTEs
functions.
# path to Gencode gtf file (provided) # gene_path <- system.file(package = 'Repdata', # 'extdata', # 'gencode.v29.annotation.gtf.gz') gene_path <- '~/davidbr/tools/cellranger/references/refdata-cellranger-GRCh38-1.2.0/genes/genes.gtf' # path to RepeatMasker hg38 repeat annotation (provided) rmsk_path <- system.file(package = 'Repdata', 'extdata', 'hg38.fa.out.gz') # creating the scSet sc <- createScSet(genome = Hsapiens, protocol = 'threeprime', tes = rmsk_path, genes = gene_path)
After we have imported and curated the data, we can procede to generate the actual read/UMI count matrix using the compCounts
function.
# path to bam files containing mapped reads bam_paths <- c( dir('/net/mraid14/export/data/users/davidbr/proj/epitherapy/data/hct116/10x/dmso/aligned/chunked_bams', pattern = '_deduplicated_chr[0-9, X, Y, MT]+.bam', full.names = TRUE), dir('/net/mraid14/export/data/users/davidbr/proj/epitherapy/data/hct116/10x/parental/aligned/chunked_bams', pattern = '_deduplicated_chr[0-9, X, Y, MT]+.bam', full.names = TRUE) ) hdf5_paths <- c( '/net/mraid14/export/data/users/davidbr/proj/epitherapy/data/hct116/10x/dmso/hct116_DMSO/outs/filtered_gene_bc_matrices_h5.h5', '/net/mraid14/export/data/users/davidbr/proj/epitherapy/data/hct116/10x/parental/hct116_PAR/outs/filtered_gene_bc_matrices_h5.h5' ) # create a data.frame specifying import parameters bam_df <- data.frame(paths = bam_paths, paired = rep(c(TRUE, FALSE), each = 25), # use FALSE for single-end libraries mate = rep(c('first', NA), each = 25), # only imports the first mate of properly aligned read pairs, set to NA when using single-end libraries barcode = 'CB', # 10x barcode included in BAM flag chunk = 1:50, hdf5 = rep(hdf5_paths, each = 25), meta = rep(c('fivep', 'threep'), each = 25), stringsAsFactors = FALSE) head(bam_df)
# compute single-cell counts sc <- addCounts(sc, bams = bam_df, bin_size = 25, msa_dir = NULL, use_gcluster = TRUE) plotCells(sc)
sc <- resolveAmbiguity(sc)
sc <- callPeaks(sc, neighb_bins = 10, offset = 4, min_expr = 10, fdr = 0.01)
sc <- addStats(sc, min_expr = 10, min_norm_var = 0.5)
counts <- Repsc::counts(sc) counts_aggr <- counts[, .(n = sum(n_all, na.rm = TRUE), n_uniq = sum(n)), by = c('name', 'barcode') ][, n := round(n) ][which(n_uniq > n), n := n_uniq] counts_ds <- Reputils::downsamp(counts_aggr, full = FALSE, i = 'name') counts_ds <- merge(counts_ds, unique(counts[, c('name', 'barcode', 'meta', 'type')]), all.x = TRUE) summarized <- counts_ds[, .(n = sum(n_ds)), by = c('name', 'type', 'meta')] p_glob_comp <- summarized %>% tidyr::spread(meta, n, fill = 0) %>% ggplot(aes(fivep, threep)) + geom_point(size = 0.1) + scale_x_log10() + scale_y_log10() + facet_wrap(~type) p_ltr12c_cov <- counts[which(name %in% c('LTR12C', 'AluY', 'MER51', 'L1PA2', 'LTR13', 'THE1B', 'LTR7')), .(n = sum(n_all)), by = c('meta', 'pos_con', 'name')][which(meta == 'threep'), n := n * -1] %>% ggplot(aes(pos_con, n)) + geom_bar(stat = 'identity') + facet_wrap(~name, scales = 'free')
[1]
sessionInfo()
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.