AggregatePeakCounts: Aggregate multiple peak count outputs together

View source: R/count_polyA.R

AggregatePeakCountsR Documentation

Aggregate multiple peak count outputs together

Description

Aggregate the output from multiple runs of CountPeaks together. By default, this function will update the cell barcodes in a format consistent with the CellRanger aggr program. That is, for n experiments to aggregate, cell barcodes will be appended with '-1', '-2',...,'-n'. For downstream analysis, if using the PeakSeuratFromTransfer function, it is important to ensure that these match what is in the gene count matrix. The name of the expected separator character can be updated with the 'exp.sep' parameter and preferred labels can be specified with the 'exp.labels' parameter. The barcode updates can be turned off by setting update.labels = FALSE if manual setting is preferred.

Usage

AggregatePeakCounts(
  peak.sites.file,
  count.dirs,
  output.dir,
  update.labels = TRUE,
  exp.sep = "-",
  exp.labels = NULL
)

Arguments

peak.sites.file

a file containing peak site coordinates

count.dirs

a list of output directories from CountPeaks

output.dir

output directory for aggregate count matrix

update.labels

whether to append an experiment label to the cell barcode (default: TRUE)

exp.sep

a character separating the cell barcode from experiment label (default: '-')

exp.labels

optional labels to append to cell barcodes corresponding to count.dirs

Value

NULL. Writes counts to file.

Examples


library(Sierra)
extdata_path <- system.file("extdata",package = "Sierra")
reference.file <- paste0(extdata_path,"/Vignette_cellranger_genes_subset.gtf")
junctions.file <- paste0(extdata_path,"/Vignette_example_TIP_sham_junctions.bed")
bamfile <- c(paste0(extdata_path,"/Vignette_example_TIP_sham.bam"),
            paste0(extdata_path,"/Vignette_example_TIP_mi.bam") )
whitelist.bc.file <- c(paste0(extdata_path,"/example_TIP_sham_whitelist_barcodes.tsv"),
                      paste0(extdata_path,"/example_TIP_MI_whitelist_barcodes.tsv"))

### Peak calling
peak.output.file <- c("Vignette_example_TIP_sham_peaks.txt",
                     "Vignette_example_TIP_MI_peaks.txt")
FindPeaks(output.file = peak.output.file[1],   # output filename
         gtf.file = reference.file,           # gene model as a GTF file
bamfile = bamfile[1],                # BAM alignment filename.
         junctions.file = junctions.file,     # BED filename of splice junctions exising in BAM file.
         ncores = 1)                          # number of cores to use


FindPeaks(output.file = peak.output.file[2],   # output filename
         gtf.file = reference.file,           # gene model as a GTF file
         bamfile = bamfile[2],                # BAM alignment filename.
         junctions.file = junctions.file,     # BED filename of splice junctions exising in BAM file.
         ncores = 1)

#### Peak merging
peak.dataset.table = data.frame(Peak_file = peak.output.file,
                               Identifier = c("TIP-example-Sham", "TIP-example-MI"),
                               stringsAsFactors = FALSE)

peak.merge.output.file = "TIP_merged_peaks.txt"
MergePeakCoordinates(peak.dataset.table, output.file = peak.merge.output.file, ncores = 1)

count.dirs <- c("example_TIP_sham_counts", "example_TIP_MI_counts")
#sham data set
CountPeaks(peak.sites.file = peak.merge.output.file,  gtf.file = reference.file,
          bamfile = bamfile[1], whitelist.file = whitelist.bc.file[1],
          output.dir = count.dirs[1],  countUMI = TRUE, ncores = 1)
 # MI data set
 CountPeaks(peak.sites.file = peak.merge.output.file,  gtf.file = reference.file,
            bamfile = bamfile[2], whitelist.file = whitelist.bc.file[2],
            output.dir = count.dirs[2],  countUMI = TRUE, ncores = 1)
            
 out.dir <- "example_TIP_aggregate"
 AggregatePeakCounts(peak.sites.file = peak.merge.output.file, count.dirs = count.dirs,
                   exp.labels = c("Sham", "MI"), output.dir = out.dir)      
 

VCCRI/Sierra documentation built on July 3, 2023, 6:39 a.m.