inst/doc/UMI4Cats.R

## ----setup, include = FALSE---------------------------------------------------
knitr::opts_chunk$set(
    collapse = TRUE,
    comment = "#>",
    eval = TRUE,
    warning = FALSE,
    message = FALSE,
    fig.align = "center",
    out.width = "80%"
)

## ----logo, echo=FALSE, eval=TRUE, out.width='10%'-----------------------------
knitr::include_graphics("../man/figures/UMI4Cats.png", dpi = 800)

## ----load---------------------------------------------------------------------
library(UMI4Cats)

## ----umi4cats-scheme, echo=FALSE, eval=TRUE, fig.cap="Overview of the different functions included in the UMI4Cats package to analyze UMI-4C data."----
knitr::include_graphics("figures/scheme.png", dpi = 400)

## ----processing-quick-start, eval=FALSE---------------------------------------
#  ## 0) Download example data -------------------------------
#  path <- downloadUMI4CexampleData()
#  
#  ## 1) Generate Digested genome ----------------------------
#  # The selected RE in this case is DpnII (|GATC), so the cut_pos is 0, and the res_enz "GATC".
#  hg19_dpnii <- digestGenome(
#      cut_pos = 0,
#      res_enz = "GATC",
#      name_RE = "DpnII",
#      ref_gen = BSgenome.Hsapiens.UCSC.hg19::BSgenome.Hsapiens.UCSC.hg19,
#      out_path = file.path(tempdir(), "digested_genome/")
#  )
#  
#  ## 2) Process UMI-4C fastq files --------------------------
#  raw_dir <- file.path(path, "CIITA", "fastq")
#  
#  contactsUMI4C(
#      fastq_dir = raw_dir,
#      wk_dir = file.path(path, "CIITA"),
#      bait_seq = "GGACAAGCTCCCTGCAACTCA",
#      bait_pad = "GGACTTGCA",
#      res_enz = "GATC",
#      cut_pos = 0,
#      digested_genome = hg19_dpnii,
#      bowtie_index = file.path(path, "ref_genome", "ucsc.hg19.chr16"),
#      ref_gen = BSgenome.Hsapiens.UCSC.hg19::BSgenome.Hsapiens.UCSC.hg19,
#      threads = 5
#  )
#  
#  ## 3) Get filtering and alignment stats -------------------
#  statsUMI4C(wk_dir = file.path(path, "CIITA"))
#  
#  ## 4) Analyze the results ---------------------------------
#  # Load sample processed file paths
#  files <- list.files(file.path(path, "CIITA", "count"),
#                      pattern = "*_counts.tsv.gz",
#                      full.names = TRUE
#  )
#  
#  # Create colData including all relevant information
#  colData <- data.frame(
#      sampleID = gsub("_counts.tsv.gz", "", basename(files)),
#      file = files,
#      stringsAsFactors = FALSE
#  )
#  
#  library(tidyr)
#  colData <- colData %>%
#      separate(sampleID,
#          into = c("condition", "replicate", "viewpoint"),
#          remove = FALSE
#      )
#  
#  # Make UMI-4C object including grouping by condition
#  umi <- makeUMI4C(
#      colData = colData,
#      viewpoint_name = "CIITA",
#      grouping = "condition",
#      bait_expansion = 2e6
#  )
#  
#  # Plot replicates
#  plotUMI4C(umi, grouping=NULL)
#  
#  ## 5) Differential testing ----------------------
#  # Fisher test
#  umi_fisher <- fisherUMI4C(umi, filter_low = 30, grouping="condition")
#  plotUMI4C(umi_fisher, ylim = c(0, 10), grouping="condition")
#  
#  # DESeq2 Wald Test
#  umi_wald <- differentialNbinomWaldTestUMI4C(umi4c=umi,
#                                              design=~condition,
#                                              alpha = 20)
#  plotUMI4C(umi_wald, ylim = c(0, 10), grouping="condition")

## ----demultiplex, eval=TRUE---------------------------------------------------
## Input files
path <- downloadUMI4CexampleData(reduced=TRUE)
fastq <- file.path(path, "CIITA", "fastq", "ctrl_hi24_CIITA_R1.fastq.gz")

## Barcode info
barcodes <- data.frame(
    sample = c("CIITA"),
    barcode = c("GGACAAGCTCCCTGCAACTCA")
)

## Output path
out_path <- tempdir()

## Demultiplex baits inside FastQ file
demultiplexFastq(
    fastq = fastq,
    barcodes = barcodes,
    out_path = out_path
)

## ----digest-------------------------------------------------------------------
library(BSgenome.Hsapiens.UCSC.hg19)
refgen <- BSgenome.Hsapiens.UCSC.hg19

hg19_dpnii <- digestGenome(
    res_enz = "GATC",
    cut_pos = 0,
    name_RE = "dpnII",
    ref_gen = refgen,
    sel_chr = "chr16", # Select bait's chr (chr16) to make example faster
    out_path = file.path(tempdir(), "digested_genome/")
)

hg19_dpnii

## ----read-scheme, echo=FALSE, eval=TRUE, fig.cap="Schematic of a UMI-4C read detailing the different elements that need to be used as input for processing the data."----
knitr::include_graphics("figures/read_scheme.png", dpi = 500)

## ----processing, message=TRUE-------------------------------------------------
## Use reduced example to make vignette faster
## If you want to download the full dataset, set reduced = FALSE or remove
## the reduce argument.
## The reduced example is already downloaded in the demultiplexFastq chunk.

# path <- downloadUMI4CexampleData(reduced=TRUE)
raw_dir <- file.path(path, "CIITA", "fastq")
index_path <- file.path(path, "ref_genome", "ucsc.hg19.chr16")

## Run main function to process UMI-4C contacts
contactsUMI4C(
    fastq_dir = raw_dir,
    wk_dir = file.path(path, "CIITA"),
    file_pattern = "ctrl_hi24_CIITA", # Select only one sample to reduce running time
    bait_seq = "GGACAAGCTCCCTGCAACTCA",
    bait_pad = "GGACTTGCA",
    res_enz = "GATC",
    cut_pos = 0,
    digested_genome = hg19_dpnii,
    bowtie_index = index_path,
    ref_gen = BSgenome.Hsapiens.UCSC.hg19::BSgenome.Hsapiens.UCSC.hg19,
    sel_seqname = "chr16", # Input bait chr to reduce running time
    threads = 2
)

## ----stats--------------------------------------------------------------------
# Using the full dataset included in the package
statsUMI4C(wk_dir = system.file("extdata", "CIITA",
    package = "UMI4Cats"
))

# Read stats table
stats <- read.delim(system.file("extdata", "CIITA", "logs", "stats_summary.txt",
    package = "UMI4Cats"
))

knitr::kable(stats)

## ----make-umi4c---------------------------------------------------------------
# Load sample processed file paths
files <- list.files(system.file("extdata", "CIITA", "count", package="UMI4Cats"),
                    pattern = "*_counts.tsv.gz",
                    full.names = TRUE
)

# Create colData including all relevant information
colData <- data.frame(
    sampleID = gsub("_counts.tsv.gz", "", basename(files)),
    file = files,
    stringsAsFactors = FALSE
)

library(tidyr)
colData <- colData %>%
    separate(sampleID,
        into = c("condition", "replicate", "viewpoint"),
        remove = FALSE
    )

# Load UMI-4C data and generate UMI4C object
umi <- makeUMI4C(
    colData = colData,
    viewpoint_name = "CIITA",
    grouping = "condition",
    ref_umi4c = c("condition"="ctrl"),
    bait_expansion = 2e6
)

umi
groupsUMI4C(umi)

## ----methods-umi4c------------------------------------------------------------
groupsUMI4C(umi) # Available grouped UMI-4C objects

head(assay(umi)) # Retrieve raw UMIs
head(assay(groupsUMI4C(umi)$condition)) # Retrieve UMIs grouped by condition

colData(umi) # Retrieve column information

rowRanges(umi) # Retrieve fragment coordinates

dgram(umi) # Retrieve domainograms
dgram(groupsUMI4C(umi)$condition) # Retrieve domainograms

bait(umi) # Retrieve bait coordinates

head(trend(umi)) # Retrieve adaptive smoothing trend

## ----dif-query----------------------------------------------------------------
library(GenomicRanges)

# Provide your own query regions as GRanges objects
enhancers <- GRanges(c(
    "chr16:10925006-10928900",
    "chr16:11102721-11103700"
))

# Perform differential test
umi_dif1 <- fisherUMI4C(umi,
    grouping = "condition",
    query_regions = enhancers,
    filter_low = 20,
    resize = 5e3
)

## ----dif-bins, eval=FALSE-----------------------------------------------------
#  # Perform differential test
#  umi_dif2 <- fisherUMI4C(umi,
#      grouping = "condition",
#      filter_low = 20,
#      window_size = 5e3
#  )

## ----dif-deseq, message=TRUE--------------------------------------------------
umi_dif3 <- differentialNbinomWaldTestUMI4C(umi,
                                           design = ~condition,
                                           alpha=100) # Low alpha to make computation faster

## ----umi4cats-DESeq2, echo=FALSE, eval=TRUE, fig.cap="DESeq2 Differential Analysis flowchart."----
knitr::include_graphics("figures/differentialNbinomWaldTestUMI4C.png", dpi = 500)

## ----show-results-umi4c-------------------------------------------------------
resultsUMI4C(umi_dif1, ordered = TRUE, counts = TRUE, format = "data.frame")

## ----plot-umi4c---------------------------------------------------------------
plotUMI4C(umi,
    grouping = NULL,
    TxDb = TxDb.Hsapiens.UCSC.hg19.knownGene::TxDb.Hsapiens.UCSC.hg19.knownGene,
    dgram_plot = FALSE
)

## ----plot-dif-----------------------------------------------------------------
plotUMI4C(umi_dif1, grouping = "condition")
plotUMI4C(umi_dif3, grouping = "condition")

Try the UMI4Cats package in your browser

Any scripts or data that you put into this service are public.

UMI4Cats documentation built on Dec. 31, 2020, 2:01 a.m.