inst/doc/consensusSeekeR.R

## ----style, echo = FALSE, results = 'asis'------------------------------------
BiocStyle::markdown()
library(knitr)

## ----peakRegion, echo=FALSE, fig.cap = "Two ChIP-Seq peaks. A ChIP-seq peak is defined by a enriched region an a peak position."----
knitr::include_graphics('figures/peak_and_region.jpg')

## ----loadingPackage, warning=FALSE, message=FALSE-----------------------------
library(consensusSeekeR)

## ----input, echo=FALSE--------------------------------------------------------
### Load dataset
data(A549_FOSL2_01_NarrowPeaks_partial)

### Remove dataset metadata field "name"
A549_FOSL2_01_NarrowPeaks_partial$name        <- NULL
A549_FOSL2_01_NarrowPeaks_partial$score       <- NULL
A549_FOSL2_01_NarrowPeaks_partial$qValue      <- NULL
A549_FOSL2_01_NarrowPeaks_partial$pValue      <- NULL
A549_FOSL2_01_NarrowPeaks_partial$signalValue <- NULL
A549_FOSL2_01_NarrowPeaks_partial$peak        <- NULL

## ----setName, collapse=TRUE---------------------------------------------------
### Initial dataset without metadata field
head(A549_FOSL2_01_NarrowPeaks_partial, n = 3)

### Adding a new metadata field "name" unique to each entry
A549_FOSL2_01_NarrowPeaks_partial$name <- paste0("FOSL2_01_entry_", 
                                1:length(A549_FOSL2_01_NarrowPeaks_partial))

### Assign the same row name to each entry
names(A549_FOSL2_01_NarrowPeaks_partial) <- rep("FOSL2_01", 
                                length(A549_FOSL2_01_NarrowPeaks_partial))

### Final dataset with metadata field 'name' and row names
head(A549_FOSL2_01_NarrowPeaks_partial, n = 3)

## ----inputClean, echo=FALSE, message=FALSE------------------------------------
### Remove dataset
rm(A549_FOSL2_01_NarrowPeaks_partial)

## ----knownGenome, collapse=TRUE, eval=FALSE-----------------------------------
#  ### Import library
#  library(GenomeInfoDb)
#  
#  ### Get the information for Human genome version 19
#  hg19Info <- Seqinfo(genome="hg19")
#  
#  ### Subset the object to keep only the analyzed chromosomes
#  hg19Subset <- hg19Info[c("chr1", "chr10", "chrX")]

## ----newGenome, collapse=FALSE------------------------------------------------
### Create an Seqinfo Object
chrInfo <- Seqinfo(seqnames=c("chr1", "chr2", "chr3"),
            seqlengths=c(1000, 2000, 1500), isCircular=c(FALSE, FALSE, FALSE),
            genome="BioconductorAlien")

## ----deleteChr, echo=FALSE, message=FALSE-------------------------------------
if (exists("chrInfo", inherits = FALSE)) rm(chrInfo)
if (exists("hg19Subset", inherits = FALSE)) rm(hg19Subset)
if (exists("hg19Info", inherits = FALSE)) rm(hg19Info)

## ----rtracklayer, collapse=TRUE-----------------------------------------------
### Load the needed packages
library(rtracklayer)
library(GenomicRanges)

### Demo file contained within the consensusSeekeR package
file_narrowPeak <- system.file("extdata",
        "A549_FOSL2_ENCSR000BQO_MZW_part_chr_1_and_12.narrowPeak", package = "consensusSeekeR")

### Information about the extra columns present in the file need
### to be specified
extraCols <- c(signalValue = "numeric", pValue = "numeric", qValue = "numeric", peak = "integer")

### Create genomic ranges for the regions
regions <- import(file_narrowPeak, format = "BED", extraCols = extraCols)

### Create genomic ranges for the peaks
peaks           <-  regions
ranges(peaks)   <-  IRanges(start = (start(regions) + regions$peak), width = rep(1, length(regions$peak)))

### First rows of each GRanges object
head(regions, n = 2)
head(peaks, n = 2)

## ----secretRM, echo=FALSE, collapse=TRUE--------------------------------------
rm(peaks)
rm(regions)

## ----readNarrowPeak_1, collapse=TRUE------------------------------------------
library(consensusSeekeR)

### Demo file contained within the consensusSeekeR package
file_narrowPeak <- system.file("extdata",
    "A549_FOSL2_ENCSR000BQO_MZW_part_chr_1_and_12.narrowPeak", package = "consensusSeekeR")

### Create genomic ranges for both the regions and the peaks
result <- readNarrowPeakFile(file_narrowPeak, extractRegions = TRUE, extractPeaks = TRUE)

### First rows of each GRanges object
head(result$narrowPeak, n = 2)
head(result$peak, n = 2)

## ----secretRM_02, echo=FALSE, collapse=TRUE-----------------------------------
rm(result)

## ----libraryLoadNucl, warning=FALSE, message=FALSE----------------------------
library(consensusSeekeR)

## ----loadingDatasetsNucl------------------------------------------------------
### Loading dataset from NOrMAL
data(NOrMAL_nucleosome_positions) ; data(NOrMAL_nucleosome_ranges)

### Loading dataset from PING
data(PING_nucleosome_positions) ; data(PING_nucleosome_ranges)

### Loading dataset from NucPosSimulator
data(NucPosSimulator_nucleosome_positions) ; data(NucPosSimulator_nucleosome_ranges)

## ----prepareData, echo=FALSE--------------------------------------------------
rownames(NOrMAL_nucleosome_positions) <- NULL
rownames(NOrMAL_nucleosome_ranges)    <- NULL

## ----datasetNuc, collapse=TRUE------------------------------------------------
### Each entry in the positions dataset has an equivalent metadata 
### "name" entry in the ranges dataset
head(NOrMAL_nucleosome_positions, n = 2)

head(NOrMAL_nucleosome_ranges, n = 2)

## ----nuclAssignment, collapse=TRUE--------------------------------------------
### Assigning software name "NOrMAL" 
names(NOrMAL_nucleosome_positions) <- rep("NOrMAL", length(NOrMAL_nucleosome_positions))
names(NOrMAL_nucleosome_ranges) <- rep("NOrMAL", length(NOrMAL_nucleosome_ranges))

### Assigning experiment name "PING"
names(PING_nucleosome_positions) <- rep("PING", length(PING_nucleosome_positions))
names(PING_nucleosome_ranges) <- rep("PING", length(PING_nucleosome_ranges))

### Assigning experiment name "NucPosSimulator"
names(NucPosSimulator_nucleosome_positions) <- rep("NucPosSimulator", 
                                length(NucPosSimulator_nucleosome_positions))
names(NucPosSimulator_nucleosome_ranges) <- rep("NucPosSimulator", 
                                length(NucPosSimulator_nucleosome_ranges))

### Row names are unique to each software
head(NOrMAL_nucleosome_positions, n = 2)

head(PING_nucleosome_positions, n = 2)

head(NucPosSimulator_nucleosome_positions, n = 2)

## ----nuclConsensus, collapse=FALSE--------------------------------------------
### Only choromsome 1 is going to be analyzed
chrList <- Seqinfo("chr1", 135534747, NA)

### Find consensus regions with both replicates inside it
results <- findConsensusPeakRegions(
            narrowPeaks = c(NOrMAL_nucleosome_ranges, PING_nucleosome_ranges,
                                NucPosSimulator_nucleosome_ranges),
            peaks = c(NOrMAL_nucleosome_positions, PING_nucleosome_positions,
                                NucPosSimulator_nucleosome_positions),
            chrInfo = chrList,
            extendingSize = 25,
            expandToFitPeakRegion = TRUE,
            shrinkToFitPeakRegion = TRUE,
            minNbrExp = 2,
            nbrThreads = 1)

## ----nuclResults, collapse=TRUE-----------------------------------------------
### Print the call 
results$call

### Print the 3 first consensus regions
head(results$consensusRanges, n = 3)

## ----nucleosomes, echo=FALSE, fig.cap = "Consensus regions. The consensus regions obtained for peaks called from three software on the same dataset."----
knitr::include_graphics('figures/nucleosomes.jpg')

## ----removeDatasetsNucl, echo=FALSE-------------------------------------------
### Remove dataset from NOrMAL
rm(NOrMAL_nucleosome_positions)
rm(NOrMAL_nucleosome_ranges)

### Remove dataset from PING
rm(PING_nucleosome_positions)
rm(PING_nucleosome_ranges)

### Remove dataset from NucPosSimulator
rm(NucPosSimulator_nucleosome_positions)
rm(NucPosSimulator_nucleosome_ranges)

## ----libraryLoad, warning=FALSE, message=FALSE--------------------------------
library(consensusSeekeR)

## ----loadingDatasets----------------------------------------------------------
### Loading datasets
data(A549_CTCF_MYN_NarrowPeaks_partial) ; data(A549_CTCF_MYN_Peaks_partial)
data(A549_CTCF_MYJ_NarrowPeaks_partial) ; data(A549_CTCF_MYJ_Peaks_partial)

## ----repAssignment------------------------------------------------------------
### Assigning experiment name "rep01" to the first replicate
names(A549_CTCF_MYJ_NarrowPeaks_partial) <- rep("rep01", length(A549_CTCF_MYJ_NarrowPeaks_partial))
names(A549_CTCF_MYJ_Peaks_partial)       <- rep("rep01", length(A549_CTCF_MYJ_Peaks_partial))

### Assigning experiment name "rep02" to the second replicate
names(A549_CTCF_MYN_NarrowPeaks_partial) <- rep("rep02", length(A549_CTCF_MYN_NarrowPeaks_partial))
names(A549_CTCF_MYN_Peaks_partial)       <- rep("rep02", length(A549_CTCF_MYN_Peaks_partial))

## ----replicateConsensus, collapse=FALSE---------------------------------------
### Only choromsome 10 is going to be analyzed
chrList <- Seqinfo("chr10", 135534747, NA)

### Find consensus regions with both replicates inside it
results <- findConsensusPeakRegions(
            narrowPeaks = c(A549_CTCF_MYJ_NarrowPeaks_partial, A549_CTCF_MYN_NarrowPeaks_partial),
            peaks = c(A549_CTCF_MYJ_Peaks_partial, A549_CTCF_MYN_Peaks_partial),
            chrInfo = chrList,
            extendingSize = 100,
            expandToFitPeakRegion = TRUE,
            shrinkToFitPeakRegion = TRUE,
            minNbrExp = 2,
            nbrThreads = 1)

## ----replicateResults, collapse=TRUE------------------------------------------
### Print the call 
results$call

### Print the 3 first consensus regions
head(results$consensusRanges, n = 3)

## ----ctcf, echo=FALSE, fig.cap = "One consensus region for 2 ChIP-Seq replicates."----
knitr::include_graphics('figures/ctcf_consensus.jpg')

## ----rmCurrentDatasets, echo=FALSE, message=FALSE-----------------------------
### Remove datasets
rm(A549_CTCF_MYN_NarrowPeaks_partial)
rm(A549_CTCF_MYN_Peaks_partial)
rm(A549_CTCF_MYJ_NarrowPeaks_partial)
rm(A549_CTCF_MYJ_Peaks_partial)

## ----libLoad, warning=FALSE, message=FALSE------------------------------------
library(consensusSeekeR)

## ----loadingDatasetsExp-------------------------------------------------------
### Loading datasets
data(A549_NR3C1_CFQ_NarrowPeaks_partial) ; data(A549_NR3C1_CFQ_Peaks_partial)
data(A549_NR3C1_CFR_NarrowPeaks_partial) ; data(A549_NR3C1_CFR_Peaks_partial)
data(A549_NR3C1_CFS_NarrowPeaks_partial) ; data(A549_NR3C1_CFS_Peaks_partial)

## ----expAssignment------------------------------------------------------------
### Assign experiment name "ENCFF002CFQ" to the first experiment
names(A549_NR3C1_CFQ_NarrowPeaks_partial) <- rep("ENCFF002CFQ", 
                                length(A549_NR3C1_CFQ_NarrowPeaks_partial))
names(A549_NR3C1_CFQ_Peaks_partial) <- rep("ENCFF002CFQ", 
                                length(A549_NR3C1_CFQ_Peaks_partial))

### Assign experiment name "ENCFF002CFQ" to the second experiment
names(A549_NR3C1_CFR_NarrowPeaks_partial) <- rep("ENCFF002CFR", 
                                length(A549_NR3C1_CFR_NarrowPeaks_partial))
names(A549_NR3C1_CFR_Peaks_partial)       <- rep("ENCFF002CFR", 
                                length(A549_NR3C1_CFR_Peaks_partial))

### Assign experiment name "ENCFF002CFQ" to the third experiment
names(A549_NR3C1_CFS_NarrowPeaks_partial) <- rep("ENCFF002CFS", 
                                length(A549_NR3C1_CFS_NarrowPeaks_partial))
names(A549_NR3C1_CFS_Peaks_partial)       <- rep("ENCFF002CFS", 
                                length(A549_NR3C1_CFS_Peaks_partial))

## ----rowNames-----------------------------------------------------------------
### Assign specific name to each entry of to first experiment
### NarrowPeak name must fit Peaks name for same experiment
A549_NR3C1_CFQ_NarrowPeaks_partial$name <- paste0("NR3C1_CFQ_region_", 
                                1:length(A549_NR3C1_CFQ_NarrowPeaks_partial))
A549_NR3C1_CFQ_Peaks_partial$name       <- paste0("NR3C1_CFQ_region_",
                                1:length(A549_NR3C1_CFQ_NarrowPeaks_partial))

### Assign specific name to each entry of to second experiment
### NarrowPeak name must fit Peaks name for same experiment
A549_NR3C1_CFR_NarrowPeaks_partial$name <- paste0("NR3C1_CFR_region_", 
                                1:length(A549_NR3C1_CFR_NarrowPeaks_partial))
A549_NR3C1_CFR_Peaks_partial$name       <- paste0("NR3C1_CFR_region_", 
                                1:length(A549_NR3C1_CFR_Peaks_partial))

### Assign specific name to each entry of to third experiment
### NarrowPeak name must fit Peaks name for same experiment
A549_NR3C1_CFS_NarrowPeaks_partial$name <- paste0("NR3C1_CFS_region_", 
                                1:length(A549_NR3C1_CFS_NarrowPeaks_partial))
A549_NR3C1_CFS_Peaks_partial$name       <- paste0("NR3C1_CFS_region_", 
                                1:length(A549_NR3C1_CFS_Peaks_partial))

## ----experimentConsensus, collapse=FALSE--------------------------------------
### Only choromsome 2 is going to be analyzed
chrList <- Seqinfo("chr2", 243199373, NA)

### Find consensus regions with both replicates inside it
results <- findConsensusPeakRegions(
            narrowPeaks = c(A549_NR3C1_CFQ_NarrowPeaks_partial, 
                        A549_NR3C1_CFR_NarrowPeaks_partial,
                        A549_NR3C1_CFS_NarrowPeaks_partial),
            peaks = c(A549_NR3C1_CFQ_Peaks_partial, 
                        A549_NR3C1_CFR_Peaks_partial,
                        A549_NR3C1_CFS_Peaks_partial),
            chrInfo = chrList,
            extendingSize = 200,
            expandToFitPeakRegion = FALSE,
            shrinkToFitPeakRegion = TRUE,
            minNbrExp = 2,
            nbrThreads = 1)

## ----experimentResults, collapse=TRUE-----------------------------------------
### Print the call 
results$call

### Print the first 3 consensus regions
head(results$consensusRanges, n = 3)

## ----NR3C1, echo=FALSE, fig.cap = "Consensus regions. The consensus regions obtained for peaks called for 3 NR2C1 ChIP-Seq experiments."----
knitr::include_graphics('figures/NR3C1_consensus.jpg')

## ----rmDatasetsExp, echo=FALSE, message=FALSE---------------------------------
### Remove datasets
rm(A549_NR3C1_CFQ_NarrowPeaks_partial)
rm(A549_NR3C1_CFQ_Peaks_partial)
rm(A549_NR3C1_CFR_NarrowPeaks_partial)
rm(A549_NR3C1_CFR_Peaks_partial)
rm(A549_NR3C1_CFS_NarrowPeaks_partial)
rm(A549_NR3C1_CFS_Peaks_partial)

## ----shrink, echo=FALSE, fig.cap = "Effect of the shrinkToFitPeakRegion parameter."----
knitr::include_graphics('figures/shrink.jpg')

## ----extend, echo=FALSE, fig.cap = "Effect of the expandToFitPeakRegion parameter."----
knitr::include_graphics('figures/extendNew.jpg')

## ----sizeEffect, collapse=TRUE, eval=FALSE------------------------------------
#  ### Set different values for the extendingSize parameter
#  size <- c(1, 10, 50, 100, 300, 500, 750, 1000)
#  
#  ### Only chrompsome 10 is going to be analyzed
#  chrList <- Seqinfo("chr10", 135534747, NA)
#  
#  ### Find consensus regions using all the size values
#  resultsBySize <- lapply(size, FUN = function(size) findConsensusPeakRegions(
#                      narrowPeaks = c(A549_CTCF_MYJ_NarrowPeaks_partial,
#                                      A549_CTCF_MYN_NarrowPeaks_partial),
#                      peaks = c(A549_CTCF_MYJ_Peaks_partial,
#                                      A549_CTCF_MYN_Peaks_partial),
#                      chrInfo = chrList,
#                      extendingSize = size,
#                      expandToFitPeakRegion = TRUE,
#                      shrinkToFitPeakRegion = TRUE,
#                      minNbrExp = 2,
#                      nbrThreads = 1))
#  
#  ### Extract the number of consensus regions obtained for each extendingSize
#  nbrRegions <- mapply(resultsBySize,
#                  FUN = function(x) return(length(x$consensusRanges)))

## ----graphSizeEffect, warning=FALSE, eval=FALSE-------------------------------
#  
#  library(ggplot2)
#  
#  data <- data.frame(extendingSize = size, nbrRegions = nbrRegions)
#  
#  ggplot(data, aes(extendingSize, nbrRegions)) + scale_x_log10("Extending size") +
#          stat_smooth(se = FALSE, method = "loess", size=1.4) +
#          ylab("Number of consensus regions") +
#          ggtitle(paste0("Number of consensus regions in function of the extendingSize"))

## ----sizeEffectG, echo=FALSE, fig.cap = "Effect of the extendingSize parameter."----
knitr::include_graphics('figures/sizeEffect.jpg')

## ----parallelExample, eval=FALSE, collapse=TRUE-------------------------------
#  ### Load data
#  data(A549_FOSL2_01_NarrowPeaks_partial) ; data(A549_FOSL2_01_Peaks_partial)
#  data(A549_FOXA1_01_NarrowPeaks_partial) ; data(A549_FOXA1_01_Peaks_partial)
#  
#  ### Assigning name "FOSL2"
#  names(A549_FOSL2_01_NarrowPeaks_partial) <- rep("FOSL2",
#                                      length(A549_FOSL2_01_NarrowPeaks_partial))
#  names(A549_FOSL2_01_Peaks_partial)       <- rep("FOSL2",
#                                      length(A549_FOSL2_01_Peaks_partial))
#  
#  ### Assigning name "FOXA1"
#  names(A549_FOXA1_01_NarrowPeaks_partial) <- rep("FOXA1",
#                                      length(A549_FOXA1_01_NarrowPeaks_partial))
#  names(A549_FOXA1_01_Peaks_partial)       <- rep("FOXA1",
#                                      length(A549_FOXA1_01_Peaks_partial))
#  
#  ### Two chromosomes to analyse
#  chrList <- Seqinfo(paste0("chr", c(1,10)), c(249250621, 135534747), NA)
#  
#  ### Find consensus regions using 2 threads
#  results <- findConsensusPeakRegions(
#              narrowPeaks = c(A549_FOSL2_01_NarrowPeaks_partial,
#                              A549_FOXA1_01_Peaks_partial),
#              peaks = c(A549_FOSL2_01_Peaks_partial,
#                              A549_FOXA1_01_NarrowPeaks_partial),
#              chrInfo = chrList, extendingSize = 200, minNbrExp = 2,
#              expandToFitPeakRegion = FALSE, shrinkToFitPeakRegion = FALSE,
#              nbrThreads = 4)

## ----sessionInfo, echo=FALSE--------------------------------------------------
sessionInfo()

Try the consensusSeekeR package in your browser

Any scripts or data that you put into this service are public.

consensusSeekeR documentation built on Nov. 8, 2020, 4:54 p.m.