inst/doc/profileplyr.R

## ----setup, echo=FALSE, results="hide"----------------------------------------
knitr::opts_chunk$set(tidy=FALSE, cache=FALSE,
                      dev="png",
                      message=FALSE, error=FALSE, warning=TRUE)

## ---- warning=F---------------------------------------------------------------
library(profileplyr)

example <- system.file("extdata", 
                       "example_deepTools_MAT", 
                       package = "profileplyr") 
proplyrObject <- import_deepToolsMat(example) 


## ---- warning=FALSE, message=FALSE--------------------------------------------

signalFiles <- c(system.file("extdata","Sorted_Hindbrain_day_12_1_filtered.bam",package = "profileplyr"),
                 system.file("extdata","Sorted_Liver_day_12_1_filtered.bam",package = "profileplyr"))

# BAMs must be indexed
require(Rsamtools)
for (i in seq_along(signalFiles)){
  indexBam(signalFiles[i])
}

testRanges <- system.file("extdata", 
                          "newranges_small.bed", 
                          package = "profileplyr")

chipProfile <- BamBigwig_to_chipProfile(signalFiles, 
                         testRanges, 
                         format = "bam",
                         paired=FALSE,
                         style="percentOfRegion",
                         nOfWindows=20,
                         distanceAround=40
                         )
chipProfile

## -----------------------------------------------------------------------------
proplyrObject <- as_profileplyr(chipProfile)
params(proplyrObject)$rowGroupsInUse

## ---- echo=F------------------------------------------------------------------
#vignette compiles much faster if going off of deeptools input
proplyrObject <- import_deepToolsMat(example)

## -----------------------------------------------------------------------------
proplyrObject

## -----------------------------------------------------------------------------
library(SummarizedExperiment)

assays(proplyrObject)
dim(proplyrObject)


## -----------------------------------------------------------------------------
rowRanges(proplyrObject)[1:3]

## -----------------------------------------------------------------------------
sampleData(proplyrObject)

## -----------------------------------------------------------------------------
params(proplyrObject)$rowGroupsInUse

## -----------------------------------------------------------------------------
params(proplyrObject)$mcolToOrderBy

proplyrObject <- orderBy(proplyrObject, "score")
params(proplyrObject)$mcolToOrderBy

## ---- echo=FALSE--------------------------------------------------------------
# change back to NULL for any other functions relying on this object
proplyrObject <- orderBy(proplyrObject,NULL)

## -----------------------------------------------------------------------------
proplyrObject[1:10,1:10]

## -----------------------------------------------------------------------------
proplyrObject[ , , 1:2]

## -----------------------------------------------------------------------------
rownames(sampleData(proplyrObject))

## -----------------------------------------------------------------------------
rownames(sampleData(proplyrObject)) <- c("Hindbrain",
                                         "Liver",
                                         "Kidney")

## ---- eval=T, results='hide'--------------------------------------------------
output_path <- file.path(tempdir(),"ATAC_example.MAT.gz")
export_deepToolsMat(proplyrObject, con = output_path)

## ---- echo = FALSE------------------------------------------------------------
proplyrObject <- proplyrObject[1:2,1:2,1]

## ---- fig3, fig.height = 6, fig.width = 4, fig.keep='none'--------------------
# heatmap not printed, see below for examples of rendered heatmaps from this function
heatmap <- generateEnrichedHeatmap(proplyrObject)
class(heatmap)

## ---- echo=FALSE--------------------------------------------------------------
example <- system.file("extdata", 
                       "example_deepTools_MAT", 
                       package = "profileplyr") 
proplyrObject <- import_deepToolsMat(example) 
rownames(sampleData(proplyrObject)) <- c("Hindbrain",
                                         "Liver",
                                         "Kidney")

## ---- fig2, fig.height=8, fig.width=4, fig.keep='none'------------------------

EH_mat <- convertToEnrichedHeatmapMat(proplyrObject)
EH_mat[[1]]

# the matrices of this list can be inputs for EnrichedHeatmap()

## -----------------------------------------------------------------------------
object_sumMat <- summarize(proplyrObject, 
                           fun = rowMeans, 
                           output = "matrix") 
object_sumMat[1:3, ]

## -----------------------------------------------------------------------------
proplyrObject_long <- summarize(proplyrObject, 
                                fun = rowMeans, 
                                output = "long") 
proplyrObject_long[1:3, ]

## ---- fig15, fig.height=4, fig.width=4, fig.align='center'--------------------
library(ggplot2)
ggplot(proplyrObject_long, aes(x = Sample, y = log(Signal))) + 
       geom_boxplot()


## ---- results='hide'----------------------------------------------------------
proplyrObject_summ <- summarize(proplyrObject, 
                                fun = rowMeans, 
                                output = "object")

## -----------------------------------------------------------------------------
assays(proplyrObject_summ)[[1]][1:3, ]
rowRanges(proplyrObject_summ)[1:3]

## ---- fig5, fig.height=6, fig.width=6, results='hide', fig.keep='none'--------
clusterRanges(proplyrObject, 
              fun = rowMeans)

# this code prints heatmap (does not return), but heatmap not shown here to save space

## ---- fig6, fig.height=6, fig.width=6-----------------------------------------
set.seed(0)
kmeans <- clusterRanges(proplyrObject, 
                        fun = rowMeans, 
                        kmeans_k = 4)

rowRanges(kmeans)[1:3]

params(kmeans)$rowGroupsInUse

## ---- fig7, fig.height=5, fig.width=5.5---------------------------------------
set.seed(0)
kmeans <- clusterRanges(proplyrObject, 
                        fun = rowMeans, 
                        kmeans_k = 4, 
                        silent = FALSE)

## ---- fig8, fig.height=5, fig.width=5-----------------------------------------
hclust <- clusterRanges(proplyrObject, 
                        fun = rowMeans, 
                        cutree_rows = 4, 
                        silent = FALSE)

## ---- results='hide'----------------------------------------------------------
output_path <- file.path(tempdir(),"kmeans_cluster_mat.gz")
export_deepToolsMat(kmeans, con = output_path)

## ---- fig9, fig.height=6, fig.width=4-----------------------------------------
generateEnrichedHeatmap(kmeans)

## ---- results='hide', fig.align="center"--------------------------------------
library(magrittr)

set.seed(0)
import_deepToolsMat(example) %>%
  clusterRanges(fun = rowMeans, 
                kmeans_k = 4, 
                silent = TRUE) %>% 
  summarize(fun = rowMeans, 
            output = "long") %>% 
  ggplot(aes(x = Sample, y = log(Signal))) + 
         geom_boxplot() + 
         facet_grid(~cluster) + 
         theme(axis.text.x = element_text(angle = 90, hjust = 1)) + 
         scale_x_discrete(labels= c("Hindbrain", "Liver", "Kidney"))

## -----------------------------------------------------------------------------
anno <- annotateRanges(proplyrObject, 
                       TxDb = "mm10")
rowRanges(anno)[1:3]

## ---- echo=FALSE--------------------------------------------------------------
# subset object to speed up demonstration
proplyrObject <- proplyrObject[1:5, , ]

## ---- fig.keep="none"---------------------------------------------------------

anno_great <- annotateRanges_great(proplyrObject, 
                                   species = "mm10")
rowRanges(anno_great)[1:3]

## -----------------------------------------------------------------------------
mcols(proplyrObject)$newColumn <- rnorm(n = nrow(proplyrObject))

params(proplyrObject)$rowGroupsInUse
switchGroup <- groupBy(proplyrObject, 
                            group = "newColumn")
params(switchGroup)$rowGroupsInUse

## ---- echo=F------------------------------------------------------------------
# reload full object with all ranges
proplyrObject <- import_deepToolsMat(example)
rownames(sampleData(proplyrObject)) <- c("Hindbrain",
                                         "Liver",
                                         "Kidney")

## ---- results='hide'----------------------------------------------------------
#import pre-made GRangesList 
data("K27ac_GRlist_hind_liver_top5000")
K27ac_GRlist_names <- c("hindbrain_K27ac", 
                        "liver_K27ac")

# subset the profileplyr object to just hindbrain and liver, 
# and group by GRanges
K27ac_groupByGR <- proplyrObject[,,grepl("Hindbrain|Liver", 
                                         names(assays(proplyrObject)))] %>%
                   groupBy(group = K27ac_GRlist_hind_liver_top5000,
                           GRanges_names = K27ac_GRlist_names, 
                           include_nonoverlapping = FALSE,
                           inherit_groups = FALSE)

## -----------------------------------------------------------------------------
rowRanges(K27ac_groupByGR)[1:3]


## ---- results='hide'----------------------------------------------------------
output_path <- file.path(tempdir(),"K27ac_GRoverlap.gz")
export_deepToolsMat(K27ac_groupByGR, con = output_path)


## -----------------------------------------------------------------------------
data("gene_list_character")

names(gene_list_character)
head(gene_list_character[[1]])

## ----message=FALSE------------------------------------------------------------
data("gene_list_dataframe")
names(gene_list_dataframe)
head(gene_list_dataframe[[1]])

signalFiles <- c(system.file("extdata","Sorted_Hindbrain_day_12_1_filtered.bam",
                             package = "profileplyr"),
                 system.file("extdata","Sorted_Liver_day_12_1_filtered.bam",
                             package = "profileplyr"))

# BAMs must be indexed
require(Rsamtools)
for (i in seq_along(signalFiles)){
  indexBam(signalFiles[i])
}

testRanges <- system.file("extdata", 
                          "newranges.bed", 
                          package = "profileplyr")

## ---- fig12, fig.height=7, fig.width=5, results='hide'------------------------

BamBigwig_to_chipProfile(signalFiles, 
                         testRanges, 
                         format = "bam",
                         paired=FALSE,
                         style="percentOfRegion",
                         nOfWindows=20,
                         distanceAround=40) %>% 
  as_profileplyr %>% 
  annotateRanges(TxDb = "mm10")  %>%
  groupBy(group = gene_list_dataframe) %>% #subseting by our gene list
  generateEnrichedHeatmap(extra_annotation_columns = c("hindbrain_liv_stat"),
                          sample_names = c("Hindbrain",
                                           "Liver"))

  

## ---- fig13, fig.height=8, out.width='100%', results='hide', dpi=35-----------
# NOTE: could also use BamBigwig_to_chipProfile() and as_profileplyr()
# within R to start from bigwig/BAM and BED file.

set.seed(0)
annotated_object <- import_deepToolsMat(example) %>%
  annotateRanges(TxDb = "mm10") %>% # annotate with ChIPseeker
  groupBy(group = gene_list_dataframe,
          include_nonoverlapping = TRUE,
          inherit_groups = FALSE) %>% # group by gene overlap
  groupBy(group = K27ac_GRlist_hind_liver_top5000,
          GRanges_names = K27ac_GRlist_names,
          include_nonoverlapping = TRUE,
          inherit_groups = FALSE) %>% # group by GRanges
  clusterRanges(kmeans_k = 3) %>%
  generateEnrichedHeatmap(extra_annotation_columns=c("annotation_short", 
                                                    "GR_overlap_names", 
                                                    "hindbrain_liv_stat"),
                          sample_names = c("Hindbrain",
                                           "Liver",
                                           "Kidney"),
                          extra_anno_width = c(5,5,8),
                          matrices_color = c("white", "red"),
                          extra_anno_color = list(NULL,
                                                c("black",
                                                  "green",
                                                  "red",
                                                  "white"),
                                                c("#e7e1ef", 
                                                  "#c994c7", 
                                                  "#dd1c77")),
                          genes_to_label = c("Myh9", "Sp4", "Rcan2"),
                          gene_label_font_size = 10)


## ----sessionInfo--------------------------------------------------------------
sessionInfo()

Try the profileplyr package in your browser

Any scripts or data that you put into this service are public.

profileplyr documentation built on Nov. 8, 2020, 8:12 p.m.