inst/doc/proActiv.R

## ----setup, include = FALSE---------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)

## ----QuickStart, eval=TRUE, message=FALSE, results='hide'---------------------
library(proActiv)

## List of STAR junction files as input
files <- list.files(system.file('extdata/vignette/junctions', 
                                package = 'proActiv'), full.names = TRUE)
## Vector describing experimental condition
condition <- rep(c('A549','HepG2'), each=3)
## Promoter annotation for human genome GENCODE v34 restricted to a subset of chr1
promoterAnnotation <- promoterAnnotation.gencode.v34.subset

result <- proActiv(files = files, 
                   promoterAnnotation = promoterAnnotation,
                   condition = condition)

## ----ListFiles, eval=FALSE----------------------------------------------------
#  files <- list.files(system.file('extdata/vignette/junctions',
#                                  package = 'proActiv'), full.names = TRUE)

## ----CreateAnnotation, eval=FALSE---------------------------------------------
#  ## From GTF file path
#  gtf.file <- system.file('extdata/vignette/annotation/gencode.v34.annotation.subset.gtf.gz',
#                          package = 'proActiv')
#  promoterAnnotation.gencode.v34.subset <- preparePromoterAnnotation(file = gtf.file,
#                                                                     species = 'Homo_sapiens')
#  ## From TxDb object
#  txdb.file <- system.file('extdata/vignette/annotation/gencode.v34.annotation.subset.sqlite',
#                           package = 'proActiv')
#  txdb <- loadDb(txdb.file)
#  promoterAnnotation.gencode.v34.subset <- preparePromoterAnnotation(txdb = txdb,
#                                                                     species = 'Homo_sapiens')

## ----proActiv, eval=FALSE, message=FALSE, results='hide'----------------------
#  promoterAnnotation <- promoterAnnotation.gencode.v34.subset
#  
#  condition <- rep(c('A549', 'HepG2'), each=3)
#  
#  result <- proActiv(files = files,
#                     promoterAnnotation = promoterAnnotation,
#                     condition = condition)

## ----proActiv result, eval=TRUE-----------------------------------------------
show(result)

## ----Result rowData, eval=TRUE, echo=FALSE------------------------------------
knitr::kable(head(rowData(result)))

## ----filter result, eval=TRUE-------------------------------------------------
## Removes single-exon transcripts / promoters by eliminating promoter counts that are NA 
result <- result[complete.cases(assays(result)$promoterCounts),]

## ----DEXSeq, eval=TRUE, message=FALSE, warning=FALSE--------------------------
library(DEXSeq)

countData <- data.frame(assays(result)$promoterCounts, rowData(result))

## Call DEXSeq - promoter as feature, gene as group
dxd <- DEXSeqDataSet(countData = as.matrix(countData[,seq_len(length(condition))]),
                     sampleData = data.frame(colData(result)),
                     design = formula(~ sample + exon + condition:exon),
                     featureID = as.factor(countData$promoterId),
                     groupID = as.factor(countData$geneId))
dxr1 <- DEXSeq(dxd)

## ----DEXSeq column description, eval=TRUE-------------------------------------
mcols(dxr1)$description

## ----dxr1 wrangling, eval=TRUE------------------------------------------------
## Arrange by minimum padj for each gene
dxr1 <- data.frame(dxr1[,1:10]) %>% 
  group_by(groupID) %>% 
  mutate(minp = min(padj)) %>%
  arrange(minp)

## ----dxr1, eval=TRUE, echo=FALSE, layout='l-body-outset'----------------------
knitr::kable(head(data.frame(dxr1)))

## ----VizAlternativePromoters txdb, eval=TRUE, message=FALSE, warning=FALSE, fig.align='center', fig.height=7, fig.width=7----
## RAP1GAP
gene <- 'ENSG00000076864.19'
txdb <- loadDb(system.file('extdata/vignette/annotations',
                           'gencode.v34.annotation.rap1gap.sqlite', 
                           package = 'proActiv'))
plotPromoters(result = result, gene = gene, txdb = txdb)

## ----VizAlternativePromoters ranges, eval=FALSE-------------------------------
#  ranges <- readRDS(system.file('extdata/vignette/annotations',
#                                'exonsBy.rap1gap.rds',
#                                package = 'proActiv'))
#  plotPromoters(result = result, gene = gene, ranges = ranges)

## ----VizPromoterCategories, eval=TRUE, fig.align='center', fig.height=5, fig.width=5, message=FALSE, warning=FALSE----
library(ggplot2)

rdata <- rowData(result)
## Create a long dataframe summarizing cell line and promoter class
pdata1 <- data.frame(cellLine = rep(c('A549', 'HepG2'), each = nrow(rdata)),
                       promoterClass = as.factor(c(rdata$A549.class, rdata$HepG2.class)))

ggplot(na.omit(pdata1)) +
  geom_bar(aes(x = cellLine, fill = promoterClass)) + 
  xlab('Cell Lines') + ylab('Count') +  labs(fill = 'Promoter Category') +
  ggtitle('Categorization of Promoters')

## ----VizMajorMinorPosition, eval=TRUE, fig.align='center', fig.height=5, fig.width=5, message=FALSE, warning=FALSE----
## Because many genes have many annotated promoters, we collapse promoters 
## from the 5th position and onward into one group for simplicity
pdata2 <- as_tibble(rdata) %>%
  mutate(promoterPosition = ifelse(promoterPosition > 5, 5, promoterPosition)) %>%
  filter(HepG2.class %in% c('Major', 'Minor'))

ggplot(pdata2) +
  geom_bar(aes(x = promoterPosition, fill = as.factor(HepG2.class)), position = 'fill') +
  xlab(expression(Promoter ~ Position ~ "5'" %->% "3'")) + ylab('Percentage') + 
  labs(fill = 'Promoter Category') + ggtitle('Major/Minor Promoter Proportion in HepG2') + 
  scale_y_continuous(breaks = seq(0,1, 0.25), labels = paste0(seq(0,100,25),'%')) +
  scale_x_continuous(breaks = seq(1,5), labels = c('1','2','3','4','>=5'))

## ----VizMajorGeneExp, eval=TRUE, fig.align='center', fig.height=5, fig.width=6.5, message=FALSE, warning=FALSE----
## Get active major promoters of HepG2
majorPromoter <- as_tibble(rdata) %>% group_by(geneId) %>% 
  mutate(promoterCount = n()) %>% filter(HepG2.class == 'Major') 
## Get gene expression corresponding to the genes identified above
geneExpression <- metadata(result)$geneExpression %>% 
  rownames_to_column(var = 'geneId') %>% 
  filter(geneId %in% majorPromoter$geneId)

pdata3 <- data.frame(proActiv = majorPromoter$HepG2.mean,
                     geneExp = geneExpression$HepG2.mean[match(majorPromoter$geneId, 
                                                              geneExpression$geneId)],
                     promoterCount = majorPromoter$promoterCount)

ggplot(pdata3, aes(x = geneExp, y = proActiv)) + 
  geom_point(aes(colour = promoterCount), alpha = 0.5) +
  ggtitle('Major Promoter Activity vs. Gene Expression in HepG2') + 
  xlab('Average Gene Expression') + ylab('Average Major Promoter Activity') +
  labs(colour = 'Number of \n Annotated Promoters') +
  geom_abline(slope = 1, intercept = 0, colour = 'red', linetype = 'dashed')

## ----VizTsne, eval=TRUE, fig.align='center', fig.height=5.2, fig.width=5.2----
library(Rtsne)

## Remove inactive promoters (sparse rows)
data <- assays(result)$absolutePromoterActivity %>% filter(rowSums(.) > 0)
data <- data.frame(t(data))
data$Sample <- as.factor(condition)

set.seed(40) # for reproducibility

tsne.out <- Rtsne(as.matrix(subset(data, select = -c(Sample))), perplexity = 1)
plot(x = tsne.out$Y[,1], y = tsne.out$Y[,2], bg = data$Sample, asp = 1,
     col = 'black', pch = 24, cex = 4,
     main = 't-SNE plot with promoters \n active in at least one sample',
     xlab = 'T-SNE1', ylab = 'T-SNE2',
     xlim = c(-300,300), ylim = c(-300,300))
legend('topright', inset = .02, title = 'Cell Lines',
       unique(condition), pch = c(24,24), pt.bg = 1:length(unique(condition)) , cex = 1.5, bty = 'n')

## ----SessionInfo, eval=TRUE, echo=FALSE---------------------------------------
sessionInfo()

Try the proActiv package in your browser

Any scripts or data that you put into this service are public.

proActiv documentation built on Nov. 8, 2020, 8:14 p.m.