Load in functions

library(tidyverse)
library(ChromatinCompare)

Set up annotations

annotations <- load_annotations(build='mm10', type='gene_body')
mm10_hindbrain_e11.5 <- bed9_importer(system.file('extdata','ENCFF195OWF.chr1.bed.gz',package='ChromatinCompare'), skip=0)
mm10_hindbrain_e15.5 <- bed9_importer(system.file('extdata','ENCFF653QXV.chr1.bed.gz',package='ChromatinCompare'), skip=0)
# Intersect the regions we read in with the annotations
mm10_hindbrain_e11.5_anno = annotate_regions(
  regions = bed9_to_granges(mm10_hindbrain_e11.5),
  annotations = annotations,
  ignore.strand = TRUE,
  quiet = FALSE)

mm10_hindbrain_e15.5_anno = annotate_regions(
  regions = bed9_to_granges(mm10_hindbrain_e15.5),
  annotations = annotations,
  ignore.strand = TRUE,
  quiet = FALSE)

# capture classes
#classes <- retina_early_anno$name %>% unique %>% sort %>% data.frame() %>% magrittr::set_colnames('name')
# reorder classes
#classes$name <- forcats::as_factor(classes$name) %>% forcats::fct_relevel('10_Open_Chromatin', after = Inf)

# order name classes

# gene-wise query
geneWise_hindbrain_e11.5 <- mm10_hindbrain_e11.5_anno %>% 
  data.frame() %>% 
  group_by(annot.symbol, name) %>% 
  summarise(bp_covered = sum(as.numeric(width))) %>% 
  group_by(annot.symbol) %>%
  mutate(`Feature Ratio` = bp_covered/sum(bp_covered)) %>% 
  dplyr::select(-bp_covered) %>% 
  ungroup() %>% 
  group_by(annot.symbol, name) %>% 
  spread(name, `Feature Ratio`) %>%
  ungroup()
gene_vector <- geneWise_hindbrain_e11.5$annot.symbol
geneWise_hindbrain_e11.5 <- geneWise_hindbrain_e11.5 %>% ungroup() %>% dplyr::select(-annot.symbol) %>% as.matrix() %>% magrittr::set_rownames(gene_vector) 
# replace na with zero
geneWise_hindbrain_e11.5[is.na(geneWise_hindbrain_e11.5)] <- 0 
geneWise_hindbrain_e11.5 <- geneWise_hindbrain_e11.5[!is.na(row.names(geneWise_hindbrain_e11.5)),] %>% data.frame()

###
# gene-wise query
geneWise_hindbrain_e15.5 <- mm10_hindbrain_e15.5_anno %>% 
  data.frame() %>% 
  group_by(annot.symbol, name) %>% 
  summarise(bp_covered = sum(as.numeric(width))) %>% 
  group_by(annot.symbol) %>%
  mutate(`Feature Ratio` = bp_covered/sum(bp_covered)) %>% 
  dplyr::select(-bp_covered) %>% 
  ungroup() %>% 
  group_by(annot.symbol, name) %>% 
  spread(name, `Feature Ratio`) %>%
  ungroup()
gene_vector <- geneWise_hindbrain_e15.5$annot.symbol
geneWise_hindbrain_e15.5 <- geneWise_hindbrain_e15.5 %>% ungroup() %>% dplyr::select(-annot.symbol) %>% as.matrix() %>% magrittr::set_rownames(gene_vector) 
# replace na with zero
geneWise_hindbrain_e15.5[is.na(geneWise_hindbrain_e15.5)] <- 0 
geneWise_hindbrain_e15.5 <- geneWise_hindbrain_e15.5[!is.na(row.names(geneWise_hindbrain_e15.5)),] %>% data.frame()

Distance between two genes

dist(rbind(geneWise_hindbrain_e11.5[1,],geneWise_hindbrain_e15.5[1,]))

# calculate all distances
gene_names <- row.names(geneWise_hindbrain_e11.5)[!is.na(row.names(geneWise_hindbrain_e11.5))]
distances <- c(1:length(gene_names))
for (i in 1:length(gene_names)){
  distances[i] <- dist(rbind(geneWise_hindbrain_e11.5[gene_names[i],], geneWise_hindbrain_e15.5[gene_names[i],]))
}
density(distances) %>% plot()

gene_distances <- cbind(gene_names, distances) %>% data.frame()
gene_distances$distances <- as.numeric(as.character(gene_distances$distances))
gene_distances %>% arrange(-distances)

Pictures

genes <- c('Terf1','Elk4','Ush2a','Sept2')
ggplot(geneWise_hindbrain_e11.5[genes,] %>% 
         as.tibble() %>% 
         mutate(Gene = genes) %>% 
         gather(Class, Value, -Gene) %>% 
         mutate(Class= gsub('_',' ', x=Class) %>% forcats::fct_relevel(as.factor(.), '10 Open Chromatin', after = Inf)), 
       aes(x=Gene, fill=Class,y=Value)) + 
  scale_fill_viridis(option='viridis', discrete = TRUE) +
  geom_bar(stat='identity') + 

  theme_bw()

Superheat

superheat(geneWise_hindbrain_e11.5,
          pretty.order.rows = T,
          bottom.label.text.angle = 90,
          bottom.label.size = 0.5)

Differences in genome wide state differences in each gene

geneWise_hindbrain_e11.5$Age <- 'e11.5'
geneWise_hindbrain_e15.5$Age <- 'e15.5'
together <- rbind(geneWise_hindbrain_e11.5 %>% data.frame() %>% gather(Mark, Ratio, -Age), 
                  geneWise_hindbrain_e15.5 %>% data.frame() %>% gather(Mark, Ratio, -Age))
ggplot(together, aes(x=log(Ratio+0.000000001), colour=Age)) + geom_density() + facet_wrap(~Mark, ncol=3) + theme_bw() + scale_colour_viridis(option='viridis', discrete = TRUE)


davemcg/ChromatinCompare documentation built on May 3, 2019, 2:57 p.m.