library(devtools) load_all('../..') library(rmarkdown) render('Tang-parse.Rmd')
library(stringr) library(dplyr) library(reshape2) library(ggplot2)
library(knitr) library(rmarkdown) # # knitr options # opts_chunk$set( fig.path = 'figures/Windram-', stop_on_error = TRUE, fig.width = 12.5, fig.height = 8) # # Widths for saving figures # text.width <- 4.79 # LaTeX width in inches golden.ratio <- 1.618 # Pleasing ratio fig.width <- text.width fig.height <- text.width / golden.ratio # # Stylesheet # options(markdown.HTML.stylesheet = system.file("inst/Rmd/foghorn.css", package="DeLorean")) font.family <- "Verdana" font.theme <- theme_update(text=element_text(family=font.family)) theme_set(font.theme)
Make list of key genes mentioned in the paper.
tang.key.genes <- c( "Adamts9", "Amhr2", "Bcl2l14", "Bex1", "Bmi1", "Bmp1", "Bmp15", "Bmp2", "Bmp4", "Bmp8b", "Bmpr1a", "Brunol4", "Brunol5", "Cbx1", "Cbx5", "Cdh1", "Cdh3", "Cdh5", "Cdx1", "Cdx2", "Clock", "Crebbp", "Dazl", "Ddx3y", "Ddx4", "Dhrs3", "Dnmt3a", "Dnmt3b", "Dnmt3l", "Dppa1", "Dppa3", "Dppa4", "Dppa5a", # Changed from Dppa5 "Ehmt1", "Ehmt2", "Enox2", "Eomes", "Epha7", "Eras", "Esrp2", "Esrrb", "Fam189a1", "Fbxw13", "Fgf10", "Fgf3", "Fgfr3", "Fgfr4", "Fzd10", "Fzd9", "Gata3", "Gata4", "Gata5", "Gata6", "Gdf9", "Gm364", "Gsc", "Hand1", "Has3", "Hdac11", "Hdac5", "Hdac6", "Hdac7", "Hdx", "Hoxb1", "Hoxb3", "Hoxb5", "Hoxd13", "Hoxd8", "Id1", "Id2", "Ifitm1", "Ifitm3", "Ilvbl", "Isl1", "Jak2", "Kat2a", "Kdm4a", "Kdm4d", "Kdm6b", "Kdr", "Khdrbs3", "Kit", "Kitl", "Klf12", "Klf17", "Lhx5", "Lin28", "Mbd2", "Mecp2", "Mesdc1", "Mesdc2", "Mll3", "Myf5", "Myod1", "Myof", "Nanog", "Nanos1", "Nanos3", "Ncoa3", "Nes", "Neurod1", "Nkx2-5", "Nkx6-2", "Nodal", "Notch1", "Notch4", "Nr0b1", "Nr5a2", "Nudt18", "Onecut1", "Otx1", "Pax6", "Pecam1", "Pim1", "Pim2", "Pim3", "Pou5f1", "Pramel5", "Pramel6", "Pramel7", "Prdm1", "Prdm14", "Prdm16", "Prdm5", "Ptbp2", "Pygm", "Rex2", "Rhox6", "Rif1", "Sall4", "Setdb1", "Sirt2", "Smad1", "Smarcb1", "Sox17", "Sox2", "Sox3", "Sox7", "Sox9", "Suz12", # Changed from unknown suv12 "Suv39h2", "Suv420h2", "T", "Tbx2", "Tbx20", "Tbx3", "Tcf15", "Tcf3", "Tet1", "Tet2", "Tgfbr2", "Tgfbr3", "Tia1", "Tmem80", "Tnk1", "Tpbpa", "Trpm3", "Tspan12", "Utf1", "Zfp41", "Zfp42", "Zic3", "Myc") length(tang.key.genes)
Load the RNA-seq expression data CSV.
rna.seq <- read.csv('../../data/Tang-mmc3-RefSeq.csv')
Parse the RNA-seq cell meta data. Which of the columns are cell names?
cell.name.re <- '^([[:alpha:][:digit:]_.]+)_(A[[:digit:]]{1,2}(?:.1)?)$' names(rna.seq) tang.rna.seq.cell.meta <- ( as.data.frame(str_match(names(rna.seq)[37:68], cell.name.re)) %>% filter(! is.na(V2)) %>% select(cell=V1, capture=V2, cell.id=V3) %>% mutate(obstime=ifelse(capture=="ICM", 0, ifelse(capture=="E4.5_Epiblast", 1, ifelse(capture=="Day3_outgrowth_Oct4_Pos", 3, ifelse(capture=="Day5_outgrowth_Oct4neg", 5, ifelse(capture=="Day5_outgrowth_Oct4_Pos", 5, ifelse(capture=="ESC", 7, NA)))))))) sapply(tang.rna.seq.cell.meta, class) stopifnot(all(! is.na(tang.rna.seq.cell.meta$obstime)))
Extract the RNA-seq gene meta data.
tang.rna.seq.gene.meta <- ( rna.seq[,1:36] %>% rename(gene=RefSeqID, name=GeneID) %>% mutate(key=name %in% tang.key.genes)) sapply(tang.rna.seq.gene.meta, class)
Extract the RNA-seq expression data.
# tang.rna.seq <- as.matrix(rna.seq[,as.character(tang.rna.seq.cell.meta$cell)]) # min(tang.rna.seq[tang.rna.seq > 0]) cell.cols <- as.character(tang.rna.seq.cell.meta$cell) tang.rna.seq <- log10(1 + as.matrix(rna.seq[,cell.cols])) rownames(tang.rna.seq) <- tang.rna.seq.gene.meta$gene colnames(tang.rna.seq) <- tang.rna.seq.cell.meta$cell dim(tang.rna.seq) qplot(as.vector(tang.rna.seq))
Check we can create a DeLorean object from the data.
dl.rna.seq <- de.lorean(tang.rna.seq, tang.rna.seq.gene.meta, tang.rna.seq.cell.meta) dl.rna.seq <- estimate.hyper(dl.rna.seq, sigma.tau=1.5)
Investigate how many of the key genes are in the data.
found.key.gene <- ( data.frame(name=tang.key.genes) %>% mutate(found=name %in% tang.rna.seq.gene.meta$name)) found.key.gene %>% filter(! found)
Examine the $p$-values.
(ggplot(tang.rna.seq.gene.meta %>% select(gene, p.value.ESC.ICM., p.value.ESC.E4.5_Epiblast., p.value.ICM.E4.5_Epiblast.) %>% melt(variable.name="comparison", value.name="p.value") %>% filter(p.value < .001), aes(x=p.value+1e-110, fill=comparison)) + geom_histogram() + scale_x_log10())
Load the PCR data CSV and fix the cell names.
pcr <- read.csv('../../data/Tang-mmc2-PCR.csv') names(pcr) names(pcr) <- c( "gene", "probe.id", "assay.id", "name", str_c("ICM.", 1:14), str_c("Day3.Oct4.plus.", 1:14), str_c("Day5.Oct4.plus.", 1:9), str_c("ES.", 1:14), str_c("Day5.Oct4.minus.", 1:13), str_c("E45.", 1:10))
Parse the cell names.
pcr.cell.re <- '^(ICM|Day3|Day5|ES|E45).(?:(Oct4.minus|Oct4.plus).)?([[:digit:]]+)$' # names(pcr) # levels(tang.pcr.cell.meta$capture) tang.pcr.cell.meta <- ( as.data.frame(str_match(names(pcr)[5:78], pcr.cell.re)) %>% rename(cell=V1, capture=V2, condition=V3, cell.id=V4) %>% mutate(obstime=ifelse(capture=="ICM" , 0, ifelse(capture=="E45", 1, ifelse(capture=="Day3", 3, ifelse(capture=="Day5", 5, ifelse(capture=="ES" , 7, NA))))))) # tang.pcr.cell.meta stopifnot(all(! is.na(tang.pcr.cell.meta$obstime))) # names(tang.pcr.cell.meta)
Extract the PCR gene meta data.
tang.pcr.gene.meta <- ( pcr[,1:4] %>% mutate(key=name %in% tang.key.genes))
Extract the PCR expression data.
tang.pcr <- 40-as.matrix(pcr[,as.character(tang.pcr.cell.meta$cell)]) rownames(tang.pcr) <- tang.pcr.gene.meta$gene colnames(tang.pcr) <- tang.pcr.cell.meta$cell class(tang.pcr) qplot(as.vector(tang.pcr))
Check we can create a DeLorean object from the data.
dl.pcr <- de.lorean(tang.pcr, tang.pcr.gene.meta, tang.pcr.cell.meta) dl.pcr <- estimate.hyper( dl.pcr, sigma.tau=1.5, delta=.5)
Save the data in a format suitable for DeLorean package.
save(tang.key.genes, tang.rna.seq, tang.rna.seq.gene.meta, tang.rna.seq.cell.meta, tang.pcr, tang.pcr.gene.meta, tang.pcr.cell.meta, file='../../data/TangDeLorean.rda')
date()
R version and packages used:
sessionInfo()
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.