#!/usr/bin/env
# Author: Sean Maden
# Summaries of samples and studies as barplots,
# and fraction of array probes filtered after
library(ggplot2)
#----------
# load data
#----------
# anova barplot data frame
pkgname <- "recountmethylationManuscriptSupplement"
nct7.dir <- system.file("extdata", "nct7", package = pkgname)
bpdf.path <- file.path(nct7.dir, "anova-filt_nct.table")
bpdf.anovafilt <- read.table(bpdf.path, sep = " ", header = TRUE)
# noncancer tissues ids
nct7.ids.fn <- "gsmid-nct7.rda"
nct7 <- get(load(file.path(nct7.dir, nct7.ids.fn)))
# samples metadata
tables.dir <- system.file("extdata", "tables", package = pkgname)
md.fn <- "table-s1_mdpost_all-gsm-md.csv"
md <- read.csv(file.path(tables.dir, md.fn))
#-----------------------------------
# figS7b -- data summaries, barplots
#-----------------------------------
which.samp <- md$gsm %in% nct7$gsmid
mdf <- md[which.samp,]
# count studies
df.gse <- matrix(nrow = 0, ncol = 2)
for(t in unique(nct7$tissue)){
tgsmv <- nct7[nct7$tissue == t,]$gsmid
ngse <- length(unique(mdf[mdf$gsm %in% tgsmv,]$gseid))
df.gse <- rbind(df.gse, matrix(c(t, ngse), nrow = 1))
}
df.gse <- as.data.frame(df.gse, stringsAsFactors = FALSE)
colnames(df.gse) <- c("tissue", "num.gse")
df.gse[,2] <- as.numeric(df.gse[,2])
df.gse$tissue <- factor(df.gse$tissue,
levels = df.gse$tissue[order(df.gse$num.gse)])
df.gse$fillcol <- c("red", "purple", "gold", "forestgreen",
"green", "blue", "firebrick")
df.gse <- df.gse[order(df.gse$num.gse),]
# count samples
df.gsm <- matrix(nrow = 0, ncol = 2)
for(t in unique(nct7$tissue)){
tgsmv <- nct7[nct7$tissue == t,]$gsmid
ngsm <- length(unique(mdf[mdf$gsm %in% tgsmv,]$gsm))
df.gsm <- rbind(df.gsm, matrix(c(t, ngsm), nrow = 1))
}
df.gsm <- as.data.frame(df.gsm, stringsAsFactors = FALSE)
colnames(df.gsm) <- c("tissue", "num.gsm")
df.gsm[,2] <- as.numeric(df.gsm[,2])
df.gsm$tissue <- factor(df.gsm$tissue,
levels = df.gsm$tissue[order(df.gsm$num.gsm)])
df.gsm$fillcol <- c("red", "purple", "gold", "forestgreen",
"green", "blue", "firebrick")
df.gsm <- df.gsm[order(df.gsm$num.gsm),]
# make plot objects
figS7b.studies <- ggplot(df.gse, aes(x = tissue,
y = num.gse, fill = tissue)) +
geom_bar(stat = "identity") + theme_bw() +
theme(legend.position = "none",
axis.text.x = element_text(angle = 90)) +
scale_fill_manual(values = df.gse$fillcol) +
geom_text(aes(label=num.gse), vjust = -0.2) +
ylab("Study count") + xlab("Tissue") + ylim(0, 6200)
figS7b.samples <- ggplot(df.gsm, aes(x = tissue,
y = num.gsm, fill = tissue)) +
geom_bar(stat = "identity") + theme_bw() +
theme(legend.position = "none",
axis.text.x = element_text(angle = 90,
vjust = 0.1, hjust = 1)) +
scale_fill_manual(values = df.gsm$fillcol) +
geom_text(aes(label=num.gsm), vjust = -0.2) +
ylab("Sample count") + xlab("Tissue") + ylim(0, 6800)
#-----------------------------------------
# figS7c -- anova filter, stacked barplots
#-----------------------------------------
# order on removed
bff <- bpdf.anovafilt[bpdf.anovafilt$type == "REMOVED",]
lvl.order <- bff$tissue[order(bff[,1])]
bpdf.anovafilt$tissue <- factor(bpdf.anovafilt$tissue,
levels = lvl.order)
# get plot object
figS7c <- ggplot(bpdf.anovafilt, aes(x = tissue,
y = nprobes,
fill = type)) +
geom_bar(stat = "identity", colour = "black") +
theme_bw() + labs(y = "Probe count", x = "Tissue",
fill = "Probe type") +
theme(axis.text.x = element_text(angle = 90,
hjust = 1, vjust = 0.5))
#-------------------------
# print manuscript figures
#-------------------------
# print manuscript figures
# pdf("sfig7b_bp-samplecount-nct7.pdf", 2.8, 2.3)
# print(figS7b.samples); dev.off()
# pdf("sfig7c_bp-cg-anovafilt_nct7.pdf", 3.4, 2.4)
# print(figS7c); dev.off()
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.