is.error <- function(x) inherits(x, "try-error")
loadGeneInformation<-function(dir="../TablesForExploration",
motifs=T,
WGCNA=F,
meanTpms=T,
non_syntenic_triads=F,
triads=T,
triadMovement=T
){
path<-paste0(dir,"/CanonicalTranscript.rds")
canonicalTranscripts<-readRDS(path)
canonicalTranscripts$intron_length<- canonicalTranscripts$mrna_length - canonicalTranscripts$exon_length
canonicalTranscripts$chr_group <- substr(canonicalTranscripts$Chr,4,4)
canonicalTranscripts$genome <- substr(canonicalTranscripts$Chr,5,5)
expressed_genes <- canonicalTranscripts$Gene
if(meanTpms == T){
path<-paste0(dir, "/MeanTpms.rds")
meanTpms <- readRDS(path)
expressed_genes<-unique(meanTpms$gene)
}
canonicalTranscripts<-canonicalTranscripts[canonicalTranscripts$Gene %in% expressed_genes, ]
canonicalTranscripts$scaled_5per_position <- 5 * ceiling(canonicalTranscripts$scaled_1per_position / 5)
canonicalTranscripts$scaled_5per_position <- ifelse(canonicalTranscripts$scaled_5per_position == 0,
5,
canonicalTranscripts$scaled_5per_position)
path<-paste0(dir, "/region_partition.csv")
partition<-read.csv(path, row.names=1)
partition_percentages<-round(100*partition/partition$Length)
partition_percentages$Chr <- rownames(partition_percentages)
partition$Chr <- rownames(partition)
ct<-canonicalTranscripts
ct_with_partition<-sqldf('SELECT ct.*, CASE
WHEN scaled_1per_position < R1_R2a THEN "R1"
WHEN scaled_1per_position < R2a_C THEN "R2A"
WHEN scaled_1per_position < C_R2b THEN "C"
WHEN scaled_1per_position < R2b_R3 THEN "R2B"
ELSE "R3" END as partition
FROM ct LEFT JOIN partition_percentages ON ct.chr = partition_percentages.chr ')
x<- as.factor(ct_with_partition$partition)
x <- factor(x,levels(x)[c(2,3,1,4,5)])
ct_with_partition$partition <- x
canonicalTranscripts<-ct_with_partition
if(triadMovement == T){
path<-paste0(dir,"/TriadMovement.rds")
triadMovement<-readRDS(path)
}
if(triads == T){
path<-paste0(dir,"/Triads.rds")
triads<-readRDS(path)
}
path<-paste0(dir,"/universe_table.csv")
gene_universe<-read.csv(path)
path<-paste0(dir, "/OntologiesForGenes.rds")
ontologies<-readRDS(path)
path<-paste0(dir, "/id_names_merged.txt")
id_names <- read.csv(path, header=F, sep = "\t")
if(WGCNA == T){
path<-paste0(dir, "/WGCNA_table.csv")
WGCNA <- read.csv(path)
}
path<-paste0(dir, "/ObservedGOTermsWithSlim.csv")
go_slim<-read.csv(path, row.names=1)
if(motifs == T){
path<-paste0(dir, "/motifs.rds")
motifs <- readRDS(path)
motifs<-unique(motifs)
}
path<-paste0(dir, "/SegmentalTriads.csv")
allTriads<-read.csv(path, stringsAsFactors=F)
only_genes<-allTriads[,c("group_id","A", "B", "D")]
allTriads<-melt(only_genes, id.vars<-c("group_id"),
variable.name = "chr_group",
value.name ="gene")
if(non_syntenic_triads){
if(is.data.frame(triads)){
path<-paste0(dir,"/Non_syn_Triads.rds")
tmp<-readRDS(path)
triads <- rbind(triads, tmp)
}
if(is.data.frame(triadMovement)){
path<-paste0(dir,"/Non_syn_TriadMovement.rds")
tmp<-readRDS(path)
triadMovement <- rbind(triadMovement, tmp)
}
path<-paste0(dir, "/NonSyntenicTriads.csv")
tmp_allTriads<-read.csv(path, stringsAsFactors=F)
only_genes<-tmp_allTriads[,c("group_id","A", "B", "D")]
tmp_allTriads<-melt(only_genes, id.vars<-c("group_id"),
variable.name = "chr_group",
value.name ="gene")
allTriads<-rbind(allTriads, tmp_allTriads)
}
list(canonicalTranscripts=canonicalTranscripts,
meanTpms=meanTpms,
triads=triads,
triadMovement=triadMovement,
gene_universe=gene_universe,
ontologies=ontologies,
id_names=id_names,
WGCNA=WGCNA,
GOSlim=go_slim,
partition=partition,
motifs=motifs,
allTriads=allTriads
)
}
prepare_hist_stats<-function(table, column="size_cds"){
table<-table[table[,column]>0,]
probs <- c( 0.1, 0.25, 0.5, 0.75, 0.9, 0.95)
quantiles <- data.frame(quantile(table[,column], prob=probs,na.rm=TRUE, include.lowest=TRUE), stringsAsFactors=FALSE)
quantiles$quant<-rownames(quantiles)
colnames(quantiles)<-c("value", "quant")
values<-quantiles$values
local_mean<-mean(table[,column])
local_sd<-sd(table[,column])
local_max <- max(table[,column])
stats_list<-list(mean=local_mean, sd = local_sd, cv = local_sd/local_mean,
median = median(table[,column],2),
max = local_max,
n = nrow(table)
)
stats_list
}
plotHistogram<-function(table,
column="size_cds",
probs = c( 0.1, 0.25, 0.5, 0.75, 0.9, 0.95)){
table<-table[table[,column]>0,]
quantiles <- data.frame(quantile(table[,column], prob=probs,na.rm=TRUE, include.lowest=TRUE), stringsAsFactors=FALSE)
quantiles$quant<-rownames(quantiles)
colnames(quantiles)<-c("value", "quant")
values<-quantiles$values
local_mean<-mean(table[,column])
local_sd<-sd(table[,column])
local_max <- max(table[,column])
p <- ggplot(table, aes_string(column))
if(nrow(table) > 100){
table <- within(table,
quantile <- as.integer(
cut(table[,column],
unique(quantile(table[,column],
prob=probs,
na.rm=TRUE,
include.lowest=TRUE))
)
))
table$quantile<-ifelse(is.na(table$quantile),0,table$quantile)
table$quantile<-as.factor(table$quantile)
iq <- quantiles$value[4] - quantiles$value[2]
xmax <- quantiles$value[3] + (iq * 2)
xmin <- quantiles$value[3] - (iq * 2)
if(xmin < 0) xmin <- 0
if(xmax > local_max) xmax <- local_max + 1
p <- ggplot(table, aes_string(column, fill="quantile"))
p <- p + geom_vline(data=quantiles,aes(xintercept=quantiles$value) )
for(i in seq(1,nrow(quantiles))){
x_pos<-quantiles$value[i]
gtext <- textGrob(quantiles$quant[i], y=0.02, gp = gpar(fontsize = 6,col = "red"))
p <- p + annotation_custom(gtext, xmin=x_pos, xmax=x_pos)
}
p <- p + xlim(xmin, xmax) + scale_fill_brewer(palette="Dark2")
}
p <- p + geom_histogram(bins=50, position = "identity") + theme_bw()
p <- p + theme(legend.position="none")
p <- p + ggtitle(paste0("Mean: ", round(local_mean,2),
" SD:", round(local_sd,2),
" CV:", round(local_sd/local_mean, 2),
" Median:", round(median(table[,column],2)),
" Max:", round(local_max,2),
" N:", nrow(table)))
p <- p + theme(plot.title = element_text(size=6))
stats_list<-list(mean=local_mean, sd = local_sd, cv = local_sd/local_mean,
median = median(table[,column],2),
max = local_max,
n = nrow(table)
)
p
}
#This function gets the expected number of genes per each 5pc bin.
get_expected_values_per_5pc_bin<-function(gene_table,
numberOfGenes,
group_in_single_chromosome=FALSE){
query<-"SELECT Chr, chr_group, genome,scaled_5per_position, count(*) as count
FROM
gene_table
WHERE geneconf = 'HC' AND Chr != 'chrUn'
GROUP BY Chr, chr_group, genome, scaled_5per_position"
counts<-sqldf(query)
if(group_in_single_chromosome){
query<-"SELECT 'All' as Chr, 'all' as chr_group, 'all' as genome,
scaled_5per_position, count(*) as count
FROM
gene_table
WHERE geneconf = 'HC' AND Chr != 'chrUn'
GROUP BY scaled_5per_position"
}
counts<-sqldf(query)
multiplier <- numberOfGenes / sum(counts$count)
counts$expected <- counts$count * multiplier
counts
}
plot_per_chromosome_5pc_bins_facet<-function(table,expected_per_chr,
title = "Test"){
gs<-list()
local_title = paste0(title, "\n Genes per chromosome 5% bin\nN: ", nrow(table) )
t1 <- table[table$Chr != "chrUn",]
expected_per_chr <- expected_per_chr[expected_per_chr$Chr != "chrUn",]
p <-ggplot(t1,aes(as.factor(scaled_5per_position)))
p <- p + geom_bar(aes(fill=partition))
p <- p + theme_bw()
p <- p + facet_grid(chr_group~genome, drop = FALSE)
p <- p + ylab(" count ") + xlab("")
p <- p + theme(axis.text.x = element_text(angle = 90, hjust = 1))
p <- p + scale_fill_brewer(palette = "Set1")
p <- p + geom_point(data=expected_per_chr,
aes(x=as.factor(scaled_5per_position), y=expected), color="red", size = 0.5)
gs[[length(gs)+1]] <- p
p <-ggplot(table,aes(Chr, fill=geneconf)) + geom_bar() + theme_bw()
p <- p + theme(axis.text.x = element_text(angle = 30, hjust = 1)) + ylab("") + xlab("")
gs[[length(gs)+1]] <- p
g1<-arrangeGrob(grobs=gs, ncol=1, heights=c(0.8,0.2), top=local_title )
g1
}
plot_tpms_summary<-function(tpms, experiment="850_samples", min_tpm=0.5, title="Test"){
local_tpms<-subset(tpms, (subset == experiment) &
( factor != "all" & factor != "all_means" & factor != "all_mean_filter" ) &
value > min_tpm)
local_title <- paste0(title, "\n", experiment)
p <- ggplot(local_tpms, aes(value))
p <- p + geom_histogram(bins=30 ) + theme_bw()
p <- p + theme(axis.text.x = element_text(angle = 90, hjust = 1),
strip.text = element_text(size=6))
p <- p + facet_wrap(~ factor, ncol=4)
p <- p + xlim(0,15)
p <- p + ylab("Count") + xlab("")
p <- p + theme(strip.text.x = element_text(margin = margin(.1, 0, .1, 0, "cm")))
local_tpms<-subset(tpms, (subset == experiment) &
( factor == "all_mean_filter" ) &
value > min_tpm)
p2 <- ggplot(local_tpms, aes(value))
p2 <- p2 + geom_histogram(bins=30 ) + theme_bw()
p2 <- p2 + theme(axis.text.x = element_text(angle = 90, hjust = 1),
strip.text = element_text(size=6, lineheight=0.5))
p2 <- p2 + facet_wrap(~ factor, ncol=1) + xlim(0,15)
p2 <- p2 + ylab("") + xlab("TPM")
mytheme <- gridExtra::ttheme_default(
core = list(fg_params=list(cex = 0.5)),
colhead = list(fg_params=list(cex = 0.5)),
rowhead = list(fg_params=list(cex = 0.5)))
lay <- rbind(c(1),
c(2))
g1<-arrangeGrob(grobs=list(p,p2), heights=c(0.8,0.2), layout_matrix=lay, top = local_title)
g1
}
get_tpms_desc_stats<-function(tpms, min_tpm=0.5, experiment="850_samples"){
local_tpms<-subset(tpms, (subset == experiment) &
factor != "all" &
factor != "all_means" &
value > min_tpm)
factor_means <- aggregate(value ~ factor, data = local_tpms, mean)
factor_max <- aggregate(value ~ factor, data = local_tpms, max)
factor_min <- aggregate(value ~ factor, data = local_tpms, min)
factor_sd <- aggregate(value ~ factor, data = local_tpms, sd)
factor_median<- aggregate(value ~ factor, data = local_tpms, median)
factor_n <- aggregate(value ~ factor, data = local_tpms, length)
rownames(factor_means)<- factor_means$factor
rownames(factor_max)<- factor_max$factor
rownames(factor_min)<- factor_min$factor
rownames(factor_sd)<- factor_sd$factor
rownames(factor_median)<- factor_median$factor
rownames(factor_n)<- factor_n$factor
factor_means$factor<-NULL
factor_max$factor<-NULL
factor_min$factor<-NULL
factor_sd$factor<-NULL
factor_median$factor <- NULL
factor_n$factor <- NULL
factor_means<-cbind(factor_means, factor_median)
factor_means<-cbind(factor_means, factor_max)
factor_means<-cbind(factor_means, factor_min)
factor_means<-cbind(factor_means, factor_sd)
factor_means<-cbind(factor_means, factor_n)
colnames(factor_means)<-c("Mean", "Median", "Max", "Min", "SD","N")
factor_means$CV<-factor_means$SD/factor_means$Median
factor_means<-round(factor_means, 1)
}
plot_tpm_desc_stats<-function(tpms,subset_tpms, experiment="850_samples", min_tpm=0.5, title="Test" ){
local_title <- paste0(title, "\n", experiment)
t_local <-get_tpms_desc_stats(subset_tpms, experiment=experiment, min_tpm=min_tpm)
t_global <-get_tpms_desc_stats(tpms, experiment=experiment, min_tpm=min_tpm)
rownames(t_global) <- NULL
mytheme <- gridExtra::ttheme_default(
core = list(fg_params=list(cex = 0.5)),
colhead = list(fg_params=list(cex = 0.5)),
rowhead = list(fg_params=list(cex = 0.5)))
t1 <- tableGrob(t_local, theme=mytheme)
t2 <- tableGrob(t_global, theme=mytheme)
title <- textGrob("Observed",gp=gpar(fontsize=10))
padding <- unit(5,"mm")
table <- gtable_add_rows(
t1,
heights = grobHeight(title),
pos = 0)
table <- gtable_add_grob(
table, title,
1, 1, 1,
ncol(table))
title2 <- textGrob("Expected",gp=gpar(fontsize=10))
table2 <- gtable_add_rows(
t2,
heights = grobHeight(title2),
pos = 0)
table2 <- gtable_add_grob(
table2, title2,
1, 1, 1,
ncol(table2))
g1<-arrangeGrob(grobs=list(table ,table2), ncol=2, top = local_title)
g1
}
plot_all_means_filteredtpms_summary<-function(tpms, transcripts, experiment="850_samples", min_tpm=0.5, title="Test"){
local_title <- paste0(title, "\n", experiment)
local_tpms<-subset(tpms, (subset == experiment) &
factor == "all_mean_filter" )
breaks <- seq(-1,max(local_tpms$samples) , by = 1)
samples.cut <- cut(local_tpms$samples, breaks, include.lowest = FALSE)
samples.freq <- table(samples.cut)
cumfreq0 = cumsum(samples.freq)
local_tpms<-subset(tpms, subset == experiment &
factor == "all_mean_filter" &
value > min_tpm)
level <- ifelse(local_tpms$value < 0.1, 0.1, local_tpms$value)
level <- ceiling(log10(level))
local_tpms$level <- level
local_tpms$exp_max_value <- as.factor(10**level)
gs<-list()
gs[[length(gs)+1]] <- ggplot(local_tpms, aes(samples, fill = exp_max_value)) +
geom_bar(width = 0.75) + scale_fill_brewer(palette="RdPu") +
ggtitle(paste0("Gene expression level and number of tissues in which genes are expressed")) +
theme_bw() +
labs(fill="AVG TPM", x="") +
theme(legend.position=c(.25,.75))+
guides(color = guide_legend(override.aes = list(size=5)))
freqs_df <- data.frame(samples = seq(0,max(local_tpms$samples) , by = 1), cum_freq=cumfreq0 )
gs[[length(gs)+1]] <- ggplot(freqs_df, aes(samples, cum_freq)) +
geom_line() + geom_point() + theme_bw() +
labs(x="Number of tissues/conditions", y="Cumulative frequency")
gs[[length(gs)+1]] <- plot_expressed_tissues_across_chromosomes(tpms,transcripts, bin_size = 2 )
g1<-arrangeGrob(grobs=gs, ncol=1, top=local_title )
g1
}
get_triads_from_genes<-function(genes, geneInformation, dataset="HC_CS_no_stress" , min_no_genes = 1){
triads<-geneInformation$triads
triadMovement<-geneInformation$triadMovement
triads_with_genes <- triads[triads$gene %in% genes,]
tridas_with_genes <- triads_with_genes[triads_with_genes$dataset == dataset,]
genes_in_triads<-sqldf("SELECT group_id, gene FROM triads_with_genes GROUP BY gene")
triad_gene_count<-sqldf("SELECT group_id, count(*) as count from genes_in_triads GROUP BY group_id")
group_ids <- triad_gene_count[triad_gene_count$count >= min_no_genes, "group_id"]
list(triads=triads[triads$group_id %in% group_ids & triads$dataset==dataset,],
triadMovement=triadMovement[triadMovement$group_id %in% group_ids & triadMovement$dataset==dataset ,])
}
plot_distribution_for_factor<-function(res, unit="central_mean_distance" ,color_by="total_categories"){
#unit<-paste0(from,"_",unit)
local_res<-res[res$factor_count>1,c(unit, color_by)]
local_res[,2]<-as.factor(local_res[,2])
probs <- c( 0.1, 0.25, 0.5, 0.75, 0.9, 0.95, 0.99)
quantiles <- data.frame(quantile(local_res[,1], prob=probs,na.rm=TRUE), stringsAsFactors=FALSE)
quantiles$quant<-rownames(quantiles)
colnames(quantiles)<-c("value", "quant")
#print(quantiles)
local_res<-local_res[order(local_res[,color_by], decreasing = F),]
p <- ggplot(local_res,aes_string(unit, fill=color_by)) +
geom_vline(data=quantiles,aes(xintercept=quantiles$value) ) #
for(i in seq(1,nrow(quantiles))){
x_pos<-quantiles$value[i]
gtext <- textGrob(quantiles$quant[i], y=0.02, gp = gpar(fontsize = 6,col = "red"))
p <- p + annotation_custom(gtext, xmin=x_pos, xmax=x_pos)
}
p <- p + theme_bw()
#p <- p + geom_text(data = quantiles, mapping = aes_string(label = "quant", y = 0)) #, mapping = aes(label = quant, y = 0))
p <- p + geom_histogram(position = "identity",alpha=0.5,binwidth=0.01) + theme(legend.position="none")
p
}
get_stats_title<-function(d){
paste0("Mean: ", round(mean(d),2),
" SD:", round(sd(d),2),
" CV:", round(sd(d)/mean(d), 2),
" Median:", round(median(d),2),
" N:", length(d))
}
table_dominance_summary<-function(selected_triads, experiment="HC_CS_no_stress", title="test"){
triads <- selected_triads$triads
triadMovement<-selected_triads$triadMovement
all_means_filter<-triads[triads$factor=="all_mean_filter",]
df <- prepare_hist_stats(all_means_filter, column="value")
df$value_type <- "All mean filter TPM for genes in triad"
df$dataset<-experiment
df$title <- title
tmp<- prepare_hist_stats(triadMovement, column="central_max_distance")
tmp$value_type <- "central_max_distance"
tmp$dataset<-experiment
tmp$title <- title
df<-rbind(df,tmp)
tmp<- prepare_hist_stats(triadMovement, column="central_mean_distance")
tmp$value_type <- "central_mean_distance"
tmp$dataset<-experiment
tmp$title <- title
df<-rbind(df,tmp)
tmp<- prepare_hist_stats(triadMovement, column="sum_mean_tpm")
tmp$value_type <- "sum_mean_tpm"
tmp$dataset<-experiment
tmp$title <- title
df<-rbind(df,tmp)
tmp<- prepare_hist_stats(triadMovement, column="factor_count")
tmp$value_type <- "No. of conditions of genes(count per triad)"
tmp$dataset<-experiment
tmp$title <- title
df<-rbind(df,tmp)
tmp<- prepare_hist_stats(triadMovement, column="total_categories")
tmp$value_type <- "No. of categories"
tmp$dataset<-experiment
tmp$title <- title
df<-rbind(df,tmp)
df
}
plot_dominance_summary<-function(selected_triads, experiment="HC_CS_no_stress", title="test"){
local_title <- paste0(title, "\n", experiment)
triads <- selected_triads$triads
triadMovement<-selected_triads$triadMovement
all_means_filter<-triads[triads$factor=="all_mean_filter",]
#print(unique(triads$factor))
gs<-list()
gs[[length(gs)+1]] <- plotHistogram(all_means_filter, column="value") +
xlab("All mean filter TPM for genes in triad")
gs[[length(gs)+1]] <- ggplot(triadMovement, aes(factor_count)) + geom_bar() + theme_bw() +
xlab("No. of conditions of genes(count per triad)") +
ggtitle(get_stats_title(triadMovement$factor_count)) + theme(plot.title = element_text(size=6))
p <- ggplot(triadMovement, aes(total_categories)) + geom_bar() + theme_bw()
p <- p + labs(fill="Main\ncategory", x="No. of categories") +
ggtitle(get_stats_title(triadMovement$total_categories)) + theme(plot.title = element_text(size=6))
gs[[length(gs)+1]] <- p
gs[[length(gs)+1]] <- plotHistogram(triadMovement, column="central_max_distance")
gs[[length(gs)+1]] <- plotHistogram(triadMovement, column="central_mean_distance")
gs[[length(gs)+1]] <- plotHistogram(triadMovement, column="sum_mean_tpm")
g1<-arrangeGrob(grobs=gs, ncol=2, top=local_title )
g1
}
get_dominance_summary_tables_per_factor<-function(selected_triads,
description = "description",
experiment="HC_CS_no_stress",
factor = NULL,
n=NULL
){
triads <- selected_triads$triads
triads <- triads[triads$dataset==experiment, ]
if(!is.null(factor)){
triads <- triads[triads$factor==factor, ]
}
query <- paste0("SELECT factor, " ,
description ,
" as description, count(*) as count FROM triads GROUP BY factor, " ,
description )
table <- sqldf(query)
casted <- dcast(table, factor ~ description , value.var="count")
rownames(casted) <- casted$factor
casted$factor <- NULL
casted<-as.matrix(casted)
casted <- ifelse(is.na(casted),0, casted)
total_per_factor<-rowSums(casted)
percentage <- as.matrix(100 * casted / total_per_factor)
if(! is.null(n)){
casted <- percentage * n / 100
total_per_factor<-rowSums(casted)
}
pasted<-matrix(paste(as.matrix(round(casted,0)),
as.matrix(round(percentage,2)) , sep=" - "),
nrow=nrow(casted),
dimnames=dimnames(casted))
pasted<-matrix(paste0(pasted, "%"),
nrow=nrow(casted),
dimnames=dimnames(casted))
pasted<-data.frame(pasted)
pasted$total <-total_per_factor
long<-melt(casted)
colnames(long)<-c("factor", "description", "count")
percentage_long<-melt(percentage)
colnames(percentage_long)<-c("factor", "description", "percentage")
long<-merge(x=long,
y=percentage_long,
by.x=c("factor","description"),
by.y=c("factor","description"))
list(long=long, casted=casted, percentage=percentage, pasted=pasted,total=total_per_factor )
}
table_with_title<-function(title, table){
mytheme <- gridExtra::ttheme_default(
core = list(fg_params=list(cex = 0.5)),
colhead = list(fg_params=list(cex = 0.5)),
rowhead = list(fg_params=list(cex = 0.5)))
table2 <- tableGrob(table, theme=mytheme)
g1<-arrangeGrob(grobs=list(table2), ncol=1, top=title )
}
plot_dominance_summary_tables<-function(selected_triads,
expected_desc,
expected_gen_desc,
experiment="HC_CS_no_stress", title="test"){
local_title <- paste0(title, "\n", experiment)
triads <- selected_triads$triads
expected_triads_desc<-expected_desc$long
total_genes<-sum(expected_triads_desc$count)
multiplier<-nrow(triads) / total_genes
expected_triads_desc$exp_count <- expected_triads_desc$count * multiplier
expected_triads_gen_desc<-expected_gen_desc$long
total_genes<-sum(expected_triads_gen_desc$count)
multiplier<-nrow(triads) / total_genes
expected_triads_gen_desc$exp_count <- expected_triads_gen_desc$count * multiplier
expected_triads_gen_desc$general_description <- expected_triads_gen_desc$description
gs <- list()
p <- ggplot(triads, aes(factor)) + geom_bar() + theme_bw()
p <- p + theme(axis.text.x = element_text(angle = 35, hjust = 1, size=4)) + labs(fill="", x="")
p <- p + theme(legend.text=element_text(size=5))+
theme(legend.title=element_text(size=6)) + facet_wrap(~ description, ncol=4)
theme(legend.key.size = unit(0.4,"line"))
p <- p + geom_point(data=expected_triads_desc,
aes(x=factor, y=exp_count), color="red", size = 0.5)
gs[[length(gs)+1]] <- p
p <- ggplot(triads, aes(factor)) + geom_bar() + theme_bw()
p <- p + theme(axis.text.x = element_text(angle = 35, hjust = 1, size=4)) + labs(fill="", x="")
p <- p + theme(legend.text=element_text(size=5))+
theme(legend.title=element_text(size=6))+
theme(legend.key.size = unit(0.4,"line"))+ facet_wrap(~ general_description, ncol=3)
p <- p + geom_point(data=expected_triads_gen_desc,
aes(x=factor, y=exp_count), color="red", size = 0.5)
gs[[length(gs)+1]] <- p
g1<-arrangeGrob(grobs=gs, ncol=1, top=local_title )
g1
}
get_goseq_enrichment<-function(geneInformation, genes_to_plot,
name="Random Samples",
dataset="HC_CS_no_stress",
ontology="GO"
){
id_names <- geneInformation$id_names
universe<-geneInformation$gene_universe
universe<-universe[universe$dataset==dataset,]
#print(nrow(universe))
if(nrow(universe) == 0) {
return (data.frame(category= numeric(0),
over_represented_pvalue= numeric(0),
under_represented_pvalue= numeric(0),
numDEInCat= numeric(0),
numInCat= numeric(0),
ontology= numeric(0),
over_rep_padj= numeric(0),
under_rep_padj= numeric(0),
description= numeric(0),
universe_size= numeric(0)
))
}
ontologies<-geneInformation$ontologies
ontologies<-ontologies[ontologies$ontology==ontology,]
assayed.genes <- as.vector(universe$gene)
gene.vector=as.integer(assayed.genes%in%genes_to_plot)
names(gene.vector)=assayed.genes
transcripts<-geneInformation$canonicalTranscripts
lengths <- transcripts[,c("Gene", "exon_length")]
colnames(lengths) <- c("gene", "length")
t1 <- subset(lengths, gene %in% universe$gene)
gene.lens <- as.numeric(t1$length)
names(gene.lens) = t1$gene
all_go <- subset(ontologies, Gene %in% universe$gene)
all_go<-all_go[,c(1,2)]
pwf = nullp(gene.vector, bias.data = gene.lens, plot.fit = FALSE)
GO.wall = goseq(pwf, gene2cat = all_go)
#this gave table with p-values...now correct for multiple testing using FDR
# add new column with over represented GO terms padj
GO.wall$over_rep_padj=p.adjust(GO.wall$over_represented_pvalue, method="BY")
# add new column with under represented GO terms padj
GO.wall$under_rep_padj=p.adjust(GO.wall$under_represented_pvalue, method="BY")
# now select only GO terms where the padj is <0.05 for either enriched or under represented
sig.GO <- GO.wall[GO.wall$over_rep_padj <0.05 | GO.wall$under_rep_padj <0.05,]
sig.GO <- sig.GO[order(sig.GO$over_rep_padj),]
if(nrow(sig.GO) == 0 ){
return (data.frame(category= numeric(0),
over_represented_pvalue= numeric(0),
under_represented_pvalue= numeric(0),
numDEInCat= numeric(0),
numInCat= numeric(0),
ontology= numeric(0),
over_rep_padj= numeric(0),
under_rep_padj= numeric(0),
description= numeric(0),
universe_size= numeric(0)
))
}
sig.GO2 <- merge(sig.GO, id_names, by.x="category", by.y="V1", all.x =TRUE, all.y =FALSE)
if( nrow(sig.GO) > 0 && (ontology == "PO" || ontology == "TO" )){
sig.GO2$ontology<-ontology
}
sig.GO <-sig.GO2[,c('category','over_represented_pvalue','under_represented_pvalue','numDEInCat','numInCat',
'ontology','over_rep_padj','under_rep_padj','V2')]
colnames(sig.GO)<-c('category','over_represented_pvalue','under_represented_pvalue','numDEInCat','numInCat',
'ontology','over_rep_padj','under_rep_padj','description')
sig.GO$type<-ifelse(sig.GO$under_rep_padj > sig.GO$over_rep_padj,
"Over represented",
"Under represented" )
sig.GO$p_adjust<-ifelse(sig.GO$under_rep_padj > sig.GO$over_rep_padj,
sig.GO$over_rep_padj,
sig.GO$under_rep_padj )
sig.GO$percentage<- round(100 * sig.GO$numDEInCat/sig.GO$numInCat,2)
ret<-sig.GO
slim<-geneInformation$GOSlim
sig.GO <- sqldf("SELECT category,
over_represented_pvalue,
under_represented_pvalue,
numDEInCat,
numInCat,
ontology,
over_rep_padj,
under_rep_padj,
description,
type,
p_adjust,
percentage,
GROUP_CONCAT(DISTINCT slim_acc) as slim_acc,
GROUP_CONCAT(DISTINCT slim_term_type) as slim_term_type,
GROUP_CONCAT(DISTINCT slim_name) as slim_name
FROM ret LEFT JOIN slim ON category = acc
GROUP BY category,
over_represented_pvalue,
under_represented_pvalue,
numDEInCat,
numInCat,
ontology,
over_rep_padj,
under_rep_padj,
description,
type,
p_adjust,
percentage")
sig.GO
}
plot_enrichment<-function(enrichment,experiment="HC_CS_no_stress", title="test" , type="Over represented"){
local_title <- paste0(title, "\n", experiment, "\n", type , "\nPercentage of genes from each ontology\n")
gs<-list()
for(ont in unique(enrichment$ontology)){
current<-enrichment[enrichment$ontology == ont & enrichment$type == type, ]
current$description<-strtrim(current$description, 70)
if(nrow(current) > 0){
gs[[length(gs)+1]] <- ggplot(current, aes(description, percentage)) +
coord_flip() +
geom_col(fill="#a8ddb5") + labs(y="", x="percentage", title=ont) + theme_bw() + #ylim(0,100) +
theme(axis.text.x = element_text(angle = 0, hjust = 1, size=4),
axis.text.y = element_text(angle = 35, hjust = 1, size=4)) +
geom_text(aes(label=numDEInCat), position=position_dodge(width=0.9), size=3)
}
}
if(length(gs) == 0){
gs[[length(gs)+1]] <- textGrob("None")
}
g1<-arrangeGrob(grobs=gs, ncol=2, top=local_title )
}
plot_expressed_tissues_across_chromosomes<-function(tpms, transcripts,
title = "Test", bin_size=5){
#all_means_filter, ct_with_partition,
#transcripts<-geneInformation$canonicalTranscripts
#all_means_filter<-geneInformation$meanTpms
#all_means_filter<-all_means_filter[all_means_filter$factor=='all_mean_filter']
transcripts$scaled_5per_position <- bin_size * ceiling(transcripts$scaled_1per_position / bin_size)
transcripts$scaled_5per_position <- ifelse(transcripts$scaled_5per_position == 0, bin_size, transcripts$scaled_5per_position)
#transcripts$scaled_5per_position <- transcripts$scaled_pc
expected_tissues <- sqldf("SELECT AVG(value) as meanTPM, AVG(samples) as noSamples, scaled_5per_position
FROM tpms
JOIN transcripts ON tpms.gene = transcripts.Gene
WHERE geneconf = 'HC' AND Chr != 'chrUn'
GROUP BY scaled_5per_position")
expected_tissues_mean <- sqldf("SELECT
Chr,
chr_group,
genome,
scaled_5per_position,
AVG(value) as meanTPM,
AVG(samples) as noSamples,
count(*) as count
FROM tpms
JOIN transcripts ON tpms.gene = transcripts.Gene
WHERE geneconf = 'HC' AND Chr != 'chrUn'
GROUP BY Chr, scaled_5per_position, chr_group, genome
ORDER BY Chr, chr_group, genome, scaled_5per_position ")
gs<-list()
local_title = paste0(title, "\n Average expressed per 5% bin\nN: ", nrow(table) )
p <-ggplot(expected_tissues_mean,aes(x=scaled_5per_position, y=noSamples, group=Chr))
samples_reduced<-expected_tissues_mean[, c("scaled_5per_position", "noSamples", "Chr")]
p <- p + geom_line(data=samples_reduced,
aes(x=scaled_5per_position, y=noSamples,group=Chr
),color='black', size=0.4, alpha=0.2 )
p <- p + ylab("No of tissues") + xlab("")
if(!is.null(expected_tissues)){
exp_norm <-expected_tissues[, c("scaled_5per_position", "noSamples")]
exp_norm$Chr<-"All"
p <- p + geom_line(data=exp_norm,
aes(x=scaled_5per_position, y=noSamples),
color="blue", size=1, alpha=1)
}
p <- p + theme_bw() + theme(axis.text=element_text(size=7),
axis.title=element_text(size=7),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank())
p <- p + scale_x_continuous(expand = c(0, 0))
p <- p + geom_vline(xintercept = c(9, 28,50,80))
p
}
plot_gene_summary<-function(geneInformation, genes_to_plot, name="Random Samples" , output_path="./Test", run_stats=FALSE){
summary_df <- NULL
local_table<-geneInformation$canonicalTranscripts
local_table<-local_table[local_table$Gene %in% genes_to_plot,]
local_mean_tpms<-geneInformation$meanTpms
local_mean_tpms<-local_mean_tpms[local_mean_tpms$gene %in% genes_to_plot, ]
stats_to_plot<-c('size_cds', 'exon_no', 'exon_length','intron_length', 'X3UTR_length', 'X5UTR_length' )
expected_per_chr<-get_expected_values_per_5pc_bin(geneInformation$canonicalTranscripts, nrow(local_table))
gs<-list()
plots<-list()
dir<-paste0(output_path,"/",name)
dir.create(dir, showWarnings = FALSE, recursive = TRUE)
gc()
plots[[length(plots)+1]] <- textGrob(paste0(name, " Gene summary"))
print("Plotting gene summaries")
for(plot in stats_to_plot){
p<-plotHistogram(local_table,column=plot)
gs[[length(gs)+1]] <- p
tmp_df <- prepare_hist_stats(local_table, column=plot)
tmp_df$value_type <- plot
tmp_df$dataset<- "Gene summary"
tmp_df$title <- name
if(is.null(summary_df))
{
summary_df <- tmp_df
}
else
{
summary_df <- rbind(summary_df, tmp_df)
}
}
plots[[length(plots)+1]] <- arrangeGrob(grobs=gs, ncol=2 , top = paste0(name, "\n Gene properties"))
plots[[length(plots)+1]] <- plot_per_chromosome_5pc_bins_facet(local_table,
expected_per_chr=expected_per_chr,
title=name)
gene_density<-get_gene_density(genes_to_plot, geneInformation)
output_gene_density<-paste0(dir, "/", "gene_density_per_region.csv")
plots[[length(plots)+1]] <- plot_per_partition_gene_count(local_table, gene_density, title=name)
write.csv(gene_density, file=output_gene_density)
print("Plotting TPM summaries")
plots[[length(plots)+1]] <- textGrob(paste0(name, " TPM summary"))
for(s in unique(geneInformation$meanTpms$subset)){
plots[[length(plots)+1]] <- plot_tpms_summary(local_mean_tpms, experiment=s, title=name)
plots[[length(plots)+1]] <- plot_density_expression(geneInformation,genes_to_plot, experiment=s, title=name)
plots[[length(plots)+1]] <- plot_tpm_desc_stats(geneInformation$meanTpms, local_mean_tpms, experiment=s, title=name)
plots[[length(plots)+1]] <- plot_all_means_filteredtpms_summary(local_mean_tpms, geneInformation$canonicalTranscripts, experiment=s, title=name)
}
plots[[length(plots)+1]] <- textGrob(paste0(name, " Triad summary"))
triada_movment_df<-NULL
print("Plotting triads")
for(s in unique(geneInformation$triads$dataset)){
tmp_plot<-plot_clust_dist(geneInformation, genes_to_plot, experiment=s, title = paste0(s,"\n",name))
if(is.null(tmp_plot)){
next
}
plots[[length(plots)+1]] <- tmp_plot
plots[[length(plots)+1]] <- plot_triad_movment(geneInformation, genes_to_plot,
experiment=s,
title=paste0(s,"\n",name))
for(i in c(1,2,3) ){
local_triads <- get_triads_from_genes(genes_to_plot, geneInformation, dataset=s, min_no_genes = i)
if(nrow(local_triads$triads)== 0){
next
}
name_tmp <-name
name <- paste0(name, " Min genes in triad: ", i )
plots[[length(plots)+1]] <- plot_dominance_summary(local_triads, experiment=s, title=name)
tmp_df<- table_dominance_summary(local_triads, experiment=s, title=name)
summary_df <- rbind(summary_df, tmp_df)
observed_desc <-get_dominance_summary_tables_per_factor(local_triads, experiment=s)
observed_gen_desc<-get_dominance_summary_tables_per_factor(local_triads,
description="general_description",
experiment=s)
expected_desc <-get_dominance_summary_tables_per_factor(geneInformation,
experiment=s,
n=observed_desc$total)
expected_gen_desc<-get_dominance_summary_tables_per_factor(geneInformation,
description="general_description",
experiment=s,
n=observed_gen_desc$total)
o_l <- observed_desc$long
og_l <- observed_gen_desc$long
e_l <- expected_desc$long
eg_l <- expected_gen_desc$long
o_l$group <- "fine"
e_l$group <- "fine"
og_l$group <- "general"
eg_l$group <- "general"
o_l$sum_for <- "observed"
e_l$sum_for <- "expected"
og_l$sum_for <- "observed"
eg_l$sum_for <- "expected"
o_l$dataset <- name_tmp
og_l$dataset <- name_tmp
e_l$dataset <- name_tmp
eg_l$dataset <- name_tmp
o_l$dataset_for_triads <- s
og_l$dataset_for_triads <- s
e_l$dataset_for_triads <- s
eg_l$dataset_for_triads <- s
o_l$min_expressed_genes_in_triad <- i
og_l$min_expressed_genes_in_triad <- i
e_l$min_expressed_genes_in_triad <- i
eg_l$min_expressed_genes_in_triad <- i
if(is.null(triada_movment_df)){
triada_movment_df <- o_l
}
else{
triada_movment_df <- rbind(triada_movment_df, o_l)
}
triada_movment_df <- rbind(triada_movment_df, og_l)
triada_movment_df <- rbind(triada_movment_df, e_l)
triada_movment_df <- rbind(triada_movment_df, eg_l)
plots[[length(plots)+1]] <-plot_dominance_summary_tables(local_triads,
expected_desc,
expected_gen_desc,
experiment=s, title=name )
plots[[length(plots)+1]] <- table_with_title(paste(name,s, "Observed",sep="\n"),
observed_desc$pasted
)
plots[[length(plots)+1]] <- table_with_title(paste(name, s, "Expected",sep="\n"),
expected_desc$pasted
)
plots[[length(plots)+1]] <- table_with_title(paste(name, s, "Observed",sep="\n"),
observed_gen_desc$pasted
)
plots[[length(plots)+1]] <- table_with_title(paste(name, s, "Expected",sep="\n"),
expected_gen_desc$pasted
)
name<-name_tmp
}
}
output_summary<-paste0(dir, "/", "summary_from_histograms.csv")
write.csv(summary_df, file=output_summary)
output_enrichment<-paste0(dir, "/", "triad_movment_summary.csv")
write.csv(triada_movment_df, file=output_enrichment)
output_pdf<-paste0(dir, "/",name ,".pdf")
g1<-marrangeGrob(plots, ncol=1, nrow=1, top="", bottom = quote(paste("page", g, "of",
pages)))
ggsave(output_pdf, plot=g1 , width = 210, height = 297, units = "mm")
if(run_stats){
path_motifs_t_test<-paste0(dir, "/", "motifs_t_test.csv")
path_motifs_fisher<-paste0(dir, "/", "motifs_fisher.csv")
path_motifs_triads<-paste0(dir, "/", "motifs_triads.csv")
print("Testing motif enrichment")
res<-get_motifs_for_genes(genes_to_plot, geneInformation, name=name)
write.csv(res$t,
file=path_motifs_t_test,
row.names=F)
write.csv(res$fisher,
file=path_motifs_fisher,
row.names=F)
write.csv(get_motifs_for_triad(genes_to_plot, geneInformation, name=name),
file=path_motifs_triads,
row.names=F)
res<-NULL
print("Testing GO enrichment")
all_enrichments <- NULL
for(g_u in unique(geneInformation$gene_universe$dataset)){
for(ont in unique(geneInformation$ontologies$ontology)){
enrichment_test<- get_goseq_enrichment(geneInformation, genes_to_plot, ontology=ont , dataset=g_u)
if(nrow(enrichment_test) == 0){
next
}
enrichment_test$universe<-g_u
enrichment_test$ontology_universe <- ont
if(is.null(all_enrichments)){
all_enrichments <- enrichment_test
}
else{
all_enrichments<-rbind(all_enrichments, enrichment_test)
}
}
}
output_enrichment<-paste0(dir, "/", "enrichment.csv")
write.csv(all_enrichments, file=output_enrichment)
}
g1
}
plot_density_expression<-function(geneInformation,genes_to_plot, experiment="850_samples", min_tpm=0.5, title="Test"){
tpms<-geneInformation$meanTpms
tpms<-tpms[tpms$gene %in% genes_to_plot, ]
local_tpms<-subset(tpms, (subset == experiment) &
( factor != "all" & factor != "all_means" & factor != "all_mean_filter" ) &
value > min_tpm)
ct <- geneInformation$canonicalTranscripts
local_tpms<-sqldf("SELECT local_tpms.*, ct.genome FROM local_tpms JOIN ct ON ct.gene = local_tpms.gene
WHERE ct.genome != 'n' ")
local_tpms$log_value <- log2(local_tpms$value)
local_title <- paste0(title, "\n", experiment)
p <- ggplot(local_tpms, aes(value, colour = genome))
p <- p + geom_density( ) + theme_bw()
p <- p + theme(axis.text.x = element_text(angle = 90, hjust = 1),
strip.text = element_text(size=6))
p <- p + facet_wrap(~ factor, ncol=4)
p <- p + scale_x_continuous(trans='log2',expand = c(0, 0))
p <- p + coord_cartesian(xlim = c(0.5, 128))
p <- p + ylab("Density") + xlab("")
p <- p + theme(strip.text.x = element_text(margin = margin(.1, 0, .1, 0, "cm")))
local_tpms<-subset(tpms, (subset == experiment) &
( factor == "all_mean_filter" ) &
value > min_tpm)
local_tpms<-sqldf("SELECT local_tpms.*, ct.genome FROM local_tpms JOIN ct ON ct.gene = local_tpms.gene
WHERE ct.genome != 'n' ")
local_tpms$log_value <- log2(local_tpms$value)
p2 <- ggplot(local_tpms, aes(value, colour = genome))
p2 <- p2 + geom_density() + theme_bw()
p2 <- p2 + theme(axis.text.x = element_text(angle = 90, hjust = 1),
strip.text = element_text(size=6, lineheight=0.5))
p2 <- p2 + ylab("") + xlab("TPM")
p2 <- p2 + scale_x_continuous(trans='log2',expand = c(0, 0))
p2 <- p2 + coord_cartesian(xlim = c(0.5, 128))
mytheme <- gridExtra::ttheme_default(
core = list(fg_params=list(cex = 0.5)),
colhead = list(fg_params=list(cex = 0.5)),
rowhead = list(fg_params=list(cex = 0.5)))
lay <- rbind(c(1),
c(2))
g1<-arrangeGrob(grobs=list(p,p2), heights=c(0.8,0.2), layout_matrix=lay, top = local_title)
g1
}
get_counts_values_per_5pc_bin<-function(gene_table,group_in_single_chromosome=FALSE){
query<-"SELECT Chr, chr_group, genome,scaled_5per_position, count(*) as count
FROM
gene_table
WHERE Chr != 'chrUn'
GROUP BY Chr, chr_group, genome, scaled_5per_position"
if(group_in_single_chromosome){
query<-"SELECT 'All' as Chr, 'all' as chr_group, 'all' as genome,
scaled_5per_position, count(*)/21 as count
FROM
gene_table
WHERE Chr != 'chrUn'
GROUP BY scaled_5per_position"
}
counts<-sqldf(query)
counts
}
plot_per_chromosome_5pc_bins_overlap_lines<-function(table,expected_per_chr,
expected_all_chromosomes=NULL,
title = "Test"){
chromosomes=c("1A", "1B", "1D",
"2A", "2B", "2D",
"3A", "3B", "3D",
"4A", "4B", "4D",
"5A", "5B", "5D",
"6A", "6B", "6D",
"7A", "7B", "7D")
gs<-list()
local_title = paste0(title, "\n Genes per chromosome 5% bin\nN: ", nrow(table) )
t <- table[table$Chr != "chrUn",]
t1 <- get_counts_values_per_5pc_bin(t)
t2 <- get_counts_values_per_5pc_bin(t, group_in_single_chromosome=TRUE)
print(head(t1))
expected_per_chr <- expected_per_chr[expected_per_chr$Chr != "chrUn",]
p <-ggplot(expected_per_chr,aes(scaled_5per_position, expected, group=Chr))
p <- p + xlim(0,100)
p <- p + geom_line(data=expected_per_chr[, c("scaled_5per_position", "expected", "Chr")],
aes(x=scaled_5per_position, y=expected,group=Chr
),
color='black', size=1, alpha=0.1 )
p <- p + ylab(" count ") + xlab("")
if(!is.null(expected_all_chromosomes)){
expected_all_chromosomes$expected <- expected_all_chromosomes$expected/21
exp_norm <-expected_all_chromosomes[, c("scaled_5per_position", "expected", "Chr")]
p <- p + geom_line(data=exp_norm,
aes(x=scaled_5per_position, y=expected),
color="blue", alpha=0.5)
}
p <- p + theme_bw()
p1 <- p + geom_line(color="red", alpha=0.3)
p1 <- p1 + geom_point(data=t1, aes(x=scaled_5per_position, y=count), color="red")
p <- p + geom_point(data=t2, aes(x=scaled_5per_position, y=count), color="blue")
gs[[length(gs)+1]] <- p1 + facet_grid(chr_group~genome, drop = TRUE)
gs[[length(gs)+1]] <- p
g1<-arrangeGrob(grobs=gs, ncol=1, heights=c(0.8,0.2), top=local_title )
g1
}
plot_triad_movment<-function(geneInformation,
genes_to_plot,
experiment="HC_850_samples",
title="Test",
density = FALSE ,
points = TRUE
){
allTriads<-geneInformation$allTriads
selectedTriads<-unique(allTriads[allTriads$gene %in% genes_to_plot, "group_id"])
tmp_df<-geneInformation$triads[geneInformation$triads$group_id %in% selectedTriads &
geneInformation$triads$dataset == experiment,
c("group_id","factor","clust","description","chr_group","normalised_triad", "Distance")]
clust_df <- dcast(tmp_df,group_id +clust+description+factor+Distance ~ chr_group, value.var = "normalised_triad")
clust_df_all_mean <- clust_df[clust_df$factor == 'all_mean_filter' , ]
clust_df_factors <- clust_df[clust_df$factor != 'all_mean_filter' &
clust_df$factor != 'all_means' &
clust_df$factor != 'all' , ]
tern_mean <- ggtern(clust_df_all_mean,aes(A,B,D,color=description)) + theme_bw() +
theme_legend_position(x = "topleft") +
theme_arrownormal() + guides(colour = guide_legend(override.aes = list(alpha = 1))) +ggtitle("Mean")
tern_all <- ggtern(clust_df_factors,aes(A,B,D,color=description)) + theme_bw() +
guides(colour = guide_legend(override.aes = list(alpha = 1)))
tern_fact <- tern_all + facet_wrap(~factor, ncol=4) +
theme_notitles() + theme(legend.position = "none") + theme_nolabels()
tern_all <- tern_all + theme_legend_position(x = "topleft") + theme_arrownormal() +ggtitle("All factors")
if(density){
tern_mean<- tern_mean + stat_density_tern(
geom='polygon', show.legend = F,
aes(fill=..level..),
bins=10,
color='grey')
tern_all <- tern_all + stat_density_tern(
geom='polygon',show.legend = F,
aes(fill=..level..),
bins=10,
color='grey')
tern_fact<- tern_fact + stat_density_tern(
geom='polygon',show.legend = F,
aes(fill=..level..),
bins=5,
color='grey')
}
if(points){
tern_mean <- tern_mean + geom_point(alpha=0.25)
tern_all <- tern_all + geom_point(alpha=0.25)
tern_fact <- tern_fact + geom_point(alpha=0.25)
}
gs<-list(tern_mean, tern_all, tern_fact)
lay <- rbind(c( 1, 3),
c( 2, 3)
)
g2 <- arrangeGrob(grobs = gs, layout_matrix = lay, top = title)
g2
}
plotExpressedTissuesAcrossChromosomes<-function(geneInformation,
genes_to_plot,
subset="850_samples",
factor="all_mean_filter",
title = "Test"){
#print(head(genes_to_plot))
tpms<-geneInformation$meanTpms
tpms<-tpms[tpms$factor==factor & tpms$subset==subset,]
transcripts<-geneInformation$canonicalTranscripts
genes_to_plot<-data.frame(Gene=genes_to_plot)
transcripts$scaled_5per_position <- 5 * ceiling(transcripts$scaled_1per_position / 5)
transcripts$scaled_5per_position <- ifelse(transcripts$scaled_5per_position == 0, 5, transcripts$scaled_5per_position)
#print(max(tpms$samples))
expected_tissues <- sqldf("SELECT AVG(value) as meanTPM, AVG(samples) as noSamples, scaled_5per_position
FROM tpms
JOIN transcripts ON tpms.gene = transcripts.Gene
WHERE geneconf = 'HC' AND Chr != 'chrUn'
GROUP BY scaled_5per_position")
expected_tissues_mean <- sqldf("SELECT
Chr,
chr_group,
genome,
scaled_5per_position,
AVG(value) as meanTPM,
AVG(samples) as noSamples,
count(*) as count
FROM tpms
JOIN transcripts ON tpms.gene = transcripts.Gene
WHERE geneconf = 'HC' AND Chr != 'chrUn'
GROUP BY Chr, scaled_5per_position, chr_group, genome
ORDER BY Chr, chr_group, genome, scaled_5per_position ")
gs<-list()
local_title = paste0(title, "\n Average expressed per 5% bin\nN: ", nrow(table) )
#t <- table[table$Chr != "chrUn",]
t1 <- sqldf("SELECT AVG(value) as meanTPM, AVG(samples) as noSamples, scaled_5per_position
FROM tpms
JOIN transcripts ON tpms.gene = transcripts.Gene
WHERE transcripts.Gene in genes_to_plot AND Chr != 'chrUn'
GROUP BY scaled_5per_position")
t2 <- sqldf("SELECT
Chr,
chr_group,
genome,
scaled_5per_position,
AVG(value) as meanTPM,
AVG(samples) as noSamples,
count(*) as count
FROM tpms
JOIN transcripts ON tpms.gene = transcripts.Gene
WHERE transcripts.Gene in genes_to_plot AND Chr != 'chrUn'
GROUP BY Chr, scaled_5per_position, chr_group, genome
ORDER BY Chr, chr_group, genome, scaled_5per_position ")
#print("-.-")
t1$Chr<-"All"
#print(head(t1))
#print(head(t2))
expected_per_chr <- expected_per_chr[expected_per_chr$Chr != "chrUn",]
p <-ggplot(expected_tissues_mean,aes(x=scaled_5per_position, y=noSamples, group=Chr))
samples_reduced<-expected_tissues_mean[, c("scaled_5per_position", "noSamples", "Chr")]
p <- p + geom_line(data=samples_reduced,
aes(x=scaled_5per_position, y=noSamples,group=Chr
),color='black', size=1, alpha=0.1 )
p <- p + ylab("No of tissues") + xlab("")
if(!is.null(expected_tissues)){
#print(head(expected_tissues))
exp_norm <-expected_tissues[, c("scaled_5per_position", "noSamples")]
exp_norm$Chr<-"All"
p <- p + geom_line(data=exp_norm,
aes(x=scaled_5per_position, y=noSamples),
color="blue", alpha=0.5)
}
p <- p + theme_bw()
p1 <- p + geom_line(color="red", alpha=0.3)
p1 <- p1 + geom_point(data=t2, aes(x=scaled_5per_position, y=noSamples), color="red")
p <- p + geom_point(data=t1, aes(x=scaled_5per_position, y=noSamples), color="blue")
gs[[length(gs)+1]] <- p1 + facet_grid(chr_group~genome, drop = TRUE)
gs[[length(gs)+1]] <- p
g1<-arrangeGrob(grobs=gs, ncol=1, heights=c(0.8,0.2), top=local_title )
g1
}
plotTPMOfExpressedTissuesAcrossChromosomes<-function(geneInformation,
genes_to_plot,
subset="850_samples",
factor="all_mean_filter",
title = "Test"){
#print(head(genes_to_plot))
tpms<-geneInformation$meanTpms
tpms<-tpms[tpms$factor==factor & tpms$subset==subset,]
transcripts<-geneInformation$canonicalTranscripts
genes_to_plot<-data.frame(Gene=genes_to_plot)
transcripts$scaled_5per_position <- 5 * ceiling(transcripts$scaled_1per_position / 5)
transcripts$scaled_5per_position <- ifelse(transcripts$scaled_5per_position == 0, 5, transcripts$scaled_5per_position)
#print(max(tpms$samples))
expected_tissues <- sqldf("SELECT AVG(value) as meanTPM, AVG(samples) as noSamples, scaled_5per_position
FROM tpms
JOIN transcripts ON tpms.gene = transcripts.Gene
WHERE geneconf = 'HC' AND Chr != 'chrUn'
GROUP BY scaled_5per_position")
expected_tissues_mean <- sqldf("SELECT
Chr,
chr_group,
genome,
scaled_5per_position,
AVG(value) as meanTPM,
AVG(samples) as noSamples,
count(*) as count
FROM tpms
JOIN transcripts ON tpms.gene = transcripts.Gene
WHERE geneconf = 'HC' AND Chr != 'chrUn'
GROUP BY Chr, scaled_5per_position, chr_group, genome
ORDER BY Chr, chr_group, genome, scaled_5per_position ")
gs<-list()
local_title = paste0(title, "\n Mean TPM of expressed tissues expressed per 5% bin\nN: ", nrow(table) )
t1 <- sqldf("SELECT AVG(value) as meanTPM, AVG(samples) as noSamples, scaled_5per_position
FROM tpms
JOIN transcripts ON tpms.gene = transcripts.Gene
WHERE transcripts.Gene in genes_to_plot AND Chr != 'chrUn'
GROUP BY scaled_5per_position")
t2 <- sqldf("SELECT
Chr,
chr_group,
genome,
scaled_5per_position,
AVG(value) as meanTPM,
AVG(samples) as noSamples,
count(*) as count
FROM tpms
JOIN transcripts ON tpms.gene = transcripts.Gene
WHERE transcripts.Gene in genes_to_plot AND Chr != 'chrUn'
GROUP BY Chr, scaled_5per_position, chr_group, genome
ORDER BY Chr, chr_group, genome, scaled_5per_position ")
t1$Chr<-"All"
expected_per_chr <- expected_per_chr[expected_per_chr$Chr != "chrUn",]
p <-ggplot(expected_tissues_mean,aes(x=scaled_5per_position, y=meanTPM, group=Chr))
samples_reduced<-expected_tissues_mean[, c("scaled_5per_position", "meanTPM", "Chr")]
p <- p + geom_line(data=samples_reduced,
aes(x=scaled_5per_position, y=meanTPM,group=Chr
),color='black', size=1, alpha=0.1 )
p <- p + ylab("No of tissues") + xlab("")
if(!is.null(expected_tissues)){
#print(head(expected_tissues))
exp_norm <-expected_tissues[, c("scaled_5per_position", "meanTPM")]
exp_norm$Chr<-"All"
p <- p + geom_line(data=exp_norm,
aes(x=scaled_5per_position, y=meanTPM),
color="blue", alpha=0.5)
}
p <- p + theme_bw() + scale_y_log10()
p1 <- p + geom_line(color="red", alpha=0.3)
p1 <- p1 + geom_point(data=t2, aes(x=scaled_5per_position, y=meanTPM), color="red")
p <- p + geom_point(data=t1, aes(x=scaled_5per_position, y=meanTPM), color="blue")
gs[[length(gs)+1]] <- p1 + facet_grid(chr_group~genome, drop = TRUE)
gs[[length(gs)+1]] <- p
g1<-arrangeGrob(grobs=gs, ncol=1, heights=c(0.8,0.2), top=local_title )
g1
}
get_empty_bins_for_partitions<-function(geneInformation){
partition<-geneInformation$partition
chrs<-NULL
for(i in rownames(partition))
{
chrom<-partition[i,"Chr"]
length<-partition[i,"Length"]
Chr<-rep(chrom, length+1)
bin<-0:length+1
count<-rep(0,length+1)
chr_group<-rep(substr(chrom, 4,4),length+1)
genome<-rep(substr(chrom, 5,5),length+1)
l_partition<-rep(0,length+1)
df<-data.frame(Chr,chr_group, genome, partition=l_partition, bin, count)
if(is.null(chrs))
{
chrs<-df
}else
{
chrs<-rbind(chrs, df)
}
}
chrs_with_partition<-sqldf('SELECT chrs.Chr, chrs.chr_group, genome, CASE
WHEN chrs.bin < R1_R2a THEN "R1"
WHEN chrs.bin < R2a_C THEN "R2A"
WHEN chrs.bin < C_R2b THEN "C"
WHEN chrs.bin < R2b_R3 THEN "R2B"
ELSE "R3" END as new_partition, chrs.bin,
count
FROM chrs LEFT JOIN partition ON partition.Chr = chrs.Chr ')
c("Chr", "chr_group", "genome","partition","bin", "count")->colnames(chrs_with_partition)
chrs_with_partition
}
get_gene_density<-function(genes_to_plot, geneInformation, bin_size=1000000){
local_table<-geneInformation$canonicalTranscripts
local_table<-local_table[local_table$Gene %in% genes_to_plot,]
ct<-local_table
ct$bin <- round(ct$Start / bin_size)
density <- sqldf("SELECT Chr, chr_group, genome, partition, bin, count(*) as count FROM ct
GROUP BY Chr, chr_group, genome, partition, bin
ORDER BY Chr, bin")
density<-rbind(density, get_empty_bins_for_partitions(geneInformation))
sqldf("SELECT Chr, chr_group, genome, partition, bin, sum(count) as count
FROM density
GROUP BY Chr, chr_group, genome, partition, bin
ORDER BY Chr, bin")
}
plot_per_partition_gene_count<-function(table, gene_density, title = "Test"){
gs<-list()
local_title = paste0(title, "\n Genes per 1MBp \nN: ", nrow(table) )
t1 <- gene_density[gene_density$Chr != "chrUn",]
ylim1 = boxplot.stats(t1$count)$stats[c(1, 5)]
p <-ggplot(t1,aes(partition, count))
p <- p + geom_boxplot(aes(fill=partition))
p <- p + theme_bw()
p <- p + coord_cartesian(ylim = ylim1*1.05)
p <- p + scale_fill_brewer(palette = "Set1")
p <- p + theme(legend.position="none")
p1 <- p + facet_grid(chr_group~genome, drop = FALSE)
gs[[length(gs)+1]] <- p1
gs[[length(gs)+1]] <- p
g1<-arrangeGrob(grobs=gs, ncol=1, heights=c(0.8,0.2), top=local_title )
g1
}
get_motifs_for_triad<-function(genes, geneInformation, name=name){
motifs<-geneInformation$motifs
triads<-geneInformation$allTriads
motifs<-motifs[motifs$gene %in% genes,]
genes_df <- data.frame(gene=genes)
triad_counts <- sqldf("
SELECT group_id
FROM triads
JOIN genes_df
WHERE triads.gene = genes_df.gene
GROUP BY group_id
HAVING count(*) = 3")
query<-"SELECT
motif,
motif_set,
chr_group,
count(DISTINCT triads.gene) as total_genes,
sum(count) as sum,
avg(count) as average
FROM triads
JOIN motifs ON triads.gene = motifs.gene
JOIN triad_counts ON triad_counts.group_id = triads.group_id
GROUP BY motif, motif_set, chr_group"
aggregated<-sqldf(query)
sums<-sqldf("SELECT motif, motif_set,
sum(total_genes) as sum_total_genes,
sum(sum) as sum_sum,
sum(average) as sum_average
FROM aggregated
GROUP BY motif, motif_set")
percentages<-sqldf("SELECT aggregated.*,
sum_total_genes, sum_sum, sum_average,
100.0 * total_genes / sum_total_genes as percentage_total_genes,
100.0 * sum / sum_sum as percentage_sum,
100.0 * average / sum_average as percentage_average
FROM aggregated JOIN sums
ON aggregated.motif = sums.motif
AND aggregated.motif_set = sums.motif_set
ORDER BY
motif_set, motif, chr_group ")
percentages$Gene_set <- name
percentages
}
get_motifs_for_genes<-function(genes_to_plot, geneInformation, name="Test"){
datasets<-unique(geneInformation$gene_universe$dataset)
total_sets<-length(datasets)
gene_universe<-geneInformation$gene_universe
motifs<-geneInformation$motifs
motif_sets<-unique(motifs$motif_set)
alternatives<-c("greater","less")
matrix_for_test<-matrix(c(1, 2, 3, 4), nrow = 2, ncol = 2)
rownames(matrix_for_test)<-c("gene_set", "universe")
colnames(matrix_for_test)<-c("motif", "genes")
enrich_all_family <- data.frame(matrix(ncol = 10, nrow = 0))
colnames(enrich_all_family) <- c("Universe",
"Gene_set",
"Motif_set",
"Motif",
"have_gene_set",
"dont_have_gene_set",
"have_universe",
"dont_have_universe",
"fisher_alternative",
"fisher_pvalue")
enrich_t_test <- data.frame(matrix(ncol = 11, nrow = 0))
colnames(enrich_t_test) <- c("Universe",
"Gene_set",
"Motif_set",
"Motif",
"gene_set_gene_count",
"universe_gene_count",
"gene_set_motif_mean",
"universe_motif_mean",
"statistic_t",
"parameter_df",
"p_value")
for(dataset in datasets){
universe<-gene_universe[gene_universe$dataset == dataset,]
genes_in_universe<-genes_to_plot[genes_to_plot %in% universe$gene]
count_gene_set_genes<-length(genes_in_universe)
count_universe_genes<-nrow(universe)
#We advance the iteration if we are comparing the universe to itself. This is because
#The statistical tests fail when both sets are exactly the same.
if(count_gene_set_genes == count_universe_genes) next
#matrix_for_test[1,2] <- count_gene_set_genes
#matrix_for_test[2,2] <- count_universe_genes
for(m_set in motif_sets){
universe_motifs<-motifs[motifs$motif_set == m_set &
motifs$gene %in% universe$gene, ]
gene_set_motifs<-universe_motifs[universe_motifs$motif_set == m_set &
universe_motifs$gene %in% genes_in_universe, ]
motifs_in_gene_set <- unique(gene_set_motifs$motif)
motifs_in_universe <- unique(universe_motifs$motif)
motifs_gene_set_count<-count(gene_set_motifs, "motif")
motifs_universe_count<-count(universe_motifs, "motif")
for(motif in motifs_in_gene_set){
subset_genes <- count_gene_set_genes
all_genes <- count_universe_genes
subset_genes_motif <- motifs_gene_set_count[motifs_gene_set_count$motif==motif,"freq"]
all_genes_motif <- motifs_universe_count[motifs_universe_count$motif==motif,"freq"]
a <- subset_genes_motif
b <- all_genes_motif - subset_genes_motif
c <- subset_genes - subset_genes_motif
d <- all_genes - all_genes_motif - c
#print(motif)
matrix_for_test<-matrix(c(a, b, c, d), nrow = 2, ncol = 2)
rownames(matrix_for_test)<-c("gene_set", "universe")
colnames(matrix_for_test)<-c("have", "dont_have")
#print(matrix_for_test)
for(alternative in alternatives){
p.value<-2
tmp<-try(fisher.test(matrix_for_test, alternative = alternative))
if(!is.error(tmp)){
p.value<- tmp$p.value
}
enrich_all_family[nrow(enrich_all_family) + 1,] = list(dataset,
name,
m_set,
motif,
matrix_for_test[1,1],
matrix_for_test[1,2],
matrix_for_test[2,1],
matrix_for_test[2,2],
alternative,
p.value)
}
#Here the student T starts. We will use a new dataframe.
universe_motif_counts<-universe_motifs[universe_motifs$motif==motif, "count"]
gene_set_motif_counts<-gene_set_motifs[gene_set_motifs$motif==motif, "count"]
t_test<-list("estimate.mean of x"=0,
"estimate.mean of y"=0,
"statistic.t"=0,
"parameter.df"=0,
"p.value"=2
)
if(length(gene_set_motif_counts) > 3 ){
tmp <- try(unlist(t.test(gene_set_motif_counts, universe_motif_counts)))
if(!is.error(tmp)){
t_test<-tmp
enrich_t_test[nrow(enrich_t_test) + 1,] = list(dataset,
name,
m_set,
motif,
length(gene_set_motif_counts),
length(universe_motif_counts),
t_test["estimate.mean of x"],
t_test["estimate.mean of y"],
t_test["statistic.t"],
t_test["parameter.df"],
t_test["p.value"]
)
}else{
enrich_t_test[nrow(enrich_t_test) + 1,] = list(dataset,
name,
m_set,
motif,
length(gene_set_motif_counts),
length(universe_motif_counts),
mean(gene_set_motif_counts),
mean(universe_motif_counts),
0,
0,
2
)
}
}else{
enrich_t_test[nrow(enrich_t_test) + 1,] = list(dataset,
name,
m_set,
motif,
length(gene_set_motif_counts),
length(universe_motif_counts),
0,
0,
0,
0,
1
)
}
}
}
}
enrich_all_family$padj_BH <- p.adjust(enrich_all_family$fisher_pvalue, method="BH")
enrich_t_test$padj_BH <- p.adjust(enrich_t_test$p_value, method="BH")
list(fisher=enrich_all_family, t=enrich_t_test)
}
plot_triad_movment_single<-function(geneInformation,
genes_to_plot,
experiment="HC_850_samples",
title="Test",
density = FALSE ,
points = TRUE
){
allTriads<-geneInformation$allTriads
selectedTriads<-unique(allTriads[allTriads$gene %in% genes_to_plot, "group_id"])
tmp_df<-geneInformation$triads[geneInformation$triads$group_id %in% selectedTriads &
geneInformation$triads$dataset == experiment,
c("group_id","factor","clust","description","general_description","chr_group","normalised_triad", "Distance")]
clust_df <- dcast(tmp_df,group_id + clust + description + general_description + factor + Distance ~
chr_group, value.var = "normalised_triad")
clust_df_all_mean <- clust_df[clust_df$factor == 'all_mean_filter' , ]
original_description <- clust_df_all_mean[,c("group_id", "description", "general_description")]
colnames(original_description)<-c("group_id", "original_description", "original_general_description")
clust_df_factors <- clust_df[clust_df$factor != 'all_mean_filter' &
clust_df$factor != 'all_means' &
clust_df$factor != 'all' , ]
#Central="#808A9F",
group.colors<-c(A.dominant = "#579D1C", B.dominant = "#4B1F6F", D.dominant ="#FF950E",
Central="#AAAAAA",
A.suppressed = "#7FD127", B.suppressed = "#7D31AF", D.suppressed ="#FFAD42")
group.alpha<-c(A.dominant = 1, B.dominant = 1, D.dominant =1,
Central=0.1,
A.suppressed = 0.2, B.suppressed = 0.2, D.suppressed = 0.2)
group.alpha.tern<-c(A.dominant = 0.05, B.dominant = 0.05, D.dominant =0.05,
Central=0.05,
A.suppressed = 0.05, B.suppressed = 0.05, D.suppressed = 0.05)
clust_df_factors<-merge(clust_df_factors, original_description, by="group_id")
clust_df_factors_full<-clust_df_factors
clust_df_factors<-clust_df_factors[clust_df_factors$description !=clust_df_factors$original_description , ]
tern_all <- ggtern(clust_df_factors,aes(A,B,D,color=original_description)) + theme_bw()
tern_all <- tern_all + geom_point(aes(alpha=original_description)) +
scale_alpha_manual(values=group.alpha.tern) +
guides(colour = guide_legend(override.aes = list(alpha = 1))) +
scale_color_manual(values=group.colors)
tern_fact <- tern_all + facet_wrap(~original_general_description, ncol=1) +
theme_notitles() + theme(legend.position = "none") + theme_nolabels()
tern_all <- tern_all + theme_legend_position(x = "topleft") + theme_arrownormal() +
ggtitle("All factors")
tern_fact_2 <- ggtern(clust_df_factors_full,aes(A,B,D,color=original_description)) + theme_bw()
tern_fact_2 <- tern_fact_2 + geom_point(aes(alpha=original_description)) +
scale_alpha_manual(values=group.alpha.tern) +
scale_color_manual(values=group.colors)
tern_fact_2 <- tern_fact_2 + facet_wrap(~original_general_description, ncol=1) +
theme_notitles() + theme(legend.position = "none") + theme_nolabels()
tern_mean <- ggtern(clust_df_all_mean,aes(A,B,D,color=description)) + theme_bw() +
theme_legend_position(x = "topleft") + scale_color_manual(values=group.colors) +
theme_arrownormal() + ggtitle("Mean")
tern_mean <- tern_mean + geom_point(aes(alpha=description)) + scale_alpha_manual(values=group.alpha)
# guides(colour = guide_legend(override.aes = list(alpha = 1)))
tern_all
gs<-list(tern_mean, tern_all, tern_fact, tern_fact_2)
lay <- rbind(c( 1, 3, 4),
c( 2, 3, 4)
)
g2 <- arrangeGrob(grobs = gs, layout_matrix = lay, top = title)
g2
}
plot_normalized_triads<-function(triads){
group.colors <- c(A = "#579D1C", B = "#4B1F6F", D ="#FF950E")
p <- ggplot(triads, aes(chr_group, normalised_triad, fill=chr_group))
p <- p + geom_boxplot(outlier.alpha = 0.05)
p <- p + theme_classic()
p <- p + scale_fill_manual(values=group.colors) + guides(fill=FALSE)
p <- p + ylim(0,1)
p <- p + ylab("") + xlab("")
p
}
plot_clust_dist<-function(geneInformation,
genes_to_plot,
experiment="HC_850_samples",
title="All"
){
allTriads<-geneInformation$allTriads
selectedTriads<-unique(allTriads[allTriads$gene %in% genes_to_plot, "group_id"])
tmp_df<-geneInformation$triads[geneInformation$triads$group_id %in% selectedTriads &
geneInformation$triads$dataset == experiment,
c("group_id","factor","clust","description","general_description","chr_group","normalised_triad")]
if(nrow(tmp_df) == 0){
return(NULL)
}
clust_df <- dcast(tmp_df,group_id +general_description+clust+description+factor ~ chr_group, value.var = "normalised_triad")
clusters<-sort(c("B.suppressed",
"Central",
"A.dominant",
"A.suppressed",
"B.dominant",
"D.suppressed",
"D.dominant"))
group.colors <- c("B.suppressed"="#4B1F6F",
"Central"="#999999",
"A.dominant"="#579D1C",
"A.suppressed"="#579D1C",
"B.dominant"="#4B1F6F",
"D.suppressed"="#FF950E",
"D.dominant"="#FF950E")
group.fills <- c("B.suppressed"="white",
"Central"="#999999",
"A.dominant"="#579D1C",
"A.suppressed"="white",
"B.dominant"="#4B1F6F",
"D.suppressed"="white",
"D.dominant"="#FF950E")
group.shapes <- c("B.suppressed"=25,
"Central"=19,
"A.dominant"=17,
"A.suppressed"=25,
"B.dominant"=17,
"D.suppressed"=25,
"D.dominant"=17)
tern <- ggtern(clust_df,aes(A,B,D)) + theme_classic()
tern <- tern + geom_point(aes(color=description,
shape=description),
alpha=0.15)
tern <- tern + theme_arrownormal()
tern <- tern + theme_legend_position(x = "topleft")
tern <- tern + guides(colour = guide_legend(override.aes = list(alpha = 1)))
tern <- tern + scale_color_manual(values=group.colors)
tern <- tern + scale_shape_manual(values=group.shapes)
#tern <- tern + scale_fill_discrete(values=group.fills)
gs<-list(tern)
dat <- data.frame(
A=numeric(0),B=numeric(0), D=numeric(0), size=numeric(0),stringsAsFactors=FALSE )
rownames(dat)<-rownames(clusters)
for(c in clusters){
tmp_df_clust<-tmp_df[tmp_df$description==c,]
p <- plot_normalized_triads(tmp_df_clust)
p <- p + ggtitle(c)
dat[c,1] <- round(100*mean(tmp_df_clust[tmp_df_clust$chr_group == "A","normalised_triad"]),digits=2)
dat[c,2] <- round(100*mean(tmp_df_clust[tmp_df_clust$chr_group == "B","normalised_triad"]),digits=2)
dat[c,3] <- round(100*mean(tmp_df_clust[tmp_df_clust$chr_group == "D","normalised_triad"]),digits=2)
dat[c,4] <- nrow(tmp_df_clust)
gs[[length(gs)+1]] <- p
}
total_size<-sum(dat$size)
dat$percentage<-round(100*dat$size/total_size,digits=2)
gs[[length(gs)+1]]<-tableGrob(dat)
lay <- rbind(c( 1, 1, 1, 2, 4, 7),
c( 1, 1, 1, 3, 5, 8),
c( 9, 9, 9, NA,6,NA)
)
g2 <- arrangeGrob(grobs = gs, layout_matrix = lay, top = title)
g2
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.