plot_heatmap <- function(m.exp=m.exp
tfs=tfs, tgs=tgs,
title = "", v.name = "",
v.max = 1.0, v.min = 0){
# assumption is that the matrix has already been ordered by dendrogram
library(ggplot2)
library(ggdendro)
library(reshape2)
library(grid)
dendro.plot <- ggdendrogram(data = dd.col, rotate = TRUE)
dendro.plot <- dendro.plot + theme(axis.text.y = element_text(size = 6), axis.text.x = element_blank())
n.specs = dim(m.exp)[2]
# Melt the correlation matrix
melted_cormat <- melt((m.exp))
melted_cormat <- na.omit(melted_cormat)
melted_value <- melt((m.sig))
melted_value <- na.omit(melted_value)
# Create a ggheatmap
ggheatmap <- ggplot(melted_cormat, aes(Var2, Var1, fill = value)) +
geom_tile(color = "black") +
scale_fill_gradient2(low = "green", high = "red", mid = "white",
midpoint = 0, limit = c(v.min,v.max), name=v.name) +
# scale_x_discrete(breaks = rownames(cormat), labels=v.x_axis.ticks) +
# scale_y_discrete(breaks = colnames(cormat), labels=v.y_axis.ticks) +
theme_minimal() + # minimal theme
theme(axis.text.x = element_text(angle = 45, vjust = 1,
size = 6, hjust = 1,
colour="black")) +
theme(axis.text.y = element_text(angle = 0, vjust = 0,
size = 6, hjust = 0,
colour="black")) +
theme(legend.key.size = unit(0.5, 'cm'), #change legend key size
legend.key.height = unit(0.5, 'cm'), #change legend key height
legend.key.width = unit(0.5, 'cm'), #change legend key width
legend.title = element_text(size=6), #change legend title font size
legend.text = element_text(size=6)) + #change legend text font size
# ggtitle(title) +
# theme(plot.title = element_text(lineheight=1.0, size = 11)) +
coord_fixed()
ggheatmap <- (ggheatmap +
# commented out for non significance representation
geom_text(aes(Var2, Var1, label = melted_value$value), color = "black", size = 6, vjust = 0.8) +
theme(
axis.title.x = element_blank(),
axis.title.y = element_blank(),
axis.text.y = element_blank(), # remove identifies
panel.grid.major = element_blank(),
panel.border = element_blank(),
panel.background = element_blank(),
axis.ticks = element_blank(),
legend.position = "left",
# legend.justification = c(-1, 1),
#legend.position = c(0.6, 0.7),
#legend.direction = "horizontal")+
#guides(fill = guide_colorbar(barwidth = 7, barheight = 1,title.position = "top", title.hjust = 0.5))
)
# geom_segment(data=my.lines, aes(x,y,xend=xend, yend=yend), size=1, inherit.aes=F)
)
pdf("test.pdf", height = 15, width = 8, paper = "letter")
grid.newpage()
print(ggheatmap, vp = viewport(x = 0.32, y = 0.5, width = 0.2, height = 1.0))
print(dendro.plot, vp = viewport(x = 0.5, y = 0.512, width = 0.2, height = 1.055))
dev.off()
# 8 15
#export figure
ggsave(p,filename="test.png",height=8,width=15,units="in",dpi=200)
return(ggheatmap)
}
#' precompute_and_store_diffExp_and_foldchange_matrices
#'
#'
#' @keywords
#' @export
#' @examples
precompute_and_store_diffExp_and_foldchange_matrices <- function(folder = "/tmp",
filename.annotation = "MERIT_athaliana_PMN_2017/athaliana_gene_expression/experiment_annotation_He_et_al_2015.txt",
filename.geneExpression = "MERIT_athaliana_PMN_2017/athaliana_gene_expression/GSE69995_re-analyzed_data_matrix.txt",
v.ath.Stress.treatment = c("GSE5620", "GSE5621", "GSE5622", "GSE5623", "GSE5624", "GSE5625", "GSE5626", "GSE5627", "GSE5628")
){
df.annotation <- read.csv(filename.annotation, header = TRUE, sep = "\t", fill = TRUE, stringsAsFactors = FALSE)
v.colnames_mandatory = c("sample_id", "series_id", "condition", "condition_optional", "tissue","replicate_group_w_control", "sublevel_series_id", "unique_ID")
if(!all(v.colnames_mandatory %in% names(df.annotation))){
stop(paste("could not find all mandatory columns in file:", paste(v.colnames_mandatory, collapse = ", ")))
}
tb.treatments = table(c(df.annotation$condition, df.annotation$condition_optional))
tb.treatments = tb.treatments[names(tb.treatments) != ""]
# tissue and treatment distributions
tb.tissues <- table(df.annotation$tissue)
v.conditionGroups = names(tb.treatments)
# loading gene expressino matrix
m.expression <- read.table(filename.geneExpression, row.names = 1, header = TRUE, sep = "\t", quote = "", stringsAsFactors = FALSE)
m.expression <- as.matrix(m.expression)
tb.conditions = table(df.annotation$condition)
tb.conditions[names(table(df.annotation$condition_optional))] = tb.conditions[names(table(df.annotation$condition_optional))] + table(df.annotation$condition_optional)
# split the experiments and tissues
df.annotation["key.exp"] <- NA
df.exp.pairs <- unique(df.annotation[,c("tissue","series_id")])
for(i in 1:nrow(df.exp.pairs)){
i.set <- which(df.annotation$tissue == df.exp.pairs[i,1] & df.annotation$series_id == df.exp.pairs[i,2])
df.annotation$key.exp[i.set] <- i
}
m.expression <- m.expression[,df.annotation$sample_id]
v.series_ids <- unique(df.annotation$series_id)
v.series_ids <- v.series_ids[!v.series_ids %in% v.ath.Stress.treatment]
v.series_sets <- c()
####
m.de <- c()
m.fc <- c()
m.directionality <- c()
v.treatment_buildingblocks <- c()
v.repeats <- c()
exp.counter <- 1
ids_differentialExpressionSamples <- c()
message(paste("Compute differential expression based on Welch t-test and prepare treatment gene expression matrix for two-way anova analysis"))
pb <- txtProgressBar(min = 0, max = length(v.series_ids), style = 3)
for(i in 1:length(v.series_ids)){
setTxtProgressBar(pb, i)
df.annotation.i <- subset(df.annotation, df.annotation$series_id == v.series_ids[i])
v.exp.i <- unique(df.annotation.i$replicate_group_w_control)
v.exp.i <- v.exp.i[v.exp.i != 0]
v.set.i <- unique(df.annotation.i$sublevel_series_id)
for(j in 1:length(v.set.i)){
for(k in 1:length(v.exp.i)){
df.annotation.control <- subset(df.annotation.i, df.annotation.i$sublevel_series_id == v.set.i[j] & df.annotation.i$replicate_group_w_control == 0)
df.annotation.j <- subset(df.annotation.i, df.annotation.i$sublevel_series_id == v.set.i[j] & df.annotation.i$replicate_group_w_control == v.exp.i[k])
if(nrow(df.annotation.j) > 0){
## Individual t-test p-values
X.treatment <- t(m.expression[,df.annotation.j$sample_id])
X.control <- t(m.expression[,df.annotation.control$sample_id])
ttpv <- numeric(dim(X.treatment)[2])
names(ttpv) <- colnames(X.treatment)
v.fc <- numeric(dim(X.treatment)[2])
names(v.fc) <- colnames(X.treatment)
v.directionality <- numeric(dim(X.treatment)[2])
names(v.directionality) <- colnames(X.treatment)
v.series_sets <- c(v.series_sets, v.series_ids[i])
v.repeats <- c(v.repeats, seq(1:dim(X.treatment)[1]))
v.treatment_buildingblocks <- c(v.treatment_buildingblocks, rep(exp.counter,dim(X.treatment)[1]))
exp.counter <- exp.counter + 1
for(l in 1:ncol(X.treatment)){ # per gene
if(sd(X.treatment[,l]) != 0 | sd(X.control[,l]) != 0){
tt <- t.test(X.treatment[,l], X.control[,l]) # welch t-test per gene
ttpv[l] = unlist(tt$p.value)
v.fc[l] <- mean(X.treatment[,l]) - mean(X.control[,l])
if(mean(X.treatment[,l]) == mean(X.control[,l])){
v.directionality[l] = 0
}else if(mean(X.treatment[,l]) > mean(X.control[,l])){
v.directionality[l] = 1
}else{
v.directionality[l] = -1
}
}else{
ttpv[l] <- 1
}
}
m.de <- cbind(m.de, ttpv)
names(v.fc) <- v.series_ids[i]
m.fc <- cbind(m.fc, v.fc)
names(v.directionality) <- v.series_ids[i]
m.directionality <- cbind(m.directionality, v.directionality)
ids_differentialExpressionSamples <- c(ids_differentialExpressionSamples, unique(df.annotation.j$unique_ID))
}
}
}
}
close(pb)
##### now the explicit stress conditions
df.annotation.control <- subset(df.annotation, df.annotation$series_id == v.ath.Stress.treatment[1])
pb <- txtProgressBar(min = 0, max = 9, style = 3)
for(i in 2:9){
df.annotation.i <- subset(df.annotation, df.annotation$series_id == v.ath.Stress.treatment[i])
v.exp.i <- unique(df.annotation.i$replicate_group_w_control)
v.exp.i <- v.exp.i[v.exp.i != 0]
v.set.i <- unique(df.annotation.i$sublevel_series_id)
for(j in 1:length(v.set.i)){
for(k in 1:length(v.exp.i)){
df.annotation.control.j <- subset(df.annotation.control, df.annotation.control$sublevel_series_id == v.set.i[j] & df.annotation.control$replicate_group_w_control == v.exp.i[k])
df.annotation.j <- subset(df.annotation.i, df.annotation.i$sublevel_series_id == v.set.i[j] & df.annotation.i$replicate_group_w_control == v.exp.i[k])
if(nrow(df.annotation.j) > 0){
## Individual t-test p-values
X.treatment <- t(m.expression[,df.annotation.j$sample_id])
X.control <- t(m.expression[,df.annotation.control.j$sample_id])
ttpv <- numeric(dim(X.treatment)[2])
names(ttpv) <- colnames(X.treatment)
v.fc <- numeric(dim(X.treatment)[2])
names(v.fc) <- colnames(X.treatment)
# m.fc.t <- matrix(0, dim(X.treatment)[2], dim(X.treatment)[1])
v.series_sets <- c(v.series_sets, v.ath.Stress.treatment[i])
v.repeats <- c(v.repeats, seq(1:dim(X.treatment)[1]))
v.treatment_buildingblocks <- c(v.treatment_buildingblocks, rep(exp.counter,dim(X.treatment)[1]))
exp.counter <- exp.counter + 1
for(l in 1:ncol(X.treatment)){
if(sd(X.treatment[,l]) != 0 | sd(X.control[,l]) != 0){
tt <- t.test(X.treatment[,l],X.control[,l])
ttpv[l] = unlist(tt$p.value)
v.fc[l] <- mean(X.treatment[,l]) - mean(X.control[,l])
if(mean(X.treatment[,l]) == mean(X.control[,l])){
v.directionality[l] = 0
}else if(mean(X.treatment[,l]) > mean(X.control[,l])){
v.directionality[l] = 1
}else{
v.directionality[l] = -1
}
}else{
ttpv[l] <- 1
}
}
m.de <- cbind(m.de, ttpv)
names(v.fc) <- v.ath.Stress.treatment[i]
# colnames(m.fc.t) <- rep(v.series_ids[i], dim(m.fc.t)[2])
m.fc <- cbind(m.fc, v.fc)
names(v.directionality) <- v.ath.Stress.treatment[i]
m.directionality <- cbind(m.directionality, v.directionality)
ids_differentialExpressionSamples <- c(ids_differentialExpressionSamples, unique(df.annotation.j$unique_ID))
}
}
}
}
close(pb)
rownames(m.fc) <- rownames(m.directionality) <- rownames(m.de)
colnames(m.fc) <- colnames(m.directionality) <- colnames(m.de) <- v.series_sets
write.table(m.de, "F:/junkDNA.ai/datasets/athaliana_gene_expression/m.pvalue_differentialExpression.txt", row.names = F, col.names = F, sep = "\t")
write.table(m.fc, "F:/junkDNA.ai/datasets/athaliana_gene_expression/m.foldChange_differentialExpression.txt", row.names = F, col.names = F, sep = "\t")
#df.annotation_series = unique(df.annotation[,c("series_id", "condition", "condition_optional", "tissue")
#write.table(df.annotation_series, "F:/junkDNA.ai/datasets/athaliana_gene_expression/experiment_series_annotation_He_et_al_2015.txt", row.names = F, sep = "\t")
genes = rownames(m.de)
experiment_series_id = colnames(m.de)
write.table(genes, "F:/junkDNA.ai/datasets/athaliana_gene_expression/genes.txt", row.names = F, col.names = F, sep = "\t")
write.table(ids_differentialExpressionSamples, "F:/junkDNA.ai/datasets/athaliana_gene_expression/ids_differentialExpressionSamples.txt", row.names = F, col.names = F, sep = "\t")
}
utils <- function(m.DE,
th.diffexp = 0.05){
m.fc <- l.data$m.foldChange_differentialExpression
m.de <- l.data$m.pvalue_differentialExpression
m.fc.ternary <- m.fc
m.fc.ternary[m.fc.ternary > 0] <- 1
m.fc.ternary[m.fc.ternary < 0] <- -1
m.de.bin <- m.de
m.de.bin[m.de.bin <= th.diffexp] <- 10
m.de.bin[m.de.bin <= 1] <- 0
m.de.bin[m.de.bin > 0] <- 1
m.de.ternary <- m.fc.ternary * m.de.bin
# ..
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.