# using identifications based on iTAK platform to identify regulator families involved
tf.families = read.table("experiments/Arabidopsis_thaliana-all.txt", header = FALSE, fill = TRUE, stringsAsFactors = FALSE)[,1:2]
tf.families$V1 <- gsub("\\..*", "",tf.families$V1)
tf.families <- unique(tf.families)
v.tf_families <- tf.families$V2
names(v.tf_families) <- tf.families$V1
v.regulators <- v.tf_families
v.regulators <- v.regulators[!grepl("RLK", v.regulators) & !grepl("PPC", v.regulators) & !grepl("AGC", v.regulators) & !grepl("Group", v.regulators) & !grepl("mTERF", v.regulators) & !grepl("Tify", v.regulators) & !grepl("Aur", v.regulators) & !grepl("CAMK", v.regulators) & !grepl("TRAF", v.regulators) & !grepl("MED7", v.regulators)]
df.condition_tissue <- unique(df.experiment_condition_annotation[,c("condition", "condition_optional", "tissue", "unique_ID")])
m.fc <- l.data$m.foldChange_differentialExpression
m.de <- l.data$m.pvalue_differentialExpression
m.fc[m.fc > 0] <- 1
m.fc[m.fc < 0] <- -1
m.de[m.de <= th.diffexp] <- 10
m.de[m.de <= 1] <- 0
m.de[m.de > 0] <- 1
m.de <- m.fc * m.de
# subset to growth & defense genes
df = read.csv("datasets/growth_defense_genes.csv")
v.mes = intersect(rownames(m.fc), df$ID)
m.de.gd = m.de[v.gns,]
# TFs
df.tfs = read.table("datasets/tfs_groups.csv", sep = "\t", header = T)
df.net = read.table("datasets/network.csv", sep = "\t", header = T)
# select indices ...
tb.condition_treatments <- l.data$tb.condition_treatments
tb.condition_tissues <- l.data$tb.condition_tissues
ids.exps = unique(l.data$df.experiment_condition_annotation$unique_ID)
# blueprint network (large enough statistics but static)
m.grn <- as.matrix(l.res.grn_tfbs$m.lead_support_w_motif.grn)
hist(m.grn[m.grn > 0])
length(m.grn[m.grn > 0])
dim(m.grn)
m.grn[m.grn > 0] = 1
sum(m.grn)
library(psych)
# get TFs as enriched
v.gns <- vector(mode = "list", length = 2)
v.gns[[1]] <- unique(subset(df, df$Group == "growth")$ID)
v.gns[[2]] <- unique(subset(df, df$Group == "defense")$ID)
names(v.gns) = c("growth", "defense")
groups <- c(rep(names(v.gns)[1], length(v.gns[[1]])), rep(names(v.gns)[2], length(v.gns[[2]])))
names(groups) <- c(v.gns[[1]], v.gns[[2]])
doms <- df$Domain
names(doms) <- df$ID
message("track the epxression pattern of metabolic genes")
expr <- vector(mode = "list", length = 2)
expr[[1]] <- matrix(0, nrow=2, ncol=length(ids.exps))
expr[[2]] <- matrix(0, nrow=2, ncol=length(ids.exps))
names(expr) = c("growth", "defense")
rownames(expr[[1]]) <- rownames(expr[[2]]) <- c("-1", "1")
colnames(expr[[1]]) <- colnames(expr[[2]]) <- colnames(m.de)
for(j in 1:length(ids.exps)){
id <- ids.exps[j]
id <- as.character(id)
for(k in 1:2){
n.gns <- length(v.gns[[k]])
t <- table(m.de[v.gns[[k]],id])
if("-1" %in% names(t)){
expr[[k]][1, id] <- round(t["-1"] / n.gns * 100, 1)
}
if("1" %in% names(t)){
expr[[k]][2, id] <- round(t["1"] / n.gns * 100, 1)
}
}
}
message("correlation across conditions between ratios of up and down regulated metabolic enzumes")
cor(expr[[1]][1,], expr[[1]][2,])
cor(expr[[2]][1,], expr[[2]][2,])
df.data <- data.frame(repressed = expr[[1]][1,], expressed = expr[[1]][2,], col = "blue")
df.data <- rbind(df.data, data.frame(repressed = expr[[2]][1,], expressed = expr[[2]][2,], col = "red"))
plot(df.data$expressed, df.data$repressed, col=df.data$col)
legend(x="topright", legend = c("growth", "defense"), col=c("blue","red"), pch=1)
####
df["repressed"] = 0
df["expressed"] = 0
df["col"] = ifelse(df$Group == "growth", "blue", "red")
for(i in 1:nrow(df)){
gn = df$ID[i]
df$Group[i]
t <- table(m.de[gn,])
if("-1" %in% names(t)){
df$repressed[i] = round(t["-1"] / ncol(m.de) * 100, 1)
}
if("1" %in% names(t)){
df$expressed[i] = round(t["1"] / ncol(m.de) * 100, 1)
}
}
plot(df$expressed, df$repressed, col=df$col)
legend(x="topright", legend = c("growth", "defense"), col=c("blue","red"), pch=1)
## repression and expression
v.tfs <- rownames(m.grn)
df.set <- data.frame(repressed = rep(0, ncol(m.grn)),
expressed = rep(0, ncol(m.grn)),
col = rep(0, ncol(m.grn)))
for(i in 1:ncol(m.grn)){
gn = colnames(m.grn)[i]
t <- table(m.de[gn,])
if("-1" %in% names(t)){
df.set$repressed[i] = round(t["-1"] / ncol(m.de) * 100, 1)
}
if("1" %in% names(t)){
df.set$expressed[i] = round(t["1"] / ncol(m.de) * 100, 1)
}
if(gn %in% v.tfs){
df.set$col[i] = "red"
}else if(gn %in% v.mes){
df.set$col[i] = "blue"
}else{
df.set$col[i] = "gray"
}
}
plot(df.set$expressed, df.set$repressed, col=df.set$col)
legend(x="topright", legend = c("TFs", "MEs", "Other"), col=c("red", "blue", "gray"), pch=1)
message("differential expression of transcription factors ")
hist(round(rowSums(abs(m.de[v.tfs, ])) / dim(m.de)[2]*100,1), 50)
hist(round(rowSums(abs(m.de[v.mes, ])) / dim(m.de)[2]*100,1), 50)
hist(round(rowSums(abs(m.de)) / dim(m.de)[2]*100,1), 50)
median(rowSums(abs(m.de[v.tfs, ])) )
median(rowSums(abs(m.de[v.mes, ])))
median(rowSums(abs(m.de)))
m.crosstalk <- matrix(0, nrow=length(v.tfs), ncol=2)
rownames(m.crosstalk) = v.tfs
for(i in 1:length(v.tfs)){
v.tgs <- names(which(m.grn[v.tfs[i],] != 0))
n.tgs <- length(v.tgs)
n.genome <- dim(m.grn)[2]
th.min.samples = 2
for(k in 1:2){
n.gns <- length(v.gns[[k]])
n.set <- length(intersect(v.gns[[k]], v.tgs))
hitInSample <- n.set
sampleSize <- n.gns
hitInPop <- n.tgs
popSize <- n.genome
failInPop <- popSize - hitInPop
fc <- ((hitInSample / sampleSize) / (hitInPop / popSize))
if(fc > 1 & hitInSample >= th.min.samples){
p.val <- phyper(hitInSample-1, hitInPop, failInPop, sampleSize, lower.tail= FALSE)
if(p.val <= 0.05)
m.crosstalk[i,k] = round(n.set / n.gns * 100, 1)
}
}
}
v.tfs.sel <- names(which(rowSums(m.crosstalk) > 0))
plot(m.crosstalk[v.tfs.sel,1], m.crosstalk[v.tfs.sel,2], xlab = "growth", ylab = "defense")
message("percentage of growth / defense enriched regulators ")
round(length(v.tfs.sel) / length(v.tfs) * 100, 1)
#####
m.conditions <- matrix(0, nrow=length(v.tfs), ncol=length(tb.condition_treatments))
rownames(m.conditions) = v.tfs
colnames(m.conditions) = names(tb.condition_treatments)
m.tissues <- matrix(0, nrow=length(v.tfs), ncol=length(tb.condition_tissues))
rownames(m.tissues) = v.tfs
colnames(m.tissues) = names(tb.condition_tissues)
for(i in 1:length(v.tfs)){
i.conds <- names(which(m.de[v.tfs[i],] != 0))
# v.tgs <- names(which(m.grn[v.tfs[i],] != 0))
id <- ids.exps[j]
df.j <- subset(df.condition_tissue, df.condition_tissue$unique_ID %in% as.numeric(i.conds))
cond <- c(df.j$condition, df.j$condition_optional)
cond <- cond[cond != ""]
tis <- df.j$tissue
cond <- table(cond)
tis <- table(tis)
m.conditions[v.tfs[i], names(cond)] = round(cond / tb.condition_treatments[names(cond)] * 100, 1)
m.tissues[v.tfs[i], names(tis)] = round(tis / tb.condition_tissues[names(tis)] * 100, 1)
}
library(reshape2)
library(ggplot2)
df.heatmap <- melt(t(m.tissues))
df.MR_vs_conditions <- df.heatmap
df.MR_vs_conditions <- subset(df.MR_vs_conditions, !is.na(df.MR_vs_conditions$value))
df.heatmap$Var2 <- paste(df.heatmap$Var2, "(", v.tf_families[df.heatmap$Var2], ")", sep = "")
v.breaks = c(0, 5,25,50,75,100)
v.lables <- c("0", "5","25","50", "75", "100")
ggheatmap <- ggplot(df.heatmap, aes(Var1, Var2, fill = value)) + geom_tile(color = "black") + scale_fill_gradient2(low = "yellow", high = "red", mid = "white", midpoint = 0, name = "percentage", breaks=v.breaks, labels=v.lables) + theme_minimal() + theme(axis.title.x=element_blank(), axis.text.x = element_text(angle = 45, vjust = 1, size = 10, hjust = 1, color = "black")) + theme(axis.title.y=element_blank(), axis.text.y = element_text(angle = 0, vjust = , size = 10, hjust = 1, color = "black"))
filename <- paste("tmp/", "transcription_factors_vs_tissues.pdf", sep = "")
pdf(filename, width=8, height=120)
plot(ggheatmap)
dev.off()
v.tgs <- c(v.gns[[1]], v.gns[[2]])
idx.tgs <-seq(1, length(v.tgs), 1)
names(idx.tgs) <- v.tgs
idx.map <- seq(1, length(ids.exps), 1)
names(idx.map) <- as.character(ids.exps)
master_regulator <- array(rep(0, length(v.tfs.sel)*2*length(idx.map)), dim=c(length(v.tfs.sel), 2, length(idx.map)))
master_targets <- array(rep(0, length(v.tfs.sel)*length(idx.map)*length(idx.tgs)), dim=c(length(v.tfs.sel), length(idx.map), length(idx.tgs)))
pb <- txtProgressBar(min = 0, max = length(v.tfs.sel), style = 3)
for(i in 1:length(v.tfs.sel)){
setTxtProgressBar(pb, i)
tf <- v.tfs.sel[i]
i.conds <- names(which(m.de[tf,] != 0))
v.tgs <- names(which(m.grn[tf,] != 0))
n.tgs <- length(v.tgs)
df.j <- subset(df.condition_tissue, df.condition_tissue$unique_ID %in% as.numeric(i.conds))
cond <- c(df.j$condition, df.j$condition_optional)
cond <- cond[cond != ""]
tis <- df.j$tissue
cond <- table(cond)
tis <- table(tis)
for(k in 1:2){ # less than or more than? -> putative "master" regulators => cross talk? -> general blueprint -> activated per condition?
n.pop = length(v.gns[[k]])
if(m.crosstalk[tf,k] > 0){
# n.gns <- length(v.gns[[k]])
v.tgs.k <- intersect(v.gns[[k]], v.tgs)
n.tgs.k <- length(v.tgs.k)
for(j in 1:length(i.conds)){
c = i.conds[j]
v.tgs.kc <- names(which(m.de[v.tgs.k, c] != 0)) # all targets with active conditions
n.tgs.kc <- length(v.tgs.kc)
hitInSample <- n.tgs.kc
sampleSize <- n.tgs.k
hitInPop <- length(which(m.de[v.gns[[k]], c] != 0))
popSize <- n.pop
failInPop <- popSize - hitInPop
fc <- ((hitInSample / sampleSize) / (hitInPop / popSize))
if(fc > 1 & hitInSample >= th.min.samples){
p.val <- phyper(hitInSample-1, hitInPop, failInPop, sampleSize, lower.tail= FALSE)
if(p.val <= 0.05){
master_regulator[i,k,idx.map[c]] = round(n.tgs.kc / n.tgs.k * 100, 1)
master_targets[i, idx.map[c], idx.tgs[v.tgs.kc]] = m.de[v.tgs.kc, c]
}
}
}
}
}
}
close(pb)
# plot 98 patterns
folder = "tmp/roles/"
for(i in 1:length(v.tfs.sel)){
filename <- paste(folder, v.tfs.sel[i], ".pdf", sep = "")
pdf(filename, width, height)
plot(master_regulator[i,1,], master_regulator[i,2,], xlab = "growth", ylab = "defense")
dev.off()
}
library(textshape)
folder = "tmp/targets/"
for(i in 1:length(v.tfs.sel)){
m <- master_targets[i,,]
vals <- paste(df.condition_tissue$condition, df.condition_tissue$tissue, sep = ", ")
names(vals) <- df.condition_tissue$unique_ID
rownames(m) <- vals[idx.map]
colnames(m) <- names(idx.tgs)
m.heatmap <- t(m)
m.heatmap <- m.heatmap[rowSums(abs(m.heatmap)) > 0, ,drop = F]
m.heatmap <- m.heatmap[, colSums(abs(m.heatmap)) > 0 ,drop = F]
if(dim(m.heatmap)[1] > 1 & dim(m.heatmap)[2] > 1)
m.heatmap <- cluster_matrix(m.heatmap)
df.heatmap <- melt(m.heatmap)
if(nrow(df.heatmap) > 0){
df.heatmap$Var1 <- paste(df.heatmap$Var1, " (", groups[as.character(df.heatmap$Var1)], " - ", doms[as.character(df.heatmap$Var1)], " )", sep = "")
v.breaks = c(-1, 0, 1)
v.lables <- c("-1", "0", "1")
ggheatmap <- ggplot(df.heatmap, aes(Var1, Var2, fill = value)) + geom_tile(color = "black") + scale_fill_gradient2(low = "green", high = "red", mid = "black", midpoint = 0, name = "expression", breaks=v.breaks, labels=v.lables, limits = c(-1,1)) + theme_minimal() + theme(axis.title.x=element_blank(), axis.text.x = element_text(angle = 45, vjust = 1, size = 10, hjust = 1, color = "black")) + theme(axis.title.y=element_blank(), axis.text.y = element_text(angle = 0, vjust = , size = 10, hjust = 1, color = "black"))
width = max(5, length(unique(df.heatmap$Var1)) / 2)
height = max(5, length(unique(df.heatmap$Var2)) / 3)
filename <- paste(folder, v.tfs.sel[i], ".pdf", sep = "")
pdf(filename, width, height)
plot(ggheatmap)
dev.off()
}
}
## require similar behavior? - maybe to stringent
## separate growth and defense
# requirement - co-diffexpression in the same condition
n.tgs <- length(v.tgs)
n.genome <- dim(m.grn)[2]
#for(j in 1:length(ids.exps)){
#id <- ids.exps[j]
#id <- as.character(id)
# TF behaviour across conditions or other TFs
# consistent conditions?
# isolated events - or
th.min.samples = 2
for(k in 1:2){ # less than or more than? -> putative "master" regulators => cross talk? -> general blueprint -> activated per condition?
n.gns <- length(v.gns[[k]])
n.set <- length(intersect(v.gns[[k]], v.tgs))
hitInSample <- n.set
sampleSize <- n.gns
hitInPop <- n.tgs
popSize <- n.genome
failInPop <- popSize - hitInPop
fc <- ((hitInSample / sampleSize) / (hitInPop / popSize))
if(fc > 1 & hitInSample >= th.min.samples){
p.val <- phyper(hitInSample-1, hitInPop, failInPop, sampleSize, lower.tail= FALSE)
if(p.val <= 0.05)
m.crosstalk[i,k] = round(n.set / n.gns * 100, 1)
}
}
}
v.tfs.sel <- names(which(rowSums(m.crosstalk) > 0))
plot(m.crosstalk[v.tfs.sel,1], m.crosstalk[v.tfs.sel,2], xlab = "growth", ylab = "defense")
# m.crosstalk[v.tfs.sel,] # in blue print
message("percentage of growth / defense enriched regulators ")
round(length(v.tfs.sel) / length(v.tfs) * 100, 1)
# unsupervised clustering - with labels?
# identify multiple TFs to explain combined behaviour?
# TODO: check with transcription factors and general genes?
# TODO: transcription factor regulation of booth genes
for(k in 1:2){
n.gns <- length(v.gns[[k]])
for(l in 1:n.gns){
gn = v.gns[[k]][l]
t <- table(m.de[gn,])
}
t <- table(m.de[v.gns[[k]],id])
}
# identify regulators that may function on their own
# TODO: crosstalk per condition - as function
# AT2G01760 - https://www.uniprot.org/uniprot/Q8L9Y3
# characterize all 90 TFs
# also pairwise ...
message("regulators of crosstalk?")
plot(m.crosstalk[v.tfs.sel,1], m.crosstalk[v.tfs.sel,2]) #, col=df$col)
# legend(x="topright", legend = c("growth", "defense")) #, col=c("blue","red"), pch=1)
# filename = paste("tmp/", "growth_expression_ratios.pdf", sep = "")
# df.data <- data.frame(cond = names(tb.condition_tissue_differentialExpression), nr = as.numeric(tb.condition_tissue_differentialExpression))
# p <- ggplot(df.data, aes(cond, nr))
# p <- p + geom_bar(stat = "identity") + theme_bw() + theme(axis.text.x = element_text(angle = 90, hjust = 1)) + ylab("cumulative number of active (expressed and repressed) genes at differential expression p-value < 0.05")
# pdf(filename, 18,8)
# p
# dev.off()
#
#
#
# library(ggplot2)
#
# ggplot(df.data, aes(x=repressed, y=expressed)) + geom_point()
#
#
#
#
#
# filename = paste("tmp/", "growth_expression_ratios.pdf", sep = "")
# df.data <- data.frame(cond = names(tb.condition_tissue_differentialExpression), nr = as.numeric(tb.condition_tissue_differentialExpression))
# p <- ggplot(df.data, aes(cond, nr))
# p <- p + geom_bar(stat = "identity") + theme_bw() + theme(axis.text.x = element_text(angle = 90, hjust = 1)) + ylab("cumulative number of active (expressed and repressed) genes at differential expression p-value < 0.05")
# pdf(filename, 18,8)
# p
# dev.off()
for(j in 1:length(ids.exps)){
# get meta information about the condition? (heat, etc.. time course )
id <- ids.exps[j]
df.j <- subset(df.experiment_condition_annotation, df.experiment_condition_annotation$unique_ID == id)
cond <- c(df.j$condition[1], df.j$condition_optional[1])
cond <- cond[cond != ""]
tis <- df.j$tissue[1]
id <- as.character(id)
print(cond)
print(tis)
# print(paste(cond, " - ", tis))
print(table(m.de[v.gns[[1]],id]))
print(table(m.de[v.gns[[2]],id]))
# TODO: requirement => identify co-differentially expressed
# general gene expression behavior in
print(table(m.de.gd[,id])) # selection growth and defense subgroup
print(table(m.de[,id])) # genome
# get the blueprint ?
# get the differential expression of the TFs
for(k in 1:2){
print(names(v.gns)[k])
idx <- which(m.grn[,v.gns[[k]]] > 0, arr.ind = T) # get growth regulators
v.tfs <- names(which(rowSums(m.grn[,v.gns[[k]]]) > 0)) # from blueprint
n.tfs = length(v.tfs)
table(m.de[v.tfs,id]) # TODO: motivation to isolate specific TFs that are "active" in a given condition
v.tfs <- names(which(m.de[v.tfs,id] != 0)) # instantiate in condition? - condition active TFs
table(m.de[v.tfs,id])
print(length(v.tfs) / n.tfs)
v.mes <- names(which(m.de[v.gns[[k]],id] != 0))
print(length(v.mes) / length(v.gns[[k]]))
# active regulation
rowSums(m.grn[v.tfs, v.mes])
# make strong statistical case to link regulator to targets to condition
# TODO: temporal case for the exceptions in the dataset (temporal up and down-regulation of growth and defense)
# TODO: separate into similar condtion groups (box plots) - TFs, MEs
# TODO: boxplot - regular links? - links per TF?
print(table(rowSums(m.grn[v.tfs, v.mes])))
# distribution - growth vs defense
# TODO: making percentage of regulator extrapolation ...?
}
for(i in 1:length(doms)){
# annotated growth?
df.i <- subset(df, df$Domain == doms[i])
v.gns.i <- unique(df.i$ID)
m.de.i <- m.de[v.gns.i,]
}
# TF identity
# regulation identity
}
# gene pair - statistics across conditions (groups)
# single condition - statistics across genes (groups)
tb.condition_treatments <- l.data$tb.condition_treatments
tb.condition_tissues <- l.data$tb.condition_tissues
df.experiment_condition_annotation <- l.data$df.experiment_condition_annotation
doms <- unique(df$Domain)
for(i in 1:length(doms)){
# annotated growth?
df.i <- subset(df, df$Domain == doms[i])
v.gns.i <- unique(df.i$ID)
m.de.i <- m.de[v.gns.i,]
# general bayesian statistics on the total number of hits ...
# l.data$tb.condition_treatments
# l.data$tb.condition_tissues
for(j in 1:length(ids.exps)){
id <- ids.exps[j]
df <- subset(df.experiment_condition_annotation, df.experiment_condition_annotation$unique_ID == id)
cond <- c(df$condition[1], df$condition_optional[1])
cond <- cond[cond != ""]
tis <- df$tissue[1]
id <- as.character(id)
cond
tis
print(table(m.de.i[,id]))
}
}
### turn the argument around ##
### work with the experimentally validated defense / growth genes
### having enough (not necessary complete) set of conditions to showcase defense vs. growth and regulation behaviour
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.