R/utils.R

Defines functions utils precompute_and_store_diffExp_and_foldchange_matrices plot_heatmap

plot_heatmap <- function(m.exp=m.exp
                          tfs=tfs, tgs=tgs,
                          title = "", v.name = "",
                          v.max = 1.0, v.min = 0){

  # assumption is that the matrix has already been ordered by dendrogram

  library(ggplot2)
  library(ggdendro)
  library(reshape2)
  library(grid)

  dendro.plot <- ggdendrogram(data = dd.col, rotate = TRUE)
  dendro.plot <- dendro.plot + theme(axis.text.y = element_text(size = 6), axis.text.x = element_blank())

  n.specs = dim(m.exp)[2]

  # Melt the correlation matrix
  melted_cormat <- melt((m.exp))
  melted_cormat <- na.omit(melted_cormat)

  melted_value <- melt((m.sig))
  melted_value <- na.omit(melted_value)

  # Create a ggheatmap
  ggheatmap <- ggplot(melted_cormat, aes(Var2, Var1, fill = value)) +
    geom_tile(color = "black") +
    scale_fill_gradient2(low = "green", high = "red", mid = "white",
                         midpoint = 0, limit = c(v.min,v.max), name=v.name) +

    #               scale_x_discrete(breaks = rownames(cormat), labels=v.x_axis.ticks) +
    #               scale_y_discrete(breaks = colnames(cormat), labels=v.y_axis.ticks) +

    theme_minimal() + # minimal theme
    theme(axis.text.x = element_text(angle = 45, vjust = 1,
                                     size = 6, hjust = 1,
                                     colour="black")) +
    theme(axis.text.y = element_text(angle = 0, vjust = 0,
                                     size = 6, hjust = 0,
                                     colour="black")) +

    theme(legend.key.size = unit(0.5, 'cm'), #change legend key size
          legend.key.height = unit(0.5, 'cm'), #change legend key height
          legend.key.width = unit(0.5, 'cm'), #change legend key width
          legend.title = element_text(size=6), #change legend title font size
          legend.text = element_text(size=6)) +  #change legend text font size

    # ggtitle(title) +
    # theme(plot.title = element_text(lineheight=1.0,  size = 11)) +
    coord_fixed()


  ggheatmap <- (ggheatmap +

                  # commented out for non significance representation
                  geom_text(aes(Var2, Var1, label = melted_value$value), color = "black", size = 6, vjust = 0.8) +

                  theme(
                    axis.title.x = element_blank(),
                    axis.title.y = element_blank(),
                    axis.text.y = element_blank(), # remove identifies
                    panel.grid.major = element_blank(),
                    panel.border = element_blank(),
                    panel.background = element_blank(),
                    axis.ticks = element_blank(),
                    legend.position = "left",
                    # legend.justification = c(-1, 1),
                    #legend.position = c(0.6, 0.7),
                    #legend.direction = "horizontal")+
                    #guides(fill = guide_colorbar(barwidth = 7, barheight = 1,title.position = "top", title.hjust = 0.5))
                  )
                # geom_segment(data=my.lines, aes(x,y,xend=xend, yend=yend), size=1, inherit.aes=F)

  )

  pdf("test.pdf", height = 15, width = 8, paper = "letter")

  grid.newpage()
  print(ggheatmap, vp = viewport(x = 0.32, y = 0.5, width = 0.2, height = 1.0))
  print(dendro.plot, vp = viewport(x = 0.5, y = 0.512, width = 0.2, height = 1.055))

  dev.off()

  # 8 15

  #export figure
  ggsave(p,filename="test.png",height=8,width=15,units="in",dpi=200)

  return(ggheatmap)
}





#' precompute_and_store_diffExp_and_foldchange_matrices
#'
#'
#' @keywords
#' @export
#' @examples
precompute_and_store_diffExp_and_foldchange_matrices <- function(folder = "/tmp",
                                                                 filename.annotation = "MERIT_athaliana_PMN_2017/athaliana_gene_expression/experiment_annotation_He_et_al_2015.txt",
                                                                 filename.geneExpression = "MERIT_athaliana_PMN_2017/athaliana_gene_expression/GSE69995_re-analyzed_data_matrix.txt",
                                                                 v.ath.Stress.treatment = c("GSE5620", "GSE5621", "GSE5622", "GSE5623", "GSE5624", "GSE5625", "GSE5626", "GSE5627", "GSE5628")
                                                                 ){

  df.annotation <- read.csv(filename.annotation,  header = TRUE, sep = "\t", fill = TRUE, stringsAsFactors = FALSE)
  v.colnames_mandatory = c("sample_id", "series_id", "condition", "condition_optional", "tissue","replicate_group_w_control", "sublevel_series_id", "unique_ID")
  if(!all(v.colnames_mandatory %in% names(df.annotation))){
    stop(paste("could not find all mandatory columns in file:", paste(v.colnames_mandatory, collapse = ", ")))
  }

  tb.treatments = table(c(df.annotation$condition, df.annotation$condition_optional))
  tb.treatments = tb.treatments[names(tb.treatments) != ""]

  # tissue and treatment distributions
  tb.tissues <- table(df.annotation$tissue)
  v.conditionGroups = names(tb.treatments)

  # loading gene expressino matrix
  m.expression <- read.table(filename.geneExpression, row.names = 1, header = TRUE, sep = "\t", quote = "", stringsAsFactors = FALSE)
  m.expression <- as.matrix(m.expression)

  tb.conditions = table(df.annotation$condition)
  tb.conditions[names(table(df.annotation$condition_optional))] = tb.conditions[names(table(df.annotation$condition_optional))] + table(df.annotation$condition_optional)

  # split the experiments and tissues
  df.annotation["key.exp"] <- NA
  df.exp.pairs <- unique(df.annotation[,c("tissue","series_id")])
  for(i in 1:nrow(df.exp.pairs)){
    i.set <- which(df.annotation$tissue == df.exp.pairs[i,1] & df.annotation$series_id == df.exp.pairs[i,2])
    df.annotation$key.exp[i.set] <- i
  }

  m.expression <- m.expression[,df.annotation$sample_id]
  v.series_ids <- unique(df.annotation$series_id)


  v.series_ids <- v.series_ids[!v.series_ids %in% v.ath.Stress.treatment]
  v.series_sets <- c()

  ####

  m.de <- c()
  m.fc <- c()
  m.directionality <- c()
  v.treatment_buildingblocks <- c()
  v.repeats <- c()

  exp.counter <- 1

  ids_differentialExpressionSamples <- c()

  message(paste("Compute differential expression based on Welch t-test and prepare treatment gene expression matrix for two-way anova analysis"))

  pb <- txtProgressBar(min = 0, max = length(v.series_ids), style = 3)
  for(i in 1:length(v.series_ids)){

    setTxtProgressBar(pb, i)

    df.annotation.i <- subset(df.annotation, df.annotation$series_id == v.series_ids[i])

    v.exp.i <- unique(df.annotation.i$replicate_group_w_control)
    v.exp.i <- v.exp.i[v.exp.i != 0]
    v.set.i <- unique(df.annotation.i$sublevel_series_id)

    for(j in 1:length(v.set.i)){

      for(k in 1:length(v.exp.i)){

        df.annotation.control <- subset(df.annotation.i, df.annotation.i$sublevel_series_id ==  v.set.i[j] & df.annotation.i$replicate_group_w_control == 0)
        df.annotation.j <- subset(df.annotation.i, df.annotation.i$sublevel_series_id ==  v.set.i[j] & df.annotation.i$replicate_group_w_control == v.exp.i[k])

        if(nrow(df.annotation.j) > 0){

          ## Individual t-test p-values
          X.treatment <- t(m.expression[,df.annotation.j$sample_id])
          X.control <- t(m.expression[,df.annotation.control$sample_id])

          ttpv <- numeric(dim(X.treatment)[2])
          names(ttpv) <- colnames(X.treatment)

          v.fc <- numeric(dim(X.treatment)[2])
          names(v.fc) <- colnames(X.treatment)

          v.directionality <- numeric(dim(X.treatment)[2])
          names(v.directionality) <- colnames(X.treatment)

          v.series_sets <- c(v.series_sets, v.series_ids[i])
          v.repeats <- c(v.repeats, seq(1:dim(X.treatment)[1]))

          v.treatment_buildingblocks <- c(v.treatment_buildingblocks, rep(exp.counter,dim(X.treatment)[1]))
          exp.counter <- exp.counter + 1

          for(l in 1:ncol(X.treatment)){ # per gene
            if(sd(X.treatment[,l]) != 0 | sd(X.control[,l]) != 0){
              tt <- t.test(X.treatment[,l], X.control[,l]) # welch t-test per gene
              ttpv[l] = unlist(tt$p.value)
              v.fc[l] <- mean(X.treatment[,l]) - mean(X.control[,l])
              if(mean(X.treatment[,l]) == mean(X.control[,l])){
                v.directionality[l] = 0
              }else if(mean(X.treatment[,l]) > mean(X.control[,l])){
                v.directionality[l] = 1
              }else{
                v.directionality[l] = -1
              }
            }else{
              ttpv[l] <- 1
            }
          }
          m.de <- cbind(m.de, ttpv)
          names(v.fc) <- v.series_ids[i]
          m.fc <- cbind(m.fc, v.fc)
          names(v.directionality) <- v.series_ids[i]
          m.directionality <- cbind(m.directionality, v.directionality)

          ids_differentialExpressionSamples <- c(ids_differentialExpressionSamples, unique(df.annotation.j$unique_ID))
        }
      }
    }
  }
  close(pb)

  ##### now the explicit stress conditions

  df.annotation.control <- subset(df.annotation, df.annotation$series_id == v.ath.Stress.treatment[1])

  pb <- txtProgressBar(min = 0, max = 9, style = 3)
  for(i in 2:9){

    df.annotation.i <- subset(df.annotation, df.annotation$series_id == v.ath.Stress.treatment[i])

    v.exp.i <- unique(df.annotation.i$replicate_group_w_control)
    v.exp.i <- v.exp.i[v.exp.i != 0]
    v.set.i <- unique(df.annotation.i$sublevel_series_id)

    for(j in 1:length(v.set.i)){

      for(k in 1:length(v.exp.i)){

        df.annotation.control.j <- subset(df.annotation.control, df.annotation.control$sublevel_series_id ==  v.set.i[j] & df.annotation.control$replicate_group_w_control == v.exp.i[k])
        df.annotation.j <- subset(df.annotation.i, df.annotation.i$sublevel_series_id ==  v.set.i[j] & df.annotation.i$replicate_group_w_control == v.exp.i[k])

        if(nrow(df.annotation.j) > 0){

          ## Individual t-test p-values
          X.treatment <- t(m.expression[,df.annotation.j$sample_id])
          X.control <- t(m.expression[,df.annotation.control.j$sample_id])

          ttpv <- numeric(dim(X.treatment)[2])
          names(ttpv) <- colnames(X.treatment)

          v.fc <- numeric(dim(X.treatment)[2])
          names(v.fc) <- colnames(X.treatment)

          # m.fc.t <- matrix(0, dim(X.treatment)[2], dim(X.treatment)[1])

          v.series_sets <- c(v.series_sets, v.ath.Stress.treatment[i])
          v.repeats <- c(v.repeats, seq(1:dim(X.treatment)[1]))
          v.treatment_buildingblocks <- c(v.treatment_buildingblocks, rep(exp.counter,dim(X.treatment)[1]))
          exp.counter <- exp.counter + 1

          for(l in 1:ncol(X.treatment)){

            if(sd(X.treatment[,l]) != 0 | sd(X.control[,l]) != 0){

              tt <- t.test(X.treatment[,l],X.control[,l])
              ttpv[l] = unlist(tt$p.value)

              v.fc[l] <- mean(X.treatment[,l]) - mean(X.control[,l])

              if(mean(X.treatment[,l]) == mean(X.control[,l])){
                v.directionality[l] = 0
              }else if(mean(X.treatment[,l]) > mean(X.control[,l])){
                v.directionality[l] = 1
              }else{
                v.directionality[l] = -1
              }

            }else{
              ttpv[l] <- 1
            }
          }

          m.de <- cbind(m.de, ttpv)

          names(v.fc) <- v.ath.Stress.treatment[i]
          # colnames(m.fc.t) <- rep(v.series_ids[i], dim(m.fc.t)[2])

          m.fc <- cbind(m.fc, v.fc)

          names(v.directionality) <- v.ath.Stress.treatment[i]
          m.directionality <- cbind(m.directionality, v.directionality)

          ids_differentialExpressionSamples <- c(ids_differentialExpressionSamples, unique(df.annotation.j$unique_ID))

        }

      }
    }

  }
  close(pb)

  rownames(m.fc) <- rownames(m.directionality) <- rownames(m.de)
  colnames(m.fc) <- colnames(m.directionality) <- colnames(m.de) <- v.series_sets


  write.table(m.de, "F:/junkDNA.ai/datasets/athaliana_gene_expression/m.pvalue_differentialExpression.txt", row.names = F, col.names = F, sep = "\t")
  write.table(m.fc, "F:/junkDNA.ai/datasets/athaliana_gene_expression/m.foldChange_differentialExpression.txt", row.names = F, col.names = F, sep = "\t")

  #df.annotation_series = unique(df.annotation[,c("series_id", "condition", "condition_optional", "tissue")
  #write.table(df.annotation_series, "F:/junkDNA.ai/datasets/athaliana_gene_expression/experiment_series_annotation_He_et_al_2015.txt", row.names = F, sep = "\t")

  genes = rownames(m.de)
  experiment_series_id = colnames(m.de)
  write.table(genes, "F:/junkDNA.ai/datasets/athaliana_gene_expression/genes.txt", row.names = F, col.names = F, sep = "\t")
  write.table(ids_differentialExpressionSamples, "F:/junkDNA.ai/datasets/athaliana_gene_expression/ids_differentialExpressionSamples.txt", row.names = F, col.names = F, sep = "\t")


}






utils <- function(m.DE,
                  th.diffexp = 0.05){

  m.fc <- l.data$m.foldChange_differentialExpression
  m.de <- l.data$m.pvalue_differentialExpression

  m.fc.ternary <- m.fc
  m.fc.ternary[m.fc.ternary > 0] <- 1
  m.fc.ternary[m.fc.ternary < 0] <- -1

  m.de.bin <- m.de
  m.de.bin[m.de.bin <= th.diffexp] <- 10
  m.de.bin[m.de.bin <= 1] <- 0
  m.de.bin[m.de.bin > 0] <- 1

  m.de.ternary <- m.fc.ternary * m.de.bin

  # ..


}
mbanf/MERIT documentation built on June 16, 2021, 1:07 p.m.