R/plot_varianceDendrogram.R

Defines functions plot_varianceDendrogram

Documented in plot_varianceDendrogram

#' Plots variances of clusters
#'
#' @param data cor.object
#' @param n number of clusters
#'
#' @return
#' @export
#'
#' @import ggplot2
#' @importFrom reshape2 melt
#'
#'
plot_varianceDendrogram <- function(data, n = 10, shorten = TRUE, true.xaxis = TRUE) {

  if(!hasArg(data)) {
    data <- getData()
  }

  cluster.list <- list()
  cluster.names <- c()

  for(i in 1:n) {

    cluster.list[[n]] <- list()

    for(j in 1:i) {

      cluster.list[[i]][[j]] <- ct(data[["dendrogram"]], k = i, l = j)

      if(i == 1) {
        names(cluster.list[[i]])[j] <- paste0(LETTERS[i], j)
        cluster.names <- c(cluster.names, names(cluster.list[[i]])[j])
      }

      else {

        for(k in 1:(i-1)) {
          if(cluster.list[[i]][[j]][1] %in% cluster.list[[i-1]][[k]]) {
            parent <- names(cluster.list[[i-1]])[k]
          }
        }

        names(cluster.list[[i]])[j] <- paste0(parent, "-", LETTERS[i], j)
        cluster.names <- c(cluster.names, names(cluster.list[[i]])[j])
      }

    }

  }



  paths <- cluster.names[(length(cluster.names) - n + 1):length(cluster.names)]

  cv <- data.frame(matrix(ncol = n, nrow = n))

  colnames(cv) <- paths
  rownames(cv) <- 1:n



  #mean coefficient of variation
  for(i in 1:n) {

    for(j in 1:i) {

      if(length(cluster.list[[i]][[j]]) > 1) {

        cv[i, regexpr(names(cluster.list[[i]])[j], colnames(cv)) != -1] <- mean(cv(n(data[["expr"]][, cluster.list[[i]][[j]]])))

      }
      else {

        cv[i, regexpr(names(cluster.list[[i]])[j], colnames(cv)) != -1] <- cv(as.matrix(data[["expr"]][, cluster.list[[i]][[j]]]))

      }

    }

  }









  #shorten
  if(shorten) {
    for(i in 1:ncol(cv)) {
      j <- nrow(cv)
      stop <- FALSE
      while(!stop) {
        if(cv[j, i] == cv[j-1, i]) {
          cv[j, i] <- NA
          j <- j - 1
        }
        else {
          stop <- TRUE
        }
      }
    }
  }


  if(true.xaxis) {
    rownames(cv) <- abs(data[["dendrogram"]]$height[length(data[["dendrogram"]]$height):(length(data[["dendrogram"]]$height) - n + 1)] - data[["dendrogram"]]$height[length(data[["dendrogram"]]$height)])

  }



  cv.melt <- reshape2::melt(cbind(rownames(cv), cv))
  #cv.melt$col <- "red"
  #cv.melt[4:9, "col"] <- "black"

  colnames(cv.melt) <- c("X", "Y", "value")
  #colnames(cv.melt) <- c("X", "Y", "value", "col")

  cv.melt$X <- as.numeric(cv.melt$X)


  ggplot(cv.melt, aes(x=X, y = value, group = Y)) +
    geom_line(aes(), size = .5) +
    #annotate("text",x=max(cv.melt$value), y=apply(cv, 2, FUN = function(x) x[sum(!is.na(x))]), label=colnames(cv)) +
    #geom_point() +
    theme_classic()

}
nicohuttmann/htmnanalysis documentation built on Dec. 6, 2020, 3:02 a.m.