clustermap: clustermap: a Versatile Toolbox in R for Clustering and Heatmap Visualisation

#' clustermap
#' @description Clustermap is an R package for heatmap and hierarchical clustering visualization
#' @docType package
#' @name clustermap
#' @author Ole Christian Lingjærde & Chloé B. Steen
devtools::use_package("graphics")
devtools::use_package("gclus")
#devtools::use_package("survival")

#' blocks
#' @description This function creates a small example gene expression data set with 110 genes as
#'   rows and 40 samples as columns.
#' @keywords example
#' @export
#' @examples
#' \dontrun{
#' tmp = blocks() # Contains the 110x40-matrix X and 40-vectors y1,y2,y3
#' X = tmp$X
#' y1 = tmp$y1 # Additional information track for the columns
#' y2 = tmp$y2 # Additional information track for the columns
#' y3 = tmp$y3 # Additional information track for the columns
#' }
blocks = function() {
  # Example data set with 110 rows and 40 columns
  A = cbind(matrix(rnorm(70*20),nrow=70), matrix(3+rnorm(70*20),nrow=70))
  B = cbind(-1 + matrix(rnorm(40*30),nrow=40), matrix(1+rnorm(40*10),nrow=40))
  X = rbind(A,B)

  # Additional information tracks for the columns
  y1 = c(rep("Luminal A",14), rep("Luminal B",6), rep("HER2",8), rep("Normal-like",4), rep("Basal-like",8))
  y2 = c(rep(1,10), rep(2,20), rep(3,10))
  y3 = runif(40, 1, 16)

  # Example text descriptor for rows and cols
  rownames(X) = paste("Gene", 1:110, sep="-")
  colnames(X) = paste("Patient", 1:40, sep="-")

  # Randomly permute rows and columns
  k.row = sample(110); k.col = sample(40)
  X = X[k.row, k.col]
  y1 = y1[k.col]; y2 = y2[k.col]; y3 = y3[k.col]

  # Collect it all in a list
  res = list(X=X, y1=y1, y2=y2, y3=y3)
  return(res)
}

#' bellagio
#' @description Function to make a larger example data set. This dataset represents a picture
#' @keywords example
#' @export
bellagio = function() {
  # Read and downsample image of Lago Como
  pict = image_read("Bellagio.jpg")
  pict2 = as.raster(pict)
  pict3 = pict2[seq(1,2448,by=5),seq(1,3264,by=5)]

  # Randomly permute rows and/or columns
  res = list(X=pict3, Xr=pict3[sample(1:nrow(pict3))],
             Xc=pict3[,sample(1:ncol(pict3))],
             Xrc=pict3[sample(1:nrow(pict3)),sample(1:ncol(pict3))])
  return(res)
}

#' col2num
#' @description Function to transform raster to numerical matrix.
#' @param X matrix
#' @param clust String. Direction for clustering, default is "col" (column-wise)
#' @export
col2num = function(X, clust="col") {
  if (clust=="col") {
    # From Nxp to (3N)xp
    Z = matrix(NA, 3*nrow(X), ncol(X))
    for (j in 1:ncol(X)) {
      Z[,j]  = as.vector(col2rgb(X[,j]))
    }
  } else if (clust=="row") {
    # From Nxp to Nx(3p)
    Z = matrix(NA, nrow(X), 3*ncol(X))
    for (i in 1:nrow(X)) {
      Z[i,] = as.vector(col2rgb(X[i,]))
    }
  }
  return(Z)
}

#' num2col
#' @description Function to transform numerical matrix to raster.
#' @param Z matrix
#' @param clust String. Default is "col". Specifies column-wise ("col") or row-rise clustering ("row").
#' @export
num2col = function(Z, clust="col") {
  if (clust=="col") {
    # From (3N)xp to Nxp
    N = nrow(Z)/3
    X = matrix(NA, N, ncol(Z))
    k = 1:3
    for (i in 1:N) {
      X[i,] = rgb(t(Z[k+(i-1)*3,])/255)
    }
  } else if (clust=="row") {
    # From Nx(3p) to Nxp
    N = ncol(Z)/3
    X = matrix(NA, nrow(Z), N)
    k = 1:3
    for (j in 1:N) {
      X[,j] = rgb(t(Z[,k+(j-1)*3])/255)
    }
  }
  return(as.raster(X))
}

#' pca.approx
#' @description Approximates PCA.
#' @param X matrix
#' @param ncomp number of principal components to use.
#' @export
pca.approx = function(X, ncomp) {
  if (ncomp > min(ncol(X),nrow(X))) {
    ncomp = min(ncol(X),nrow(X))
    message(paste("Note: only the first", ncomp, "principal components are used"))
  }
  z = svd(X)
  if (ncomp == 1) {
    d = matrix(z$d[1],1,1)
  } else {
    d = diag(z$d[1:ncomp])
  }
  X = z$u[,1:ncomp,drop=F] %*% d %*% t(z$v[,1:ncomp,drop=F])
  X
}

#' hcluster
#' @description Performs hierarchical clustering of a data matrix X. To cluster both rows and columns,
#' call it twice with clust="col"and clust="row" respectively. The option reorder=TRUE leads to a nice
#' arrangement of the clusters from left to right to ensure neighbors are similar to each other.
#' @param X matrix
#' @param clust String. Perform clustering of rows or columns, can be by column: "col" (default) or by row: "row".
#' @param distance String. Distance measure for hierarchical clustering, can be"euclidean" (default), "pearson", "spearman".
#' @param linkage String. Method for clusterting, can be "complete" (default), "average", "single", "ward".
#' @param reorder Boolean. When TRUE, arranges the clustered items in optimized order.
#' @param ncomp Integer. Default NA. If set to integer k, only the first k principal components are used in clustering
#' @export
hcluster = function(X, clust="col", distance="euclidean", linkage="complete", reorder=T, ncomp=NA) {
  if (!is.na(ncomp)) {
    if (distance %in% c("euclidean","pearson","spearman")) {
      X = pca.approx(X, ncomp)
    } else {
      warning("Argument ncomp is ignored for this distance type", call.=F)
    }
  }
  if (distance=="euclidean" || distance=="binary") {
    if (clust=="col") {
      d = dist(t(X), method=distance)
    } else {
      d = dist(X, method=distance)
    }
  } else if (distance=="pearson" || distance=="spearman") {
    if (clust=="col") {
      d = as.dist((1-cor(X, method=distance))/2)
    } else {
      d = as.dist((1-cor(t(X), method=distance))/2)
    }
  }
  hc = stats::hclust(d, method=linkage)
  if (reorder) {
    hc = gclus::reorder.hclust(hc, d)
  }
  tmp = .CLUSTERMAP
  if (clust=="row") {
    tmp$row.X = X
    tmp$row.ncomp = ncomp
    tmp$rowclust = hc
    tmp$row.linkage = linkage
    tmp$row.distance = distance
  } else if (clust=="col") {
    tmp$col.X = X
    tmp$col.ncomp = ncomp
    tmp$colclust = hc
    tmp$col.linkage = linkage
    tmp$col.distance = distance
  }
  assign(".CLUSTERMAP", tmp, envir=.GlobalEnv)
}

#' subclust
#' @description The function identifies subclusters in a dendrogram.
#' The effect is that later calls to draw.tree will show the subclusters
#' in different colors. You can either provide the desired number of
#' clusters using the argument k, or you can set k=NA to automatically
#' estimate the number of cluster with one of two algorithms:
#' gap (Tibshirani et al. (2001), J Roy Statist Soc B, 63: 411-423) or
#' PART  (Nilsen et al (2012), Stat Appl Genet Mol Biol, 12: 637-652).
#' @param k Integer. Default is NA. Desired number of clusters. If default, k is estimated automatically.
#' @param clust String. Identify clusters column-wise if set to "col" (default), or row-wise if set to "row".
#' @param method String. Default is "gap". If k is NA, the number of clusters that automatically be estimated
#' by one of two algorithms: gap (default) (Tibshirani et al. (2001), J Roy Statist Soc B, 63: 411-423) or PART
#' (Nilsen et al (2012), Stat Appl Genet Mol Biol, 12: 637-652).
#' @param B Integer. Number of permutations. Used if method set to 'PART'.
#' @param min.size Integer. Least number of element in a single cluster. Used if method set to 'PART'.
#' @param max.level Integer. The level in the dendogram counting from the root to stop searching for clusters.
#' @export
subclust = function(k=NA, clust="col", method="gap", B=50, min.size=5, max.level=3) {
  tmp = .CLUSTERMAP
  if (clust=="col" && !is.na(k)) {
    tmp$colgroup = cutree(.CLUSTERMAP$colclust, k)
  } else if (clust=="col" && is.na(k) && method=="gap") {
    tmp$colgroup = gap.optimal(.CLUSTERMAP$col.X, clust, .CLUSTERMAP$col.distance, .CLUSTERMAP$col.linkage, B)
  } else if (clust=="col" && is.na(k) && method=="part") {
    tmp$colgroup = part.new(.CLUSTERMAP$col.X, clust, .CLUSTERMAP$col.distance, .CLUSTERMAP$col.linkage, B, min.size, max.level)
  } else if (clust=="row" && !is.na(k)) {
    tmp$rowgroup = cutree(.CLUSTERMAP$rowclust, k)
  } else if (clust=="row" && is.na(k) && method=="gap") {
    tmp$rowgroup = gap.optimal(.CLUSTERMAP$row.X, clust, .CLUSTERMAP$row.distance, .CLUSTERMAP$row.linkage, B)
  } else if (clust=="row" && is.na(k) && method=="part") {
    tmp$rowgroup = part.new(.CLUSTERMAP$row.X, clust, .CLUSTERMAP$row.distance, .CLUSTERMAP$row.linkage, B, min.size, max.level)
  }
  assign(".CLUSTERMAP", tmp, envir=.GlobalEnv)
}

#' get.subclust
#' @description Retrieves a vector of cluster labels after identification of subclusters with subclust().
#' @param clust String. Retrieves cluster labels for column-wise clusters ("col", default) or
#' row-wise clusters ("row").
#' @param labels String vector. If you supply row/column labels using the argument 'labels' the function
#' returns a data frame with two columns, the first being the cluster labels and the second being the
#' supplied row/column labels.
#' @param order String. Get subclusters in same order as dendogram.
#' @export
get.subclust = function(clust="col", labels=c(), order="tree") {
  if (clust=="col") {
    group = .CLUSTERMAP$colgroup
    if (order=="tree") {
      group = group[.CLUSTERMAP$colclust$order]
      if (length(labels)>0) labels = labels[.CLUSTERMAP$colclust$order]
    }
  } else if (clust=="row") {
    group = .CLUSTERMAP$rowgroup
    if (order=="tree") {
      group = group[.CLUSTERMAP$rowclust$order]
      if (length(labels)>0) labels = labels[.CLUSTERMAP$rowclust$order]
    }
  }
  if (length(labels)>0) {
    res = data.frame(cluster=group, label=labels)
  } else {
    res = group
  }
  res
}

#' get.order
#' @description Retrieves order of samples after clustering.
#' @param clust String. Default is "col". Specifies column-wise ("col") or row-rise clustering ("row").
#' @export
get.order = function(clust="col") {
  if (clust=="col") {
    return(.CLUSTERMAP$colclust$order)
  } else if (clust=="row") {
    return(.CLUSTERMAP$rowclust$order)
  }
}

#' draw.init()
#' @description Initiates a clustermap draw. The purpose is to make
#' room on the four sides of the heatmap for additional elements such
#' as cluster dendrograms, color bars with additional info, and labels.
#' Supply as arguments the sides where you want such elements, with the
#' coding side=1 (below), side=2 (left), side=3 (top), side=4 (right).
#' @param tree Integer. Adds a dendogram to the plot on the sides specified by the argument. Default is c(2,3),
#' which adds a dendogram on the left and top sides of the draw.
#' @param text Integer. Adds text on specified sides.
#' @param cbar Integer. Adds color bar(s) on specified sides.
#' @param ckey Boolean. Adds color bar key(s) (legends). Default is TRUE.
#' @param legend Boolean. Default is FALSE. If set to TRUE, color bar keys are plotted on same plot as heatmap.
#' Otherwise, it is plotted on a separate page.
#' @param inner Integer vector of 4 elements. Adds (or subtracts) a specific amount of space in the inner margins
#' on a specified side of the heatmap.
#' @param outer Integer vector of 4 elements. Adds (or subtracts) a specific amount of space in the outer margins
#' on a specified side of the heatmap.
#' @export
#' @examples
#' \dontrun{
#' draw.init() # Make no room for additional elements
#' draw.init(tree=c()) # Dendrogram on the left and on the top
#' draw.init(tree=3, text=4)  # Dendrogram on the top, labels on the right
#' }
draw.init = function(tree=c(), text=c(), cbar=c(), ckey=T, legend=F, inner=c(), outer=c()){
  if (ckey) {
    # layout(matrix(c(2,3,3,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1),6,3,byrow=TRUE))
    layout(matrix(c(2,3,3,1,1,1),2,3,byrow=T),heights=c(1,9))
    par(mar=c(2,2,2,2))
  }
  marg.outer = rep(0, 4)
  marg.inner = rep(0.01, 4)
  marg.outer[tree] = 0.3
  marg.outer[text] = 0.3
  if (legend) marg.outer[4] = 0.3
  marg.inner[cbar] = 0.1
  if (length(inner)==4) marg.inner = marg.inner + inner
  if (length(outer)==4) marg.outer = marg.outer + outer
  marg = list(inner=marg.inner, outer=marg.outer)
  tmp = list(margin=marg, rowclust=c(), colclust=c(), rowgroup=c(), legend=legend,
             colgroup=c(), row.ncomp=NA, col.ncomp=NA, col.X=c(), row.X=c())
  assign(".CLUSTERMAP", tmp, envir = .GlobalEnv)
}

#' panel.init
#' @description Initiates panel for plotting heatmaps.
#' @param mfrow List of 2 integers. Number of rows and columns for panel. Default "mfrow=c(2,2)".
#' @export
panel.init = function(mfrow=c(2,2)) {
  A = matrix(c(2,3,3,1,1,1),2,3,byrow=TRUE)
  B = matrix(NA, 2*mfrow[1], 3*mfrow[2])
  for (i in 1:mfrow[1]) {
    for (j in 1:mfrow[2]) {
      B[(1+(i-1)*2):(i*2),(1+(j-1)*3):(j*3)] = A + 3*(j-1) + mfrow[2]*3*(i-1)
    }
  }
  layout(B, heights=c(1,7,1,7))
  par(mar=c(1,2,1,2))
}

set.colgroup = function(group) {
  tmp = .CLUSTERMAP
  tmp$colgroup = group
  assign(".CLUSTERMAP", tmp, envir = .GlobalEnv)
}

set.colclust = function(clust) {
  tmp = .CLUSTERMAP
  tmp$colclust = list()
  tmp$colclust$order = clust
  assign(".CLUSTERMAP", tmp, envir = .GlobalEnv)
}

# For additional information that you want to put into color bars
# around the heatmap, you may either specify directly a color for
# each element, or if the information is numerical you may use one
# of the two functions below to convert to colors. See top section
# of Examplecode_clustermap.R for three examples of use.

#' set.color
#' @description Converts numerical information into colors
#' @param x matrix
#' @param type String. Default is "discrete".
#' @param label String. Default is "".
#' @param color String. Default is "".
#' @param na.color String. Set colors to missing data. Default is "grey".
#' @param clust String. Default is "col". Specifies column-wise ("col") or row-rise clustering ("row").
#' @export
set.color = function(x, type="discrete", label, color, na.color, clust) {
  if (type=="continuous") {
    if (missing(label)) label = ""
    if (missing(color)) color = "green-white-red" # color=palette()
    if (missing(na.color)) na.color="grey"
    res = color.cont(x, label=label, color=color, na.color=na.color)
  } else if (type=="discrete") {
    if (missing(label)) label = ""
    if (missing(color)) color = palette()
    if (missing(na.color)) na.color= "grey"
    res = color.disc(x, label=label, color=color, na.color=na.color)
  } else if (type=="survival") {
    if (missing(label)) label = ""
    if (missing(color)) color = palette()
    if (missing(na.color)) na.color= "grey"
    if (missing(clust)) clust = "col"
    res = color.surv(x, label=label, color=color, na.color=na.color, clust=clust)
  }
  return(res)
}

#' color.cont
#' @description Sets colors for continuous variables. Called by set.color().
#' @param x numeric matrix
#' @param label String
#' @param color String. Color string separated by "-".
#' @param na.color String. Set colors to missing data. Default is "grey".
#' @export
color.cont = function(x, label, color, na.color) {
  M = max(abs(x),na.rm=T)
  z = rbind(as.numeric(x)/M)
  z.mis = is.na(z)
  z[z.mis] = mean(z[!z.mis])
  x.grid = seq(min(x,na.rm=T), max(x,na.rm=T), length=100)
  z.grid = rbind(as.numeric(x.grid)/M)
  c0 = col2rgb(unlist(strsplit(color,"-")))/255
  if (ncol(c0)==1) {
    c0 = cbind(c(1,1,1), c0)
  }
  if (ncol(c0)==2) {
    z2 = (z-min(z))/(max(z)-min(z))
    c1 = cbind(c0[,1]) %*% (1-z2) + cbind(c0[,2]) %*% z2
    c2 = rgb(c1[1,], c1[2,], c1[3,])
    z2.grid = as.numeric((z.grid-min(z.grid))/(max(z.grid)-min(z.grid)))
    c1.grid = cbind(c0[,1]) %*% (1-z2.grid) + cbind(c0[,2]) %*% z2.grid
    c2.grid = rgb(c1.grid[1,], c1.grid[2,], c1.grid[3,])
  } else if (ncol(c0)==3) {
    c1 = matrix(c0[,2], 3, length(z), byrow=F) +
      cbind(c0[,1]-c0[,2]) %*% ifelse(z<0,-z,0) +
      cbind(c0[,3]-c0[,2]) %*% ifelse(z>0,z,0)
    c2 = rgb(c1[1,], c1[2,], c1[3,])
    z2.grid = as.numeric((z.grid-min(z.grid))/(max(z.grid)-min(z.grid)))
    c1.grid = matrix(c0[,2], 3, length(z.grid), byrow=F) +
      cbind(c0[,1]-c0[,2]) %*% ifelse(z.grid<0,-z.grid,0) +
      cbind(c0[,3]-c0[,2]) %*% ifelse(z.grid>0,z.grid,0)
    c2.grid = rgb(c1.grid[1,], c1.grid[2,], c1.grid[3,])
  }
  c2[z.mis] = "grey"
  key.x = x.grid #x[!z.mis]
  key.z = z2.grid #z2[!z.mis]
  key.c = c2.grid #c2[!z.mis]
  list(label=label, type="cont", x=x, color=c2, na.color=na.color,
       key=data.frame(x=key.x, z=key.z, color=key.c))
}

#interpo = function(t0, survobj) {
#	tvec = c(0, survobj$time, 10^9)
#	ind = 1:(length(tvec)-1)
#	for (i in 1:length(t0)) {
#		kstar = which((tvec[k] <= t0[i]) & (tvec[k+1] >= t0[i]))
#		if (
#	}
#}

## #' color.surv
## #' @description Sets colors for survival variables. Called by set.color().
## #' @param x numeric matrix
## #' @param label String
## #' @param color String. Color string separated by "-".
## #' @param na.color String. Set colors to missing data. Default is "grey".
## #' @param clust String. Direction for clustering.
## color.surv = function(x, label, color, na.color, clust) {
##     clu = get.subclust(clust=clust, order="orig")
##     uclu = unique(clu)
##     for (ii in 1:length(uclu)) {
##     	time.ii = x[clu==uclu[ii],1]
##     	cens.ii = x[clu==uclu[ii],2]
##     	res = survfit(Surv(time.ii,cens.ii) ~ 1)
##     	pred.ii = interpo(time.ii, res)
##     }
##
## 	M = max(abs(x),na.rm=T)
##     z = rbind(as.numeric(x)/M)
##     z.mis = is.na(z)
##     z[z.mis] = mean(z[!z.mis])
##     x.grid = seq(min(x,na.rm=T), max(x,na.rm=T), length=100)
##     z.grid = rbind(as.numeric(x.grid)/M)
##     c0 = col2rgb(unlist(strsplit(color,"-")))/255
##     if (ncol(c0)==1) {
##     	c0 = cbind(c(1,1,1), c0)
##     }
##    	if (ncol(c0)==2) {
##     	z2 = (z-min(z))/(max(z)-min(z))
##     	c1 = cbind(c0[,1]) %*% (1-z2) + cbind(c0[,2]) %*% z2
##     	c2 = rgb(c1[1,], c1[2,], c1[3,])
##     	z2.grid = as.numeric((z.grid-min(z.grid))/(max(z.grid)-min(z.grid)))
##     	c1.grid = cbind(c0[,1]) %*% (1-z2.grid) + cbind(c0[,2]) %*% z2.grid
##     	c2.grid = rgb(c1.grid[1,], c1.grid[2,], c1.grid[3,])
##     } else if (ncol(c0)==3) {
##     	c1 = matrix(c0[,2], 3, length(z), byrow=F) +
##     	cbind(c0[,1]-c0[,2]) %*% ifelse(z<0,-z,0) +
##     	cbind(c0[,3]-c0[,2]) %*% ifelse(z>0,z,0)
##     	c2 = rgb(c1[1,], c1[2,], c1[3,])
##     	z2.grid = as.numeric((z.grid-min(z.grid))/(max(z.grid)-min(z.grid)))
##     	c1.grid = matrix(c0[,2], 3, length(z.grid), byrow=F) +
##     	cbind(c0[,1]-c0[,2]) %*% ifelse(z.grid<0,-z.grid,0) +
##     	cbind(c0[,3]-c0[,2]) %*% ifelse(z.grid>0,z.grid,0)
##     	c2.grid = rgb(c1.grid[1,], c1.grid[2,], c1.grid[3,])
##     }
##     c2[z.mis] = "grey"
##     key.x = x.grid #x[!z.mis]
##     key.z = z2.grid #z2[!z.mis]
##     key.c = c2.grid #c2[!z.mis]
##     list(label=label, type="cont", x=x, color=c2, na.color=na.color,
##     	key=data.frame(x=key.x, z=key.z, color=key.c))
## }

#' color.disc
#' @description Sets colors for discrete variables. Called by set.color().
#' @param x numeric matrix
#' @param label String
#' @param color String. Color string separated by "-".
#' @param na.color String. Set colors to missing data. Default is "grey".
#' @export
color.disc = function(x, label, color, na.color) {
  val = sort(unique(x))
  z = rep(NA, length(x))
  ncol = length(color)
  for (i in 1:length(val)) {
    z[x==val[i]] = color[1+(i-1) %% ncol]
  }
  z[is.na(x)] = na.color
  list(label=label, type="disc", x=as.factor(x), color=z, na.color=na.color,
       key=data.frame(x=val, color=color[1+(0:(length(val)-1)) %% ncol]))
}

#' draw.hmap.key
#' @description The function draw.hmap.key() creates a color key, i.e. a graphical overview
#' of what numerical values the different colors found in the heatmap correspond to.
#' @export
draw.hmap.key = function() {
  colrange = COLOR_SCALE
  xmin = -max(abs(colrange$valuerange))
  xmax = max(abs(colrange$valuerange))
  plot(0, 0, type = "n", xlim = c(xmin, xmax), ylim = c(0,1), xlab = "",
       ylab = "", xaxt = "n", yaxt = "n", bty="n")
  axis(1, c(xmin,xmax), round(c(xmin,xmax) * 100)/100)
  axis(1, 0, 0)
  z1.plot = rbind(seq(-max(abs(colrange$valuerange)),
                      max(abs(colrange$valuerange)), length=100))
  z1 = rbind(seq(-1, 1, length=100))
  c0 = col2rgb(unlist(strsplit(colrange$colorscale,"-")))/255
  c1 = matrix(c0[,2],3,length(z1),byrow=F) +
    cbind(c0[,1]-c0[,2])%*%ifelse(z1<0,-z1,0) +
    cbind(c0[,3]-c0[,2])%*%ifelse(z1>0,z1,0)
  c2 = rgb(c1[1,],c1[2,],c1[3,])
  for (i in 1:(length(c2)-1)) {
    polygon(c(z1.plot[i], z1.plot[i], z1.plot[i+1], z1.plot[i+1]), c(0,1,1,0),
            col = c2[i], border = FALSE)
  }
  if (sum(c0)>2) {abline(v=0,col="black")} else {abline(v=0,col="white")}
  #frame()
}

#' draw.cbar.key
#' @description Adds key for color bar.
#' @param border Boolean. Default set to FALSE.
#' @param vsize float. Height of the key. Default set to 0.1
#' @param hsize float. Width of the key. Default set to 0.2
#' @param hsep float. Horizontal separator of the colorbar keys, default set to 0.05.
#' @param vlsep float. Vertical distance between lower part of the heatmap, and the bottom of the color bar.
#' Default is set to 0.1
#' @param vusep float. Vertical distance between upper part of the heatmap, and the bottom of the color bar.
#' Default is set to 0.1
#' @param vsize.title float. Space for adding color bar title. Default is 0.08
#' @param cex.title integer. Size of title font. Default is 1.
#' @param font.title integer. Font type for title, default is set to 2 (bold)
#' @param cex float. Text size for legend. Default is set to 0.8.
#' @param mfrow integer vector of length 2, default set to NA.
#' @export
draw.cbar.key = function(border=F, vsize=0.1, hsize=0.05, hsep=0.05, vlsep=0.1, vusep=0.1,
                         vsize.title=0.08, cex.title=1, font.title=2, cex=0.8, mfrow=NA) {
  if (.CLUSTERMAP$legend) {
    draw.cbar.key.samepage(border, hsize, hsep, vlsep,
                           vusep, vsize.title, cex.title, font.title, cex)
  } else {
    draw.cbar.key.separatepage(border, vsize, hsize,
                               hsep, mfrow, cex.title, font.title, cex)
  }
}

#' draw.cbar.key.sampage
#' @description If legend set to TRUE in draw.init(), plots color bar legend on the same page as heatmap.
#' @param border Boolean. Default set to FALSE.
#' @param vsize float. Height of the key. Default set to 0.1
#' @param hsize float. Width of the key. Default set to 0.2
#' @param hsep float. Horizontal separator of the colorbar keys, default set to 0.05.
#' @param vlsep float. Vertical distance between lower part of the heatmap, and the bottom of the color bar
#' @param vusep float. Vertical distance between upper part of the heatmap, and the bottom of the color bar.
#' @param vsize.title float. Space for adding color bar title. Default is 0.08
#' @param cex.title integer. Size of title font. Default is 1.
#' @param font.title integer. Font type for title, default is set to 2 (bold)
#' @param cex float. Text size for legend. Default is set to 0.8.
#' @export
draw.cbar.key.samepage = function(border, hsize, hsep, vlsep, vusep,
                                  vsize.title, cex.title, font.title, cex) {
  legend = .CLUSTERMAP$legend
  margin = .CLUSTERMAP$margin
  keys = .CLUSTERMAP$cbar.key
  type = .CLUSTERMAP$cbar.type
  nbar = length(keys)
  bty = ifelse(border, "o", "n")
  h.offset = 1 + margin$inner[4] + hsep
  v.range = 1 - vusep - vlsep
  v.boxrange = v.range/nbar
  maxlevels = 3
  for (i in 1:nbar) {
    if (type[i]=="disc") maxlevels = max(maxlevels,nrow(keys[[i]]))
  }
  dy = (v.boxrange - vsize.title)/maxlevels
  for (i in 1:nbar) {
    if (type[i]=="disc") {
      nlevels = nrow(keys[[i]])
      # Title
      text(h.offset, vlsep+i*v.boxrange-0.7*vsize.title,
           names(keys)[i], adj=c(0,0.5), cex=cex.title, font=font.title)
      # Key
      ytop = vlsep + i*v.boxrange - vsize.title
      for (j in 1:nlevels) {
        polygon(c(h.offset,h.offset,h.offset+hsize,h.offset+hsize),
                c(ytop, ytop-dy, ytop-dy, ytop),
                col=as.character(keys[[i]][j,2]), border="black")
        text(h.offset+1.2*hsize, ytop-0.5*dy,
             as.character(keys[[i]][j,1]), adj=c(0,0.5), cex=cex)
        ytop = ytop - dy
      }
    } else if (type[i]=="cont") {
      xval = as.character(keys[[i]][,1])
      keep = !duplicated(xval)
      xval.unique = xval[keep]
      zval.unique = keys[[i]][keep,2]
      col.unique = as.character(keys[[i]][keep,3])
      nlevels = length(xval.unique)
      # Title
      text(h.offset, vlsep+i*v.boxrange-0.7*vsize.title,
           names(keys)[i], adj=c(0,0.5), cex=cex.title, font=font.title)
      # Key
      ytop = vlsep + i*v.boxrange - vsize.title
      ybot = vlsep + (i-1)*v.boxrange
      delta = (ytop-ybot)/nlevels
      ytop.j = ytop
      for (j in 1:nlevels) {
        polygon(c(h.offset,h.offset,h.offset+hsize,h.offset+hsize),
                c(ytop.j, ytop.j-0.8*delta, ytop.j-0.8*delta, ytop.j),
                col=col.unique[j], border=FALSE)
        ytop.j = ytop.j - delta
      }
      xval.unique = as.numeric(xval.unique)
      text(h.offset+1.2*hsize, ytop-0.5*delta,
           round(xval.unique[1],2), adj=c(0,1), cex=cex)
      text(h.offset+1.2*hsize, ybot+0.5*delta,
           round(xval.unique[length(xval.unique)],2), adj=c(0,0), cex=cex)
    }
  }
}

#' draw.cbar.key.separatepage
#' @description If legend set to FALSE in draw.init(), plots color bar legend on a separate page than the heatmap.
#' @param border Boolean. Default set to FALSE.
#' @param vsize float. Height of the key. Default set to 0.1
#' @param hsize float. Width of the key. Default set to 0.2
#' @param hsep float. Horizontal separator of the colorbar keys, default set to 0.05.
#' @param cex.title integer. Size of title font. Default is 1.
#' @param font.title integer. Font type for title, default is set to 2 (bold)
#' @param cex float. Text size for legend. Default is set to 0.8.
#' @param mfrow notinuse
#' @export
draw.cbar.key.separatepage = function(border, vsize, hsize, hsep, mfrow,
                                      cex.title, font.title, cex) {
  keys = .CLUSTERMAP$cbar.key
  type = .CLUSTERMAP$cbar.type
  nbar = length(keys)
  frame()
  if (is.na(mfrow[1])) {
    mfrow = c(2,5)
  }
  par(mfrow=mfrow)
  par(mar=c(2,2,2,2))
  bty = ifelse(border, "o", "n")
  for (i in 1:nbar) {
    plot(0,0,xlim=c(0,1),ylim=c(0,1),type="n",xaxt="n",yaxt="n",bty=bty)
    if (type[i]=="disc") {
      nlevels = nrow(keys[[i]])
      text(0.1, 1, names(keys)[i], adj=c(0,1), cex=cex.title, font=font.title)
      for (j in 1:nlevels) {
        polygon(c(0.1,0.1,0.1+hsize,0.1+hsize),
                c(1-vsize-(j-1)*vsize, 1-vsize-j*vsize,
                  1-vsize-j*vsize, 1-vsize-(j-1)*vsize),
                col=as.character(keys[[i]][j,2]), border="black")
        text(0.1+2*hsize, 1-vsize-j*vsize+0.5*vsize,
             as.character(keys[[i]][j,1]), adj=c(0,0.5), cex=cex)
      }
    } else if (type[i]=="cont") {
      xval = as.character(keys[[i]][,1])
      keep = !duplicated(xval)
      xval.unique = xval[keep]
      zval.unique = keys[[i]][keep,2]
      col.unique = as.character(keys[[i]][keep,3])
      nlevels = length(xval.unique)
      text(0.1, 1, names(keys)[i], adj=c(0,1), cex=cex.title, font=font.title)
      for (j in 1:nlevels) {
        polygon(c(0.1,0.1,0.1+hsize,0.1+hsize),
                c(1-vsize-0.8*(j-1)/nlevels, 1-vsize-0.8*j/nlevels,
                  1-vsize-0.8*j/nlevels, 1-vsize-0.8*(j-1)/nlevels),
                col=col.unique[j], border=FALSE)
      }
      xval.unique = as.numeric(xval.unique)
      text(0.1+2*hsize, 1-vsize-0.5*0.8/nlevels,
           round(xval.unique[1],2), adj=c(0,1), cex=cex)
      text(0.1+2*hsize, 1-vsize-0.8+0.8*0.5/nlevels,
           round(xval.unique[length(xval.unique)],2), adj=c(0,0), cex=cex)
    }
  }
}

#' draw.text
#' @description Add labels to the sides of the heatmap.
#' @param txt String. Text to be added to the heatmap.
#' @param side Integer. Default is 4 (right side). Defines the side of the heatmap onto which the text will be added.
#' @param sep.outer Integer. Default is 0.
#' @param cex Integer. Set text size.
#' @param maxchar Integer. Default is NA. Truncates the text labels to a maximum number of characters.
#' @export
draw.text = function(txt, side=4, sep.outer=0, cex=0.5, maxchar=NA) {
  if (side==1 || side==3) {
    if (length(.CLUSTERMAP$colclust)>0) {
      txt = txt[.CLUSTERMAP$colclust$order]
    }
  } else if (side==2 || side==4) {
    if (length(.CLUSTERMAP$rowclust)>0) {
      txt = txt[.CLUSTERMAP$rowclust$order]
    }
  }
  margin = .CLUSTERMAP$margin
  N = length(txt)
  if (!is.na(maxchar)) {
    for (i in 1:N) txt[i] = substr(txt[i],1,maxchar)
  }
  if (side==1) {
    for (k in 1:N) {
      text((2*k-1)/(2*N), -margin$inner[1]-sep.outer, txt[k],
           adj=c(0,0.5), srt=-90, cex=cex)
    }
  } else if (side==2) {
    for (k in 1:N) {
      text(-margin$inner[2]-sep.outer, (2*k-1)/(2*N), txt[k],
           adj=c(1,0.5), cex=cex)
    }
  } else if (side==3) {
    for (k in 1:N) {
      text((2*k-1)/(2*N), 1+margin$inner[1]+sep.outer, txt[k],
           adj=c(0,0.5), srt=90, cex=cex)
    }
  } else if (side==4) {
    for (k in 1:N) {
      text(1+margin$inner[4]+sep.outer, (2*k-1)/(2*N), txt[k],
           adj=c(0,0.5), cex=cex)
    }
  }
}

#' draw.cbar
#' @description Add a color bar with additional information to the sides of the heatmap.
#' @param ... A list of set.color or color.cont objects
#' @param side Integer. Default is 3. Add the color bar to a specific side of the heatmap.
#' @param labels Boolean. Default is TRUE. Add labels to the colorbar.
#' @param pvalue Boolean. Default is FALSE. If TRUE, adds a p-value calculated using the method specificied in the
#' pvalue.method parameter.
#' @param pvalue.method String. Default is "chisq". Tests association of information added to identifed clusters using this method.
#' @param cex Integer. Text size. Default set to 0.8
#' @param sep.outer Integer. Default is 0.2.
#' @param sep.inner Integer. Default is 0.2.
#' @param border String. Color border. Default is NA. Can be set to desired color.
#' @param lwd Integer. Default is 0.5. Thickness of border.
#' @export
draw.cbar = function (..., side=3, labels=T, pvalue=F, pvalue.method="chisq",
                      cex=0.8, sep.outer=0.02, sep.inner=0.2, border=NA, lwd=0.5) {
  datatype = sapply(list(...), function(x) x$type)
  groups.x = sapply(list(...), function(x) x$x)
  groups = sapply(list(...), function(x) x$color)
  keys = lapply(list(...), function(x) x$key)
  labs = sapply(list(...), function(x) x$label)
  margin = .CLUSTERMAP$margin

  # For the jth color bar, we have a label (labels[j]), a set of values
  # (groups.x[,j]) and a set of colors (groups[,j]). The link between
  # values and colors is given by keys[[j]].

  tmp = .CLUSTERMAP
  tmp$cbar.key = keys
  tmp$cbar.type = datatype
  if (labels) {
    names(tmp$cbar.key) = labs
  } else {
    names(tmp$cbar.key) = rep("", length(keys))
  }
  assign(".CLUSTERMAP", tmp, envir = .GlobalEnv)

  if (side==1) {
    if (!is.null(.CLUSTERMAP$colclust)) {
      groups = groups[.CLUSTERMAP$colclust$order,,drop=F]
      groups.x = groups.x[.CLUSTERMAP$colclust$order,,drop=F]
    }
    for (j in 1:ncol(groups)) { # Number of bars
      delta = margin$inner[1]
      for (i in 1:nrow(groups)) {
        coli = groups[i,j]
        a0 = c(i-1, i-1, i, i)/nrow(groups)
        b0 = c(-sep.outer-delta*(j-1)/ncol(groups),
               -sep.outer-delta*(j-sep.inner)/ncol(groups),
               -sep.outer-delta*(j-sep.inner)/ncol(groups),
               -sep.outer-delta*(j-1)/ncol(groups))
        polygon(a0, b0, col = coli, border = border, lwd=lwd)
      }
      pval = NA
      if (pvalue & length(.CLUSTERMAP$colgroup)>0 & datatype[j]=="disc") {
        yj = .CLUSTERMAP$colgroup[.CLUSTERMAP$colclust$order]
        xj = groups.x[,j]
        if (length(unique(yj)) < 2 || length(unique(xj)) < 2) {
          pval = 1
        } else if (pvalue.method == "chisq") {
          pval = chisq.test(xj,yj)$p.value
        } else if (pvalue.method == "chisq.sim") {
          pval = chisq.test(xj,yj,simulate.p.value=T,B=5000)$p.value
        } else if (pvalue.method == "fisher") {
          pval = fisher.test(xj,yj,simulate.p.value=T,B=5000)$p.value
        }
      } else if (pvalue & length(.CLUSTERMAP$colgroup)>0 & datatype[j]=="cont") {
        yj = .CLUSTERMAP$colgroup[.CLUSTERMAP$colclust$order]
        xj = groups.x[,j]
        if (length(unique(yj)) < 2 || length(unique(xj)) < 2) {
          pval = 1
        } else {
          datj = data.frame(xj=xj, yj=yj)
          pval = summary(aov(xj ~ yj, data=datj))[[1]][1,5]
        }
      }
      info.txt = ""
      if (labels & !is.na(pval)) {
        info.txt = paste(labs[j], " (", paste("P =",
                                              format(pval, digits=4, nsmall=4)), ")", sep="")
      } else if (labels & is.na(pval)) {
        info.txt = labs[j]
      } else if (!labels & !is.na(pval)) {
        info.txt = paste("P =", format(pval, digits=4, nsmall=4))
      }
      text(1.03, -sep.outer-delta*(j-0.5)/ncol(groups), info.txt,
           adj=c(0,0.5), cex=cex)
    }
  } else if (side==2) {
    if (!is.null(.CLUSTERMAP$rowclust)) {
      groups = groups[.CLUSTERMAP$rowclust$order,,drop=F]
    }
    for (j in 1:ncol(groups)) { # Number of bars
      delta = margin$inner[2]
      for (i in 1:nrow(groups)) {
        coli = groups[i,j]
        a0 = c(-sep.outer-delta*(j-1)/ncol(groups),
               -sep.outer-delta*(j-sep.inner)/ncol(groups),
               -sep.outer-delta*(j-sep.inner)/ncol(groups),
               -sep.outer-delta*(j-1)/ncol(groups))
        b0 = c(i-1, i-1, i, i)/nrow(groups)
        polygon(a0, b0, col = coli, border = border, lwd=lwd)
      }
    }
  } else if (side==3) {
    if (!is.null(.CLUSTERMAP$colclust)) {
      groups = groups[.CLUSTERMAP$colclust$order,,drop=F]
      groups.x = groups.x[.CLUSTERMAP$colclust$order,,drop=F]
    }
    for (j in 1:ncol(groups)) { # Number of bars
      delta = margin$inner[3]
      for (i in 1:nrow(groups)) {
        coli = groups[i,j]
        a0 = c(i-1, i-1, i, i)/nrow(groups)
        b0 = c(1+sep.outer+delta*(j-1)/ncol(groups),
               1+sep.outer+delta*(j-sep.inner)/ncol(groups),
               1+sep.outer+delta*(j-sep.inner)/ncol(groups),
               1+sep.outer+delta*(j-1)/ncol(groups))
        polygon(a0, b0, col = coli, border = border, lwd=lwd)
      }
      pval = NA
      if (pvalue & length(.CLUSTERMAP$colgroup)>0 & datatype[j]=="disc") {
        yj = .CLUSTERMAP$colgroup[.CLUSTERMAP$colclust$order]
        xj = groups.x[,j]
        if (length(unique(yj)) < 2 || length(unique(xj)) < 2) {
          pval = 1
        } else if (pvalue.method == "chisq") {
          pval = chisq.test(xj,yj)$p.value
        } else if (pvalue.method == "chisq.sim") {
          pval = chisq.test(xj,yj,simulate.p.value=T,B=5000)$p.value
        } else if (pvalue.method == "fisher") {
          pval = fisher.test(xj,yj,simulate.p.value=T,B=5000)$p.value
        }
      } else if (pvalue & length(.CLUSTERMAP$colgroup)>0 & datatype[j]=="cont") {
        yj = .CLUSTERMAP$colgroup[.CLUSTERMAP$colclust$order]
        xj = groups.x[,j]
        if (length(unique(yj)) < 2 || length(unique(xj)) < 2) {
          pval = 1
        } else {
          datj = data.frame(xj=xj, yj=yj)
          pval = summary(aov(xj ~ yj, data=datj))[[1]][1,5]
        }
      }
      info.txt = ""
      if (labels & !is.na(pval)) {
        info.txt = paste(labs[j], " (", paste("P =",
                                              format(pval, digits=4, nsmall=4)), ")", sep="")
      } else if (labels & is.na(pval)) {
        info.txt = labs[j]
      } else if (!labels & !is.na(pval)) {
        info.txt = paste("P =", format(pval, digits=4, nsmall=4))
      }
      text(1.03, 1+sep.outer+delta*(j-0.5)/ncol(groups), info.txt,
           adj=c(0,0.5), cex=cex)
    }
  } else if (side==4) {
    if (!is.null(.CLUSTERMAP$rowclust)) {
      groups = groups[.CLUSTERMAP$rowclust$order,,drop=F]
    }
    for (j in 1:ncol(groups)) { # Number of bars
      delta = margin$inner[4]
      for (i in 1:nrow(groups)) {
        coli = groups[i,j]
        a0 = c(1+sep.outer+delta*(j-1)/ncol(groups),
               1+sep.outer+delta*(j-sep.inner)/ncol(groups),
               1+sep.outer+delta*(j-sep.inner)/ncol(groups),
               1+sep.outer+delta*(j-1)/ncol(groups))
        b0 = c(i-1, i-1, i, i)/nrow(groups)
        polygon(a0, b0, col = coli, border = border, lwd=lwd)
      }
    }
  }
}

#' compute.hmap
#' @description Helper function to be used by draw.hmap
#' @param X matrix
#' @param colorscale String. Color string separated by "-".
#' @param xmax Integer Default is set to maximum value in input matrix X
#' @param col.na String. Specifies color of missing data. Default set to "grey".
#' @export
compute.hmap = function (X, colorscale, xmax, col.na) {
  if (length(.CLUSTERMAP$colclust)>0) {
    X = X[,.CLUSTERMAP$colclust$order]
  }
  if (length(.CLUSTERMAP$rowclust)>0) {
    X = X[.CLUSTERMAP$rowclust$order,]
  }
  # margin = .CLUSTERMAP$margin
  Nrow = nrow(X)
  Ncol = ncol(X)
  x0 = seq(0, 1, length = Ncol)
  y0 = seq(0, 1, length = Nrow)
  x1 = rep(x0, Nrow)
  y1 = rep(y0, rep(Ncol, Nrow))
  if (is.na(xmax)) {
    xmax = max(abs(X), na.rm=T)
  }
  z1 = rbind(as.numeric(t(X))/xmax)
  z1[z1 > 1] = 1
  z1[z1 < -1] = -1
  c0 = col2rgb(unlist(strsplit(colorscale,"-")))/255
  c1 = matrix(c0[,2],3,length(z1),byrow=F) +
    cbind(c0[,1]-c0[,2]) %*% ifelse(is.na(z1),NA,ifelse(z1<0,-z1,0)) +
    cbind(c0[,3]-c0[,2]) %*% ifelse(is.na(z1),NA,ifelse(z1>0,z1,0))
  c2 = rep(col.na, length(z1))
  ok = !is.na(c1[1,]) & !is.na(c1[2,]) & !is.na(c1[3,])
  c2[ok] = rgb(c1[1,ok], c1[2,ok], c1[3,ok])
  pict = matrix(c2, Nrow, Ncol, byrow=T)
  valuerange = c(max(min(X,na.rm=T),-xmax), min(max(X,na.rm=T),xmax))
  cscale = list(colorscale = colorscale, valuerange = valuerange)
  assign("COLOR_SCALE", cscale, envir=.GlobalEnv)
  return(pict)
}

#' draw.hmap
#' @description plots heatmaps of a numerical data matrix X.
#' @param X matrix
#' @param colorscale String. Three (valid R) colors separated by hyphens used to represent the values as colors
#' in the heatmap. Default is "blue-white-red".
#' @param xmax Integer Default is set to maximum value of input matrix X.
#' @param col.na String. Default is set to "grey". Specifies color of missing data.
#' @param as.image Boolean. Default is set to T
#' @param interpolate Boolean. Default is set to F.
#' @export
draw.hmap = function(X, colorscale="blue-white-red", xmax=NA, col.na="grey", as.image=T, interpolate=F) {
  m = .CLUSTERMAP$margin
  plot(0, 0, type="n", xaxt="n", yaxt="n", xlab="", ylab="",
       xlim=c(-m$inner[2]-m$outer[2],1+m$inner[4]+m$outer[4]),
       ylim=c(-m$inner[1]-m$outer[1],1+m$inner[3]+m$outer[3]))
  if (class(X) == "raster") {
    if (length(.CLUSTERMAP$colclust)>0) {
      X = X[,.CLUSTERMAP$colclust$order]
    }
    if (length(.CLUSTERMAP$rowclust)>0) {
      X = X[.CLUSTERMAP$rowclust$order,]
    }
  } else {
    X = compute.hmap(X, colorscale, xmax, col.na)
  }
  if (as.image) {
    ind = seq(nrow(X),1,-1)
    rasterImage(as.raster(X[ind,]), 0, 0, 1, 1, interpolate=interpolate)
  } else {
    Ncol = ncol(X); Nrow = nrow(X)
    for (i in 1:Nrow) {
      for (j in 1:Ncol) {
        polygon(c(j-1,j-1,j,j)/Ncol, c(i-1,i,i,i-1)/Nrow, col=X[i,j], border=F)
      }
    }
  }
}

#' draw.tree
#' @description Adds a dendogram to heatmap.
#' @param groups Identified groups
#' @param side Integer. Side onto which the tree is plotted on the heatmap. Default is 3 (top)
#' @param sep Float.
#' @param lwd Float. Line width of tree branches.
#' @param trim Boolean. Default set to TRUE.
#' @param colors String. Default set to NA. Colors the clusters.
#' @export
draw.tree = function (groups, side=3, sep=0.03, lwd=0.5, trim=T, colors=NA) {
  if (side==1 || side==3) {
    clust = .CLUSTERMAP$colclust
    if (missing(groups)) {
      if (length(.CLUSTERMAP$colgroup)>0) {
        groups = .CLUSTERMAP$colgroup[.CLUSTERMAP$colclust$order]
      } else {
        groups = rep(0, length(clust$order))
      }
    }
  } else if (side==2 || side==4) {
    clust = .CLUSTERMAP$rowclust
    if (missing(groups)) {
      if (length(.CLUSTERMAP$rowgroup)>0) {
        groups = .CLUSTERMAP$rowgroup[.CLUSTERMAP$rowclust$order]
      } else {
        groups = rep(0, length(clust$order))
      }
    }
  }
  margin = .CLUSTERMAP$margin
  d = margin$inner[side]; d2 = margin$outer[side]; g = as.numeric(groups)
  if (side == 1) {
    invisible(draw.fork.below(length(clust$height), clust, trim, g, d, d2, sep, lwd))
  } else if (side == 2) {
    invisible(draw.fork.left(length(clust$height), clust, trim, g, d, d2, sep, lwd))
  } else if (side == 3) {
    invisible(draw.fork.above(length(clust$height), clust, trim, g, d, d2, sep, lwd))
  } else if (side == 4) {
    invisible(draw.fork.right(length(clust$height), clust, trim, g, d, d2, sep, lwd))
  }
}

#' draw.for.left
#' @description Plots on the left side of the heatmap. Function called by draw.tree()
#' @param i argument
#' @param clust argument
#' @param trim argument
#' @param groups argument
#' @param delta argument
#' @param delta2 argument
#' @param sep argument
#' @param lwd Float. Line width.
#' @export
draw.fork.left = function (i, clust, trim, groups, delta, delta2, sep, lwd) {
  N = length(clust$height)
  mrg = clust$merge
  if (trim) {
    hgt = (clust$height[i]-min(clust$height))/(max(clust$height)-min(clust$height))
  } else {
    hgt = clust$height[i]/max(clust$height)
  }
  a = rep(0, 2)
  group = rep(0, 2)
  b1 = -delta - sep - delta2 * hgt
  for (j in 1:2) {
    if (mrg[i, j] < 0) {
      k = which(clust$order == -mrg[i, j])
      a[j] = (2*k-1)/(2*(N+1))
      b0 = -delta - sep
      group[j] = groups[k]
    }
    else {
      tmp = draw.fork.left(mrg[i, j], clust, trim, groups, delta, delta2, sep, lwd)
      a[j] = tmp[1]
      b0 = -delta - sep - delta2 * tmp[2]
      group[j] = tmp[3]
    }
    if (group[j] != -1) {
      lines(c(b0, b1), c(a[j], a[j]), col = group[j] + 1, lend=1, lwd=lwd)
    }
    else {
      lines(c(b0, b1), c(a[j], a[j]), lend=1, lwd=lwd)
    }
  }
  if (diff(group) == 0 & group[1] != -1) {
    lines(c(b1, b1), c(a[1], a[2]), col = group[1] + 1, lend=2, lwd=lwd)
    invisible(return(c(mean(a), hgt, group[1])))
  }
  else {
    lines(c(b1, b1), c(a[1], a[2]), lend=2, lwd=lwd)
    invisible(return(c(mean(a), hgt, -1)))
  }
}

#' draw.for.right
#' @description Plots on the right side of the heatmap. Function called by draw.tree()
#' @param i argument
#' @param clust argument
#' @param trim argument
#' @param groups argument
#' @param delta argument
#' @param delta2 argument
#' @param sep argument
#' @param lwd Float. Line width.
#' @export
draw.fork.right = function (i, clust, trim, groups, delta, delta2, sep, lwd) {
  N = length(clust$height)
  mrg = clust$merge
  if (trim) {
    hgt = (clust$height[i]-min(clust$height))/(max(clust$height)-min(clust$height))
  } else {
    hgt = clust$height[i]/max(clust$height)
  }
  a = rep(0, 2)
  group = rep(0, 2)
  b1 = 1+delta+sep+delta2*hgt
  for (j in 1:2) {
    if (mrg[i, j] < 0) {
      k = which(clust$order == -mrg[i, j])
      a[j] = (2*k-1)/(2*(N+1))
      b0 = 1+delta+sep
      group[j] = groups[k]
    }
    else {
      tmp = draw.fork.right(mrg[i, j], clust, trim, groups, delta, delta2, sep, lwd)
      a[j] = tmp[1]
      b0 = 1+delta+sep+delta2*tmp[2]
      group[j] = tmp[3]
    }
    if (group[j] != -1) {
      lines(c(b0, b1), c(a[j], a[j]), col = group[j] + 1, lend=1, lwd=lwd)
    }
    else {
      lines(c(b0, b1), c(a[j], a[j]), lend=1, lwd=lwd)
    }
  }
  if (diff(group) == 0 & group[1] != -1) {
    lines(c(b1, b1), c(a[1], a[2]), col = group[1] + 1, lend=2, lwd=lwd)
    invisible(return(c(mean(a), hgt, group[1])))
  }
  else {
    lines(c(b1, b1), c(a[1], a[2]), lend=2, lwd=lwd)
    invisible(return(c(mean(a), hgt, -1)))
  }
}

#' draw.for.above
#' @description Plots above the heatmap. Function called by draw.tree()
#' @param i argument
#' @param clust argument
#' @param trim argument
#' @param groups argument
#' @param delta argument
#' @param delta2 argument
#' @param sep argument
#' @param lwd Float. Line width.
#' @export
draw.fork.above = function (i, clust, trim, groups, delta, delta2, sep, lwd) {
  N = length(clust$height)
  mrg = clust$merge
  if (trim) {
    hgt = (clust$height[i]-min(clust$height))/(max(clust$height)-min(clust$height))
  } else {
    hgt = clust$height[i]/max(clust$height)
  }
  a = rep(0, 2)
  group = rep(0, 2)
  b1 = 1 + delta + sep + delta2 * hgt
  for (j in 1:2) {
    if (mrg[i,j] < 0) { # Leaf node
      k = which(clust$order == -mrg[i,j])
      a[j] = (2*k-1)/(2*(N+1))
      b0 = 1 + delta + sep
      group[j] = groups[k]
    } else { # Inner node
      tmp = draw.fork.above(mrg[i,j], clust, trim, groups, delta, delta2, sep, lwd)
      a[j] = tmp[1]
      b0 = 1 + delta + sep + delta2 * tmp[2]
      group[j] = tmp[3]
    }
    if (group[j] != -1) {
      lines(c(a[j], a[j]), c(b0, b1), col = group[j] + 1, lend=1, lwd=lwd)
    } else {
      lines(c(a[j], a[j]), c(b0, b1), lend=1, lwd=lwd)
    }
  }
  if (diff(group) == 0 & group[1] != -1) {
    lines(c(a[1], a[2]), c(b1, b1), col = group[1] + 1, lend=2, lwd=lwd)
    invisible(return(c(mean(a), hgt, group[1])))
  } else {
    lines(c(a[1], a[2]), c(b1, b1), lend=2, lwd=lwd)
    invisible(return(c(mean(a), hgt, -1)))
  }
}

#' draw.for.below
#' @description Plots below the heatmap. Function called by draw.tree()
#' @param i argument
#' @param clust argument
#' @param trim argument
#' @param groups argument
#' @param delta argument
#' @param delta2 argument
#' @param sep argument
#' @param lwd Float. Line width.
#' @export
draw.fork.below = function (i, clust, trim, groups, delta, delta2, sep, lwd) {
  N = length(clust$height)
  mrg = clust$merge
  if (trim) {
    hgt = (clust$height[i]-min(clust$height))/(max(clust$height)-min(clust$height))
  } else {
    hgt = clust$height[i]/max(clust$height)
  }
  a = rep(0, 2)
  group = rep(0, 2)
  b1 = -delta-sep-delta2*hgt
  for (j in 1:2) {
    if (mrg[i, j] < 0) {
      k = which(clust$order == -mrg[i, j])
      a[j] = (2*k-1)/(2*(N+1))
      b0 = -delta-sep
      group[j] = groups[k]
    }
    else {
      tmp = draw.fork.below(mrg[i, j], clust, trim, groups, delta, delta2, sep, lwd)
      a[j] = tmp[1]
      b0 = -delta-sep-delta2*tmp[2]
      group[j] = tmp[3]
    }
    if (group[j] != -1) {
      lines(c(a[j], a[j]), c(b0, b1), col = group[j] + 1, lend=1, lwd=lwd)
    }
    else {
      lines(c(a[j], a[j]), c(b0, b1), lend=1, lwd=lwd)
    }
  }
  if (diff(group) == 0 & group[1] != -1) {
    lines(c(a[1], a[2]), c(b1, b1), col = group[1] + 1, lend=2, lwd=lwd)
    invisible(return(c(mean(a), hgt, group[1])))
  }
  else {
    lines(c(a[1], a[2]), c(b1, b1), lend=2, lwd=lwd)
    invisible(return(c(mean(a), hgt, -1)))
  }
}

#' gap.optimal()
#' @description gap.optimal function
#' @param X matrix
#' @param clust String. Direction of clustering.
#' @param distance Default is set to "eucledian". Distance measure used for hierarchical clustering
#' @param linkage Default is "complete". Linkage for hierarchical clustering.
#' @param B integer. Default is 100.
#' @export
gap.optimal = function(X, clust="col", distance="euclidean", linkage="complete", B=100) {
  if (clust=="col") {X = t(X)}
  if (distance=="euclidean") {
    res = gap.euclidean(X, linkage, B)$group
  } else if (distance=="binary") {
    res = gap.binary(X, linkage, B)$group
  } else if (distance=="pearson") {
    res = gap.pearson(X, linkage, B)$group
  } else if (distance=="spearman") {
    res = gap.spearman(X, linkage, B)$group
  }
  return(res)
}

#' gap.curve()
#' @description gap.curve() function
#' @param X matrix
#' @param clust String.
#' @param distance Default is set to "eucledian". Distance measure used for hierarchical clustering
#' @param linkage Default is "complete". Linkage for hierarchical clustering.
#' @param B integer. Default is 100.
#' @param K integer. Default is 6.
#' @export
gap.curve = function(X, clust="col", distance="euclidean", linkage="complete", B=100, K=6) {
  if (clust=="col") {X = t(X)}
  if (distance=="euclidean") {
    res = gap.euclidean(X, linkage, B, K)
  } else if (distance=="binary") {
    res = gap.binary(X, linkage, B, K)
  } else if (distance=="pearson") {
    res = gap.pearson(X, linkage, B, K)
  } else if (distance=="spearman") {
    res = gap.spearman(X, linkage, B, K)
  }
  return(res)
}

#' part.new
#' @description Identifies number of clusters using method "part".
#' @param X matrix
#' @param clust String. Direction of clustering.
#' @param distance Default is set to "eucledian". Distance measure used for hierarchical clustering
#' @param linkage Default is "complete". Linkage for hierarchical clustering.
#' @param B integer. Default is 100.
#' @param minSize Integer Minimum size of cluster. Default set to 10.
#' @param maxLevel Integer. Maximum depth/level for clustering, default set to 3.
#' @export
part.new = function(X, clust="col", distance="euclidean", linkage="complete", B=100, minSize=10, maxLevel=3) {
  if (clust=="col") {X = t(X)}
  if (distance=="euclidean") {
    res = part.euclidean(X, linkage, B, minSize, maxLevel)
  } else if (distance=="binary") {
    res = part.binary(X, linkage, B, minSize, maxLevel)
  } else if (distance=="pearson") {
    res = part.pearson(X, linkage, B, minSize, maxLevel)
  } else if (distance=="spearman") {
    res = part.spearman(X, linkage, B, minSize, maxLevel)
  }
  return(res)
}

#' part.euclidean
#' @description Identifies number of clusters using method "part" with Euclidean distance.
#' @param X matrix
#' @param linkage Default is "complete". Linkage for hierarchical clustering.
#' @param B integer.
#' @param minSize Integer Minimum size of cluster.
#' @param maxLevel Integer. Maximum depth/level for clustering.
#' @param K integer. Default set to 6.
#' @export
part.euclidean = function(X, linkage, B, minSize, maxLevel, K=6) {
  if (nrow(X) < 2*minSize) {
    return(rep(1,nrow(X)))
  }
  if (maxLevel == 0) {
    return(rep(1,nrow(X)))
  }
  K = min(nrow(X),K)
  k.opt = gap.euclidean(X, linkage, B, K)$k
  Dobs = dist(X, method="euclidean")
  Popt = cutree(hclust(Dobs, method=linkage), k=k.opt) # N x 1 matrix
  if (sum(table(Popt)>=minSize) < 2) {
    Popt = rep(1,nrow(X))
    k.opt = 1
  }
  group = rep(NA,nrow(X))
  if (k.opt > 1) {
    group.max = 0
    for (i in 1:k.opt) {
      group[Popt==i] = group.max + part.euclidean(X[Popt==i,,drop=F], linkage, B, minSize, maxLevel-1, K)
      group.max = max(group.max, group, na.rm=T)
    }
  } else {
    P2 = cutree(hclust(Dobs, method=linkage), k=2)
    P2.1 = part.euclidean(X[P2==1,,drop=F], linkage, B, minSize, maxLevel-1, min(sum(P2==1), K))
    P2.2 = part.euclidean(X[P2==2,,drop=F], linkage, B, minSize, maxLevel-1, min(sum(P2==2), K))
    if (all(c(P2.1,P2.2) == 1)) {
      group[1:nrow(X)] = 1
    } else {
      group[P2==1] = P2.1
      group[P2==2] = max(P2.1,na.rm=T) + P2.2
    }
  }
  return(group)
}

#' part.binary
#' @description Identifies number of clusters using method "part".
#' @param X matrix
#' @param linkage Default is "complete". Linkage for hierarchical clustering.
#' @param B integer. Default is 100.
#' @param minSize Integer Minimum size of cluster. Default set to 10.
#' @param maxLevel Integer. Maximum depth/level for clustering, default set to 3.
#' @param K Integer. Default is set to 6.
#' @export
part.binary = function(X, linkage, B, minSize, maxLevel, K=6) {
  if (nrow(X) < 2*minSize) {
    return(rep(1,nrow(X)))
  }
  if (maxLevel == 0) {
    return(rep(1,nrow(X)))
  }
  K = min(nrow(X),K)
  k.opt = gap.binary(X, linkage, B, K)$k
  Dobs = dist(X, method="binary")
  Popt = cutree(hclust(Dobs, method=linkage), k=k.opt) # N x 1 matrix
  if (sum(table(Popt)>=minSize) < 2) {
    Popt = rep(1,nrow(X))
    k.opt = 1
  }
  group = rep(NA,nrow(X))
  if (k.opt > 1) {
    group.max = 0
    for (i in 1:k.opt) {
      group[Popt==i] = group.max + part.binary(X[Popt==i,,drop=F], linkage, B, minSize, maxLevel-1, K)
      group.max = max(group.max, group, na.rm=T)
    }
  } else {
    P2 = cutree(hclust(Dobs, method=linkage), k=2)
    P2.1 = part.binary(X[P2==1,,drop=F], linkage, B, minSize, maxLevel-1, min(sum(P2==1), K))
    P2.2 = part.binary(X[P2==2,,drop=F], linkage, B, minSize, maxLevel-1, min(sum(P2==2), K))
    if (all(c(P2.1,P2.2) == 1)) {
      group[1:nrow(X)] = 1
    } else {
      group[P2==1] = P2.1
      group[P2==2] = max(P2.1,na.rm=T) + P2.2
    }
  }
  return(group)
}

#' part.pearson
#' @description Identifies number of clusters using method "part" with pearson correlation as distance measure.
#' @param X matrix
#' @param linkage Linkage for clustering.
#' @param B integer.
#' @param minSize Integer Minimum size of cluster.
#' @param maxLevel Integer. Maximum depth/level for clustering.
#' @param K integer. Default set to 6.
#' @export
part.pearson = function(X, linkage, B, minSize, maxLevel, K=6) {
  if (nrow(X) < 2*minSize) {
    return(rep(1,nrow(X)))
  }
  if (maxLevel == 0) {
    return(rep(1,nrow(X)))
  }
  K = min(nrow(X),K)
  k.opt = gap.pearson(X, linkage, B, K)$k
  Dobs = as.dist((1-cor(t(X), method="pearson"))/2)
  Popt = cutree(hclust(Dobs, method=linkage), k=k.opt) # N x 1 matrix
  if (sum(table(Popt)>=minSize) < 2) {
    Popt = rep(1,nrow(X))
    k.opt = 1
  }
  group = rep(NA,nrow(X))
  if (k.opt > 1) {
    group.max = 0
    for (i in 1:k.opt) {
      group[Popt==i] = group.max + part.pearson(X[Popt==i,,drop=F], linkage, B, minSize, maxLevel-1, K)
      group.max = max(group.max, group, na.rm=T)
    }
  } else {
    P2 = cutree(hclust(Dobs, method=linkage), k=2)
    P2.1 = part.pearson(X[P2==1,,drop=F], linkage, B, minSize, maxLevel-1, min(sum(P2==1), K))
    P2.2 = part.pearson(X[P2==2,,drop=F], linkage, B, minSize, maxLevel-1, min(sum(P2==1), K))
    if (all(c(P2.1,P2.2) == 1)) {
      group[1:nrow(X)] = 1
    } else {
      group[P2==1] = P2.1
      group[P2==2] = max(P2.1,na.rm=T) + P2.2
    }
  }
  return(group)
}

#' part.spearman
#' @description Identifies number of clusters using method "part" with spearman correlation as distance measure.
#' @param X matrix
#' @param linkage Linkage for clustering.
#' @param B integer.
#' @param minSize Integer Minimum size of cluster.
#' @param maxLevel Integer. Maximum depth/level for clustering.
#' @param K integer. Default set to 6.
#' @export
part.spearman = function(X, linkage, B, minSize, maxLevel, K=6) {
  if (nrow(X) < 2*minSize) {
    return(rep(1,nrow(X)))
  }
  if (maxLevel == 0) {
    return(rep(1,nrow(X)))
  }
  K = min(nrow(X),K)
  k.opt = gap.spearman(X, linkage, B, K)$k
  Dobs = as.dist((1-cor(t(X), method="spearman"))/2)
  Popt = cutree(hclust(Dobs, method=linkage), k=k.opt) # N x 1 matrix
  if (sum(table(Popt)>=minSize) < 2) {
    Popt = rep(1,nrow(X))
    k.opt = 1
  }
  group = rep(NA,nrow(X))
  if (k.opt > 1) {
    group.max = 0
    for (i in 1:k.opt) {
      group[Popt==i] = group.max + part.spearman(X[Popt==i,,drop=F], linkage, B, minSize, maxLevel-1, K)
      group.max = max(group.max, group, na.rm=T)
    }
  } else {
    P2 = cutree(hclust(Dobs, method=linkage), k=2)
    P2.1 = part.spearman(X[P2==1,,drop=F], linkage, B, minSize, maxLevel-1, min(sum(P2==1), K))
    P2.2 = part.spearman(X[P2==2,,drop=F], linkage, B, minSize, maxLevel-1, min(sum(P2==1), K))
    if (all(c(P2.1,P2.2) == 1)) {
      group[1:nrow(X)] = 1
    } else {
      group[P2==1] = P2.1
      group[P2==2] = max(P2.1,na.rm=T) + P2.2
    }
  }
  return(group)
}

#' gap.euclidean
#' @description Identifies number of clusters using method "gap" with Eucledian distance as distance measure.
#' @param X matrix
#' @param linkage Linkage for clustering.
#' @param B integer.
#' @param K integer. Default set to 6.
#' @export
gap.euclidean = function(X, linkage, B, K=6) {
  N = nrow(X)
  # Xobs = scale(X, scale=F)  ## REMOVED

  # Find W1,...,WK
  Wobs = rep(0, K)
  Dobs = dist(X, method="euclidean")
  Pobs = cutree(hclust(Dobs, method=linkage), 1:K) # N x K matrix
  for (k in 1:K) {
    Dk = as.matrix(Dobs)
    for (r in 1:k) {
      keep = (Pobs[,k]==r)
      Wobs[k] = Wobs[k] + sum(Dk[keep,keep])/(2*sum(keep))
    }
  }

  # Find W1*,...,WK* for B simulated data sets
  Xmean = outer(rep(1,nrow(X)), apply(X, 2, mean))
  Xobs = X - Xmean
  V = svd(Xobs)$v       # eigen(t(Xobs)%*%Xobs)$vectors
  R = apply(Xobs %*% V, 2, range)
  Wsim = matrix(0, B, K)
  for (b in 1:B) {
    print(paste("Permutation", b))
    # Simulate data and find partitions into 1,2,...,K clusters
    Xsim = Xmean + apply(R,2,function(x)runif(N,x[1],x[2])) %*% t(V)
    Dsim = dist(Xsim, method="euclidean") # Uses the most time
    Psim = cutree(hclust(Dsim, method=linkage), 1:K)
    wsim = rep(0,K)
    for (k in 1:K) {
      Dk = as.matrix(Dsim)
      for (r in 1:k) {
        keep = (Psim[,k]==r)
        wsim[k] = wsim[k] + sum(Dk[keep,keep])/(2*sum(keep))
      }
    }
    Wsim[b,] = wsim
  }

  # Calculate gap statistic
  res = apply(log(Wsim),2,mean) - log(Wobs)

  # Find optimal k
  sk = sqrt(1+1/B) * apply(log(Wsim),2,sd)
  k.opt = which(diff(res)-sk[-1] <= 0)[1]
  if (is.na(k.opt) || length(k.opt)==0) k.opt=1

  # Return result
  list(k=k.opt, gap=res, sk=sk, group=Pobs[,k.opt])
}

#' gap.binary
#' @description Identifies number of clusters using method "gap".
#' @param X matrix
#' @param linkage Linkage for clustering.
#' @param B integer.
#' @param K integer. Default set to 6.
#' @export
gap.binary = function(X, linkage, B, K=6) {
  N = nrow(X)
  # Xobs = scale(X, scale=F)  ## REMOVED

  # Find W1,...,WK
  Wobs = rep(0, K)
  Dobs = dist(X, method="binary")
  Pobs = cutree(hclust(Dobs, method=linkage), 1:K) # N x K matrix
  for (k in 1:K) {
    Dk = as.matrix(Dobs)
    for (r in 1:k) {
      keep = (Pobs[,k]==r)
      Wobs[k] = Wobs[k] + sum(Dk[keep,keep])/(2*sum(keep))
    }
  }

  # Find W1*,...,WK* for B simulated data sets
  Xmean = outer(rep(1,nrow(X)), apply(X, 2, mean))
  Xobs = X - Xmean
  V = svd(Xobs)$v       # eigen(t(Xobs)%*%Xobs)$vectors
  R = apply(Xobs %*% V, 2, range)
  Wsim = matrix(0, B, K)
  for (b in 1:B) {
    print(paste("Permutation", b))
    # Simulate data and find partitions into 1,2,...,K clusters
    Xsim = Xmean + apply(R,2,function(x)runif(N,x[1],x[2])) %*% t(V)
    Dsim = dist(Xsim, method="binary") # Uses the most time
    Psim = cutree(hclust(Dsim, method=linkage), 1:K)
    wsim = rep(0,K)
    for (k in 1:K) {
      Dk = as.matrix(Dsim)
      for (r in 1:k) {
        keep = (Psim[,k]==r)
        wsim[k] = wsim[k] + sum(Dk[keep,keep])/(2*sum(keep))
      }
    }
    Wsim[b,] = wsim
  }

  # Calculate gap statistic
  res = apply(log(Wsim),2,mean) - log(Wobs)

  # Find optimal k
  sk = sqrt(1+1/B) * apply(log(Wsim),2,sd)
  k.opt = which(diff(res)-sk[-1] <= 0)[1]
  if (is.na(k.opt) || length(k.opt)==0) k.opt=1

  # Return result
  list(k=k.opt, gap=res, sk=sk, group=Pobs[,k.opt])
}

#' gap.pearson
#' @description Identifies number of clusters using method "part" with Pearson correlation as distance measure.
#' @param X matrix
#' @param linkage Linkage for clustering.
#' @param B integer.
#' @param K integer. Default set to 6.
#' @export
gap.pearson = function(X, linkage, B, K=6)  {
  N = nrow(X)
  # Xobs = scale(X, scale=F)

  # Find W1,...,WK
  Wobs = rep(0, K)
  Dobs = as.dist((1-cor(t(X), method="pearson"))/2)
  Pobs = cutree(hclust(Dobs, method=linkage), 1:K) # N x K matrix
  for (k in 1:K) {
    Dk = as.matrix(Dobs)
    for (r in 1:k) {
      keep = (Pobs[,k]==r)
      Wobs[k] = Wobs[k] + sum(Dk[keep,keep])/(2*sum(keep))
    }
  }

  # Find W1*,...,WK* for B simulated data sets
  Xmean = outer(rep(1,nrow(X)), apply(X, 2, mean))  ## NEW
  Xobs = X - Xmean  ## NEW
  V = svd(Xobs)$v    # eigen(t(Xobs)%*%Xobs)$vectors
  R = apply(Xobs %*% V, 2, range)
  Wsim = matrix(0, B, K)
  for (b in 1:B) {
    print(paste("Permutation", b))
    # Simulate data and find partitions into 1,2,...,K clusters
    Xsim = Xmean + apply(R,2,function(x)runif(N,x[1],x[2])) %*% t(V)
    Dsim = as.dist((1-cor(t(Xsim), method="pearson"))/2)
    Psim = cutree(hclust(Dsim, method=linkage), 1:K)
    wsim = rep(0,K)
    for (k in 1:K) {
      Dk = as.matrix(Dsim)
      for (r in 1:k) {
        keep = (Psim[,k]==r)
        wsim[k] = wsim[k] + sum(Dk[keep,keep])/(2*sum(keep))
      }
    }
    Wsim[b,] = wsim
  }

  # Calculate gap statistic
  res = apply(log(Wsim),2,mean) - log(Wobs)

  # Find optimal k
  sk = sqrt(1+1/B) * apply(log(Wsim),2,sd)
  k.opt = which(diff(res)-sk[-1] <= 0)[1]

  # Return result
  list(k=k.opt, gap=res, sk=sk, group=Pobs[,k.opt])
}

#' gap.spearman
#' @description Identifies number of clusters using method "part" with Spearman correlation as distance measure.
#' @param X matrix
#' @param linkage Linkage for clustering.
#' @param B integer.
#' @param K integer. Default set to 6.
#' @export
gap.spearman = function(X, linkage, B, K=6)  {
  N = nrow(X)
  # Xobs = scale(X, scale=F)

  # Find W1,...,WK
  Wobs = rep(0, K)
  Dobs = as.dist((1-cor(t(X), method="spearman"))/2)
  Pobs = cutree(hclust(Dobs, method=linkage), 1:K) # N x K matrix
  for (k in 1:K) {
    Dk = as.matrix(Dobs)
    for (r in 1:k) {
      keep = (Pobs[,k]==r)
      Wobs[k] = Wobs[k] + sum(Dk[keep,keep])/(2*sum(keep))
    }
  }

  # Find W1*,...,WK* for B simulated data sets
  Xmean = outer(rep(1,nrow(X)), apply(X, 2, mean))  ## NEW
  Xobs = X - Xmean  ## NEW
  V = svd(Xobs)$v    # eigen(t(Xobs)%*%Xobs)$vectors
  R = apply(Xobs %*% V, 2, range)
  Wsim = matrix(0, B, K)
  for (b in 1:B) {
    print(paste("Permutation", b))
    # Simulate data and find partitions into 1,2,...,K clusters
    Xsim = Xmean + apply(R,2,function(x)runif(N,x[1],x[2])) %*% t(V)
    Dsim = as.dist((1-cor(t(Xsim), method="spearman"))/2)
    Psim = cutree(hclust(Dsim, method=linkage), 1:K)
    wsim = rep(0,K)
    for (k in 1:K) {
      Dk = as.matrix(Dsim)
      for (r in 1:k) {
        keep = (Psim[,k]==r)
        wsim[k] = wsim[k] + sum(Dk[keep,keep])/(2*sum(keep))
      }
    }
    Wsim[b,] = wsim
  }

  # Calculate gap statistic
  res = apply(log(Wsim),2,mean) - log(Wobs)

  # Find optimal k
  sk = sqrt(1+1/B) * apply(log(Wsim),2,sd)
  k.opt = which(diff(res)-sk[-1] <= 0)[1]

  # Return result
  list(k=k.opt, gap=res, sk=sk, group=Pobs[,k.opt])
}

#' draw.gap
#' @description Plots gap statistic
#' @param clust String. Sets direction of clustering, default is "col" (column-wise)
#' @param B Integer.
#' @param K Integer
#' @export
draw.gap = function(clust="col", B=100, K=6) {
  if (clust=="col") {
    res = gap.curve(.CLUSTERMAP$col.X, clust,
                    .CLUSTERMAP$col.distance, .CLUSTERMAP$col.linkage, B, K)
  } else if (clust=="row") {
    res = gap.curve(.CLUSTERMAP$row.X, clust,
                    .CLUSTERMAP$row.distance, .CLUSTERMAP$row.linkage, B, K)
  }
  plot(1:K, res$gap, type="b", pch=19, col="blue", xlab="clusters",
       ylab="Gap statistic", ylim=c(min(res$gap-res$sk),max(res$gap+res$sk)))
  segments(1:K, res$gap-res$sk, 1:K, res$gap+res$sk)
  points(res$k, res$gap[res$k], pch=19, col="red")
}

#' draw.silhouette
#' @description Get silhouettes
#' @param clust String. Sets direction of clustering, default is "col" (column-wise)
#' @param order String. Default is set to "tree"
#' @export
get.silhouette = function(clust="col", order="tree") {
  if (clust=="col") {
    if (order=="tree") {
      Y = t(.CLUSTERMAP$col.X[,.CLUSTERMAP$colclust$order])
    } else {
      Y = t(.CLUSTERMAP$col.X)
    }
  } else if (clust=="row") {
    if (order=="tree") {
      Y = .CLUSTERMAP$row.X[.CLUSTERMAP$rowclust$order,]
    } else {
      Y = .CLUSTERMAP$col.X
    }
  }
  cl = get.subclust(clust=clust, order=order)
  dY = as.matrix(dist(Y))
  nclust = length(unique(cl))
  D0 = matrix(NA, nrow(Y), nclust)
  for (i in 1:nrow(Y)) {
    for (j in 1:nclust) {
      D0[i,j] = mean(dY[i,cl==j])
    }
  }
  S0 = rep(NA,nrow(Y))
  for (i in 1:nrow(Y)) {
    a0 = D0[i,cl[i]]; b0 = min(D0[i,-cl[i]])
    S0[i] = (b0-a0)/max(a0,b0)
  }
  for (i in 1:nclust) {
    S0[cl==i] = rev(sort(S0[cl==i]))
  }
  tab = data.frame(cluster=cl, S=S0)
  return(tab)
}

#' draw.mean.silhouette
#' @description Plot mean silhouettes
#' @param clust String. Sets direction of clustering, default is "col" (column-wise)
#' @param K Integer, Default is set to 10.
#' @export
draw.mean.silhouette = function (clust="col", K=10) {
  nclust = 2:K
  S0 = rep(NA, length(nclust))
  for (k in 1:length(nclust)) {
    subclust(nclust[k], clust=clust)
    silh = get.silhouette(clust=clust, order="tree")
    S0[k] = mean(silh$S)
  }
  k.opt = which(S0==max(S0))[1]
  plot(nclust, S0, type="b", pch=19, col="blue", ylim=c(0,max(S0)), xlab="clusters", ylab="Silhouette score")
  points(nclust[k.opt], S0[k.opt], pch=19, col="red")
}

#' draw.silhouette
#' @description Plot Silhouettes for cluster diagnostics.
#' @param clust String. Sets direction of clustering, default is "col" (column-wise)
#' @param ... list
#' @export
draw.silhouette = function (clust="col", ...) {
  silh = get.silhouette(clust=clust, order="tree")
  nclust = length(unique(silh$cluster))
  N = length(silh$S)
  ymin = min(-0.02, min(silh$S)); ymax = 1.02
  xmin = -0.01; xmax = 1.01
  plot(2, 0, xlab="", ylab="", xaxt="n", yaxt="n", xlim=c(-0.22,1.3), ylim=c(ymin,ymax), bty="n")
  # Grey background with white grid lines
  rect(0, ymin, 1, ymax, border=NA, col="lightgrey")
  segments(seq(1/N,1,by=1/N),ymin,seq(1/N,1,by=1/N),ymax,col="white",lwd=0.8)
  for (yval in seq(0.25, 1, by=0.25)) {
    segments(0, yval, 1, yval, lwd=1.5, col="white")
    segments(0, yval-0.125, 1, yval-0.125, lwd=0.7, col="white")
  }
  # Label y-axis
  text(-0.07, 0, "0.00", cex=1.2)
  text(-0.07, 0.25, "0.25", cex=1.2)
  text(-0.07, 0.50, "0.50", cex=1.2)
  text(-0.07, 0.75, "0.75", cex=1.2)
  text(-0.07, 1.00, "1.00", cex=1.2)
  text(-0.2, 0.5, "Silhouette score", adj=0.5, srt=90, cex=1.5)
  # Silhouette
  for (i in 1:N) {
    rect((i-1)/N, 0, i/N, silh$S[i], col=silh$cluster[i], border=NA)
  }
  # Legend
  text(1.05, 0.62, "cluster", adj=0)
  for (i in 1:nclust) {
    rect(1.05, 0.6-(i-1)*0.07, 1.12, 0.6-i*0.07+0.005, col=i, border=NA)
    text(1.15, 0.6-i*0.07+0.035, i)
  }
}
cbsteen/clustermap documentation built on May 24, 2019, 1:35 a.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
cbsteen/clustermap
clustermap: a Versatile Toolbox in R for Clustering and Heatmap Visualisation

R/clustermap.R
In cbsteen/clustermap: clustermap: a Versatile Toolbox in R for Clustering and Heatmap Visualisation

R Package Documentation

Browse R Packages

We want your feedback!

cbsteen/clustermap clustermap: a Versatile Toolbox in R for Clustering and Heatmap Visualisation

R/clustermap.R In cbsteen/clustermap: clustermap: a Versatile Toolbox in R for Clustering and Heatmap Visualisation

R Package Documentation

Browse R Packages

We want your feedback!

cbsteen/clustermap
clustermap: a Versatile Toolbox in R for Clustering and Heatmap Visualisation

R/clustermap.R
In cbsteen/clustermap: clustermap: a Versatile Toolbox in R for Clustering and Heatmap Visualisation