R/consensus.R
In maanova: Tools for analyzing Micro Array experiments

Documented in buildtree cluster2num consensus consensus.hc consensus.kmean makeAB plot.consensus.hc plot.consensus.kmean

######################################################################
#
# consensus.R
#
# copyright (c) 2001-2002, Hao Wu and Gary A. Churchill, The Jackson Lab.
# written May, 2002
# Licensed under the GNU General Public License version 2 (June, 1991)
#
# Part of the R/maanova package
#
#
######################################################################

consensus <- function(macluster, level=0.8, draw=TRUE)
{
  
  if(class(macluster) != "macluster")
    stop("The input variable is not an object of class macluster.")

  if( (level<0.5) | (level>1) )
    stop("Level must be between 0.5 and 1.")

 # if(macluster$n.perm == 1) {
 #   msg <- "You don't have permutation result in the input object."
 #   msg <- paste(msg, "Set n.perm to be an integer bigger than 1")
 #   msg <- paste(msg, "in macluster function in order to do permutation.")
 #   stop(msg)
 # }
 # local variable
  n.perm <- macluster$n.perm

  # result
  consensus <- NULL

  if( macluster$method == "hc")
    #stop("Consensus for HC has not implemented yet.")
    consensus <- consensus.hc(macluster, level, draw)
  else if( macluster$method == "kmean") {
    consensus <- consensus.kmean(macluster, level, draw)
  }
  invisible(consensus)
}

consensus.hc <- function(macluster, level, draw)
{
  # convert the observed hierarchical cluster and store it
  # each clade is represented by three integers:
  # number of nodes, sum of node indices and product of node indices
  obstree <- cluster2num(macluster$cluster.obs$merge)

  # how many nodes in each tree?
  n.node <- length(obstree$tree)
  # make the result to be the observed tree
  result <- obstree
  # total number of nodes in consensus tree
  n.totalnode <- length(result$tree)
  result$count <- rep(1, n.totalnode)
  # loop for the permutation trees
  if(macluster$n.perm > 1) {
    for(i in 2:macluster$n.perm) {
      permtree <- cluster2num(macluster$cluster.perm[[i]])
      # compare the permutation tree with the nodes already in result
      for(j in 1:n.node) {
        # number of leaves in this tree
        n.leaves.tmp <- permtree$num[j,1]
        # sum of leaves numbers
        sum.leaves.tmp <- permtree$num[j,2]
        # find the matches for number of nodes and sum of indices
        # for this perm tree in the result
        idx.match <- which( (result$num[,1]==n.leaves.tmp)&
                           (result$num[,2]==sum.leaves.tmp) )

        flag <- FALSE

        # if there' some match, check if they are really the result we want
        if(length(idx.match) != 0) {
          for(k in 1:length(idx.match)) {
            if(length(setdiff(permtree$tree[[j]], result$tree[[idx.match[k]]])) == 0) {
              # no difference in this two sets. BINGO!
              result$count[idx.match[k]] <- result$count[idx.match[k]] + 1
              flag <- TRUE
              break
            }
          }
        }
        
        # if there's no real match, add this node to the result
        if( (length(idx.match) == 0) | (flag==FALSE) ) {
          n.totalnode <- n.totalnode + 1
          result$tree[[n.totalnode]] <- permtree$tree[[j]]
          result$num <- rbind(result$num, permtree$num[j,])
          result$count <- c(result$count, 1)
        }
      } # finish loop for nodes in this permutation tree
    } # finish loop for all trees
  }
  # total number of leaves in the tree
  result$nleaves <- n.node+2
  result$count <- result$count/macluster$n.perm

  # the leave names
  result$leaves.name <- macluster$leave.names
  #  if(missing(leaves.name)) {
  #    if(macluster$what == "gene")
  #      idx.tmp <- macluster$idx.gene
  #    else
  #      idx.tmp <- 1:result$nleaves
  #    leaves.name <- paste(macluster$what, idx.tmp, sep="")
  #  }
  #  result$leaves.name <- leaves.name
  # visualize the result consensus tree

  idx.node <- which(result$count>level)
  # number of inner nodes
  n.nodes <- length(idx.node)

  # make the consensus tree object as list
  ct <- vector("list", n.nodes+1)
  # make the root
  ct[[1]]$depth <- 0
  ct[[1]]$children <- NULL
  ct[[1]]$nleaves <- result$nleaves
  ct[[1]]$occurance <- 1

  if(n.nodes==0) {
    msg <- "There's no inner nodes in the tree."
    msg <- paste(msg, "All leaves are direct children of the root.")
    msg <- paste(msg, "You may need to lower the significant level.")
    stop(msg)
  }
  else { # there are some inner nodes
    # update occurance
    for(i in 1:n.nodes)
      ct[[i+1]]$occurance <- result$count[idx.node[i]]
        
    # take out the nodes with occurance bigger than level
    tree <- result$tree[idx.node]
    
    # convert the selected nodes to a matrix
    # the number of rows is the number of nodes exclude the root
    # the number of columns is the number of total leaves
    binstr <- matrix(rep(0, n.nodes*result$nleaves), n.nodes,
                     result$nleaves)
    # turn on certain positions
    for(i in 1:n.nodes)
      binstr[i, tree[[i]]] <- 1

    # call build tree recursively
    ct <- buildtree(ct, binstr, 0, 1, 1:n.nodes, 1:result$nleaves)
  }

  result$ct <- ct
  result$level <- level
  result$tree <- NULL
  result$count <- NULL
  result$num <- NULL
  class(result) <- "consensus.hc"

  # plot the result
  if(draw)
    plot(result, title=macluster$term)
  
  invisible(result)

}


plot.consensus.hc <- function(x, title, ...)
{
  con <- x
  # visualize the tree
  ###########################
  # calculate the coordinates
  ###########################
  # start from the root
  coord <- NULL
  coord$A <- NULL; coord$B <- NULL;
  coord$xycoord <- NULL; coord$nodecoord <- vector("list", length(con$ct))
  coord$leavesidx <- NULL
  maxdepth <- 0
    
  # find the maximum depth in the tree
  for(i in 1:length(con$ct))
    maxdepth <- max(maxdepth, con$ct[[i]]$depth)
  
  coord <- makeAB(con$ct, coord, 1, 0, maxdepth)
    
  # draw the figure
  if(missing(title)) title <- ""
  title <- paste(title, " - ", con$level*100, "% Consensus tree", sep="")
  plot(coord$A, coord$B, type="n", main=title,
       xlab="", ylab="depth", xaxt="n")
  for(i in 1:dim(coord$A)[2]) 
    lines(coord$A[,i], coord$B[,i])
  # add texts for leaves
  for(i in 1:length(coord$leavesidx))
    text(i-1, 0, con$leaves.name[coord$leavesidx[i]],srt=90, pos=4,
         offset=0.7)
  # add texts for significant levels
  for(i in 1:(length(con$ct)-1)) {
    x <- mean(range(coord$nodecoord[[i+1]][,1]))
    y <- coord$nodecoord[[i+1]][1,2]
    text(x, y, formatC(con$ct[[i+1]]$occurance, format="f", digits=2),
         pos=3)
  }
}

makeAB <- function(ct, coord, treeidx, startx, maxdepth)
{
  # local variable
  A <- coord$A
  B <- coord$B
  xycoord <- coord$xycoord
  nodecoord <- coord$nodecoord
  leavesidx <- coord$leavesidx
  
  subct <- ct[[treeidx]]
  # current x coordinate
  currentx <- startx
  
  # find the direct leaves in children
  directleaves <- subct$children[subct$children<0]
  # find the inner nodes in children 
  innernode <- subct$children[subct$children>0]

  # create the verticle lines for each leaf
  if(length(directleaves) != 0){
    leavesidx <- c(leavesidx, -directleaves)
    for(i in 1:length(directleaves)) {
      x0 <- currentx
      x1 <- currentx
      y0 <- 0
      y1 <- maxdepth - subct$depth + 1
      # append to A and B
      A <- cbind(A, c(x0,x1))
      B <- cbind(B, c(y0,y1))
      # increase startx
      currentx <- currentx + 1
      # update nodecoord for this node
      nodecoord[[treeidx]] <- rbind(nodecoord[[treeidx]], c(x1, y1))
    }
  }
  # if there's no inner nodes, create the horizontal and
  # vertical lines and return
  if(length(innernode) == 0) {
    x0 <- startx
    y0 <- maxdepth - subct$depth + 1
    x1 <- currentx - 1
    y1 <- y0
    A <- cbind(A, c(x0,x1))
    B <- cbind(B, c(y0,y1))
    # create a verticle line to connect the children and parent
    A <- cbind(A, c((x0+x1)/2,(x0+x1)/2))
    B <- cbind(B, c(y1, y1+1))
    # return the midpoint of the horizontal line
    xycoord <-  c((x0+x1)/2, y1+1)
  }
  else {
    # if there are any inner nodes,
    # call makeAB recursively for all inner nodes
    for(i in 1:length(innernode)) {
      if( is.null(A) )
        substartx <- 0
      else
        substartx <- max(A) + 1
      # update input coord object
      coord$A <- A;  coord$B <- B;
      coord$xycoord <- xycoord;
      coord$nodecoord <- nodecoord
      coord$leavesidx <- leavesidx
      # recursively call makeAB
      coord.tmp <- NULL
      coord.tmp <- makeAB(ct, coord, innernode[i], substartx, maxdepth)
      A <- cbind(coord.tmp$A, c(coord.tmp$xycoord[1], coord.tmp$xycoord[1]))
      B <- cbind(coord.tmp$B, c(coord.tmp$xycoord[2]-1, coord.tmp$xycoord[2]))
      nodecoord <- coord.tmp$nodecoord
      nodecoord[[treeidx]] <- rbind(nodecoord[[treeidx]], coord.tmp$xycoord)
      leavesidx <- coord.tmp$leavesidx
    }
    # create a horizontal line to connect all inner nodes and direct leaves 
    x0 <- min(nodecoord[[treeidx]][,1])
    x1 <- max(nodecoord[[treeidx]][,1])
    y0 <- nodecoord[[treeidx]][1,2]
    y1 <- y0
    A <- cbind(A, c(x0,x1))
    B <- cbind(B, c(y0,y1))
    # create a verticle line to connect the children and parent
    # if this node is not root
    if(treeidx != 1) {
      A <- cbind(A, c((x0+x1)/2, (x0+x1)/2))
      B <- cbind(B, c(y1,y1+1))
    }

    # return the midpoint of the horizontal line
    xycoord <- c((x0+x1)/2, y1+1)
  }

  # make the return variable
  result <- NULL
  result$A <- A
  result$B <- B
  result$xycoord <- xycoord
  result$nodecoord <- nodecoord
  result$leavesidx <- leavesidx
  result
}  

buildtree <- function(ct, binstr, depth, parent, idx.node, idx.leave)
{
  # local variable
  n <- dim(binstr)[1]
  l <- dim(binstr)[2]

  # find the missing leaves in binstr and make them the direct
  # children of parent
  missleave <- NULL
  for(i in 1:l) {
    if(all(binstr[,i]==0)) # direct children
      missleave <- c(missleave, i)
  }
  if(!is.null(missleave)) {
    ct[[parent]]$children <- c(ct[[parent]]$children, -idx.leave[missleave])
    binstr <- binstr[,-missleave]
    idx.leave <- idx.leave[-missleave]
  }
  # if there are only one node, we are done
  if(n==1) {
    ct[[idx.node+1]]$children <- -idx.leave
    ct[[idx.node+1]]$depth <- depth + 1
    ct[[idx.node+1]]$nleaves <- length(idx.leave)
    ct[[parent]]$children <- c(ct[[parent]]$children, idx.node+1)
    return(ct)
  }

  # find the node with maximum elements
  nele <- NULL
  for(i in 1:n)
    nele[i] <- sum(binstr[i,]==1)
  nleave <- max(nele)
  maxidx <- which(nele==nleave)[1]

  # update the node object for maxidx
  ct[[idx.node[maxidx]+1]]$depth <- depth + 1
  ct[[idx.node[maxidx]+1]]$nleaves <- nleave
  ct[[parent]]$children <- c(ct[[parent]]$children, idx.node[maxidx]+1)
  
  # use binstr(maxidx) to divide the inner nodes into two groups, e.g.,
  # set1 is the nodes to contain the leaves in binstr(maxidx) but doesn't
  # include maxidx itselt. set2 is the nodes don't contain that.
  leaves <- which(binstr[maxidx,]==1)
  noleaves <- which(binstr[maxidx,]==0)
  set1 <- NULL
  set2 <- NULL
  for(i in 1:dim(binstr)[1]) {
    if(all(binstr[i,leaves]==0))
      set2 <- c(set2,i)
    else
      set1 <- c(set1, i)
  }
  if(length(set1)==1)
    # only one node in set1
    ct[[idx.node[maxidx]+1]]$children <- -idx.leave[leaves]
  
  # exclude maxidx from set1
  set1 <- setdiff(set1, maxidx)

  # create two matrices
  binstr1 <- binstr[set1,leaves]
  binstr2 <- binstr[set2, noleaves]

  # call buildtree recursively
  if(length(binstr1)!=0)
    ct <- buildtree(ct, matrix(binstr1, nrow=length(set1)), ct[[idx.node[maxidx]+1]]$depth,
              idx.node[maxidx]+1, idx.node[set1], idx.leave[leaves])
  if(length(binstr2) != 0)
    ct <- buildtree(ct, matrix(binstr2, nrow=length(set2)), depth, parent, idx.node[set2],
              idx.leave[noleaves])

  ct

}

cluster2num <- function(clust){
  # number of leaves, exclude the last one(root)
  n.leaves <- dim(clust)[1] - 1
  result <- NULL
  result$tree <- list(NULL)
  result$num <- matrix(rep(0, n.leaves*2), n.leaves, 2)
  result$tree[[1]] <- -clust[1,]
  result$num[1,] <- c(2, sum(-clust[1,]))
  if(n.leaves > 1){
    for(i in 2:n.leaves) {
      tmp1 <- -clust[i,1]
      tmp2 <- -clust[i,2]
      if(tmp1 < 0)
        tmp1 <- result$tree[[-tmp1]]
      if(tmp2 < 0)
        tmp2 <- result$tree[[-tmp2]]
      result$tree[[i]] <- c(tmp1, tmp2)
      result$num[i,] <- c(length(result$tree[[i]]), sum(result$tree[[i]]))
    }
  }
  result
}

#############################################################
# The following functions are using for Kmeans clustering
#############################################################

consensus.kmean <- function(macluster, level, draw)
{
  # local variable
  what <- macluster$what
  n.perm <- macluster$n.perm
  term <- macluster$term
  n.leaves <- length(macluster$cluster.obs)
  n.grp <- macluster$kmean.ngroups
  idx.gene <- macluster$idx.gene
  idxname = macluster$leave.names 
  groups <- matrix(rep(0, n.perm*n.leaves), n.perm, n.leaves)
  
  # fill groups
  groups[1,] <- macluster$cluster.obs
  if(n.perm > 1)
    for(i in 2:n.perm)
      groups[i,] <- macluster$cluster.perm[[i]]

  # initialize output
  result <- list(NULL)
  for(i in 1:(n.grp+1))
    result[[i]] <- numeric(0)
  
  tmp <- rep(0, n.grp)
  # given level, calculate consensus result
  for(i in 1:n.leaves) {
    for(j in 1:n.grp) 
      tmp[j] <- sum(groups[,i]==j)/n.perm

    # find the consensus group for this leaf
    # if level is not 1, use ">" (that's because
    # if level is 0.5, the leaf can be in two groups, each
    # has 50% occurance)
    # if level is 1, use ">="
    if(level != 1)
      idx <- which(tmp>level)
    else 
      idx <- which(tmp>=level)
    
    if(length(idx) != 0) # there's a group for this leaf
      result[[idx]] <- c(result[[idx]], i)
    else
      result[[n.grp+1]] <- c(result[[n.grp+1]], i)
  }
  resultname <- list(NULL)
  for(i in 1:(n.grp+1))
    resultname[[i]] <- idxname[result[[i]]]
  
  # return variable
  output <- NULL
  output$group <- result
  output$groupname <- resultname
  output$what <- what
  output$data.draw <- macluster$VG
  output$condition.names <- macluster$condition.names
  class(output) <- "consensus.kmean"

  # draw the VG profiles
  if(draw)
    plot(output)
  
  invisible(output)
  
}

plot.consensus.kmean <- function(x, ...)
{
  con <- x
  # local variables
  data.draw <- con$data.draw
  ncondition <- dim(data.draw)[2]
  what <- con$what
  group <- con$group
  n.grp <- length(group) - 1

  # number of figures per plot
  if(n.grp > 8)
    nfig.plot <- 9
  else
    nfig.plot <- n.grp + 1
  
  # determin how many plots will be generated
  # maximum figures on one plot is 9
  # don't forget the not-in-any-group group
  # calculate the number of plots and figures per plot for a pretty layout
  
  if(n.grp > 8)
    n.plot <- ceiling((n.grp+1)/9)
  else
    n.plot <- 1
  
  # determine how many figures horizontally
  if(n.grp<=6)
    n.col <- 2
  else
    n.col <- 3
  # number of cols in figure is n.col, calculate number of rows
  n.row <- ceiling(nfig.plot/n.col)
  
  # save old par
  #old.mar <- par("mar")
  #old.las <- par("las")
  #old.mfrow <- par("mfrow")
  #old.mfcol <- par("mfcol")
  #on.exit(par(las = old.las, mar = old.mar, mfrow = old.mfrow,
  #            mfcol=old.mfcol))

  for(iplot in 1:n.plot) {
    # draw gene profiles
    ylabel <- "Expression level"
    if(what=="gene")
      xlabel <- "Sample"
    else 
      xlabel <- "Gene"

    # starting and ending group in this plot
    istart <- nfig.plot*(iplot-1) + 1
    iend <- nfig.plot * iplot
    if(iend > n.grp)
      iend <-  n.grp + 1
    # number of figures on this plot
    nfig.thisplot <- iend - istart + 1
    
    # generate a new figure for each plot
    dev.new()
#    if(.Platform$GUI == "AQUA")
#      quartz()
#    else
#      x11()

    # build a matrix for figure layout
    mat <- matrix(c(1:nfig.thisplot, rep(0, n.col*n.row-nfig.thisplot)),
                  n.row, n.col, byrow=TRUE)
    layout(mat)

    # y-axis limits. It'll be the same for all plots
    ylim <- range(data.draw)
    
    for(i in istart:iend) {
      idx.grp <- group[[i]]
      if(i==(n.grp+1))
        title <- paste("Not in any group \n\n(",length(idx.grp)," ",
                       what, "s)", sep="")
      else
        title <- paste("Expression profile for group ", i,
                       "\n\n (",length(idx.grp)," ", what,"s)", sep="")
      
      # data to be drawn for this group
      data.grp <- data.draw[idx.grp,]
      # create an empty plot
      plot(1:ncondition, ylim=ylim, type="n", xaxt="n",
           xlab=xlabel, ylab=ylabel, main=title)
      
      if(length(idx.grp) == 1)
        # only one gene in this group
        # draw a line
        lines(data.grp, col="blue", type="b")
      else if(length(idx.grp) > 0)
         # draw the lines
        apply(data.grp, 1, function(x) lines(x, col="blue", type="b"))
      
      # draw the axis
      axis(1, at=1:ncondition, labels=con$condition.names)
    }
  }
}
Any scripts or data that you put into this service are public.
maanova documentation built on Nov. 8, 2020, 8:21 p.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
maanova
Tools for analyzing Micro Array experiments

R/consensus.R
In maanova: Tools for analyzing Micro Array experiments

Defines functions plot.consensus.kmean consensus.kmean cluster2num buildtree makeAB plot.consensus.hc consensus.hc consensus

Documented in buildtree cluster2num consensus consensus.hc consensus.kmean makeAB plot.consensus.hc plot.consensus.kmean

Try the maanova package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

maanova Tools for analyzing Micro Array experiments

R/consensus.R In maanova: Tools for analyzing Micro Array experiments

Defines functions plot.consensus.kmean consensus.kmean cluster2num buildtree makeAB plot.consensus.hc consensus.hc consensus

Documented in buildtree cluster2num consensus consensus.hc consensus.kmean makeAB plot.consensus.hc plot.consensus.kmean

Try the maanova package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

maanova
Tools for analyzing Micro Array experiments

R/consensus.R
In maanova: Tools for analyzing Micro Array experiments