R/findClones.R

Defines functions find_clone

Documented in find_clone

#'Identify clones in hierarchical tree.
#'
#'Based on hierarchical clustering, identify the hard/soft clones.
#'@param hc The hclust objects with new items added generated by \code{hclust_tree}.
#'@param fdr_thresh FDR criterion for clone nodes. Default: -2.
#'@param share_min A feature is considered shared if present in share_min fraction of leaves in a node.Default: 0.90.
#'@param n_share Minimal number of shared features in a clone node. Default: 3.
#'@param bymax Logical. If TRUE (Default), use maximal of mean FDR for the node to find clones.
#'@param climb_from_size An integer.
#'@param climb_to_share An integer.
#'@param graphic Logical. If TRUE (Default), generate the hierarchical tree plot with hard/soft clones labeled.
#'@return A hclust object.
#'@export



find_clone <- function(hc, fdr_thresh = -2, share_min = 0.90, n_share = 3, bymax = TRUE,
                       climb_from_size = 2, climb_to_share = 3, graphic = TRUE){

  # fdr_thresh: FDR criterion for clone nodes
  # share_min: A feature is considered shared if present in share_min fraction of leaves in a node
  # n_share: Minimal number of shared features in a clone node
  # bymax: TRUE, Use maximal of mean FDR for the node to find clones?
  # climb_from_size
  # climb_to_share



  ## Number of features (approximately) shared across the node
  #leaves of the node together with the shared features form a bicluster
  count_pins_share <- colSums(hc$sharing > share_min)



  ## A node is considered compliant if FDR is below and its sharing across above threshod for the node
  #and all its descendants
  if(bymax){
    node_compliant <- (hc$maxfdr < fdr_thresh & (count_pins_share - count_pins_share[nrow(hc$merge)]) > n_share)}

  if(!bymax){
    node_compliant <- (hc$meanfdr < fdr_thresh & (count_pins_share - count_pins_share[nrow(hc$merge)]) > n_share)}




  leftchild <- (hc$merge[,1] < 0)
  leftchild[hc$merge[,1] > 0] <- node_compliant[hc$merge[hc$merge[,1] > 0, 1]]

  rightchild <- (hc$merge[,2] < 0)
  rightchild[hc$merge[,2] > 0] <- node_compliant[hc$merge[hc$merge[,2] > 0, 2]]

  new_node_compliant <- node_compliant & leftchild & rightchild

  while(!all(new_node_compliant == node_compliant)){
    node_compliant <- new_node_compliant

    leftchild <- (hc$merge[,1] < 0)
    leftchild[hc$merge[,1] > 0] <- node_compliant[hc$merge[hc$merge[,1] > 0, 1]]

    rightchild <- (hc$merge[,2] < 0)
    rightchild[hc$merge[,2] > 0] <- node_compliant[hc$merge[hc$merge[,2] > 0, 2]]

    new_node_compliant <- node_compliant & leftchild & rightchild
  }


  #Clone nodes are maximum compliant nodes
  clone_nodes <- setdiff((1:nrow(hc$merge))[node_compliant],
                         c(hc$merge[node_compliant, 1], hc$merge[node_compliant, 2]))
  hc$fdr_thresh <- fdr_thresh
  hc$clone_nodes <- clone_nodes
  hc$bymax <- bymax
  hc$count_pins_share <- count_pins_share
  hc$share_min <- share_min
  hc$n_share <- n_share

  if(!is.null(hc$clone_nodes)){
    hc$softclones <- hcClimb(hc, minsize = climb_from_size, minshare = climb_to_share + hc$count_pins_share[nrow(hc$merge)])}

  if(graphic){
    plot(hc, labels = FALSE)
    abline(h = hc$height[hc$softclones["hard",]], lty = 2)
    abline(h = hc$height[hc$softclones["soft",]], lty = 2, col = "red")
  }

  return(hc)

}
JunyanSong/SCclust documentation built on April 16, 2022, 8:44 p.m.