#' Identify nodes in a hierarchical tree which qualify as clones.
#' Identify 'hard' clones first, then expand them to 'soft' clones. Expansion may
#' result in clone mergers.
#' Based on hierarchical clustering, identify the hard/soft clones.
#' @param hc An hclust object with additional items generated by \code{hclust_tree}.
#' @param fdrthresh maximal allowed value for log10(FDR) for any pair of leaves
#' in a clone node. Default: -2.
#' @param sharemin A feature is considered 'widely shared' if present in sharemin
#' fraction of leaves in a node.Default: 0.90.
#' @param nshare Minimal number of 'widely shared' features in a hard clone.
#' Default: 3.
#' @param bymax Logical. If TRUE (default), use maximal, and otherwise mean,
#' FDR for the node as a criterion for a hard clone.
#' @param climbfromsize An integer: minimal size of a hard clone allowed to be
#' expanded
#' @param climbtoshare An integer: expand the clone as long as the number of
#' widely shared features is at least this value
#' @return An hclust object, with hard/soft clones indicated
#' @export
find_clones <- function(hc,
fdrthresh = -2,
sharemin = 0.85,
nshare = 3,
bymax = T,
climbfromsize = 2,
climbtoshare = 3){
# fdrthresh: FDR criterion for clone nodes
# share_min: A feature is considered shared if present in share_min fraction
# of leaves in a node
# nshare: Minimal number of shared features in a clone node
# bymax: TRUE, Use maximal of mean FDR for the node to find clones?
# climbfromsize
# climbtoshare
# Number of features (approximately) shared across the node
# leaves of the node together with the shared features form a bicluster
count_pins_share <- colSums(hc$sharing > sharemin)
# A node is considered compliant if FDR is below and its sharing across above
# threshod for the node and all its descendants
# if(bymax){
# node_compliant <- (hc$maxfdr < fdrthresh &
# (count_pins_share - count_pins_share[nrow(hc$merge)]) > nshare)
# } else {
# node_compliant <- (hc$meanfdr < fdrthresh &
# (count_pins_share - count_pins_share[nrow(hc$merge)]) > nshare)
# }
if (bymax) {
node_compliant <- (hc$mergefdr < fdrthresh & count_pins_share > nshare)
} else {
node_compliant <- (hc$meanfdr < fdrthresh & count_pins_share > nshare)
}
leftchild <- (hc$merge[,1] < 0)
leftchild[hc$merge[,1] > 0] <- node_compliant[hc$merge[hc$merge[,1] > 0, 1]]
rightchild <- (hc$merge[,2] < 0)
rightchild[hc$merge[,2] > 0] <- node_compliant[hc$merge[hc$merge[,2] > 0, 2]]
new_node_compliant <- node_compliant & leftchild & rightchild
while(!is.na(new_node_compliant) && !is.na(node_compliant) &&
!all(new_node_compliant == node_compliant)){
node_compliant <- new_node_compliant
leftchild <- (hc$merge[,1] < 0)
leftchild[hc$merge[,1] > 0] <- node_compliant[hc$merge[hc$merge[,1] > 0, 1]]
rightchild <- (hc$merge[,2] < 0)
rightchild[hc$merge[,2] > 0] <- node_compliant[hc$merge[hc$merge[,2] > 0, 2]]
new_node_compliant <- node_compliant & leftchild & rightchild
}
#Clone nodes are maximum compliant nodes
clone_nodes <- setdiff((1:nrow(hc$merge))[node_compliant],
c(hc$merge[node_compliant, 1], hc$merge[node_compliant, 2]))
hc$fdrthresh <- fdrthresh
hc$clonenodes <- clone_nodes
hc$bymax <- bymax
hc$count_pins_share <- count_pins_share
hc$sharemin <- sharemin
hc$nshare <- nshare
if(!is.null(hc$clonenodes)){
hc$softclones <- hc_climb(
hc, minsize = climbfromsize,
minshare = climbtoshare + hc$count_pins_share[nrow(hc$merge)])
}
return(hc)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.