#' plot shc object
#'
#' Visualize the results of SHC analysis as an annotated
#' dendrogram with significant branches highlighted
#'
#' @param x a \code{shc} object to plot produced by a call to \code{shc}
#' @param groups a vector specifying group labels for the clustered objects.
#' The vector should be in the same order as the rows of the original
#' data matrix. If specified, color blocks will be placed along the
#' bottom of the dendrogram. Useful when the samples have a priori known
#' groupi behavior. (default = \code{NULL})
#' @param use_labs a boolean specifyin whether rowlabels should be added as
#' text along the bottom of the dendrogram (default = \code{TRUE})
#' @param fwer a boolean specifying whether the FWER control procedure of
#' Meinshausen et al. 2010 should be used. (default = \code{TRUE})
#' @param alpha a double between 0 and 1 specifying the significance cutoff. If
#' \code{fwer} is TRUE, the FWER of the entire dendrogram is controlled at
#' \code{alpha}, else, each branch is tested at \code{alpha}. Only has
#' effect if \code{alpha(shc)} = 1. (default = 0.05 or
#' \code{alpha} used for \code{x} if original \code{shc} called
#' with \code{alpha} < 1)
#' @param ci_idx a numeric value between 1 and \code{length(ci)}
#' specifiying which CI to use for the FWER stopping rule.
#' This only has an effect if \code{alpha} < 1. (default = 1 or
#' \code{ci_idx} used for \code{x} if original \code{shc} called with
#' \code{alpha} < 1 or \code{fwer} = TRUE)
#' @param ci_emp a logical value specifying whether to use the empirical
#' p-value from the CI based on \code{ci_idx} for the FWER stopping rule.
#' As with \code{ci_idx} this only has an effect if \code{alpha} < 1.
#' (default = FALSE or \code{ci_emp} used for \code{x} if original \code{shc}
#' called with \code{alpha} < 1 or \code{fwer} = TRUE)
#' @param hang a double value corresponding to the \code{hang} parameter for
#' the typical call to \code{plot} for an object of class
#' \code{hsigclust} (default = -1)
#' @param ... other parameters to be used by the function
#'
#' @return
#' \code{ggplot} object containing a dendrogram annotated by the results of the
#' corresponding \code{shc} analysis
#'
#' @details
#' This function makes use of dendrogram plotting functions made
#' available through the \pkg{ggdendro} package which provides a
#' \pkg{ggplot2}-like grammer for working with dendrograms.
#'
#' @import ggplot2 ggdendro dplyr
#' @importFrom stats as.dendrogram hclust is.leaf
#' @name plot-shc
#' @export
#' @method plot shc
#' @author Patrick Kimes
plot.shc <- function(x, groups = NULL, use_labs = TRUE, hang = -1,
fwer = TRUE, alpha = NULL, ci_idx = NULL, ci_emp = NULL,
...) {
shc <- x
## determine number of samples
n <- nrow(shc$p_emp) + 1
## colors to be used in figure
col_tx_sig <- "#A60A3C"
col_nd_null <- "gray"
## check validity of ci_idx
if (!is.null(ci_idx)) {
if (ci_idx > ncol(shc$p_emp)) {
stop("invalid choice for ci_idx; ci_idx must be < length(ci)")
}
}
## check validity of alpha
if (!is.null(alpha)) {
if (alpha > 1 || alpha < 0) {
stop("invalid choice for alpha; alpha must be 0 < alpha < 1")
}
}
## check fwer, alpha, ci_idx, ci_emp feasibility w/ original alpha value
if (shc$in_args$alpha < 1) {
if (is.null(alpha)) {
alpha <- shc$in_args$alpha
} else if (alpha > shc$in_args$alpha) {
alpha <- shc$in_args$alpha
warning(paste0("shc constructed using smaller alpha, using alpha = ", alpha))
}
if (is.null(ci_emp)) {
ci_emp <- shc$in_args$ci_emp
} else if (shc$in_args$ci_emp != ci_emp) {
ci_emp <- shc$in_args$ci_emp
warning("shc constructed using alpha < 1, using ci_emp from x$in_args")
}
if (is.null(ci_idx)) {
ci_idx <- shc$in_args$ci_idx
} else if (shc$in_args$ci_idx != ci_idx) {
ci_idx <- shc$in_args$ci_idx
warning("shc constructed using alpha < 1, using ci_idx from x$in_args")
}
if (!fwer) {
fwer <- TRUE
warning("shc constructed using alpha < 1, using fwer = TRUE")
}
} else {
## set default values if shc called with alpha = 1
if (is.null(alpha)) {
alpha <- 0.05
}
if (is.null(ci_idx)) {
ci_idx <- 1
}
if (is.null(ci_emp)) {
ci_emp <- FALSE
}
}
## for easier calling
if (ci_emp) {
p_use <- shc$p_emp[, ci_idx]
} else {
p_use <- shc$p_norm[, ci_idx]
}
## determine sig/non-sig nodes based on cutoffs
if (fwer) {
## use FWER controlling hierarchical testing procedure
cutoff <- fwer_cutoff(shc, alpha)
pd_map <- .pd_map(shc$hc_dat, n)
nd_type <- rep("", n-1)
for (k in 1:(n-1)) {
## check if subtree is large enough
if (length(unlist(shc$idx_hc[k, ])) < shc$in_args$n_min) {
nd_type[k] <- "n_small"
next
}
## check if parent was significant
if ((k > 1) && (nd_type[pd_map[k]] != "sig")) {
nd_type[k] <- "no_test"
next
}
## compare against p-value cutoff
if (alpha < 1) {
nd_type[k] <- ifelse(p_use[k] < cutoff[k], "sig", "not_sig")
}
}
} else {
##if not using FWER control, use alpha as flat cutoff
cutoff <- rep(alpha, n-1)
nd_type <- ifelse(p_use < alpha, "sig", "not_sig")
nd_type[shc$nd_type == "n_small"] <- "n_small"
}
##using ggdendro package
shc_dend <- as.dendrogram(shc$hc_dat, hang=hang)
shc_dendat <- ggdendro::dendro_data(shc_dend)
shc_segs <- ggdendro::segment(shc_dendat)
shc_labs <- ggdendro::label(shc_dendat)
##change label hight to be correct
shc_labs$y <- .lab_height(shc_dend)
if (!is.null(groups) & length(groups) == n) {
shc_labs$clusters <- groups[shc$hc_dat$order]
}
## flip index, hclust/dend and shc use revered ordering
rev_nd_type <- rev(nd_type)
rev_p_use <- rev(p_use)
## significant nodes
sig_linkvals <- as.factor(shc$hc_dat$height[rev_nd_type == "sig"])
sig_segs <- filter(shc_segs, as.factor(y) %in% sig_linkvals)
## non-signficant nodes
notsig_linkvals <- as.factor(shc$hc_dat$height[rev_nd_type == "not_sig"])
notsig_segs <- filter(shc_segs, as.factor(y) %in% notsig_linkvals)
## tested nodes
test_linkvals <- as.factor(shc$hc_dat$height[which(rev_nd_type == "sig")])
test_segs <- filter(shc_segs, as.factor(y) %in% test_linkvals)
test_segtops <- filter(test_segs, (y == yend) & (x < xend))
test_segtops <- test_segtops[order(test_segtops[, 2]), ]
test_segtops <- cbind(test_segtops,
"pval"=format(rev_p_use[which(rev_nd_type == "sig")],
digits=3, scientific=TRUE))
## small sample nodes
skip_linkvals <- as.factor(shc$hc_dat$height[rev_nd_type == "n_small"])
skip_segs <- filter(shc_segs, as.factor(y) %in% skip_linkvals)
## un-tested nodes (non-significant parent node)
if (fwer) {
fwer_linkvals <- as.factor(shc$hc_dat$height[rev_nd_type == "no_test"])
fwer_segs <- filter(shc_segs, as.factor(y) %in% fwer_linkvals)
}
## calculate various plotting dimensions prior to actual ggplot call
ax_x_ref <- max(shc$hc_dat$height)
ax_x_top <- max(ax_x_ref)*1.25
ax_x_bot <- -max(ax_x_ref)/4
ax_x_scale <- floor(log10(ax_x_top))
ax_y_range <- max(shc_segs$y) - min(shc_segs$y)
## make initial ggdendro with null color
plot_dend <- ggplot() +
geom_segment(data=shc_segs,
aes(x=x, y=y, xend=xend, yend=yend),
color=col_nd_null) +
theme_bw()
## add appropriate title
plot_dend <- plot_dend +
ggtitle(paste("showing all p-values below",
alpha, "cutoff",
ifelse(fwer, "(FWER corrected)", "")))
##add color group labels for objects if specified
if (!is.null(groups)) {
shc_labs$color_y <- shc_labs$y - ax_y_range/30
plot_dend <- plot_dend +
geom_tile(data=shc_labs,
aes(x=x, y=color_y, fill=as.factor(clusters)),
height=ax_y_range/30, alpha=1) +
scale_fill_discrete('Labels')
}
##add text labels for each object if desired
if (use_labs) {
shc_labs$txt_y <- shc_labs$y - (2-is.null(groups))*ax_y_range/30
plot_dend <- plot_dend +
geom_text(data=shc_labs,
aes(x=x, y=txt_y, label=label,
hjust=1, vjust=.5, angle=90),
size=3)
}
##add colored segments to plot if any branches were significant
if (sum(rev_nd_type == "sig") > 0) {
plot_dend <- plot_dend +
geom_segment(data=sig_segs,
aes(x=x, y=y, xend=xend, yend=yend, color="sig"),
size=1)
}
##add p-values for segments if they were tested
if (sum(rev_nd_type == "sig") > 0) {
plot_dend <- plot_dend +
geom_text(data=test_segtops,
aes(x=x, y=y, label=pval, hjust=-0.2, vjust=-0.5),
col=col_tx_sig, size=4)
}
##add FWER controlled segments if any branches were controlled
if (fwer & sum(rev_nd_type == "no_test") > 0) {
plot_dend <- plot_dend +
geom_segment(data=fwer_segs,
aes(x=x, y=y, xend=xend, yend=yend, color="no_test"),
size=1)
}
##add colored segments if any branches had size < n_min
if (sum(rev_nd_type == "n_small") > 0) {
plot_dend <- plot_dend +
geom_segment(data=skip_segs,
aes(x=x, y=y, xend=xend, yend=yend, color="n_small"),
size=1)
}
## add colored segments if any branches had non-significant calls
if (sum(rev_nd_type == "not_sig") > 0) {
plot_dend <- plot_dend +
geom_segment(data=notsig_segs,
aes(x=x, y=y, xend=xend, yend=yend, color="not_sig"),
size=1)
}
plot_dend <- plot_dend +
scale_y_continuous(name="linkage", expand=c(.25, 0),
breaks=seq(0, ax_x_top, by=10^ax_x_scale)) +
scale_x_continuous(name="", expand=c(.1, 0),
breaks=c(),
labels=c())
##attach appropriate labels for colored branches along dendrogram
plot_dend + scale_color_manual(name='Nodes',
values=c('sig'='#FF1E66',
'not_sig'='orange',
'n_small'='#53A60A',
'no_test'='#096272'),
breaks=c('sig',
'not_sig',
'n_small',
'no_test'),
labels=c('significant',
'not significant',
'cluster too small',
'untested by FWER'),
drop=TRUE)
}
## #############################################################################
## #############################################################################
## helper functions
##pull label information for when hang > 0
## necessary since ggdendro package won't provide this output
.lab_height <- function(tree, heights = c()) {
for (k in seq(length(tree))) {
if (is.leaf(tree[[k]])) {
heights <- c(heights, attr(tree[[k]], "height"))
} else {
heights <- .lab_height(tree[[k]], heights)
}
}
heights
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.