#' Trend plot of outlier clusteres
#'
#' Plotting the number of tiny clusters and unclassified cases
#' as the subregional size (hex_len) and different number of clusters (k) are
#' used.
#'
#' @param root_dir A directory path pointing to the parent directory of the
#' ouptput folders containing TIPC metric counts generated by
#' \code{\link[TIPC]{count_TIPC_cat}} at various hexagonal lengths.
#' @param output_dir A directory path for saving output plot; if NULL, plots will
#' be saved in root_dir
#' @param hex_len_range A vector of 2 integer number indicating the range of
#' hexagonal length. If unspecified (NULL), all found TIPC result folders of
#' different hexagonal lengths will be processed.
#' @param hex_len_stepsize An integer indicating the step size of hexagonal
#' length. If unspecified (NULL), all found TIPC result folders of
#' different hexagonal lengths will be processed.
#' @param min_cluster_size An integer indicating the minimum size of TIPC
#' cluster to be tested for survival analysis; default 10.
#' @param clustering_subfolder_nm A character string indicating the subfolder
#' name containing the TIPC clustering results within each hex_len.
#' @param pdf_width,pdf_height the width and height of the graphics region in inches.
#' The default values are 18 and 12; inherited from \code{\link[grDevices]{pdf}}
#' @export
#' @importFrom grDevices pdf dev.off
#' @import ggplot2
#' @import dplyr
#' @importFrom tidyr gather
trend_plot_tinyClusters <- function(root_dir = NULL, hex_len_range = NULL, hex_len_stepsize = NULL,
output_dir=NULL,
pdf_width=12, pdf_height=8,min_cluster_size = 30,
clustering_subfolder_nm='ConsensusClusterPlus_test'){
cluster_no <- k <- total_outlier_cases <- no_effective_cluster <- no_outlier_cluster <- NULL
## ======================
## check argument root_dir
## ======================
if(is.null(root_dir)) stop('No root_dir is provided!\n')
if(is.null(output_dir))output_dir <- root_dir
## ======================
## identify all result folders generated by tessellation
## ======================
subfolder_nms <- list.dirs(path = root_dir, full.names = TRUE, recursive = FALSE)
## subsetting for output folders from tessellation
subfolder_nms <- grep(x= subfolder_nms, pattern = 'TIPC_hexLen', value = TRUE)
## ======================
## subsettting for result folders specified by users
## ======================
if(!is.null(hex_len_range)){
hex_lens <- seq(from = min(hex_len_range), to = max(hex_len_range), by = hex_len_stepsize)
subfolder_nms <- grep(x=subfolder_nms, pattern = paste0(hex_lens, collapse = '|'), value = TRUE)
}
## ======================
## ======================
## consolidate all hex_len and k
## ======================
## ======================
res_df <- list()
ll <- 1
for (f_nm in subfolder_nms){
## ---------------
## extract hex_len
## ---------------
hex_lenX <- sapply(strsplit(f_nm, split = 'hexLen'), "[[", 2)
hex_lenX <- as.integer(hex_lenX)
## ---------------
## loop over each k
## ---------------
k_dirs <- list.dirs(path = file.path(f_nm,clustering_subfolder_nm), recursive = FALSE)
k_dirs <- basename(k_dirs)
k_dirs <- k_dirs[grep(x=k_dirs, 'k[0-9]')]
k_dirs <- file.path(f_nm,clustering_subfolder_nm, k_dirs)
for (kk in k_dirs){
setwd(kk)
cluster_kk <- read.csv(file = list.files(pattern = 'cluster_no_k'), as.is = TRUE)
res_kk <- cluster_kk %>%
group_by(cluster_no) %>%
count(cluster_no)
res_kk$hex_len <- hex_lenX
res_kk$k <- max(res_kk$cluster_no)
res_kk$no_outlier_cluster <- sum(res_kk$n<min_cluster_size)
res_kk$no_effective_cluster <- sum(res_kk$n>=min_cluster_size)
res_kk$total_outlier_cases <- sum(res_kk$n[res_kk$n<min_cluster_size])
res_df[[ll]] <- res_kk
ll <- ll+1
}
}# end all subfolder_nms i.e. hex_len
all_hexLen_k <- do.call(rbind, res_df)
## ======================
## ======================
## plotting: effect of hex_len
## ======================
## ======================
## ---------------
## total. no. of outlier/unclassified cases
## ---------------
plot_fnm <- file.path(output_dir, 'total_no_unclassifed_cases_by_hexlen_N_k.pdf')
pl <- ggplot(data=all_hexLen_k, aes(x = k, y=total_outlier_cases)) +
geom_point(size=4) +
scale_x_discrete(limits=c(range(all_hexLen_k$k)))+
ylab('total no. of unclassified tumors')+
facet_wrap(~ hex_len) + theme_bw()+
theme(axis.title = element_text(size = 26, face = 'bold'), axis.text = element_text(size = 24),
strip.text.x = element_text(size = 26, face='bold'))
ggsave(plot = pl, filename = plot_fnm, width=pdf_width, height=pdf_height)
## ---------------
## no. of outlier clusters & effective clusters
## ---------------
plot_fnm <- file.path(output_dir, 'No_effective_vs_outlier_clusters_by_hexlen_N_k.pdf')
pl <- ggplot(data=all_hexLen_k, aes(x = k, y=no_effective_cluster, color='no. of effective clusters')) +
geom_point(size=4) +
geom_point(size=4, aes(x = k, y=no_outlier_cluster, color='no. of outlier clusters')) +
scale_x_discrete(limits=c(range(all_hexLen_k$k)))+
ylab('No. of clusters')+
facet_wrap(~ hex_len) + theme_bw()+
theme(legend.position='bottom' ,
legend.text = element_text(size = 26),legend.title = element_text(size = 26),
axis.title = element_text(size = 26, face = 'bold'), axis.text = element_text(size = 24),
strip.text.x = element_text(size = 26, face='bold'))+
labs(color="Cluster type")+
guides(color=guide_legend(ncol=1, override.aes = list(size = 7)))
ggsave(plot = pl, filename = plot_fnm, width=pdf_width, height=pdf_height)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.