##############################################
# The deveroper and the maintainer: #
# Lang Ho Lee (lhlee@bwh.harvard.edu) #
# Sasha A. Singh (sasingh@bwh.harvard.edu) #
##############################################
# Mute warnings
options(warn=1)
#' @title get_theme_blank
#' @description Predefined ggplot theme for removing ticks, titles and labels of X and Y axis
#' @import ggplot2
#' @return A ggplot theme
get_theme_blank <- function(){
return(theme(
axis.title.x = element_blank(),
axis.title.y = element_blank(),
axis.text.x=element_blank(),
axis.text.y=element_blank(),
axis.ticks.x=element_blank(),
axis.ticks.y=element_blank(),
panel.border = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.line = element_line(colour = "white")
))
}
#' @title length2
#' @description Customized function for vector length calculation
#' @param x A vector
#' @param na.rm If it is FALSE, no exclusion of NA values.
#' @return A vector length
length2 <- function (x, na.rm=FALSE) {
if (na.rm) sum(!is.na(x))
else length(x)
}
#' @title get_stats
#' @description Calculate statistics of the given data for XINA network analysis
#' @param centrality_results Network centrality score data frame calculated by XINA network module
#' @param na.rm If it is FALSE, no exclusion of NA values.
#' @import plyr
#' @return A data frame containing statistics of XINA network centrality scores
get_stats <- function(centrality_results, na.rm=FALSE) {
tgc <- ddply(data.frame(d=centrality_results$Score, r=centrality_results$Rank), c("r"), .drop=TRUE,
.fun = function(xx, col) {
c(N = length2(xx[[col]], na.rm=na.rm),
max = max(xx[[col]], na.rm=na.rm),
min = min(xx[[col]], na.rm=na.rm),
mean = mean (xx[[col]], na.rm=na.rm)
)
}, "d")
return(tgc)
}
#' @title mutate_colors
#' @description 'mutate_colors' generates new color scheme for XINA clustering plot based on condition composition results (\link[XINA]{plot_condition_compositions}).
#' If any clusters have higher percentage than the 'threshold_percent', XINA will assign new colors in accordance to 'color_for_condition'.
#' If not, XINA will give 'gray' color or user-defined color via 'null_color' parameter.
#' @param condition_composition A data frame generated by \link[XINA]{plot_condition_compositions}
#' @param color_for_condition A vector like 'color_for_condition' of \link[XINA]{xina_clustering}
#' @param threshold_percent Default is 50. The percentage threshold for giving new colors
#' @param null_color Default is 'gray'. This color is for clusters that are not biased to any of experimental conditions
#' @return A data frame containing statistics of XINA network centrality scores
#' @export
#' @examples
#' # load XINA example data
#' data(xina_example)
#'
#' # Plot condition composition pie-chart with default option
#' condition_composition <- plot_condition_compositions(example_clusters)
#' example_clusters$color_for_clusters <- mutate_colors(condition_composition,
#' example_clusters$color_for_condition)
#' plot_clusters(example_clusters, xval=c(0,2,6,12,24,48,72), xylab=FALSE)
#'
mutate_colors <- function(condition_composition, color_for_condition, null_color='gray', threshold_percent=50) {
Cluster <- Condition <- NULL
color_for_clusters <- c()
conditions <- unique(condition_composition$Condition)
cl=1
for (cl in seq_len(max(condition_composition$Cluster))) {
sub_df <- subset(condition_composition, Cluster==cl)
color_for_cluster <- NULL
for (condition in conditions) {
sub_df2 <- subset(sub_df, Condition==condition)
if (nrow(sub_df2) > 0)
if (sub_df2$Percent_ratio >= threshold_percent)
color_for_cluster <- as.character(color_for_condition[condition])
}
if (is.null(color_for_cluster)) {
color_for_cluster <- null_color
}
color_for_clusters <- c(color_for_clusters, color_for_cluster)
}
return(color_for_clusters)
}
#' @title get_color_for_nodes
#' @description Pre-defined 30 colors
#' @return A vector for color code of XINA graphics
get_color_for_nodes <- function(){
# http://colorbrewer2.org/#type=qualitative&scheme=Set3&n=12
return(c('#66c2a5', '#fc8d62', '#8da0cb', '#e78ac3', '#a6d854',
'#ffd92f', '#e5c494', '#b3b3b3',
'#a6cee3', '#1f78b4', '#b2df8a', '#33a02c', '#fb9a99',
'#e31a1c', '#fdbf6f', '#ff7f00', '#cab2d6', '#6a3d9a',
'#ffff99', '#b15928',
'#8dd3c7', '#ffffb3', '#bebada',
'#fb8072', '#80b1d3', '#fdb462', '#b3de69', '#fccde5',
'#d9d9d9', '#bc80bd', '#ccebc5', '#ffed6f'))
}
#' @title plot_NA
#' @description Draw NULL plot
#' @import graphics
#' @return a empty plot
plot_NA <- function() {
plot(1, type="n", axes=FALSE, xlab="", ylab="", main="Not Available")
}
#' @title get_colors
#' @description Generate color series for XINA graphics
#' @param nClusters The number of clusters
#' @param set Pre-defined color series set
#' @param colorset manually defined color codes
#' @return A vector for color code of XINA graphics
get_colors <- function(nClusters, set='', colorset=NULL)
{
if (is.null(colorset)) {
if (set == '') {
# Pastel tone
colorset <- c("#FFB5E8", "#FF9CEE", "#FFCCF9", "#FCC2FF", "#F6A6FF",
"#B28DFF", "#C5A3FF", "#D5AAFF", "#ECD4FF", "#FBE4FF",
"#DCD3FF", "#A79AFF", "#B5B9FF", "#97A2FF", "#AFCBFF",
"#AFF8DB", "#C4FAF8", "#85E3FF", "#ACE7FF", "#6EB5FF",
"#BFFBCB", "#DBFFD6", "#BEBEBE", "#E7FFAC", "#FFFFD1",
"#FFC9DE", "#FFABAB", "#FFBEBC", "#FFCBC1", "#FFF5BA")
# #FFFFD1 --> #d3d300
N <- 30
} else {
# Original color
colorset <- c("#8B008B", "#F1C40F", "#00008B","#1E90FF", "#104E8B",
"#FF0000", "#8B0000", "#9ACD32", "#008B00", "#00FFFF",
"#008B8B","#0000FF", "#8B0A50", "#FFFF00", "#8B7500",
"#00FF00", "#FF69B4", "#A020F0", "#FFA500", "#FF00FF")
N <- 20
}
}
if (nClusters>N){
# color_for_graph <- colorRampPalette(colorset)(nClusters)
color_for_graph <- c(rep(colorset, nClusters/length(colorset)),
colorset[seq_len(nClusters%%N)])
} else {
color_for_graph <- colorset
}
return(color_for_graph)
}
#' @title plot_condition_compositions
#' @description
#' computes condition composition of the XINA clustering results and draws pie-charts.
#' @param clustering_result A list containing XINA clustering results.
#' See \link[XINA]{xina_clustering}
#' @param bullseye If it is TRUE, draw bullseye plot instead of the pie-chart. Default is FALSE
#' @param ggplot_theme This is ggplot theme to modify condition composition pie-chart and bulles eye plots.
#' @return A condition composition plot and a data frame containing condition compositions of the clusters
#' @import ggplot2
#' @import plyr
#' @import gridExtra
#' @export
#' @examples
#'
#' # load XINA example data
#' data(xina_example)
#'
#' # Plot condition composition pie-chart with default option
#' plot_condition_compositions(example_clusters)
#'
#' # Make a new color code for conditions
#' condition_colors <- c("tomato","steelblue1","gold")
#' names(condition_colors) <- example_clusters$condition
#' example_clusters$color_for_condition <- condition_colors
#'
#' # Draw condition composition pie-chart with the new color code
#' plot_condition_compositions(example_clusters)
#'
#' # Draw condition composition bullseye plot
#' plot_condition_compositions(example_clusters, bullseye = TRUE)
#'
plot_condition_compositions <- function(clustering_result, bullseye=FALSE, ggplot_theme=NULL)
{
Cluster <- Condition <- Percent_ratio <- NULL
# Collect cluster information by proteins
clustering_dir <- clustering_result$out_dir
nClusters <- clustering_result$nClusters
max_cluster <- clustering_result$max_cluster
data_column <- clustering_result$data_column
column_numbers <- seq_len(length(data_column))
super_ds <- clustering_result$clusters
num_conditions <- length(unique(super_ds$Condition))
# Color setting
color_for_nodes <- clustering_result$color_for_condition
if (length(color_for_nodes)==length(clustering_result$condition)) {
names(color_for_nodes) <- clustering_result$condition
} else {
stop(paste("Length of color_for_nodes vector is not same to the number of conditions",
length(color_for_nodes), length(clustering_result$condition)))
}
# Draw plots
list_output <- list()
out_summary <- data.frame()
for (cn in seq_len(max_cluster))
{
cl_subset <- subset(super_ds, Cluster == cn)
plot_title <- paste("#",cn, " (n=",nrow(cl_subset),")", sep='')
if (nrow(cl_subset)==0) {
g_summary <- data.frame(Condition=as.vector(unique(super_ds$Condition)),N=NA,Percent_ratio=NA)
list_output[[cn]] <- ggplot(g_summary, aes(x="", y=Percent_ratio, fill=Condition)) +
labs(title = plot_title) + geom_blank() + theme_bw() + get_theme_blank()
} else {
g_summary <- ddply(cl_subset, c("Condition"), .drop=TRUE,
.fun = function(xx, col) {
c(N = length2(xx[[col]], na.rm=TRUE))},
c("Condition"))
g_summary$Percent_ratio <- round((g_summary$N/sum(g_summary$N))*100, 2)
if (bullseye) {
# Bullseye plot
# http://ggplot2.tidyverse.org/reference/coord_polar.html
list_output[[cn]] <- ggplot(cl_subset, aes(x = factor(1), fill = factor(Condition))) +
geom_bar(width = 1) + coord_polar() +
labs(title = plot_title) +
scale_fill_manual(values=color_for_nodes) +
get_theme_blank() +
guides(fill=guide_legend(title=NULL))
} else {
# PieChart
# http://www.sthda.com/english/wiki/ggplot2-pie-chart-quick-start-guide-r-software-and-data-visualization
list_output[[cn]] <- ggplot(g_summary, aes(x="", y=Percent_ratio, fill=Condition)) +
geom_bar(width = 1, stat = "identity") +
coord_polar("y", start=0) +
labs(title = plot_title) +
scale_fill_manual(values=color_for_nodes) +
get_theme_blank() +
guides(fill=guide_legend(title=NULL))
}
}
if (!is.null(ggplot_theme)) {
list_output[[cn]] <- list_output[[cn]] + ggplot_theme
}
out_summary <- rbind(out_summary,cbind(Cluster=cn, g_summary))
}
do.call(grid.arrange,list_output)
return(out_summary)
}
#' @title plot_clusters
#' @description Draw all the clustering results.
#' 'plot_clusters' draws two plots, scaled and unscaled line graphs.
#' Scaled graphs have same y limits that are 0 to 1 by default,
#' but can be changed via 'y_lim' parameter.
#' @param clustering_result A list containing XINA clustering results.
#' See \link[XINA]{xina_clustering}
#' @param y_lim Y axis limit. If you set y_lim=c(0,1),
#' 'plot_clusters' will plot line graphs scaled from 0 to 1 in y-axis
#' Default is NULL, which means unscaled line graphs.
#' @param xval XINA basically considers time points as a ordinary variable, like 1,2,3,4...n.
#' You can make the time points as a continuous variable using xval.
#' @param xtickmark Change X axis tick marks.
#' Default is data_column of the clustering result list.
#' @param xylab If it is FALSE, x and y labels will be blank.
#' If it is TRUE (defualt), x and y labels will be shown.
#' @param ggplot_theme This is ggplot theme to modify XINA clustering plot.
#' @return Line graphs of all the clusters
#' @import ggplot2
#' @export
#' @examples
#' library(ggplot2)
#'
#' # load XINA example data
#' data(xina_example)
#'
#' # Draw clustering plots
#' plot_clusters(example_clusters)
#'
#' # Apply theme to the clustering plot
#' theme1 <- theme(title=element_text(size=8, face='bold'),
#' axis.text.x = element_text(size=7),
#' axis.text.y = element_blank(),
#' axis.ticks.x = element_blank(),
#' axis.ticks.y = element_blank(),
#' axis.title.x = element_blank(),
#' axis.title.y = element_blank())
#' plot_clusters(example_clusters, ggplot_theme=theme1)
#'
plot_clusters <- function(clustering_result, y_lim=NULL, xval=NULL,
xtickmark=NULL, xylab=TRUE, ggplot_theme=NULL)
{
Cluster <- indviduals <- x <- y <- NULL
clustering_dir <- clustering_result$out_dir
nClusters <- clustering_result$nClusters
max_cluster <- clustering_result$max_cluster
data_column <- clustering_result$data_column
data <- clustering_result$clusters
# X axis set up
if (is.null(xval)) {
column_numbers <- seq_len(length(data_column))
} else {
column_numbers <- xval
}
if (is.null(xtickmark)) {
xtickmark <- data_column
}
# Plot parameter setting
color_for_graph <- clustering_result$color_for_clusters
if (length(color_for_graph)<max_cluster) {
stop(paste("Length of color_for_clusters vector should be bigger than max_cluster",
length(clustering_result$color_for_clusters), max_cluster))
}
# Draw line graphs and save it as a PNG image
scaled_plots <- list()
plot_out <- list()
for (i in seq_len(max_cluster)) {
sub_data<-as.matrix(subset(data, Cluster %in% i, select = data_column))
nlines <- nrow(sub_data)
if (nlines > 0) {
data_Sds<-cbind(data.frame(expand.grid(x = column_numbers, indviduals = seq_len(nlines))),
y=as.numeric(t(sub_data[, data_column])))
num_prots <- nrow(data_Sds)/length(data_column)
plt_title <- paste("#",i, " (n=",num_prots,")", sep='')
plot_out[[i]]<-ggplot(data_Sds, aes(x=x, y=y, group=indviduals)) +
geom_line(colour = color_for_graph[i]) +
geom_hline(yintercept=1/length(data_column), color="red", linetype="dashed") +
labs(x="", y="", title=plt_title) +
scale_x_continuous(breaks=column_numbers, labels=xtickmark)
if(!xylab){
plot_out[[i]] <- plot_out[[i]] + get_theme_blank()
}
if (!is.null(y_lim)) {
plot_out[[i]] <- plot_out[[i]] + ylim(y_lim)
}
if (!is.null(ggplot_theme)) {
plot_out[[i]] <- plot_out[[i]] + ggplot_theme
}
} else {
plot_out[[i]] <- plot_NA()
print(paste("Cluster #",i," doesn't have any members", sep=''))
}
}
do.call(gridExtra::grid.arrange,plot_out)
# return(plot_out)
}
#' @title plot_clusters_all
#' @description Draw line graphs of all the proteins in the given dataset
#' @param clustering_result A list containing XINA clustering results.
#' See \link[XINA]{xina_clustering}
#' @param selected_condition A condition name to draw the kinetics plot
#' @return a list containing clustering results and pdf file
#' containing a BIC plot in current working directory.
#' @import ggplot2
#' @export
#' @examples
#'
#' # load XINA example data
#' data(xina_example)
#'
#' # Plot kinetics of all the proteins in Control
#' plot_clusters_all(example_clusters, selected_condition="Control")
#'
#' # Plot kinetics of all the proteins in Stimulus1
#' plot_clusters_all(example_clusters, selected_condition="Stimulus1")
#'
#' # Plot kinetics of all the proteins in Stimulus2
#' plot_clusters_all(example_clusters, selected_condition="Stimulus2")
#'
#' # Plot kinetics of all the proteins in three data
#' plot_clusters_all(example_clusters)
#'
plot_clusters_all <- function(clustering_result, selected_condition=NULL)
{
condition <- x <- y <- z_levels_Sds <- NULL
data_column <- clustering_result$data_column
data <- clustering_result$clusters
column_numbers <- seq_len(length(data_column))
color_for_condition <- clustering_result$color_for_condition
all_prots <- data.frame(expand.grid(x = seq_len(length(data_column)), y=seq_len(nrow(data))),
condition = as.vector(data[,"Condition"]),
z_levels_Sds=as.numeric(t(data[data_column])))
if (!is.null(selected_condition)) {
all_prots <- subset(all_prots, condition==selected_condition)
title2show <- paste("Unclustered profiles of",selected_condition)
} else {
title2show <- "Unclustered profiles of all"
}
ggplot(all_prots, aes(x=x, y=z_levels_Sds, group=y, colour=condition)) +
geom_line() +
scale_color_manual(values=color_for_condition) +
geom_hline(yintercept=1/max(column_numbers), color="red", linetype="dashed") +
labs(x = "", y = paste("Normalized intensity by",clustering_result$norm_method)) +
labs(title = title2show) +
scale_x_continuous(breaks=column_numbers, labels=data_column)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.