#' Make Balloon Plot Data from Run Comparison Function
#'
#' @param comparison_result a data structure generated by run_comparison() function
#' @param melt_data boolean, whether to apply reshape2::melt() to data.frame
#'
#' @description Code to convert the output of TC for a single cancer type to balloon plots. We
#' don't use ranks here, since some cell lines will get low ranks even if none have particularly
#' low scores (similarly some will get high ranks even if all scores are relatively low). Instead,
#' we convert the distance matrices for all tumor-cell line comparisons to 0-1 using min-max
#' scaling, and then compute the mean similarity to tumor for each cell line. The overall score is
#' just a simple mean of the data type specific scores right now - could use different weights.
#'
#' @note TODO Add example; run analysis save comparison and use here
#'
#' @return a ggplot object
#'
#' @importFrom ggplot2 ggplot geom_point geom_text ggtitle xlab ylab element_blank element_line
#' @importFrom dplyr mutate
#' @importFrom reshape2 melt
#' @importFrom stats reorder
#' @importFrom magrittr %>%
#'
#' @export
make_balloon_plot_data_from_comparison_result <- function(comparison_result, melt_data=TRUE) {
#mean_similarity_to_tumors_after_0to1_scaling_MUT <- round(1 - rowMeans(apply(comparison_result$dist_mat_by_data_type$mut,2,convert_to_0_to_1_using_xminusmin_by_maxminusmin)[comparison_result$cell_line_ids,comparison_result$tumor_ids],na.rm=T),digits=2)
#mean_similarity_to_tumors_after_0to1_scaling_EXP <- round(1 - rowMeans(apply(comparison_result$dist_mat_by_data_type$exp,2,convert_to_0_to_1_using_xminusmin_by_maxminusmin)[comparison_result$cell_line_ids,comparison_result$tumor_ids],na.rm=T),digits=2)
#mean_similarity_to_tumors_after_0to1_scaling_CNA <- round(1 - rowMeans(apply(comparison_result$dist_mat_by_data_type$cna,2,convert_to_0_to_1_using_xminusmin_by_maxminusmin)[comparison_result$cell_line_ids,comparison_result$tumor_ids],na.rm=T),digits=2)
# These will only be the samples that had all the input data types
# These vectors may be equal to or smaller (but never bigger) than the length of the column number for the distance matrices
cell_line_ids <- comparison_result$cell_line_ids
tumor_ids <- comparison_result$tumor_ids
available_data_types <- names(comparison_result$dist_mat_by_data_type)
mean_similarity_to_tumors_scaling_mat <- matrix(rep(NA, length(cell_line_ids)*length(available_data_types)),
ncol=length(available_data_types),
dimnames = list(cell_line_ids, paste0(available_data_types, "_score")))
for(i in 1:length(available_data_types)) {
#i <- 1
data_type <- available_data_types[i]
#data_type <- "mut"
# This may have more columns than length(cell_line_ids) + length(tumor_ids)
cur_data <- comparison_result$dist_mat_by_data_type[[data_type]]
# mean_similarity_to_tumors_scaling_mat is a vector
#mean_similarity_to_tumors_scaling_mat[,i] <- round(1 - rowMeans(apply(cur_data[cell_line_ids,tumor_ids],2,convert_to_0_to_1_using_xminusmin_by_maxminusmin), na.rm=T), digits=2)
## Min-Max scaling on the cell line - tumor distance/similarity matrix, so all values are in 0-1
cur_max <- max(as.vector(cur_data[cell_line_ids, tumor_ids])) # max distance over all cell line - tumor pairs
cur_min <- min(as.vector(cur_data[cell_line_ids, tumor_ids])) # min distance over all cell line - tumor pairs
mean_similarity_to_tumors_scaling_mat[,i] <- round(1 - rowMeans((cur_data[cell_line_ids,tumor_ids] - cur_min)/(cur_max - cur_min + 1e-6)), digits=2)
#mean_similarity_to_tumors_scaling_mat[,i] <- round(1 - apply((cur_data[cell_line_ids,tumor_ids] - cur_min)/(cur_max - cur_min),1,function(x){mean(sort(x)[1:10])}),digits=2)
}
#mean_similarity_to_tumors_after_0to1_scaling_MUT <- round(1 - rowMeans(apply(comparison_result$dist_mat_by_data_type$mut[comparison_result$cell_line_ids,comparison_result$tumor_ids],2,convert_to_0_to_1_using_xminusmin_by_maxminusmin),na.rm=T),digits=2)
#mean_similarity_to_tumors_after_0to1_scaling_CNA <- round(1 - rowMeans(apply(comparison_result$dist_mat_by_data_type$cna[comparison_result$cell_line_ids,comparison_result$tumor_ids],2,convert_to_0_to_1_using_xminusmin_by_maxminusmin),na.rm=T),digits=2)
#mean_similarity_to_tumors_after_0to1_scaling_EXP <- round(1 - rowMeans(apply(comparison_result$dist_mat_by_data_type$exp[comparison_result$cell_line_ids,comparison_result$tumor_ids],2,convert_to_0_to_1_using_xminusmin_by_maxminusmin),na.rm=T),digits=2)
average_mean_similarity_scaling <- round(rowMeans(mean_similarity_to_tumors_scaling_mat),digits=2)
#average_mean_similarity_after_0to1_scaling <- round((mean_similarity_to_tumors_after_0to1_scaling_MUT + mean_similarity_to_tumors_after_0to1_scaling_CNA + mean_similarity_to_tumors_after_0to1_scaling_EXP)/3, digits=2)
heatmap_mat <- as.data.frame(mean_similarity_to_tumors_scaling_mat)
heatmap_mat$Cell_Line_Name <- cell_line_ids
heatmap_mat$combined_score <- average_mean_similarity_scaling
# heatmap_mat <- as.data.frame(
# cbind(names(average_mean_similarity_after_0to1_scaling), mean_similarity_to_tumors_after_0to1_scaling_MUT, mean_similarity_to_tumors_after_0to1_scaling_CNA, mean_similarity_to_tumors_after_0to1_scaling_EXP, average_mean_similarity_after_0to1_scaling))
#heatmap_mat[,-1] <- apply(heatmap_mat[,-1],2,as.numeric)
#heatmap_mat[,-1] <- round(heatmap_mat[,-1],digits=2)
#colnames(heatmap_mat) <- c("Cell_Line_Name","MUT_score","CNA_score","EXP_score","Combined_score")
if(melt_data) {
df <- melt(heatmap_mat, id.vars = "Cell_Line_Name")
df <- transform(df, Cell_Line_Name=reorder(df$Cell_Line_Name, df$value))
} else {
df <- heatmap_mat
df <- df[order(-df$combined_score), ]
}
return(df)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.