R/make_balloon_plot_data_from_comparison_result.R

Defines functions make_balloon_plot_data_from_comparison_result

Documented in make_balloon_plot_data_from_comparison_result

#' Make Balloon Plot Data from Run Comparison Function
#' 
#' @param comparison_result a data structure generated by run_comparison() function
#' @param melt_data boolean, whether to apply reshape2::melt() to data.frame
#' 
#' @description   Code to convert the output of TC for a single cancer type to balloon plots. We
#'   don't use ranks here, since some cell lines will get low ranks even if none have particularly
#'   low scores (similarly some will get high ranks even if all scores are relatively low). Instead,
#'   we convert the distance matrices for all tumor-cell line comparisons to 0-1 using min-max
#'   scaling, and then compute the mean similarity to tumor for each cell line. The overall score is
#'   just a simple mean of the data type specific scores right now - could use different weights.
#'   
#' @note TODO Add example; run analysis save comparison and use here 
#' 
#' @return a ggplot object
#' 
#' @importFrom ggplot2 ggplot geom_point geom_text ggtitle xlab ylab element_blank element_line
#' @importFrom dplyr mutate 
#' @importFrom reshape2 melt
#' @importFrom stats reorder
#' @importFrom magrittr %>%
#' 
#' @export 
make_balloon_plot_data_from_comparison_result <- function(comparison_result, melt_data=TRUE) {
  #mean_similarity_to_tumors_after_0to1_scaling_MUT <- round(1 - rowMeans(apply(comparison_result$dist_mat_by_data_type$mut,2,convert_to_0_to_1_using_xminusmin_by_maxminusmin)[comparison_result$cell_line_ids,comparison_result$tumor_ids],na.rm=T),digits=2)
  #mean_similarity_to_tumors_after_0to1_scaling_EXP <- round(1 - rowMeans(apply(comparison_result$dist_mat_by_data_type$exp,2,convert_to_0_to_1_using_xminusmin_by_maxminusmin)[comparison_result$cell_line_ids,comparison_result$tumor_ids],na.rm=T),digits=2)
  #mean_similarity_to_tumors_after_0to1_scaling_CNA <- round(1 - rowMeans(apply(comparison_result$dist_mat_by_data_type$cna,2,convert_to_0_to_1_using_xminusmin_by_maxminusmin)[comparison_result$cell_line_ids,comparison_result$tumor_ids],na.rm=T),digits=2)
  
  # These will only be the samples that had all the input data types
  # These vectors may be equal to or smaller (but never bigger) than the length of the column number for the distance matrices 
  cell_line_ids <- comparison_result$cell_line_ids 
  tumor_ids <- comparison_result$tumor_ids
  
  available_data_types <- names(comparison_result$dist_mat_by_data_type)
  
  mean_similarity_to_tumors_scaling_mat <- matrix(rep(NA, length(cell_line_ids)*length(available_data_types)), 
                                                  ncol=length(available_data_types), 
                                                  dimnames = list(cell_line_ids, paste0(available_data_types, "_score")))
  
  for(i in 1:length(available_data_types)) {
    #i <- 1 
    
    data_type <- available_data_types[i]
    #data_type <- "mut"
    # This may have more columns than length(cell_line_ids) + length(tumor_ids)
    cur_data <- comparison_result$dist_mat_by_data_type[[data_type]] 
      
    # mean_similarity_to_tumors_scaling_mat is a vector 
    #mean_similarity_to_tumors_scaling_mat[,i] <- round(1 - rowMeans(apply(cur_data[cell_line_ids,tumor_ids],2,convert_to_0_to_1_using_xminusmin_by_maxminusmin), na.rm=T), digits=2)
    
    ## Min-Max scaling on the cell line - tumor distance/similarity matrix, so all values are in 0-1
    cur_max <- max(as.vector(cur_data[cell_line_ids, tumor_ids])) # max distance over all cell line - tumor pairs
    cur_min <- min(as.vector(cur_data[cell_line_ids, tumor_ids])) # min distance over all cell line - tumor pairs
    mean_similarity_to_tumors_scaling_mat[,i] <- round(1 - rowMeans((cur_data[cell_line_ids,tumor_ids] - cur_min)/(cur_max - cur_min + 1e-6)), digits=2)   
    #mean_similarity_to_tumors_scaling_mat[,i] <- round(1 - apply((cur_data[cell_line_ids,tumor_ids] - cur_min)/(cur_max - cur_min),1,function(x){mean(sort(x)[1:10])}),digits=2)   
  }
  
  #mean_similarity_to_tumors_after_0to1_scaling_MUT <- round(1 - rowMeans(apply(comparison_result$dist_mat_by_data_type$mut[comparison_result$cell_line_ids,comparison_result$tumor_ids],2,convert_to_0_to_1_using_xminusmin_by_maxminusmin),na.rm=T),digits=2)
  #mean_similarity_to_tumors_after_0to1_scaling_CNA <- round(1 - rowMeans(apply(comparison_result$dist_mat_by_data_type$cna[comparison_result$cell_line_ids,comparison_result$tumor_ids],2,convert_to_0_to_1_using_xminusmin_by_maxminusmin),na.rm=T),digits=2)
  #mean_similarity_to_tumors_after_0to1_scaling_EXP <- round(1 - rowMeans(apply(comparison_result$dist_mat_by_data_type$exp[comparison_result$cell_line_ids,comparison_result$tumor_ids],2,convert_to_0_to_1_using_xminusmin_by_maxminusmin),na.rm=T),digits=2)

  average_mean_similarity_scaling <- round(rowMeans(mean_similarity_to_tumors_scaling_mat),digits=2)
  #average_mean_similarity_after_0to1_scaling <- round((mean_similarity_to_tumors_after_0to1_scaling_MUT + mean_similarity_to_tumors_after_0to1_scaling_CNA + mean_similarity_to_tumors_after_0to1_scaling_EXP)/3, digits=2)
  
  heatmap_mat <- as.data.frame(mean_similarity_to_tumors_scaling_mat)
  heatmap_mat$Cell_Line_Name <- cell_line_ids
  heatmap_mat$combined_score <- average_mean_similarity_scaling
  
  # heatmap_mat <- as.data.frame(
  #   cbind(names(average_mean_similarity_after_0to1_scaling), mean_similarity_to_tumors_after_0to1_scaling_MUT, mean_similarity_to_tumors_after_0to1_scaling_CNA, mean_similarity_to_tumors_after_0to1_scaling_EXP, average_mean_similarity_after_0to1_scaling))
  #heatmap_mat[,-1] <- apply(heatmap_mat[,-1],2,as.numeric)
  #heatmap_mat[,-1] <- round(heatmap_mat[,-1],digits=2)
  #colnames(heatmap_mat) <- c("Cell_Line_Name","MUT_score","CNA_score","EXP_score","Combined_score")

  if(melt_data) {
    df <- melt(heatmap_mat, id.vars = "Cell_Line_Name")
    df <- transform(df, Cell_Line_Name=reorder(df$Cell_Line_Name, df$value))     
  } else {
    df <- heatmap_mat
    df <- df[order(-df$combined_score), ]
  }
  
  return(df)
}
cannin/tumorcomparer documentation built on Feb. 7, 2023, 3:13 p.m.