R/get_contributing_signatures.R

#' get_contributing_signatures
#'
#' Function that finds the minimun number of signatures needed recreated mutational profiles of each cluster to a certain threshold
#' However cant deal with clusters of size = 1. These are removed.
#'
#' @param ClusterDF Dataframe with cluster information 
#' @param cos_sim_matrix cosine simililarity matrix 
#' @param treshold Treshold for needed cosine similarity between reconstructed and original mutational matrix
#' @param mutational_matrix Mutational matrix from mut_mat()
#'
#' @return DF with main contributing signatures in each cluster
#' @export
#' @examples
#' get_signatre(Cluster,cos_sim_matrix,mutational_matrix,treshold=0.95)

#' @import dplyr









get_contributing_signatures <- function(ClusterDF,cos_sim_samples_cosmic,mutational_matrix,threshold) {
  
 
  
  contributing_signatures <- function(cluster_id){
    
    clust_samples <- ClusterDF %>% filter(cluster == cluster_id) %>% select(sample)
    
    
    
    clust_samples <- clust_samples$sample
    clust_cossim <- cos_sim_samples_cosmic[rownames(cos_sim_samples_cosmic) %in% clust_samples ,]
    top_sign <- names(sort(colMeans(clust_cossim),decreasing = TRUE))
    
    
    #Reconstruc mutational profile using top signature
    clust_mut_mat <- mutational_matrix[, colnames(mutational_matrix) %in% clust_samples]
    
    for (i in 2:length(top_sign)){
      
      signatures_to_use <- top_sign[1:i]
      signatures_to_use <- cosmic_signatures[, colnames(cosmic_signatures) %in% signatures_to_use]
      fit_sign <- fit_to_signatures(clust_mut_mat,signatures_to_use)
      
      
      #How well are the mutational profiles reconstructed using the top i signatures? 
      cos_sim_original_reconstructed <- cos_sim_matrix(clust_mut_mat, fit_sign$reconstructed)
      cos_sim_original_reconstructed <- as.data.frame(diag(cos_sim_original_reconstructed))
      
      
      mean_cossim_reconstruct <- as.numeric(colMeans(cos_sim_original_reconstructed))

      
      
      
      
      if(mean_cossim_reconstruct >= threshold){
        
        prominent_signatures <- top_sign[1:i]
        return(prominent_signatures)
        break
        
      }
      
      
      
    }
    
    
    #return(prominent_signatures)
    
   
    
  }
  
  
  
  
  #Remove clusters with only one sample
  cluster_ids <- names(table(ClusterDF$cluster)[(table(ClusterDF$cluster) > 1)])
  cluster_ids <- as.vector(cluster_ids)
  

  prominent_signatures <- lapply(cluster_ids,contributing_signatures)
  names(prominent_signatures) <- paste("cluster ",cluster_ids,sep="")
  prominent_signatures
 
  

  
  
  }
NilsWeng/nilsPaket documentation built on June 5, 2019, 7:50 a.m.