R/00_check_distribution.R

Defines functions plot_distribution generate_data_for_plot generate_data_byGene separate_data

Documented in generate_data_byGene generate_data_for_plot plot_distribution separate_data

#' separate_data() Function
#'
#' This function select samples from selected group in count matrix
#' @param counts : count matrix
#' @param samples_metadata : Metadata of samples, need to involve Chip,BulkName variables
#' @param selected_group : selected group in Chip variable
#' @return selected counts
#' @export
#' @examples
#'  separate_data(counts, samples_metadata, selected_group)
#' 
separate_data <- function(counts, samples_metadata, selected_group){
  
  # take the selected Chip 
  samples_metadata <- samples_metadata[which(samples_metadata$Chip == selected_group),]
  
  # selected samples data from count matrix
  counts <- counts[,samples_metadata$BulkName]
  message(paste0("Selected chip : ", unique(samples_metadata$Chip)))
  message(paste0("Dimension of matrix : ", dim(counts)))
  
  return(counts)
}

#' generate_data_byGene() Function
#'
#' This function generates data to plot distribution by Gene
#' @param datasetname : added name for dataset
#' @param counts : count matrix
#' @param samples_metadata : Metadata of samples, need to involve Chip,BulkName variables
#' @param selected_group : selected group in Chip variable
#' @param selected.gene.table : gene table composend of selected genes with ensembl and symbols variable
#' @param genetype : type of gene names "ensembl" or "symbols"
#' @return data ready to plot
#' @export
#' @examples
#' generate_data_byGene(datasetname, counts, samples_metadata, selected_group, selected.gene.table, genetype)
#' 
generate_data_byGene <- function(datasetname, counts, samples_metadata, selected_group, 
                                            selected.gene.table, genetype){
  
  message("*** Data processing")
  tmp.data <- separate_data(counts, samples_metadata, selected_group)
  
  if(genetype=="ensembl"){
    message("  Gene type : ENSEMBL ID")
    tmp.data <- as.data.frame(t(tmp.data[selected.gene.table$ensembl,]))
  }else{
    message("  Gene type : Symbols")
    tmp.data <- as.data.frame(t(tmp.data[selected.gene.table$symbols,]))
  }
  
  # Add BulkName variable
  tmp.data$BulkName <- rownames(tmp.data)
  
  data.plt <- list()
  n <- ncol(tmp.data) -1 
  for(i in 1:n){
    d <- tmp.data[,c(i,ncol(tmp.data))]
    d$Gene <- colnames(d)[1]
    colnames(d) <- c("value","BulkName", "Gene")
    data.plt[[i]] <- d
  }
  
  final.data <- Reduce(rbind, data.plt)
  final.data$Chip <- selected_group
  final.data$dataset <- datasetname
  
  message("*** Data processing done : data ready to be used")
  return(final.data)
}


#' generate_data_for_plot() Function
#'
#' This function generates data to plot distribution by Gene
#' @param datasetname : added name for dataset
#' @param counts : count matrix
#' @param samples_metadata : Metadata of samples, need to involve "Chip", "BulkName" variable
#' @param selected_group : selected group in Chip variable
#' @param selected.gene.table : gene table composend of selected genes with ensembl and symbols variable
#' @param genetype : type of gene names "ensembl" or "symbols"
#' @return data ready to plot
#' @export
#' @examples
#' generate_data_for_plot(datasetname, counts, samples_metadata, selected_group, selected.gene.table, genetype, master_gene_table)
#' 
generate_data_for_plot <- function(datasetname, counts, samples_metadata, selected_group, 
                                   selected.gene.table, genetype, master_gene_table){
  message("** Generating data for plots")   
  data <- generate_data_byGene(datasetname, counts, samples_metadata, selected_group, selected.gene.table, genetype)
  
  if(genetype=="ensembl"){
    data$symbols <- master_gene_table$symbols[match(data$Gene, master_gene_table$ensembl)]
    data$symbols <- factor(data$symbols , levels=rev(levels(selected.gene.table$symbols)))
  }else{
    data$symbols <- master_gene_table$symbols[match(data$Gene, master_gene_table$ensembl)]
    data$symbols <- factor(data$symbols , levels=rev(levels(selected.gene.table$symbols)))
  }
  
  message("** Define targeted genes")   
  data$target <- ifelse(data$symbols %in% random_genes, "target", "no")
  data$target <- as.factor(data$target)
  
  return(data)
}

#' plot_distribution() Function
#'
#' This function generates distribution plots.
#' @param data : data obtained from generate_data_for_plot() function
#' @param title : title of plot
#' @param saveName : file name to be saved
#' @return distribution plot
#' @export
#' @examples
#' plot_distributiondata, title, saveName)
#' 
#' 
plot_distribution <- function(data, title, saveName){
  message(paste0("Plotting : ", title))   
  message("   data needs to contain value, symbols and target variables")
  plt <- ggplot2::ggplot(data, aes(value, symbols, fill=target))+
    geom_density_ridges(aes(value, symbols, fill=target)) +
    theme_ridges() +
    ggtitle(title) +
    ylab("Gene")+
    theme(axis.text.y = element_text(size = 10)) 
  
  message("Saved plots")   
  ggplot2::ggsave(plt, filename = saveName, height = 35, width = 15, bg = "white")
  return(plt)
}
jyoh1248/MyoSignature documentation built on May 18, 2022, 12:37 a.m.