R/03_preprocessingR.R

Defines functions DESeq2_process set_scenario_parameters

Documented in DESeq2_process set_scenario_parameters

#' set_scenario_parameters() Function
#'
#' This function select parameters to be used in analysis. Need pediatric_counts and adult_counts in global environement.
#' @param scenario_groups : selected groups for analysis
#' @param anno_colori : list of colors in all groups
#' @param Metadata : Metadata of samples, need to involce "group", "BulkName" variable
#' @param type : "pediatric" or "adult"
#' @param ranking_groups : design formula of DESeq2
#' @return list 
#' @import ggplot2
#' @export
#' @examples
#' set_scenario_parameters()
#' 
set_scenario_parameters <- function(scenario_groups,anno_colori,Metadata, type, ranking_groups){
   
   # colors used for heatmap
   heatmap_anno_colori <-  list(
      group = anno_colori$group[which(names(anno_colori$group) %in% scenario_groups)])
   
   # colors used for fill option
   colScale <- ggplot2::scale_fill_manual(name="samples",values=heatmap_anno_colori$group)
   
   # colors used in color option
   colScale2 <- ggplot2::scale_color_manual(name="samples",values=heatmap_anno_colori$group)
   colorvalue <- anno_colori$group[ranking_groups]

   # Selecting samples from given groups
   scenario_metadata <- Metadata[which(Metadata$group %in% scenario_groups),]
   
   
   if(type == "pediatric"){
      scenario_counts <- pediatric_counts[, which(names(pediatric_counts) %in% scenario_metadata$BulkName)]
      
   } else {
      scenario_counts <- adults_counts[, which(names(adults_counts) %in% scenario_metadata$BulkName)]
      
   }
   
   # need to check if names of matrix is same with metadata
   names(scenario_counts) == scenario_metadata$BulkName            
   
   heatmap_anno <- as.data.frame(scenario_metadata$group)
   rownames(heatmap_anno) <- scenario_metadata$BulkName
   names(heatmap_anno) <- "group"
   
   return(list(heatmap_anno_colori,heatmap_anno, scenario_metadata, scenario_counts, colScale, colScale2, colorvalue))
   
}


#' DESeq2_process() Function
#'
#' This function proceed with DESeq2 to get vst matrix
#' @param raw_counts : raw counts matrix
#' @param samples_metadata : metadata where names of raw counts match with metadata
#' @param design_formula : formula used for DESeq2 process
#' @param batch : batch = NULL if not, run batch correction with limma packages (Chip variable)
#' @param blindRun : no messages during DESeq2 process
#' @param outDIR : directory to save plots
#' @return normalised matrix 
#' @export
#' @examples
#' DESeq2_process()
DESeq2_process <- function(raw_counts, samples_metadata, design_formula, batch, blindRun, outDIR){
  
  # change so there is no code repetition
  if(blindRun){
    
    tmpDESeqObject <- suppressMessages(
      DESeq2::DESeqDataSetFromMatrix(countData = raw_counts,
                             colData = samples_metadata, 
                             design =  as.formula(design_formula))
    )
    #tmpDESeqObject$group <- relevel(tmpDESeqObject$group, ref='Control')
    dds <- suppressMessages(DESeq2::DESeq(tmpDESeqObject))
    vsd <- suppressMessages(DESeq2::vst(dds,blind =  FALSE, nsub=500))
    # nsub= 500 to run in Agilent data
    
  } else {
    tmpDESeqObject <- DESeq2::DESeqDataSetFromMatrix(countData = raw_counts,
                                             colData = samples_metadata, 
                                             design =  as.formula(design_formula))
    #tmpDESeqObject$group <- relevel(tmpDESeqObject$group, ref='Control')
    dds <- DESeq2::DESeq(tmpDESeqObject)
    vsd <- DESeq2::vst(dds,blind = FALSE, nsub=500) 
    # nsub= 500 to run in Agilent data
    
  }
  

  if(!is.null(batch)){
    
    # correct for batch effect (form of vsd$group)
    message("** Correcting effect of chips")
    mat <- SummarizedExperiment::assay(vsd)
    mat <- limma::removeBatchEffect(mat, batch =samples_metadata$Batch)
    SummarizedExperiment::assay(vsd) <- mat
    PCAplot(vsd, c("BulkName","Batch","group"), batch, outDIR)
    
  }else{
    PCAplot(vsd, c("BulkName", "group"), batch, outDIR)
  }
  
  message("** VST process")
  # need other code to extract only normalised counts
  # normalized_counts <- counts(dds, normalized=TRUE)
  # normalized_counts <- as.data.frame(normalized_counts)
  norm_vst <- SummarizedExperiment::assay(vsd)
  
  return(norm_vst)
}
jyoh1248/MyoSignature documentation built on May 18, 2022, 12:37 a.m.