R/SummarizeMultiToolsMultiDatasetsAttr.R

Defines functions SummarizeMultiToolsMultiDatasetsAttr

Documented in SummarizeMultiToolsMultiDatasetsAttr

#' Internal function to summarizing exposure inference measure.
#'
#' Summarize results of mutational exposure inference
#' by multiple computational approaches on multiple datasets. Before running this
#' function, make sure the summary file for each single data set
#'  \code{toolSummaryPaths}/\code{OneToolSummary.Rda} exists.
#'
#' \code{OneToolSummary.Rda} is generated by \code{\link{SummarizeOneToolMultiDatasets}}).
#'
#' @inheritParams SummarizeMultiToolsMultiDatasets
#'
#' @importFrom rlang .data
#'
#' @importFrom utils write.csv
#'
#' @keywords internal
#'
SummarizeMultiToolsMultiDatasetsAttr <- function(
  toolSummaryPaths,
  out.dir,
  display.datasetName = FALSE,
  sort.by.composite.extraction.measure = "descending",
  overwrite = FALSE) {

  # Check whether package gtools is installed -------------------------------
  if (!requireNamespace("gtools", quietly=TRUE)) {
    stop("Package gtools needs to be installed\n")
  }

  #### I. Summarize scaled Manhattan distance
  ## Summarizing aggregated Scaled Manhattan distance results
  {

    FinalAttr <- list()
    FinalAttr$AggManhattanDist <- list()
    ## Combine attribution assessment onto multiple sheets.
    ## Each sheet shows Scaled Manhattan distance for one mutational signature.
    for(toolSummaryPath in toolSummaryPaths){
      ## Add OneToolSummary <- NULL to please R check
      OneToolSummary <- NULL
      load(paste0(toolSummaryPath,"/OneToolSummary.RDa"))

	  ## Read gtSigNames if it was unspecified
	  if(!exists("gtSigNames"))
	    gtSigNames <- gtools::mixedsort(setdiff(names(OneToolSummary$cosSim),"combined"))


      if(length(FinalAttr$AggManhattanDist) == 0){
        for(gtSigName in gtSigNames) {
          FinalAttr$AggManhattanDist[[gtSigName]] <- data.frame()
        }
      }

      ## Combine Scaled Manhattan distance
      for(gtSigName in gtSigNames){
        FinalAttr$AggManhattanDist[[gtSigName]] <- rbind(
          FinalAttr$AggManhattanDist[[gtSigName]],
          OneToolSummary$AggManhattanDist[[gtSigName]])
      }
    }

    ## For the purpose of SBS1-SBS5 paper,
    ## don't output summary tables for aggregated scaled Manhattan distance.
    if(FALSE){
      for(gtSigName in gtSigNames){
        output <- FinalAttr$AggManhattanDist[[gtSigName]]

        colnames(output)[1] <- "Seed or run number"
        colnames(output)[2] <- paste0("Scaled distance of ",gtSigName)
        colnames(output)[3] <- "Name of computational approach"
        colnames(output)[4] <- datasetGroupName
        colnames(output)[5] <- datasetSubGroupName

        write.csv(output,
                  file = paste0(out.dir,"/Agg.ManhattanDist.",gtSigName,".csv"))
      }
    }



  }

  #### II. Summarizing results for mean and stdev of separated Manhattan distance
  {

    fileNames = c(
      "meanSepMD" = "mean.of.sep.Scaled.Manhattan.dist",
      "sdSepMD" = "stdev.of.sep.Scaled.Manhattan.dist")
    titles = c(
      "meanSepMD" = "Mean of Manhattan distances of individual tumors",
      "sdSepMD" = "Standard deviation of Manhattan distances of individual tumors"
    )

    for(measure in c("meanSepMD", "sdSepMD")){
      FinalAttr[[measure]] <- list()
      ## Combine attribution assessment onto multiple sheets.
      ## Each sheet shows Scaled Manhattan distance for one mutational signature.
      for(toolSummaryPath in toolSummaryPaths){
        ## Add OneToolSummary <- NULL to please R check
        OneToolSummary <- NULL
        load(paste0(toolSummaryPath,"/OneToolSummary.RDa"))

        if(length(FinalAttr[[measure]]) == 0){
          for(gtSigName in gtSigNames) {
            FinalAttr[[measure]][[gtSigName]] <- data.frame()
          }
        }

        ## Combine Scaled Manhattan distance
        for(gtSigName in gtSigNames){
          FinalAttr[[measure]][[gtSigName]] <- rbind(
            FinalAttr[[measure]][[gtSigName]],
            OneToolSummary[[measure]][[gtSigName]])
        }
      }

      ## For the purpose of SBS1-SBS5 paper,
      ## don't output summary tables for scaled Manhattan distance.
      if(FALSE){
        for(gtSigName in gtSigNames){
          output <- FinalAttr[[measure]][[gtSigName]]

          colnames(output)[1] <- "Seed or run number"
          colnames(output)[2] <- paste0("Scaled distance of ",gtSigName)
          colnames(output)[3] <- "Name of computational approach"
          colnames(output)[4] <- datasetGroupName
          colnames(output)[5] <- datasetSubGroupName

          write.csv(output,
                    file = paste0(out.dir,"/",fileNames[measure],".",gtSigName,".csv"))
        }
      }
    }
  }

  return(FinalAttr)

}
WuyangFF95/SynSigEval documentation built on Sept. 18, 2022, 11:41 a.m.