R/SummarizeResultsSP.R

Defines functions SummarizeSigOneSigProSSSubdir SummarizeSigProExtractor SummarizeSigOneSigProExtractorSubdir

Documented in SummarizeSigOneSigProExtractorSubdir SummarizeSigOneSigProSSSubdir SummarizeSigProExtractor

#' Assess/evaluate results from SigProExtractor (v0.0.5.45+)
#'
#' SigProfiler-python de novo extraction and attribution package.
#' Assessment is restricted to v0.0.5.43 ~ v0.0.5.77,
#' because different version has different folder structure.
#'
#' This function cannot be used on new SigProfilerExtractor
#' (v1+) as the folder structure has been changed markedly
#'
#' @inheritParams SummarizeSigOneSubdir
#'
#' @param run.dir A directory which contains output of SigProExtractor in one
#' run on a specific dataset, possibly with a specified seed. E.g.
#' \code{2b.Full_output_K_as_2/SigProExtractor.results/S.0.1.Rsq.0.1/seed.1/}.
#'
#' This code depends on a conventional directory structure documented
#' in \code{NEWS.md}.
#'
#' @param hierarchy Whether the user have enabled hierarchy = True when running SigProExtractor.
#' specifying True or False into SigProExtractor will cause the program
#' to generate different folder structure.
#'
#' @export
#'
#' @importFrom ICAMS WriteCatalog ReadCatalog
#' @importFrom utils capture.output sessionInfo
#' @importFrom grDevices dev.off
#' @importFrom graphics par
#'
SummarizeSigOneSigProExtractorSubdir <-
  function(run.dir,
           ground.truth.exposure.dir = paste0(run.dir,"/../../../"),
           summarize.exp = TRUE,
           overwrite = FALSE,
           hierarchy = FALSE,
           summary.folder.name = "summary",
           export.Manhattan.each.spectrum = FALSE) {

    # Location of SigProfiler output, which is our input
    # inputPath may change if SigProExtractor updates!
    if(dir.exists(paste0(run.dir,"/SBS96/"))){
      flagSBS96 <- TRUE
      inputPath <- paste0(run.dir,"/SBS96/Suggested_Solution/De_Novo_Solution")
    } else if(dir.exists(paste0(run.dir,"/DBS78/"))) {
      flagDBS78 <- TRUE
      inputPath <- paste0(run.dir,"/DBS78/Suggested_Solution/De_Novo_Solution")
    } else if (dir.exists(paste0(run.dir,"/DBS78/"))) {
      flagID83 <- TRUE
      inputPath <- paste0(run.dir,"/DBS78/Suggested_Solution/De_Novo_Solution")
    }

    stopifnot(dir.exists(inputPath))

    # Read in extracted signatures in SigProExtractor txt format,
    # and convert it to ICAMS csv format.
    # Need special function to read in extracted signatures
    # Converted signatures will be included in the /summary folder.
    if(flagSBS96){
      extractedSigs <- ICAMS::ReadCatalog(paste0(inputPath,"/De_Novo_Solution_Signatures_SBS96.txt"))
    } else if(flagDBS78) {
      extractedSigs <- ICAMS::ReadCatalog(paste0(inputPath,"/De_Novo_Solution_Signatures_SBSDINUC.txt"))
    } else if(flagID83) {
      extractedSigs <- ICAMS::ReadCatalog(paste0(inputPath,"/De_Novo_Solution_Signatures_ID.txt"))
    }

    extractedSigs <- ICAMS::as.catalog(object = extractedSigs,
                                       region = "unknown",
                                       catalog.type = "counts.signature")
    extracted.sigs.path <- paste0(run.dir,"/extracted.signatures.csv")
    ICAMS::WriteCatalog(extractedSigs, extracted.sigs.path)

    # Read in inferred exposures in SP format,
    # and convert it into our internal format
    if(flagSBS96){
      inferred.exp.path.SP.format <-
        paste0(inputPath,"/De_Novo_Solution_Activities_SBS96.txt")
    } else if(flagDBS78){
      inferred.exp.path.SP.format <-
        paste0(inputPath,"/De_Novo_Solution_Activities_DINUC.txt")
    }



    inferredExposures <- ReadSigProfilerExposure(inferred.exp.path.SP.format)
    inferred.exp.path <- paste0(run.dir,"/inferred.exposures.csv")
    mSigTools::write_exposure(inferredExposures,inferred.exp.path)


    # SummarizeSigOneSubdir will generate a "/summary" folder
    # under run.dir. Summarized results are dumped into
    # this folder.
    retval <-
      SummarizeSigOneSubdir(
        run.dir = run.dir,
        ground.truth.exposure.dir = ground.truth.exposure.dir,
        extracted.sigs.path = extracted.sigs.path,
        inferred.exp.path = inferred.exp.path,
        summarize.exp = summarize.exp,
        overwrite = overwrite,
        summary.folder.name = summary.folder.name,
        export.Manhattan.each.spectrum = export.Manhattan.each.spectrum)

    # Copy stability.pdf and result_stat.csv
    # generated by SigProfiler to summary/ folder
    # file.copy will return an "okay" flag, which equals to be TRUE if properly executed.
    # This is annoying, and I'll prevent this flag from printing it out
    copy.from.files <- paste0(run.dir,
                              c("/SBS96/SBS96_selection_plot.pdf",
                                "/SBS96/All_solutions_stat.csv"))
    for(copy.from in copy.from.files) {
      if (!file.exists(copy.from)) {
        warning("Cannot find", copy.from, "\n\nSkipping\n\n")
      } else {
        file.copy(# from = paste0(run.dir,"/SBS96/All_Solution_Layer/L1/stability.pdf"),
          from = copy.from,
          to = paste0(run.dir,"/summary/"),
          overwrite = TRUE)
      }
    }

    invisible(retval) # So we can test without looking at a file.
}

#' Summarize SigProfiler results in the sa.sa.96 and/or sp.sp subdirectories.
#'
#' @param top.dir The top directory of a conventional data structure containing
#' at least one of the subdirectories: sa.sa.96/sp.results and sp.sp/sp.results;
#' see further documentation elsewhere.
#'
#' @param sub.dir The subdirectory under \code{top.dir}, and containing a folder
#' named sp.results. By default, it contains both \code{c("sa.sa","sp.sp")}.
#' But you should specify \code{sub.dir = "sp.sp"} for \code{top.dir} with only
#' the \code{sp.sp} subdirectory
#' (as is the case for the correlated SBS1-and-SBS5-containing data sets).
#'
#' @inheritParams SummarizeSigOneSubdir
#'
#' @export
#'
#' @details Results are put in standardized subdirectories of \code{top.dir}.

SummarizeSigProExtractor <-
  function(
    top.dir,
    sub.dir = c("sa.sa.96","sp.sp"),
		overwrite = FALSE) {

  ## If sub.dir are unexpected, throw an error
  expected.sub.dir <- c("sa.sa.96","sp.sp")
  if( !all(sub.dir %in% expected.sub.dir) ){ ## There are other sub-dirs than sa.sa.96 and sp.sp
    stop("sub.dir can only be one or two of c(\"sa.sa\",\"sp.sp\")!\n")
  }

  if("sa.sa.96" %in% sub.dir) {
    SummarizeSigOneSigProExtractorSubdir(
      run.dir = paste0(top.dir, "/sa.sa.96/sp.results"),
      ground.truth.exposure.dir = paste0(top.dir,"/sa.sa.96/"),
	    overwrite = overwrite)
  }

  if("sp.sp" %in% sub.dir) {
    SummarizeSigOneSigProExtractorSubdir(
      run.dir = paste0(top.dir, "/sp.sp/sp.results"),
      ground.truth.exposure.dir = paste0(top.dir,"/sp.sp/"),
	    overwrite = overwrite)
  }
  }


#' Assess/evaluate results from sigproSS
#' (a.k.a. SigProfiler Python attribution package)
#'
#' @inheritParams SummarizeSigOneSubdir
#'
#' @param run.dir Lowest level path to results, e.g.
#' \code{<top.dir>}\code{/sa.sa.96/ExtrAttr/SigProExtractor.results/seed.1/}
#' Here, \code{<top.dir>} refers to a top-level directory which contains the
#' full information of a synthetic dataset. (e.g. \code{syn.2.7a.7b.abst.v8})
#' This code depends on a conventional directory structure documented
#' elsewhere. However there should be a directory
#' \code{<run.dir>}\code{/SBS96} which
#' stores SigProfiler results.
#'
#' @param ground.truth.exposure.dir TODO(Wu Yang): Fix this
#' File name which stores ground-truth exposures;
#' defaults to \code{"ground.truth.syn.exposures.csv"}.
#' This file can be found in the \code{sub.dir}, i.e. \code{<run.dir>/../../../}
#'
#' @param overwrite If TRUE overwrite existing directories and files.
#'
#' @export
#'
#' @importFrom ICAMS WriteCatalog ReadCatalog
#' @importFrom utils capture.output sessionInfo
#' @importFrom grDevices dev.off
#' @importFrom graphics par
#'
SummarizeSigOneSigProSSSubdir <-
  function(run.dir,
           ground.truth.exposure.dir = paste0(run.dir,"/../../../"),
           overwrite = FALSE,
           summary.folder.name = "summary",
           export.Manhattan.each.spectrum = FALSE) {

  # Read in reference signatures in ICAMS csv format.
  # signatures will be included in the /summary folder.
  ref.sigs.path <- paste0(ground.truth.exposure.dir,"/ground.truth.syn.sigs.csv")


  inferred.exp.path.SP.format <- paste0(run.dir,"/sig_activities.txt")
  inferredExposures <- ReadSigProfilerExposure(inferred.exp.path.SP.format)
  ## Remove the first "Similarity" row.
  inferredExposures <- inferredExposures[-1,,drop = FALSE]
  inferred.exp.path <- paste0(run.dir,"/inferred.exposures.csv")
  mSigTools::write_exposure(inferredExposures,inferred.exp.path)


  # SummarizeSigOneSubdir will generate a "/summary" folder
  # under run.dir. Summarized results are dumped into
  # this folder.
  retval <-
    SummarizeSigOneSubdir(
      run.dir = run.dir,
      ground.truth.exposure.dir = ground.truth.exposure.dir,
      extracted.sigs.path = ref.sigs.path,
      inferred.exp.path = inferred.exp.path,
      overwrite = overwrite,
      summary.folder.name = summary.folder.name,
      export.Manhattan.each.spectrum = export.Manhattan.each.spectrum)

  # Copy stability.pdf and result_stat.csv
  # generated by SigProfiler to summary/ folder
  # file.copy will return an "okay" flag, which equals to be TRUE if properly executed.
  # This is annoying, and I'll prevent this flag from printing it out
  copy.from.files <- paste0(run.dir,
                            c("decomposition profile.csv",
                              "dendrogram.pdf"))
  for(copy.from in copy.from.files) {
    if (!file.exists(copy.from)) {
      warning("Cannot find", copy.from, "\n\nSkipping\n\n")
    } else {
      file.copy(
        from = copy.from,
        to = paste0(run.dir,"/summary/"),
        overwrite = TRUE)
    }
  }

  invisible(retval) # So we can test without looking at a file.

}
WuyangFF95/SynSigEval documentation built on Sept. 18, 2022, 11:41 a.m.