#' Assess/evaluate results from SigProExtractor (v0.0.5.45+)
#'
#' SigProfiler-python de novo extraction and attribution package.
#' Assessment is restricted to v0.0.5.43 ~ v0.0.5.77,
#' because different version has different folder structure.
#'
#' This function cannot be used on new SigProfilerExtractor
#' (v1+) as the folder structure has been changed markedly
#'
#' @inheritParams SummarizeSigOneSubdir
#'
#' @param run.dir A directory which contains output of SigProExtractor in one
#' run on a specific dataset, possibly with a specified seed. E.g.
#' \code{2b.Full_output_K_as_2/SigProExtractor.results/S.0.1.Rsq.0.1/seed.1/}.
#'
#' This code depends on a conventional directory structure documented
#' in \code{NEWS.md}.
#'
#' @param hierarchy Whether the user have enabled hierarchy = True when running SigProExtractor.
#' specifying True or False into SigProExtractor will cause the program
#' to generate different folder structure.
#'
#' @export
#'
#' @importFrom ICAMS WriteCatalog ReadCatalog
#' @importFrom utils capture.output sessionInfo
#' @importFrom grDevices dev.off
#' @importFrom graphics par
#'
SummarizeSigOneSigProExtractorSubdir <-
function(run.dir,
ground.truth.exposure.dir = paste0(run.dir,"/../../../"),
summarize.exp = TRUE,
overwrite = FALSE,
hierarchy = FALSE,
summary.folder.name = "summary",
export.Manhattan.each.spectrum = FALSE) {
# Location of SigProfiler output, which is our input
# inputPath may change if SigProExtractor updates!
if(dir.exists(paste0(run.dir,"/SBS96/"))){
flagSBS96 <- TRUE
inputPath <- paste0(run.dir,"/SBS96/Suggested_Solution/De_Novo_Solution")
} else if(dir.exists(paste0(run.dir,"/DBS78/"))) {
flagDBS78 <- TRUE
inputPath <- paste0(run.dir,"/DBS78/Suggested_Solution/De_Novo_Solution")
} else if (dir.exists(paste0(run.dir,"/DBS78/"))) {
flagID83 <- TRUE
inputPath <- paste0(run.dir,"/DBS78/Suggested_Solution/De_Novo_Solution")
}
stopifnot(dir.exists(inputPath))
# Read in extracted signatures in SigProExtractor txt format,
# and convert it to ICAMS csv format.
# Need special function to read in extracted signatures
# Converted signatures will be included in the /summary folder.
if(flagSBS96){
extractedSigs <- ICAMS::ReadCatalog(paste0(inputPath,"/De_Novo_Solution_Signatures_SBS96.txt"))
} else if(flagDBS78) {
extractedSigs <- ICAMS::ReadCatalog(paste0(inputPath,"/De_Novo_Solution_Signatures_SBSDINUC.txt"))
} else if(flagID83) {
extractedSigs <- ICAMS::ReadCatalog(paste0(inputPath,"/De_Novo_Solution_Signatures_ID.txt"))
}
extractedSigs <- ICAMS::as.catalog(object = extractedSigs,
region = "unknown",
catalog.type = "counts.signature")
extracted.sigs.path <- paste0(run.dir,"/extracted.signatures.csv")
ICAMS::WriteCatalog(extractedSigs, extracted.sigs.path)
# Read in inferred exposures in SP format,
# and convert it into our internal format
if(flagSBS96){
inferred.exp.path.SP.format <-
paste0(inputPath,"/De_Novo_Solution_Activities_SBS96.txt")
} else if(flagDBS78){
inferred.exp.path.SP.format <-
paste0(inputPath,"/De_Novo_Solution_Activities_DINUC.txt")
}
inferredExposures <- ReadSigProfilerExposure(inferred.exp.path.SP.format)
inferred.exp.path <- paste0(run.dir,"/inferred.exposures.csv")
mSigTools::write_exposure(inferredExposures,inferred.exp.path)
# SummarizeSigOneSubdir will generate a "/summary" folder
# under run.dir. Summarized results are dumped into
# this folder.
retval <-
SummarizeSigOneSubdir(
run.dir = run.dir,
ground.truth.exposure.dir = ground.truth.exposure.dir,
extracted.sigs.path = extracted.sigs.path,
inferred.exp.path = inferred.exp.path,
summarize.exp = summarize.exp,
overwrite = overwrite,
summary.folder.name = summary.folder.name,
export.Manhattan.each.spectrum = export.Manhattan.each.spectrum)
# Copy stability.pdf and result_stat.csv
# generated by SigProfiler to summary/ folder
# file.copy will return an "okay" flag, which equals to be TRUE if properly executed.
# This is annoying, and I'll prevent this flag from printing it out
copy.from.files <- paste0(run.dir,
c("/SBS96/SBS96_selection_plot.pdf",
"/SBS96/All_solutions_stat.csv"))
for(copy.from in copy.from.files) {
if (!file.exists(copy.from)) {
warning("Cannot find", copy.from, "\n\nSkipping\n\n")
} else {
file.copy(# from = paste0(run.dir,"/SBS96/All_Solution_Layer/L1/stability.pdf"),
from = copy.from,
to = paste0(run.dir,"/summary/"),
overwrite = TRUE)
}
}
invisible(retval) # So we can test without looking at a file.
}
#' Summarize SigProfiler results in the sa.sa.96 and/or sp.sp subdirectories.
#'
#' @param top.dir The top directory of a conventional data structure containing
#' at least one of the subdirectories: sa.sa.96/sp.results and sp.sp/sp.results;
#' see further documentation elsewhere.
#'
#' @param sub.dir The subdirectory under \code{top.dir}, and containing a folder
#' named sp.results. By default, it contains both \code{c("sa.sa","sp.sp")}.
#' But you should specify \code{sub.dir = "sp.sp"} for \code{top.dir} with only
#' the \code{sp.sp} subdirectory
#' (as is the case for the correlated SBS1-and-SBS5-containing data sets).
#'
#' @inheritParams SummarizeSigOneSubdir
#'
#' @export
#'
#' @details Results are put in standardized subdirectories of \code{top.dir}.
SummarizeSigProExtractor <-
function(
top.dir,
sub.dir = c("sa.sa.96","sp.sp"),
overwrite = FALSE) {
## If sub.dir are unexpected, throw an error
expected.sub.dir <- c("sa.sa.96","sp.sp")
if( !all(sub.dir %in% expected.sub.dir) ){ ## There are other sub-dirs than sa.sa.96 and sp.sp
stop("sub.dir can only be one or two of c(\"sa.sa\",\"sp.sp\")!\n")
}
if("sa.sa.96" %in% sub.dir) {
SummarizeSigOneSigProExtractorSubdir(
run.dir = paste0(top.dir, "/sa.sa.96/sp.results"),
ground.truth.exposure.dir = paste0(top.dir,"/sa.sa.96/"),
overwrite = overwrite)
}
if("sp.sp" %in% sub.dir) {
SummarizeSigOneSigProExtractorSubdir(
run.dir = paste0(top.dir, "/sp.sp/sp.results"),
ground.truth.exposure.dir = paste0(top.dir,"/sp.sp/"),
overwrite = overwrite)
}
}
#' Assess/evaluate results from sigproSS
#' (a.k.a. SigProfiler Python attribution package)
#'
#' @inheritParams SummarizeSigOneSubdir
#'
#' @param run.dir Lowest level path to results, e.g.
#' \code{<top.dir>}\code{/sa.sa.96/ExtrAttr/SigProExtractor.results/seed.1/}
#' Here, \code{<top.dir>} refers to a top-level directory which contains the
#' full information of a synthetic dataset. (e.g. \code{syn.2.7a.7b.abst.v8})
#' This code depends on a conventional directory structure documented
#' elsewhere. However there should be a directory
#' \code{<run.dir>}\code{/SBS96} which
#' stores SigProfiler results.
#'
#' @param ground.truth.exposure.dir TODO(Wu Yang): Fix this
#' File name which stores ground-truth exposures;
#' defaults to \code{"ground.truth.syn.exposures.csv"}.
#' This file can be found in the \code{sub.dir}, i.e. \code{<run.dir>/../../../}
#'
#' @param overwrite If TRUE overwrite existing directories and files.
#'
#' @export
#'
#' @importFrom ICAMS WriteCatalog ReadCatalog
#' @importFrom utils capture.output sessionInfo
#' @importFrom grDevices dev.off
#' @importFrom graphics par
#'
SummarizeSigOneSigProSSSubdir <-
function(run.dir,
ground.truth.exposure.dir = paste0(run.dir,"/../../../"),
overwrite = FALSE,
summary.folder.name = "summary",
export.Manhattan.each.spectrum = FALSE) {
# Read in reference signatures in ICAMS csv format.
# signatures will be included in the /summary folder.
ref.sigs.path <- paste0(ground.truth.exposure.dir,"/ground.truth.syn.sigs.csv")
inferred.exp.path.SP.format <- paste0(run.dir,"/sig_activities.txt")
inferredExposures <- ReadSigProfilerExposure(inferred.exp.path.SP.format)
## Remove the first "Similarity" row.
inferredExposures <- inferredExposures[-1,,drop = FALSE]
inferred.exp.path <- paste0(run.dir,"/inferred.exposures.csv")
mSigTools::write_exposure(inferredExposures,inferred.exp.path)
# SummarizeSigOneSubdir will generate a "/summary" folder
# under run.dir. Summarized results are dumped into
# this folder.
retval <-
SummarizeSigOneSubdir(
run.dir = run.dir,
ground.truth.exposure.dir = ground.truth.exposure.dir,
extracted.sigs.path = ref.sigs.path,
inferred.exp.path = inferred.exp.path,
overwrite = overwrite,
summary.folder.name = summary.folder.name,
export.Manhattan.each.spectrum = export.Manhattan.each.spectrum)
# Copy stability.pdf and result_stat.csv
# generated by SigProfiler to summary/ folder
# file.copy will return an "okay" flag, which equals to be TRUE if properly executed.
# This is annoying, and I'll prevent this flag from printing it out
copy.from.files <- paste0(run.dir,
c("decomposition profile.csv",
"dendrogram.pdf"))
for(copy.from in copy.from.files) {
if (!file.exists(copy.from)) {
warning("Cannot find", copy.from, "\n\nSkipping\n\n")
} else {
file.copy(
from = copy.from,
to = paste0(run.dir,"/summary/"),
overwrite = TRUE)
}
}
invisible(retval) # So we can test without looking at a file.
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.