CopyWithChecks <- function(from, to.dir, overwrite = FALSE) {
if (!file.exists(from)) {
warning("Cannot find", from, "\n\nSkipping\n\n")
} else {
copy.res <- file.copy(
from = from, to = to.dir, overwrite = overwrite)
if (!copy.res)
cat("Copy from", from, "to directory", to.dir, "failed\n\n")
}
}
#' Assess/evaluate one result sub-folder from a computational approach
#'
#' Note: For summarizing SigProExtractor or SignatureAnalyzer,
#' users should use SigProExtractor(SigProfiler-Python) v0.0.5.43+
#' and SignatureAnalyzer 2018-Apr-18.
#'
#' @param run.dir A directory which contains output of computational approach
#' in one run on a specific dataset, possibly with a specified seed. E.g.
#' \code{2b.Full_output_K_as_2/hdp.results/S.0.1.Rsq.0.1/seed.1/}.
#'
#' This code depends on a conventional directory structure documented
#' in \code{NEWS.md}.
#'
#' @param ground.truth.exposure.dir Folder which stores ground-truth exposures.
#' Should contain a file named \code{ground.truth.syn.exposures.csv}.
#' In PCAWG paper: \code{run.dir/../../}
#' In SBS1-SBS5 paper: \code{0.Input_datasets/S.0.1.Rsq.0.1/}
#'
#' @param extracted.sigs.path Path to extracted sigs file.
#' For most computational approaches, it's in:
#' \code{<run.dir>/extracted.signatures.csv}.
#'
#' For SigProExtractor, it's in:
#' \code{<run.dir>/<catalog.type>/Selected_Solution/De_Novo_Solution/De_Novo_Solution_Signatures_<catalog.type>.txt}
#' , and is converted to
#' \code{<run.dir>/extracted.signatures.csv} by
#' wrapper function \code{\link{SummarizeSigProExtractor}}.
#'
#' For SignatureAnalyzer, it's in:
#' \code{<run.dir>/sa.output.sigs.csv}
#'
#' For EMu, helmsman.NMF, MultiModalMuSig.LDA and MultiModalMuSig.CTM,
#' their extracted signature files are not standard \code{ICAMS} catalog format.
#' These non-standard extracted signatures are converted to
#' \code{extracted.signatures.csv}, a standard \code{ICAMS} catalog
#' by their respective wrapper functions: \itemize{
#' \item \code{\link{ReadEMuCatalog}},
#' \item \code{\link{helmsmanCatalog2ICAMS}}, and
#' \item \code{\link{MMCatalog2ICAMS}}
#' }
#' They are dumped into \code{extracted.sigs.csv} same as most approaches.
#'
#' @param inferred.exp.path Path to inferred exposures file.
#' For most computational approaches, it's in:
#' \code{<run.dir>/inferred.exposures.csv}.
#'
#' For SigProExtractor, it's in
#' \code{<run.dir>/<catalog.type>/Selected_Solution/De_Novo_Solution/De_Novo_Solution_Activities_<catalog.type>.txt}
#' , and is converted to
#' \code{<run.dir>/inferred.exposures.csv} by
#' wrapper function \code{\link{SummarizeSigProExtractor}}.
#'
#' For SignatureAnalyzer, it's in:
#' \code{<run.dir>/sa.output.exp.csv}
#'
#' For EMu, helmsman.NMF, MultiModalMuSig.LDA and MultiModalMuSig.CTM,
#' their extracted signature files are not standard \code{ICAMS} catalog format.
#' These non-standard extracted signatures are converted to
#' \code{extracted.signatures.csv}, a standard \code{mSigTools} exposures
#' by their respective wrapper functions: \itemize{
#' \item \code{\link{ReadEMuExposureFile}}
#' \item \code{\link{ReadhelmsmanExposure}}, and
#' \item \code{\link{ReadExposureMM}}
#' }
#' They are dumped into \code{inferred.exposures.csv} same as most approaches.
#'
#'
#' @param summarize.exp Whether to summarize exposures when the file specified
#' by \code{inferred.exp.path} exists.
#'
#' @param overwrite If TRUE overwrite and files in existing \code{run.dir/summary} folder.
#'
#' @param summary.folder.name The name of the folder containing summary results.
#' Usually, it equals to "summary".
#'
#' @param export.Manhattan.each.spectrum Whether to export csv files for Manhattan
#' distance of each mutational spectrum.
#'
#' @param verbose If > 0, print progress / checking messages.
#'
#' @importFrom utils write.csv capture.output sessionInfo
#'
#' @keywords internal
SummarizeSigOneSubdir <-
function(run.dir,
ground.truth.exposure.dir,
extracted.sigs.path,
inferred.exp.path = NULL,
summarize.exp = TRUE,
# TODO(Steve): copy this to the summary and do analysis on how much
# extracted signature contributes to exposures.
overwrite = FALSE,
summary.folder.name = "summary",
export.Manhattan.each.spectrum = FALSE,
verbose = 0) {
# Check whether package gtools is installed -------------------------------
if (!requireNamespace("gtools", quietly=TRUE)) {
stop("Package gtools needs to be installed\n")
}
# Specify and create output directory -------------------------------------
outputPath <- paste0(run.dir, "/", summary.folder.name)
if (dir.exists(outputPath)) {
if (!overwrite) stop(outputPath, " already exists")
}
suppressWarnings(dir.create(outputPath))
# Calculate signature extraction measures ---------------------------------
sigAnalysis <- # calls mSigTools::TP_FP_FN_avg_sim
ReadAndAnalyzeSigs(
extracted.sigs = extracted.sigs.path,
reference.sigs =
paste0(ground.truth.exposure.dir,"/ground.truth.syn.sigs.csv"),
ground.truth.exposures =
paste0(ground.truth.exposure.dir,"/ground.truth.syn.exposures.csv")
)
sigAnalysis$TP.ex.names <- sigAnalysis$table$ex.sig
sigAnalysis$TP.ref.names <- sigAnalysis$table$ref.sig
sigAnalysis$FP.names <-
setdiff(colnames(sigAnalysis$ex.sigs), sigAnalysis$table$ex.sig)
stopifnot(sigAnalysis$FP.names == sigAnalysis[["unmatched.ex.sigs"]])
if (verbose > 0) {
message("old FP names: ", paste(sigAnalysis$FP.names, collapse = " "))
message("new FP names: ",
paste(sigAnalysis[["unmatched.ex.sigs"]], collapse = " "))
}
sigAnalysis$FN.names <-
setdiff(colnames(sigAnalysis$ref.sigs), sigAnalysis$table$ref.sig)
stopifnot(sigAnalysis$FN.names == sigAnalysis[["unmatched.ref.sigs"]])
if (verbose > 0) {
message("old FN names: ", paste(sigAnalysis$FN.names, collapse = " "))
message("new FN names: ",
paste(sigAnalysis[["unmatched.ref.sigs"]], collapse = " "))
}
sigAnalysis$averCosSim <- sigAnalysis$avg.cos.sim
sigAnalysis$avg.cos.sim <- NULL
sigAnalysis$PPV <- sigAnalysis$TP / (sigAnalysis$TP + sigAnalysis$FP)
sigAnalysis$TPR <- sigAnalysis$TP / (sigAnalysis$TP + sigAnalysis$FN)
# Calculate best cosine similarity for each reference sig ----------------
# Reference sigs in sigAnalysis$table are reference sigs with best match.
# For these sigs, we directly record the cosSim values in $table.
cosSim_TP_ref <- numeric(0)
if(nrow(sigAnalysis$table) > 0) {
cosSim_TP_ref <- sigAnalysis$table$sim
names(cosSim_TP_ref) <- sigAnalysis$table$ref.sig
}
# For reference sigs which do not have a best match (and thus FN sigs),
# their best cosine similarity values are calculated in cosSim_FN
cosSim_FN <- numeric(0)
if(sigAnalysis$FN > 0) {
cosSim_FN <- numeric(sigAnalysis$FN)
names(cosSim_FN) <- sigAnalysis$FN.names
for(ref_sig_name in sigAnalysis$FN.names) {
cosSim_FN[ref_sig_name] <- max(sigAnalysis$sim.matrix[, ref_sig_name])
}
}
sigAnalysis$cosSim <- c(cosSim_TP_ref, cosSim_FN)
# Export results summary from sigAnalysis ---------------------------------
# Copy ground.truth exposures from second.level.dir
# to outputPath == run.dir/<summary.folder.name>.
CopyWithChecks(
from = paste0(ground.truth.exposure.dir, "/ground.truth.syn.exposures.csv"),
to.dir = outputPath,
overwrite = TRUE)
# Export bi-lateral matching table between extracted and reference signatures
write.csv(sigAnalysis$table,
file = paste(outputPath,"match.ex.to.ref.csv",sep = "/"),
row.names = F)
# Export full cosine similarity table
write.csv(sigAnalysis$sim.matrix,
file = paste(outputPath,"full.cossims.ex.to.ref.csv",sep = "/"),
row.names = T)
# Export reference signatures with non-zero ground-truth exposures
ICAMS::WriteCatalog(
sigAnalysis$ref.sigs,
paste(outputPath,"ref.sigs.csv",sep = "/"),
)
# Export extracted signatures with new names
ex.sigs.renamed <- RelabelExSigs(sigAnalysis)
sigAnalysis$ex.sigs <- ex.sigs.renamed
ICAMS::WriteCatalog(
sigAnalysis$ex.sigs,
paste(outputPath,"extracted.sigs.csv",sep = "/"))
# Exports other outputs into "other.results.txt"
capture.output(
cat("Average cosine similarity, only best matches are considered\n"),
sigAnalysis$averCosSim,
cat("\nNumber of reference signatures\n"),
ncol(sigAnalysis$ref.sigs),
cat("\nNumber of extracted signatures\n"),
ncol(sigAnalysis$ex.sigs),
cat("\nTrue positive reference signatures\n"),
gtools::mixedsort(sigAnalysis$TP.ref.names),
cat("\nTrue positive extracted signatures\n"),
gtools::mixedsort(sigAnalysis$TP.ex.names),
cat("\nFalse positive signatures\n"),
gtools::mixedsort(sigAnalysis$FP.names),
cat("\nFalse negative signatures\n"),
gtools::mixedsort(sigAnalysis$FN.names),
file = paste0(outputPath,"/other.results.txt"))
# Plot signatures to PDF files --------------------------------------------
# Currently, ICAMS cannot plot COMPOSITE catalog.
# TODO(Wuyang): To add a ICAMS:::PlotCatalog.COMPOSITECatalog function
# Plot reference sigs with non-zero ground-truth exposures
if("COMPOSITECatalog" %in% class(sigAnalysis$ref.sigs) == FALSE){
ICAMS::PlotCatalogToPdf(sigAnalysis$ref.sigs,
paste0(outputPath,"/ref.sigs.pdf"))
}
# Plot renamed and sorted extracted sigs
if("COMPOSITECatalog" %in% class(sigAnalysis$ex.sigs) == FALSE){
ICAMS::PlotCatalogToPdf(sigAnalysis$ex.sigs,
paste0(outputPath,"/extracted.sigs.pdf"))
}
# Summarize signature attribution (a.k.a. exposure inference) -------------
# Only when inferred.exp.path is not empty
# AND flag "summarize.exp" is set as TRUE
if(!is.null(inferred.exp.path) && summarize.exp == TRUE) {
if(file.exists(inferred.exp.path)) {
exposureDiff <- ReadAndAnalyzeExposures(
extracted.sigs = extracted.sigs.path,
reference.sigs =
paste0(ground.truth.exposure.dir,"/ground.truth.syn.sigs.csv"),
inferred.exp.path = inferred.exp.path,
ground.truth.exposures =
paste0(ground.truth.exposure.dir,"/ground.truth.syn.exposures.csv"))
# Write results of sum of Manhattan distance
write.csv(exposureDiff$SumOfManhattan,
file = paste0(outputPath,"/SumOfManhattan.csv"),
quote = T)
# Write TPR, PPV, and F1 score
# for exposure inference measures.
write.csv(exposureDiff$F1.measures,
file = paste0(outputPath,"/F1.measures.csv"),
quote = T)
# Write results of exposure inference measures,
# in aggregated format for each tumor and each
# reference signature.
if(export.Manhattan.each.spectrum){
for(spectrumName in names(exposureDiff$Manhattan)){
## Replace all characters unsuitable for filenames.
cleanedName <- fs::path_sanitize(spectrumName, replacement = ".")
write.csv(exposureDiff$Manhattan[[spectrumName]],
file = paste0(outputPath,"/Manhattan.",cleanedName,".csv"),
quote = T)
}
}
# Copy inferred exposures to summary folder.
CopyWithChecks(inferred.exp.path,
paste0(outputPath,"/inferred.exposures.csv"),
overwrite = overwrite)
}
else {
warning("Cannot find", inferred.exp.path, "\n\nSkipping\n\n")
}
}
# Save and return result summary lists, and log file ----------------------
# Log of system time and session info
capture.output(Sys.time(), sessionInfo(),
file = paste0(outputPath,"/assessment.sessionInfo.txt"))
# Save Signature extraction summary in an RDa file,
# for reuse in SummarizeMultiRuns().
save(sigAnalysis,
file = paste0(outputPath,"/sigAnalysis.RDa"))
# Save exposure inference summary in an RDa file,
# for reuse in SummarizeMultiRuns().
if(file.exists(inferred.exp.path) & summarize.exp){
save(exposureDiff,
file = paste0(outputPath,"/exposureDiff.RDa"))
}
# Returned for checking in tests
invisible(sigAnalysis)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.