#' Combine results for a single dataset, from different computational approaches.
#'
#' Summarize results from each computational approach in \code{third.level.dir}/\code{tool.dirnames}
#' (generated by \code{\link{SummarizeMultiRuns}}),
#' combine them into \code{third.level.dir}.
#'
#'
#' @param third.level.dir Third level path distinguishing de novo extraction
#' + attribution packages from attribution-only packages.
#' Examples:
#' \code{top.dir}/sp.sp/ExtrAttr/
#' \code{top.dir}/sa.sa/Attr/
#'
#' @param toolNames Names of computational approach. (e.g. "SigProExtractor")
#'
#' @param tool.dirnames Third level path from the \code{top.dir}. Expected to have
#' summarized results generated by \code{\link{SummarizeMultiRuns}}.
#' (multiRun.RDa, ManhattanDist.csv, meanSD.csv, meanSD.Manhattan.dist.csv)
#' Examples:
#' \code{"signeR.results"} (Under \code{third.level.dir} "ExtrAttr")
#' \code{"deconstructSigs.results"} (Under \code{third.level.dir} "Attr")
#'
#' Here, \code{top.dir} refers to a top-level directory which contains the
#' full information of a synthetic dataset. (e.g. \code{syn.2.7a.7b.abst.v8})
#' This code depends on a conventional directory structure documented
#' elsewhere. However there should be a directory within the \code{tool.names}
#' which stores the software output.
#'
#' @param datasetGroup Numeric or character vector specifying the groups
#' each dataset belong to.
#' E.g. For SBS1-SBS5 correlated datasets, we can consider slope as the group:
#' c("slope=0.1","slope=0.5","slope=1","slope=2","slope=5","slope=10")
#' Default: "Default"
#'
#' @param datasetGroupName Meaning or label of all datasetGroup.
#' E.g. For SBS1-SBS5 correlated datasets, we can consider \code{"SBS1:SBS5 mutation count ratio"}
#' as the label of the \code{datasetGroup} slope.
#'
#' @param datasetSubGroup Optional. Numeric or character vector differentiating
#' datasets within each group.
#' E.g. For SBS1-SBS5 correlated datasets, we can consider Pearson's R^2
#' as the subgroup:
#' c("Rsq=0.1","Rsq=0.2","Rsq=0.3","Rsq=0.6")
#' Default: Names of datasets, which are \code{basename(dataset.dirs)}
#'
#' @param datasetSubGroupName Optional. Meaning or label of all datasetSubGroup.
#' E.g. For SBS1-SBS5 correlated datasets, we can consider \code{"Pearson's R squared"}
#' as the label of the \code{datasetSubGroup} Pearson's R^2.
#'
#' @return A list contain c(\code{mean},\code{sd}) of multiple runs:
#' Cosine similarity
#' True Positives(TP): Ground-truth signatures which are active in
#' the spectra, and extracted.
#' False Negatives(FN): Ground-truth signatures not extracted.
#' False Positives(FP): Signatures wrongly extracted, not resembling
#' any ground-truth signatures.
#' True positive rate (TPR, Sensitivity): TP / (TP + FN)
#' Positive predictive value (PPV, Precision): TP / (FP + TP)
#'
#' @details This function generates \code{multiTools.RDa} under
#' \code{third.level.dir}
#'
#' @importFrom utils write.csv capture.output sessionInfo
#'
#' @export
#'
SummarizeMultiToolsOneDataset <- function(
third.level.dir,
toolNames,
tool.dirnames,
datasetGroup,
datasetGroupName,
datasetSubGroup = NULL,
datasetSubGroupName = NULL){
multiTools <- list()
combMeanSD <- NULL
combMeanSDAggMD <- NULL
for(toolNumber in 1:length(toolNames)){
toolName <- toolNames[toolNumber]
toolDirName <- tool.dirnames[toolNumber]
toolPath <- paste0(third.level.dir,"/",toolDirName)
## Add multiRun <- NULL to please the R check
multiRun <- NULL
datasetName <- NULL
load(paste0(toolPath,"/multiRun.RDa"))
if(!is.null(datasetName)) {
if(datasetName != multiRun$datasetName) {
stop("Must provide results of different approaches on the SAME dataset.\n")
}
}
datasetName <- multiRun$datasetName
## Combine multi-runs and multi-tools for each measure
{
indexes <- c("averCosSim","falseNeg","falsePos",
"truePos","TPR","PPV")
indexLabels <- c("averCosSim" = "Average cosine similarity of all signatures",
"falseNeg" = "Number of False negatives",
"falsePos" = "Number of False positives",
"truePos" = "Number of True positives",
"TPR" = "True positive rate",
"PPV" = "Positive predictive value")
for(index in indexes){
indexNum <- which(index == indexes)
if(!exists("datasetSubGroup")) { # datasetSubGroup is not provided
measure4OneTool <- data.frame(seed = names(multiRun[[index]]),
value = multiRun[[index]],
toolName = toolName,
datasetName = multiRun$datasetName,
datasetGroup = datasetGroup,
stringsAsFactors = FALSE)
} else {
measure4OneTool <- data.frame(seed = names(multiRun[[index]]),
value = multiRun[[index]],
toolName = toolName,
datasetName = multiRun$datasetName,
datasetGroup = datasetGroup,
datasetSubGroup = datasetSubGroup,
stringsAsFactors = FALSE)
}
rownames(measure4OneTool) <- NULL
## Create a data.frame for each index,
## and summarize multi-Run, multiDataset values
## for each index.
if(is.null(multiTools[[index]])){
multiTools[[index]] <- data.frame()
}
multiTools[[index]] <- rbind(multiTools[[index]],measure4OneTool)
}
}
## meanSD contains mean and standard deviation
## for each extraction measure.
{
meanSD <- multiRun$meanSD
colnames(meanSD) <- paste0(toolDirName,".", colnames(meanSD))
if(is.null(meanSD)){
combMeanSD <- meanSD
} else{
combMeanSD <- cbind(combMeanSD,meanSD)
}
}
## Combine multi-runs and multi-tools for:
## $cosSim - cosine similarity to each ground-truth signature
## $NumSigsSimilar - number of extracted sigs similar to each
## ground-truth sig (requires cosine similarity > 0.9)
if(TRUE){ ## old code
{
gtSigNames <- names(multiRun$cosSim)
multiTools$gtSigNames <- gtSigNames
if(is.null(multiTools$cosSim)) multiTools$cosSim <- list()
for(gtSigName in gtSigNames){
if(!exists("datasetSubGroup")) {
gtMeanCosSim4OneTool <- data.frame(seed = names(multiRun$cosSim[[gtSigName]]),
value = multiRun$cosSim[[gtSigName]],
toolName = toolName,
datasetName = multiRun$datasetName,
datasetGroup = datasetGroup,
stringsAsFactors = FALSE)
} else {
gtMeanCosSim4OneTool <- data.frame(seed = names(multiRun$cosSim[[gtSigName]]),
value = multiRun$cosSim[[gtSigName]],
toolName = toolName,
datasetName = multiRun$datasetName,
datasetGroup = datasetGroup,
datasetSubGroup = datasetSubGroup,
stringsAsFactors = FALSE)
}
rownames(gtMeanCosSim4OneTool) <- NULL
## Create a data.frame for each ground-truth signature,
## and summarize multi-Run, multiDataset values
## for each ground-truth signature.
if(is.null(multiTools$cosSim[[gtSigName]])){
multiTools$cosSim[[gtSigName]] <- data.frame()
}
multiTools$cosSim[[gtSigName]] <- rbind(multiTools$cosSim[[gtSigName]],gtMeanCosSim4OneTool)
}
}
}else{ ## new code
gtSigNames <- names(multiRun$cosSim)
multiTools$gtSigNames <- gtSigNames
measures <- c("cosSim","NumSigsSimilar")
for(measure in measures){
multiTools[[measure]] <- list()
## Create a data.frame for each ground-truth signature,
## and summarize multi-Run, multiDataset values
## for each ground-truth signature.
for(gtSigName in gtSigNames){
multiTools[[measure]][[gtSigName]] <- data.frame()
}
}
for(measure in measures){
for(gtSigName in gtSigNames){
if(!exists("datasetSubGroup")) {
measure4OneTool <- data.frame(
seed = names(multiRun[[measure]][[gtSigName]]),
value = multiRun[[measure]][[gtSigName]],
toolName = toolName,
datasetName = multiRun$datasetName,
datasetGroup = datasetGroup,
stringsAsFactors = FALSE)
} else {
measure4OneTool <- data.frame(
seed = names(multiRun[[measure]][[gtSigName]]),
value = multiRun[[measure]][[gtSigName]],
toolName = toolName,
datasetName = multiRun$datasetName,
datasetGroup = datasetGroup,
datasetSubGroup = datasetSubGroup,
stringsAsFactors = FALSE)
}
rownames(measure4OneTool) <- NULL
multiTools$cosSim[[gtSigName]] <- rbind(
multiTools$cosSim[[gtSigName]],measure4OneTool)
}
}
}
## Combine multi-runs and multi-tools for
## aggregated scaled Manhattan distance.
if(!is.null(multiRun$AggManhattanDist)){
## Combine multi-runs and multi-tools for Manhattan
## distance of each ground-truth signature
{
if(is.null(multiTools$AggManhattanDist)) multiTools$AggManhattanDist <- list()
for(gtSigName in gtSigNames){
if(!exists("datasetSubGroup")) {
gtAggManhattanDist4OneTool <- data.frame(seed = colnames(multiRun$AggManhattanDist),
value = multiRun$AggManhattanDist[gtSigName,],
toolName = toolName,
datasetName = multiRun$datasetName,
datasetGroup = datasetGroup,
stringsAsFactors = FALSE)
} else{
gtAggManhattanDist4OneTool <- data.frame(seed = colnames(multiRun$AggManhattanDist),
value = multiRun$AggManhattanDist[gtSigName,],
toolName = toolName,
datasetName = multiRun$datasetName,
datasetGroup = datasetGroup,
datasetSubGroup = datasetSubGroup,
stringsAsFactors = FALSE)
}
rownames(gtAggManhattanDist4OneTool) <- NULL
## Create a data.frame for each ground-truth signature,
## and summarize multi-Run, multiDataset values
## for each ground-truth signature.
if(is.null(multiTools$AggManhattanDist[[gtSigName]])){
multiTools$AggManhattanDist[[gtSigName]] <- data.frame()
}
multiTools$AggManhattanDist[[gtSigName]] <- rbind(multiTools$AggManhattanDist[[gtSigName]],gtAggManhattanDist4OneTool)
}
}
## meanSDAggMD contains mean and standard deviation
## for aggregated Scaled Manhattan distance between ground-truth exposures
## and inferred exposures for each ground-truth signature
{
meanSDAggMD <- multiRun$meanSDAggMD
colnames(meanSDAggMD) <- paste0(toolDirName,".", colnames(meanSDAggMD))
if(is.null(meanSDAggMD)){
combMeanSDAggMD <- meanSDAggMD
} else{
combMeanSDAggMD <- cbind(combMeanSDAggMD,meanSDAggMD)
}
}
}
## Combine multi-runs and multi-tools for
## mean of scaled Manhattan distance for each tumor.
if(!is.null(multiRun$meanSepMD)){
## Combine multi-runs and multi-tools for Manhattan
## distance of each ground-truth signature
if(is.null(multiTools$meanSepMD)) multiTools$meanSepMD <- list()
for(gtSigName in gtSigNames){
if(!exists("datasetSubGroup")) {
gtmeanSepMD4OneTool <- data.frame(seed = colnames(multiRun$meanSepMD),
value = multiRun$meanSepMD[gtSigName,],
toolName = toolName,
datasetName = multiRun$datasetName,
datasetGroup = datasetGroup,
stringsAsFactors = FALSE)
} else{
gtmeanSepMD4OneTool <- data.frame(seed = colnames(multiRun$meanSepMD),
value = multiRun$meanSepMD[gtSigName,],
toolName = toolName,
datasetName = multiRun$datasetName,
datasetGroup = datasetGroup,
datasetSubGroup = datasetSubGroup,
stringsAsFactors = FALSE)
}
rownames(gtmeanSepMD4OneTool) <- NULL
## Create a data.frame for each ground-truth signature,
## and summarize multi-Run, multiDataset values
## for each ground-truth signature.
if(is.null(multiTools$meanSepMD[[gtSigName]])){
multiTools$meanSepMD[[gtSigName]] <- data.frame()
}
multiTools$meanSepMD[[gtSigName]] <- rbind(multiTools$meanSepMD[[gtSigName]],gtmeanSepMD4OneTool)
}
}
## Combine multi-runs and multi-tools for
## standard deviation of scaled Manhattan distance for each tumor.
if(!is.null(multiRun$sdSepMD)){
## Combine multi-runs and multi-tools for Manhattan
## distance of each ground-truth signature
if(is.null(multiTools$sdSepMD)) multiTools$sdSepMD <- list()
for(gtSigName in gtSigNames){
if(!exists("datasetSubGroup")) {
gtsdSepMD4OneTool <- data.frame(seed = colnames(multiRun$sdSepMD),
value = multiRun$sdSepMD[gtSigName,],
toolName = toolName,
datasetName = multiRun$datasetName,
datasetGroup = datasetGroup,
stringsAsFactors = FALSE)
} else{
gtsdSepMD4OneTool <- data.frame(seed = colnames(multiRun$sdSepMD),
value = multiRun$sdSepMD[gtSigName,],
toolName = toolName,
datasetName = multiRun$datasetName,
datasetGroup = datasetGroup,
datasetSubGroup = datasetSubGroup,
stringsAsFactors = FALSE)
}
rownames(gtsdSepMD4OneTool) <- NULL
## Create a data.frame for each ground-truth signature,
## and summarize multi-Run, multiDataset values
## for each ground-truth signature.
if(is.null(multiTools$sdSepMD[[gtSigName]])){
multiTools$sdSepMD[[gtSigName]] <- data.frame()
}
multiTools$sdSepMD[[gtSigName]] <- rbind(multiTools$sdSepMD[[gtSigName]],gtmeanSepMD4OneTool)
}
}
}
multiTools$combMeanSD <- combMeanSD
if(exists("combMeanSDAggMD")){
multiTools$combMeanSDAggMD <- combMeanSDAggMD
}
multiTools$datasetName <- datasetName
multiTools$datasetGroupName <- datasetGroupName
multiTools$datasetSubGroupName <- datasetSubGroupName
save(multiTools,file = paste0(third.level.dir,"/multiTools.RDa"))
write.csv(x = multiTools$combMeanSD,
file = paste0(third.level.dir,"/combined.meanSD.csv"))
if(!is.null(multiTools$combMeanSDAggMD)){
write.csv(x = multiTools$combMeanSDAggMD,
file = paste0(third.level.dir,"/combined.meanSD.Aggregated.Manhattan.dist.csv"))
}
if(!is.null(multiTools$meanSepMD)){
write.csv(x = multiTools$meanSepMD,
file = paste0(third.level.dir,"/mean.of.sep.Scaled.Manhattan.dist.csv"))
}
if(!is.null(multiTools$sdSepMD)){
write.csv(x = multiTools$sdSepMD,
file = paste0(third.level.dir,"/stdev.of.sep.Scaled.Manhattan.dist.csv"))
}
invisible(multiTools)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.