# helmsmanMuSigInteraction.R
# Interacting functions for running helmsman Python package
#' Convert Catalogs from ICAMS format to helmsman format
#'
#' @param catalog A catalog matrix in ICAMS format. (SNS only!)
#'
#' @param type Whether it is a spectra catalog ("spectra") or
#' a signature catalog ("signature").
#'
#' @return a catalog matrix in helmsman format.
#'
#' @export
ICAMSCatalog2helmsman <- function(catalog, type = "spectra") {
# Read catalog. From matrix-like
stopifnot(is.data.frame(catalog) | is.matrix(catalog))
catalog <- t(catalog)
ICAMSBaseContext <- colnames(catalog)
baseContextLength <- nchar(ICAMSBaseContext[1])
Var <- substr(ICAMSBaseContext,baseContextLength,baseContextLength)
if(baseContextLength == 4){
BeforeRefAfter <- substr(ICAMSBaseContext,1,3)
Ref <- substr(ICAMSBaseContext,2,2)
helmsmanBaseContext <- paste0(Ref,"_",Var,".",BeforeRefAfter)
} else if(baseContextLength == 6){
BeforeRefAfter <- substr(ICAMSBaseContext,1,5)
Ref <- substr(ICAMSBaseContext,3,3)
helmsmanBaseContext <- paste0(Ref,"_",Var,".",BeforeRefAfter)
}
colnames(catalog) <- helmsmanBaseContext
if(type == "spectra"){
catalog <- data.frame("ID" = rownames(catalog),
catalog)
} else if(type == "signature"){
catalog <- data.frame("Sig" = rownames(catalog),
catalog)
}
return(catalog)
}
#' Read Exposure files in helmsman format.
#'
#' @param exposure Exposure file generated by helmsman.
#' Usually, it is called "W_components.txt".
#'
#' @param check.names logical. If \code{TRUE} then the names of the
#' variables in the data frame are checked to ensure that they are
#' syntactically valid variable names. If necessary they are adjusted
#' (by \code{\link[base]{make.names}}) so that they are, and also to
#' ensure that there are no duplicates.
#'
#' Return ICAMS/SynSigEval formatted exposure matrix.
#'
#' @export
ReadhelmsmanExposure <- function(exposure, check.names = TRUE){
exposure <- utils::read.table(
file = exposure, header = T,
sep = "\t", as.is = T,
check.names = check.names)
## Assign the contents in first column "ID"
## as the names of samples.
rownames(exposure) <- exposure[,1]
exposure <- t(exposure[,-1,drop = FALSE])
return(exposure)
}
#' Read Catalog files or matrices in helmsman format.
#'
#' @param cat Input catalog, can be a tab-delimited text
#' file in helmsman format, or a matrix/data.frame object.
#'
#' @param region Catalog region. Can be a specific genomic
#' or exomic region, or "unknown".
#' Default: "unknown"
#'
#' @param catalog.type Is the catalog a signature catalog,
#' or a spectrum catalog?
#' Default: "counts.signature"
#'
#' @return a catalog matrix in ICAMS format.
#'
#' @export
helmsmanCatalog2ICAMS <- function(
cat,
region = "unknown",
catalog.type = "counts.signature"){
stopifnot(is.character(cat) | is.data.frame(cat) | is.matrix(cat))
if (methods::is(cat, "character")) {
catMatrix <- utils::read.table(
file = cat, header = T,
sep = "\t", as.is = T)
} else {
catMatrix <- cat
}
rownames(catMatrix) <- catMatrix[,1]
catMatrix <- t(catMatrix[,-1])
helmsmanBaseContext <- rownames(catMatrix)
baseContextLength <- nchar(helmsmanBaseContext[1])
if(baseContextLength == 7){ ## trinucleotide base context
BeforeRefAfter <- substr(helmsmanBaseContext,5,7)
Ref <- substr(helmsmanBaseContext,1,1)
Var <- substr(helmsmanBaseContext,3,3)
}else if(baseContextLength == 9){ ## pendanucleotide base context
BeforeRefAfter <- substr(helmsmanBaseContext,5,9)
Ref <- substr(helmsmanBaseContext,1,1)
Var <- substr(helmsmanBaseContext,3,3)
}
ICAMSBaseContext <- paste0(BeforeRefAfter,Var)
rownames(catMatrix) <- ICAMSBaseContext
catMatrix <- ICAMS::as.catalog(object = catMatrix,
region = region,
catalog.type = catalog.type)
return(catMatrix)
}
#' Prepare input file for helmsman from a
#' helmsman formatted catalog file.
#'
#' @param catalog a catalog in ICAMS format. It can be
#' a .csv file, or a matrix or data.frame.
#' Usually, it refers to \code{"ground.truth.syn.catalog.csv"}.
#'
#' @param out.dir Directory that will be created for the output;
#' abort if it already exists. Usually, the \code{out.dir} will
#' be a \code{helmsman.results} folder directly under the
#' folder storing \code{catalog}.
#'
#' @param overwrite If TRUE, overwrite existing output
#'
#' @return \code{invisible(catMatrix)},
#' original catalog in helmsman format
#'
#' @details Creates folder named \code{helmsman.results} containing catalogs
#' in helmsman-formatted catalogs: Rows are signatures;
#' the first column is the name of the mutation type, while the remaining
#' columns are samples (tumors).
#' These helmsman-formatted catalogs will the input when running helmsman program
#' later on Python platform.
#'
#' @export
#'
#' @importFrom utils capture.output
CreatehelmsmanOutput <-
function(catalog,
out.dir = paste0(dirname(catalog),"/ExtrAttr/helmsman.results"),
overwrite = FALSE) {
## If catalog is a string of file path
if(is.character(catalog)){
## Read in catalog matrix using ICAMS::ReadCatalog.
catMatrix <- ICAMS::ReadCatalog(catalog, strict = FALSE)
## Convert catalog to helmsman format
catMatrix <- ICAMSCatalog2helmsman(catMatrix)
## Fetch the name of catalog file without extension
oldFileName <- tools::file_path_sans_ext(basename(catalog))
} else if(is.data.frame(catalog) | is.matrix(catalog)){
## Assume `catalog` is a legal ICAMS catalog object.
## Convert catalog to helmsman format
catMatrix <- ICAMSCatalog2helmsman(catalog)
## Fetch the name of catalog file
oldFileName <- "ground.truth.syn.catalog"
}
## Create out.dir
dir.create(out.dir,recursive = T)
## Dump catMatrix into out.dir
newFileName <- paste0(out.dir,"/",oldFileName,".tsv")
utils::write.table(
catMatrix, file = newFileName,
sep = "\t", quote = F, row.names = F)
invisible(catMatrix)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.