R/helmsmanInteraction.R

Defines functions CreatehelmsmanOutput helmsmanCatalog2ICAMS ReadhelmsmanExposure ICAMSCatalog2helmsman

Documented in CreatehelmsmanOutput helmsmanCatalog2ICAMS ICAMSCatalog2helmsman ReadhelmsmanExposure

# helmsmanMuSigInteraction.R
# Interacting functions for running helmsman Python package


#' Convert Catalogs from ICAMS format to helmsman format
#'
#' @param catalog A catalog matrix in ICAMS format. (SNS only!)
#'
#' @param type Whether it is a spectra catalog ("spectra") or
#' a signature catalog ("signature").
#'
#' @return a catalog matrix in helmsman format.
#'
#' @export
ICAMSCatalog2helmsman <- function(catalog, type = "spectra") {
  # Read catalog. From matrix-like
  stopifnot(is.data.frame(catalog) | is.matrix(catalog))

  catalog <- t(catalog)
  ICAMSBaseContext <- colnames(catalog)
  baseContextLength <- nchar(ICAMSBaseContext[1])
  Var <- substr(ICAMSBaseContext,baseContextLength,baseContextLength)
  if(baseContextLength == 4){
    BeforeRefAfter <- substr(ICAMSBaseContext,1,3)
    Ref <- substr(ICAMSBaseContext,2,2)
    helmsmanBaseContext <- paste0(Ref,"_",Var,".",BeforeRefAfter)
  } else if(baseContextLength == 6){
    BeforeRefAfter <- substr(ICAMSBaseContext,1,5)
    Ref <- substr(ICAMSBaseContext,3,3)
    helmsmanBaseContext <- paste0(Ref,"_",Var,".",BeforeRefAfter)
  }
  colnames(catalog) <- helmsmanBaseContext

  if(type == "spectra"){
    catalog <- data.frame("ID" = rownames(catalog),
                          catalog)
  } else if(type == "signature"){
    catalog <- data.frame("Sig" = rownames(catalog),
                          catalog)
  }
  return(catalog)
}

#' Read Exposure files in helmsman format.
#'
#' @param exposure Exposure file generated by helmsman.
#' Usually, it is called "W_components.txt".
#'
#' @param check.names logical. If \code{TRUE} then the names of the
#' variables in the data frame are checked to ensure that they are
#' syntactically valid variable names. If necessary they are adjusted
#' (by \code{\link[base]{make.names}}) so that they are, and also to
#' ensure that there are no duplicates.
#'
#' Return ICAMS/SynSigEval formatted exposure matrix.
#'
#' @export
ReadhelmsmanExposure <- function(exposure, check.names = TRUE){

  exposure <- utils::read.table(
    file = exposure, header = T,
    sep = "\t", as.is = T,
    check.names = check.names)
  ## Assign the contents in first column "ID"
  ## as the names of samples.
  rownames(exposure) <- exposure[,1]
  exposure <- t(exposure[,-1,drop = FALSE])

  return(exposure)
}

#' Read Catalog files or matrices in helmsman format.
#'
#' @param cat Input catalog, can be a tab-delimited text
#' file in helmsman format, or a matrix/data.frame object.
#'
#' @param region Catalog region. Can be a specific genomic
#' or exomic region, or "unknown".
#' Default: "unknown"
#'
#' @param catalog.type Is the catalog a signature catalog,
#' or a spectrum catalog?
#' Default: "counts.signature"
#'
#' @return a catalog matrix in ICAMS format.
#'
#' @export
helmsmanCatalog2ICAMS <- function(
  cat,
  region = "unknown",
  catalog.type = "counts.signature"){

  stopifnot(is.character(cat) | is.data.frame(cat) | is.matrix(cat))
  if (methods::is(cat, "character")) {
    catMatrix <- utils::read.table(
      file = cat, header = T,
      sep = "\t", as.is = T)
  } else {
    catMatrix <- cat
  }

  rownames(catMatrix) <- catMatrix[,1]
  catMatrix <- t(catMatrix[,-1])
  helmsmanBaseContext <- rownames(catMatrix)
  baseContextLength <- nchar(helmsmanBaseContext[1])
  if(baseContextLength == 7){ ## trinucleotide base context
    BeforeRefAfter <- substr(helmsmanBaseContext,5,7)
    Ref <- substr(helmsmanBaseContext,1,1)
    Var <- substr(helmsmanBaseContext,3,3)

  }else if(baseContextLength == 9){ ## pendanucleotide base context
    BeforeRefAfter <- substr(helmsmanBaseContext,5,9)
    Ref <- substr(helmsmanBaseContext,1,1)
    Var <- substr(helmsmanBaseContext,3,3)
  }
  ICAMSBaseContext <- paste0(BeforeRefAfter,Var)
  rownames(catMatrix) <- ICAMSBaseContext

  catMatrix <- ICAMS::as.catalog(object = catMatrix,
                                 region = region,
                                 catalog.type = catalog.type)
  return(catMatrix)
}

#' Prepare input file for helmsman from a
#' helmsman formatted catalog file.
#'
#' @param catalog a catalog in ICAMS format. It can be
#' a .csv file, or a matrix or data.frame.
#' Usually, it refers to \code{"ground.truth.syn.catalog.csv"}.
#'
#' @param out.dir Directory that will be created for the output;
#' abort if it already exists. Usually, the \code{out.dir} will
#' be a \code{helmsman.results} folder directly under the
#' folder storing \code{catalog}.
#'
#' @param overwrite If TRUE, overwrite existing output
#'
#' @return \code{invisible(catMatrix)},
#' original catalog in helmsman format
#'
#' @details Creates folder named \code{helmsman.results} containing catalogs
#' in helmsman-formatted catalogs: Rows are signatures;
#' the first column is the name of the mutation type, while the remaining
#' columns are samples (tumors).
#' These helmsman-formatted catalogs will the input when running helmsman program
#' later on Python platform.
#'
#' @export
#'
#' @importFrom utils capture.output
CreatehelmsmanOutput <-
  function(catalog,
           out.dir = paste0(dirname(catalog),"/ExtrAttr/helmsman.results"),
           overwrite = FALSE) {

  ## If catalog is a string of file path
  if(is.character(catalog)){
    ## Read in catalog matrix using ICAMS::ReadCatalog.
    catMatrix <- ICAMS::ReadCatalog(catalog, strict = FALSE)
    ## Convert catalog to helmsman format
    catMatrix <- ICAMSCatalog2helmsman(catMatrix)
    ## Fetch the name of catalog file without extension
    oldFileName <- tools::file_path_sans_ext(basename(catalog))
  } else if(is.data.frame(catalog) | is.matrix(catalog)){
    ## Assume `catalog` is a legal ICAMS catalog object.
    ## Convert catalog to helmsman format
    catMatrix <- ICAMSCatalog2helmsman(catalog)
    ## Fetch the name of catalog file
    oldFileName <- "ground.truth.syn.catalog"
  }

  ## Create out.dir
  dir.create(out.dir,recursive = T)

  ## Dump catMatrix into out.dir
  newFileName <- paste0(out.dir,"/",oldFileName,".tsv")
  utils::write.table(
    catMatrix, file = newFileName,
    sep = "\t", quote = F, row.names = F)


  invisible(catMatrix)
}
WuyangFF95/SynSigEval documentation built on Sept. 18, 2022, 11:41 a.m.