R/EMuinteraction.R

Defines functions CreateEMuOutput ReadEMuCatalog ReadEMuExposureFile ICAMSCatalog2EMu

Documented in CreateEMuOutput ICAMSCatalog2EMu ReadEMuCatalog ReadEMuExposureFile

# EMuMuSigInteraction.R
# Interacting functions for running EMu Python package


#' Convert Catalogs from ICAMS format to EMu format
#'
#' @param catalog A catalog matrix in ICAMS format. (SNS only!)
#'
#' @return a matrix without any dimnames, but the values are the
#' transposition of the values in \code{catalog}.
#'
#' @export
ICAMSCatalog2EMu <- function(catalog) {
  # Read catalog. From matrix-like
  stopifnot(is.data.frame(catalog) | is.matrix(catalog))

  ## EMu does not accept a file with mutTypes and sampleNames,
  ## We need to clean them.
  catalog <- t(catalog)
  rownames(catalog) <- NULL
  colnames(catalog) <- NULL

  return(catalog)
}

#' Read Exposure files in EMu format.
#'
#' @param exposureFile Exposure file generated by EMu.
#' Usually, it is called "W_components.txt".
#'
#' @param sigNames Names of signatures. These will be
#' served as the rownames of the exposure matrix.
#'
#' @param sampleNames Names of samples in exposure file.
#'
#' Return ICAMS/SynSigEval formatted exposure matrix.
#'
#' @export
ReadEMuExposureFile <-
  function(exposureFile,
           sigNames,
           sampleNames){
  exposure <- utils::read.table(
    file = exposureFile, sep = " ",
    as.is = T)
  last <- ncol(exposure)
  ## Since the exposure is space-separated,
  ## We may need to remove NA in the last column.
  if(any(is.na(exposure[,last]))){
    exposure <- exposure[,1:(last-1),drop = FALSE]
  }

  ## Read in EMu exposure file;
  ## add rownames and colnames.
  exposure <- t(exposure)
  ## We need to fetch the sigNames and sampleNames
  if(is.null(sigNames)){
    sigNames <- paste0("EMu.",1:nrow(exposure))
  }
  rownames(exposure) <- sigNames
  colnames(exposure) <- sampleNames

  return(exposure)
}

#' Read Catalog files in EMu format.
#'
#' @param cat A tab-delimited catalog text file in EMu format;
#' or a EMu formatted matrix or data.frame.
#'
#' @param mutTypes Types of mutations. They are usually from an
#' \code{ICAMS:::catalog.row.header} object.
#'
#' @param sigOrSampleNames If input file is a counts signature file
#' \code{(catalog.type == "counts.signature")},
#' signature names should be provided.
#'
#' If input file is a counts spectra file
#' \code{(catalog.type == "counts")},
#' names of samples should be provided.
#'
#' @param region Catalog region. Can be a specific genomic
#' or exomic region, or "unknown".
#' Default: "unknown"
#'
#' @param catalog.type Is the catalog a signature catalog,
#' or a spectrum catalog?
#' Default: "counts"
#'
#' @return a catalog matrix in ICAMS format.
#'
#' @export
#'
ReadEMuCatalog <-
  function(cat,
           mutTypes,
           sigOrSampleNames,
           region = "unknown",
           catalog.type = "counts.signature"){

    stopifnot(is.character(cat) | is.data.frame(cat) | is.matrix(cat))
    if (methods::is(cat, "character")) {
      catalog <- utils::read.table(
        file = cat, sep = " ",
        as.is = T)
      last <- ncol(catalog)
      ## Since the catalog is space-separated,
      ## We may need to remove NA in the last column.
      if(any(is.na(catalog[,last]))){
        catalog <- catalog[,1:(last-1),drop = FALSE]
      }
    } else {
      catalog <- cat
    }

    ## Read in EMu catalog file;
    ## add rownames and colnames.
    catalog <- t(catalog)
    rownames(catalog) <- mutTypes
    if(is.null(sigOrSampleNames)){
      sigOrSampleNames <- paste0("EMu.",1:ncol(catalog))
    }
    colnames(catalog) <- sigOrSampleNames
    ## Convert to ICAMS-formatted catalog
    catalog <-
      ICAMS::as.catalog(object = catalog,
                        region = region,
                        catalog.type = catalog.type)
    return(catalog)
  }


#' Prepare input file for EMu from a
#' EMu formatted catalog file.
#'
#' @param catalog a catalog in ICAMS format. It can be
#' a .csv file, or a matrix or data.frame.
#' Usually, it refers to \code{"ground.truth.syn.catalog.csv"}.
#'
#' @param out.dir Directory that will be created for the output;
#' abort if it already exists. Usually, the \code{out.dir} will
#' be a \code{EMu.results} folder directly under the folder
#' storing \code{catalog}.
#'
#' @param overwrite If TRUE, overwrite existing output
#'
#' @return \code{invisible(catalog)},
#' original catalog in EMu format
#'
#' @details Creates folder named \code{EMu.results} containing catalogs
#' in EMu-formatted catalogs: Rows are signatures;
#' the first column is the name of the mutation type, while the remaining
#' columns are samples (tumors).
#' These EMu-formatted catalogs will the input when running EMu program
#' later on compiled binary.
#'
#' @importFrom utils capture.output
#'
#' @export

CreateEMuOutput <-
  function(catalog,
           out.dir = paste0(dirname(catalog),"/ExtrAttr/EMu.results"),
           overwrite = FALSE) {

  ## If catalog is a string of file path
  if(is.character(catalog)){
    ## Fetch the name of catalog file without extension
    oldFileName <- tools::file_path_sans_ext(basename(catalog))
    ## Read in catalog matrix using read.catalog.function.
    catalog <- ICAMS::ReadCatalog(catalog, strict = FALSE)
    ## Convert catalog to EMu format
    catalog <- ICAMSCatalog2EMu(catalog)
  } else if(is.data.frame(catalog) | is.matrix(catalog)) {
    ## Fetch the name of catalog file
    oldFileName <- "ground.truth.syn.catalog"
    ## Assume `catalog` is a legal ICAMS catalog object.
    ## Convert catalog to EMu format
    catalog <- ICAMSCatalog2EMu(catalog)
  }

  ## Create out.dir
  dir.create(out.dir,recursive = T)
  ## Dump catalog into out.dir/seed.{seedInUse}
  newFileName <- paste0(out.dir,"/",oldFileName,".tsv")
  utils::write.table(
    catalog, file = newFileName,
    sep = "\t", quote = F,
    row.names = F, col.names = F)

  ## Return catalog, invisibly.
  invisible(catalog)
}
WuyangFF95/SynSigEval documentation built on Sept. 18, 2022, 11:41 a.m.