data-raw/old.code.and.notes/code.using.nicola.roberts.hdp/Runhdp5.R

#' Run hdp extraction and attribution on a spectra catalog file using the original hdp package.
#'
#' @inheritParams RunhdpInternal5
#'
#' @param input.catalog.file File containing a spectra catalog
#' in \code{\link[ICAMS]{ICAMS}} format.
#'
#' @param out.dir Directory that will be created for the output;
#'   if \code{overwrite} is \code{FALSE} then
#'   abort if \code{out.dir} already exits.
#'
#' @param remove.noise Deprecated; ignored
#'
#' @param test.only If > 0, only analyze the first \code{test.only} columns
#'  in \code{input.catalog.file}.
#'
#' @param overwrite If \code{TRUE} overwrite \code{out.dir} if it exists, otherwise
#'  raise an error.
#'
#' @return The same list as returned by \code{\link{RunhdpInternal5}}.
#'
#' @details Creates several files in \code{out.dir}. These are:
#'  TODO(Steve): list the files
#'
#' @export

Runhdp5 <-
  function(input.catalog.file,
           out.dir,
           CPU.cores           = 1,
           seedNumber          = 1,
           K.guess,
           multi.types         = FALSE,
           remove.noise        = FALSE,
           test.only           = 0,
           overwrite           = FALSE,
           verbose             = TRUE,
           num.posterior       = 4,
           post.burnin         = 4000,
           post.n              = 50,
           post.space          = 50,
           post.cpiter         = 3,
           post.verbosity      = 0,
           cos.merge           = 0.9,
           min.sample          = 1) {

    if (verbose) message("Reading input catalog file ", input.catalog.file)
    spectra <- ICAMS::ReadCatalog(input.catalog.file, strict = FALSE)
    if (test.only > 0) spectra <- spectra[ , 1:test.only]

    ## Create output directory
    if (dir.exists(out.dir)) {
      if (!overwrite) stop(out.dir, " already exits")
      if (verbose) message("Using existing out.dir ", out.dir)
    } else {
      dir.create(out.dir, recursive = T)
      if (verbose) message("Created new out.dir", out.dir)
    }

    utils::capture.output(date(), cat("\n\n"),
                          match.call(), cat("\n\n"),
                          utils::sessionInfo(),
                          file = file.path(out.dir, "call.and.session.info.txt"))

    retval <- mSigHdp::RunhdpInternal5(
      input.catalog   = spectra,
      CPU.cores       = CPU.cores,
      seedNumber      = seedNumber,
      K.guess         = K.guess,
      multi.types     = multi.types,
      num.posterior   = num.posterior,
      verbose         = verbose,
      post.burnin     = post.burnin,
      post.n          = post.n,
      post.space      = post.space,
      post.cpiter     = post.cpiter,
      post.verbosity  = post.verbosity,
      cos.merge       = cos.merge,
      min.sample      = min.sample
    ) # 14 Arguments

    save(retval, file = file.path(out.dir, "Runhdp5.retval.Rdata"))

    multi <- retval[["multi.chains"]] # class hdpSampleMulti
    chains <- hdp::chains(multi)      # list of hdpSampleChain

    # Plot the diagnostics of sampling chains.
    if (verbose) message("Writing hdp.diagnostics.pdf")
    grDevices::pdf(file = paste0(out.dir,"/hdp.diagnostics.pdf"))
    graphics::par(mfrow=c(2,2), mar=c(4, 4, 2, 1))

    # This is the likelihood plot along each chain
    lapply(chains, hdp::plot_lik, bty = "L")

    # This is the number of raw clusters sampled along each chain
    lapply(chains, hdp::plot_numcluster, bty = "L")

    # This is the number of mutations assigned as a function of
    # the number of raw clusters
    lapply(chains, hdp::plot_data_assigned, bty = "L")

    graphics::par(mfrow=c(1,1), mar=c(5, 4, 4, 2))
    # Components were already extracted, so this call will work
    hdp::plot_comp_size(multi, bty="L")

    graphics::par(mfrow=c(8, 1), mar = c(1, 1, 1, 1))
    # This plots the component (signature) profiles with
    # 95% credibility intervals
    hdp::plot_comp_distn(multi)

    # TODO, need argument dpinices and col_comp;
    # Need to return the hdp object (perhaps) from RunhdpInternal
    # to get the required values.
    if (FALSE) { # Not finished
    num.dpindices <- length(chains[[1]]@hdp@ppindex)
    hdp::plot_dp_comp_exposure(
      multi, dpindices = 3:num.dpindices,
      col_comp = myCol[1:ncol(retval$signature)],
      dpnames = colnames(retval$exposure))
    }

    grDevices::dev.off()

    if (verbose) message("Writing signatures")
    extractedSignatures <- ICAMS::as.catalog(retval$signature,
                                             region       = "unknown",
                                             catalog.type = "counts.signature")
    ICAMS::WriteCatalog(extractedSignatures,
                        paste0(out.dir,"/extracted.signatures.csv"))

    if (verbose) message("Writing exposures")
    SynSigGen::WriteExposure(retval$exposure.p,
                  paste0(out.dir,"/exposure.probs.csv"))
    SynSigGen::WriteExposure(retval$exposure,
                  paste0(out.dir,"/inferred.exposures.csv"))

    invisible(retval)
  }
steverozen/mSigHdp documentation built on Feb. 6, 2023, 1:36 a.m.