#' Runs Parent-Specific Copy-Number Segmentation (PSCBS) on a Dataset
#'
#' @inheritParams pscnseq
#'
#' @return (character vector) Pathnames to \file{*,PairedPSCBS.rds} files
#' holding [PSCBS::segmentByPairedPSCBS] results.
#'
#' @seealso
#' This function uses [PSCBS::segmentByPairedPSCBS()].
#'
#' @importFrom future %<-% %label% resolve
#' @importFrom listenv listenv
#' @importFrom R.utils Arguments isFile saveObject printf mprintf mstr hpaste seqToHumanReadable cat enter enterf exit
#' @importFrom utils file_test
#' @importFrom R.filesets readDataFrame sortBy
#' @importFrom aroma.seq SeqzFileSet
#' @importFrom PSCBS segmentByPairedPSCBS normalizeTotalCNs getChromosomes
#'
#' @export
pscnseq_pscbs <- function(dataset, organism, chrs, samples, binsize, verbose = FALSE) {
verbose <- Arguments$getVerbose(verbose)
## - - - - - - - - - - - - - - - - - - - - - - - - - - -
## Sample data
## - - - - - - - - - - - - - - - - - - - - - - - - - - -
if (samples != "*") {
## Tumor-normal pairs from sample data
data <- readDataFrame(samples, fill=TRUE)
pairs <- pairs_from_samples(data)
mstr(pairs)
}
## - - - - - - - - - - - - - - - - - - - - - - - - - - -
## Overview
## - - - - - - - - - - - - - - - - - - - - - - - - - - -
chrs[chrs == "X"] <- 23
chrs[chrs == "Y"] <- 24
chrs[chrs == "M"] <- 25
chrsTag <- sprintf("chrs=%s", seqToHumanReadable(chrs, collapse = ","))
binSizeTag <- sprintf("%gkb", binsize/1000)
mprintf("Dataset: %s\n", dataset)
mprintf("Organism: %s\n", organism)
mprintf("Chromosomes: %s\n", seqToHumanReadable(chrs))
mprintf("Bin size: %s (%d bp)\n", binSizeTag, binsize)
## - - - - - - - - - - - - - - - - - - - - - - - - - - -
## Identify files to be processed
## - - - - - - - - - - - - - - - - - - - - - - - - - - -
## Identify availble Sequenza files
datasetS <- paste(c(dataset, "seqz"), collapse = ",")
pathS <- file.path("seqzData", datasetS, organism)
pathS <- Arguments$getReadablePath(pathS)
filenamesS <- dir(path=pathS, pattern="[.]seqz(|[.]gz)$")
## Sanity check
if (length(filenamesS) == 0) {
stop("Failed to locate any *.seqz(.gz) files: ", sQuote(pathS))
}
pathnamesS <- file.path(pathS, filenamesS)
fullnamesS <- gsub("[.]seqz(|[.]gz)$", "", filenamesS)
datasetD <- fullname(datasetS, binSizeTag, "tcn=2")
pathD <- file.path("pscbsData", datasetD, organism)
pathD <- Arguments$getWritablePath(pathD)
fullnamesS2 <- gsub("chrX", "chr23", fullnamesS)
fullnamesS2 <- gsub("chrY", "chr24", fullnamesS2)
fullnamesS2 <- gsub("chrM", "chr25", fullnamesS2)
fullnamesD <- gsub(",chr=chr", ",chr=", fullnamesS2)
filenamesD <- sprintf("%s,PairedPSCBS.xdr", fullnamesD)
pathnamesD <- file.path(pathD, filenamesD)
todo <- which(!file_test("-f", pathnamesD))
if (length(todo) == 0) {
mprintf("All %d Sequenza files have been PSCBS segmented: %s", length(pathnamesS), pathD)
}
## Keep non-processed files
if (length(todo) != length(pathnamesD)) {
pathnamesS <- pathnamesS[todo]
pathnamesD <- pathnamesD[todo]
fullnamesS <- fullnamesS[todo]
}
mprintf("Sequenza files located: [n=%d] %s\n", length(fullnamesS), hpaste(sQuote(fullnamesS)))
all_samples <- unique(gsub(",chr=.*", "", fullnamesS))
if (FALSE && samples != "*") {
stop("Not yet implemented.")
sample_pairs <- sprintf("%s,%s_vs_%s", pairs$Patient_ID, pairs$T.A0, pairs$N.A0)
keep <- (all_samples %in% sample_pairs)
all_samples <- all_samples[keep]
pathnamesS <- pathnamesS[keep]
pathnamesD <- pathnamesD[keep]
fullnamesS <- fullnamesS[keep]
todo <- !(sample_pairs %in% unique(all_samples))
}
mprintf("Sequenza files to process: [n=%d] %s\n", length(fullnamesS), hpaste(sQuote(fullnamesS)))
mprintf("Tumor-normal samples: [n=%d] %s\n", length(all_samples), hpaste(sQuote(all_samples)))
## - - - - - - - - - - - - - - - - - - - - - - - - - - -
## Process
## - - - - - - - - - - - - - - - - - - - - - - - - - - -
if (interactive()) readline("Press ENTER to start processing of data: ")
fitList <- listenv()
for (ii in seq_along(all_samples)) {
sample <- all_samples[ii]
verbose && enterf(verbose, "Sample %d ('%s') of %d ...\n", ii, sample, length(all_samples))
## Future label
label <- sprintf("sample_%s", ii)
filenameF <- sprintf("%s,PairedPSCBS.rds", fullname(sample, chrsTag))
pathnameF <- file.path(pathD, filenameF)
if (isFile(pathnameF)) {
verbose && cat(verbose, "Already processed. Skipping")
fitList[[ii]] <- pathnameF
} else {
fitList[[ii]] %<-% {
pattern <- sprintf("%s,chr=.*[.]seqz(|[.]gz)$", sample)
seqz <- SeqzFileSet$byPath(pathS, pattern=pattern, depth=1L)
seqz <- sortBy(seqz, by="mixeddecimal") # chr1, chr2, ..., chr10, ...
verbose && print(verbose, seqz)
verbose && printf(verbose, "Tumor-normal samples: [n=%d] %s\n",
length(seqz), hpaste(sQuote(getFullNames(seqz))))
fits <- segmentByPairedPSCBS(seqz, binSize=binsize, verbose=TRUE)
verbose && cat(verbose, "PSCBS segmentation:")
verbose && print(verbose, fits)
## Since 'fits' consists of futures, we'll collect here
fits <- resolve(fits, result = TRUE)
## Coerce to list
fits <- as.list(fits)
fit <- do.call(c, fits)
verbose && print(verbose, fit)
fits <- NULL ## Not needed anymore
## Assert all chromosomes have been processed
missing <- setdiff(chrs, getChromosomes(fit))
if (length(missing) > 0) {
stop(sprintf("Sanity check failure: Some chromosomes were not processed for sample %s: %s", sQuote(sample), paste(missing, collapse=", ")))
}
verbose && cat(verbose, "PSCBS total copy-number normalization:")
fit <- normalizeTotalCNs(fit)
verbose && print(verbose, fit)
## Save to file (atomically)
saveObject(fit, pathnameF)
pathnameF
} %label% label
} ## if (isFile(pathnameF))
## Have at most 10 jobs on the cluster at any time
if (ii %% 10 == 0) resolve(fitList, result = TRUE)
verbose && exit(verbose)
} ## for (ii ...)
## Resolve futures and gather their values
fitList <- resolve(fitList, result = TRUE)
## Vectorize
fitList <- unlist(fitList)
fitList
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.