inst/extdata/Dx.R

suppressPackageStartupMessages(library(optparse))
suppressPackageStartupMessages(library(futile.logger))

### Parsing command line ------------------------------------------------------

option_list <- list(
    make_option(c("--rds"), action = "store", type = "character",
        default = NULL, help = "PureCN output RDS file"),
    make_option(c("--callable"), action = "store", type = "character",
        default = NULL,
        help = "TMB: File parsable by rtracklayer specifying all callable regions, e.g. generated by GATK CallableLoci"),
    make_option(c("--exclude"), action = "store", type = "character",
        default = NULL,
        help = paste("TMB: File parsable by rtracklayer specifying regions to exclude",
         "from mutation burden calculation, e.g. intronic regions")),
    make_option(c("--maxpriorsomatic"), action = "store", type = "double",
        default = formals(PureCN::callMutationBurden)$max.prior.somatic,
        help = "TMB: Can be used to exclude hotspot mutations with high somatic prior probability [default %default]"),
    make_option(c("--keepindels"), action = "store_true", default = FALSE, 
        help = "TMB: count indels"),
    make_option(c("--signatures"), action = "store_true", default = FALSE, 
        help="Attempt the deconstruction of COSMIC signatures (requires deconstructSigs package)"),
    make_option(c("--signature_databases"), action = "store", type = "character", 
        default = "signatures.exome.cosmic.v3.may2019", 
        help = "Use the specified signature databases provided by deconstrucSigs. To test multiple databases, provide them : separated [%default]."),
    make_option(c("--out"), action = "store", type = "character",
        default = NULL,
        help = "File name prefix to which results should be written"),
    make_option(c("-v", "--version"), action = "store_true", default = FALSE, 
        help = "Print PureCN version"),
    make_option(c("-f", "--force"), action = "store_true", default = FALSE, 
        help="Overwrite existing files")
)

opt <- parse_args(OptionParser(option_list=option_list))

if (opt$version) {
    message(as.character(packageVersion("PureCN")))
    q(status=1)
}    

# Parse input rds
infileRds <- opt$rds
if (is.null(infileRds)) stop("Need --rds")
infileRds <- normalizePath(infileRds, mustWork = TRUE)

# Parse outdir

# Parse both BED files restricting covered region
callableFile <- opt$callable
callable <- NULL
if (!is.null(callableFile)) {
    suppressPackageStartupMessages(library(rtracklayer))
    callableFile <- normalizePath(callableFile, mustWork = TRUE)
    flog.info("Reading %s...", callableFile)
    callable <- GRanges(import(callableFile))
}

excludeFile <- opt$exclude
exclude <- NULL
if (!is.null(excludeFile)) {
    suppressPackageStartupMessages(library(rtracklayer))
    excludeFile <- normalizePath(excludeFile, mustWork = TRUE)
    flog.info("Reading %s...", excludeFile)
    exclude <- GRanges(import(excludeFile))
}


### Run PureCN ----------------------------------------------------------------

flog.info("Loading PureCN...")
suppressPackageStartupMessages(library(PureCN))

res <- readCurationFile(infileRds)
sampleid <- res$input$sampleid

.getOutPrefix <- function(opt, infile, sampleid) {
    out <- opt[["out"]]
    if (is.null(out)) {
        if (!is.null(infile) && file.exists(infile)) {
            outdir <- dirname(infile)
            prefix <- sampleid
        } else {
            stop("Need --out")
        }    
    } else {
        outdir <- dirname(out)
        prefix <- basename(out)
    }    
    outdir <- normalizePath(outdir, mustWork = TRUE)
    outPrefix <- file.path(outdir, prefix)
}
outPrefix <- .getOutPrefix(opt, infileRds, sampleid)
     
outfileMb <- paste0(outPrefix, "_mutation_burden.csv")
if (!opt$force && file.exists(outfileMb)) {
    stop(outfileMb, " exists. Use --force to overwrite.")
}    

flog.info("Calling mutation burden...")

fun.countMutation <- eval(formals(callMutationBurden)$fun.countMutation)
if (opt$keepindels) fun.countMutation <- function(vcf) width(vcf) >= 1
    
mb <- callMutationBurden(res, callable = callable, exclude = exclude,
        max.prior.somatic = opt$maxpriorsomatic,
        fun.countMutation = fun.countMutation)

write.csv(cbind(Sampleid=sampleid, mb), file = outfileMb, row.names = FALSE,
          quote = FALSE)

flog.info("Calling chromosomal instability...")

cin <- data.frame(
    cin = callCIN(res, allele.specific = FALSE, reference.state = "normal"),
    cin.allele.specific = callCIN(res, reference.state = "normal"),
    cin.ploidy.robust = callCIN(res, allele.specific = FALSE),
    cin.allele.specific.ploidy.robust = callCIN(res)
)
outfileCin <- paste0(outPrefix, "_cin.csv")
if (!opt$force && file.exists(outfileCin)) {
    flog.warn("%s exists. Use --force to overwrite.", outfileCin)
} else {
    write.csv(cbind(Sampleid=sampleid, round(cin, digits = 4)), 
              file = outfileCin, row.names = FALSE, quote = FALSE)
}

if (opt$signatures && require(deconstructSigs)) {

    x <- predictSomatic(res)
    x$Sampleid <- res$input$sampleid

    flog.info("deconstructSigs package found.")

    if (compareVersion(package.version("deconstructSigs"), "1.9.0") < 0) {
        flog.fatal("deconstructSigs package is outdated. >= 1.9.0 required")
        q(status = 0)
    }    

    s <- x[ x$ML.SOMATIC & x$prior.somatic > 0.1 & !x$FLAGGED ,]

    genome <- genome(res$input$vcf)[1]
    bsg <- NULL
    if (genome != "hg19") { 
        bsgPkg <- paste0("BSgenome.Hsapiens.UCSC.", genome)
        if (!require(bsgPkg, character.only = TRUE)) { 
            flog.fatal("%s not found.", bsgPkg)
            q(status = 0)
        }
        bsg <- get(bsgPkg)
    }
    if (nrow(s) >= 10) {
        databases <- strsplit(opt$signature_databases, ":")[[1]]
        for (database in databases) {
            sig.type <- if (grepl("^signatures.dbs", database)[1]) "DBS" else "SBS"
            s[["REF"]] <- as.character(s[["REF"]])
            s[["ALT"]] <- as.character(s[["ALT"]])
            sigs.input <- mut.to.sigs.input(s, 
                            sample.id = "Sampleid", chr = "chr", pos = "start", 
                            ref = "REF", alt = "ALT", bsg = bsg, sig.type = sig.type,
                            dbs_table = dbs_possible)
            signatures.ref <- get(database)
            flog.info("Using %s signature database.", database)
            sigs <- whichSignatures(tumor.ref = sigs.input, 
                                    signatures.ref = signatures.ref, 
                                    contexts.needed = TRUE, 
                                    tri.counts.method = "default")
            database_name <- if (length(databases) > 1) gsub("signatures.", "_", database) else ""
            outfile <- paste0(outPrefix, database_name, "_signatures.pdf")
            pdf(outfile)
            plotSignatures(sigs, sig.type = sig.type)
            makePie(sigs)
            invisible(dev.off())
            outfile <- paste0(outPrefix, database_name, "_signatures.csv")
            write.csv(sigs$weights, file = outfile)
        }
    } else {
        flog.warn("Not enough somatic calls to deconstruct signatures.")
    }
}

Try the PureCN package in your browser

Any scripts or data that you put into this service are public.

PureCN documentation built on Nov. 8, 2020, 5:37 p.m.